X86FrameLowering.cpp 146 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785
  1. //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the X86 implementation of TargetFrameLowering class.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "X86FrameLowering.h"
  13. #include "X86InstrBuilder.h"
  14. #include "X86InstrInfo.h"
  15. #include "X86MachineFunctionInfo.h"
  16. #include "X86Subtarget.h"
  17. #include "X86TargetMachine.h"
  18. #include "llvm/ADT/SmallSet.h"
  19. #include "llvm/ADT/Statistic.h"
  20. #include "llvm/Analysis/EHPersonalities.h"
  21. #include "llvm/CodeGen/MachineFrameInfo.h"
  22. #include "llvm/CodeGen/MachineFunction.h"
  23. #include "llvm/CodeGen/MachineInstrBuilder.h"
  24. #include "llvm/CodeGen/MachineModuleInfo.h"
  25. #include "llvm/CodeGen/MachineRegisterInfo.h"
  26. #include "llvm/CodeGen/WinEHFuncInfo.h"
  27. #include "llvm/IR/DataLayout.h"
  28. #include "llvm/IR/Function.h"
  29. #include "llvm/MC/MCAsmInfo.h"
  30. #include "llvm/MC/MCObjectFileInfo.h"
  31. #include "llvm/MC/MCSymbol.h"
  32. #include "llvm/Support/Debug.h"
  33. #include "llvm/Target/TargetOptions.h"
  34. #include <cstdlib>
  35. #define DEBUG_TYPE "x86-fl"
  36. STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
  37. STATISTIC(NumFrameExtraProbe,
  38. "Number of extra stack probes generated in prologue");
  39. using namespace llvm;
  40. X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
  41. MaybeAlign StackAlignOverride)
  42. : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
  43. STI.is64Bit() ? -8 : -4),
  44. STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
  45. // Cache a bunch of frame-related predicates for this subtarget.
  46. SlotSize = TRI->getSlotSize();
  47. Is64Bit = STI.is64Bit();
  48. IsLP64 = STI.isTarget64BitLP64();
  49. // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
  50. Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
  51. StackPtr = TRI->getStackRegister();
  52. }
  53. bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
  54. return !MF.getFrameInfo().hasVarSizedObjects() &&
  55. !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
  56. !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
  57. }
  58. /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
  59. /// call frame pseudos can be simplified. Having a FP, as in the default
  60. /// implementation, is not sufficient here since we can't always use it.
  61. /// Use a more nuanced condition.
  62. bool
  63. X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
  64. return hasReservedCallFrame(MF) ||
  65. MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
  66. (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
  67. TRI->hasBasePointer(MF);
  68. }
  69. // needsFrameIndexResolution - Do we need to perform FI resolution for
  70. // this function. Normally, this is required only when the function
  71. // has any stack objects. However, FI resolution actually has another job,
  72. // not apparent from the title - it resolves callframesetup/destroy
  73. // that were not simplified earlier.
  74. // So, this is required for x86 functions that have push sequences even
  75. // when there are no stack objects.
  76. bool
  77. X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
  78. return MF.getFrameInfo().hasStackObjects() ||
  79. MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
  80. }
  81. /// hasFP - Return true if the specified function should have a dedicated frame
  82. /// pointer register. This is true if the function has variable sized allocas
  83. /// or if frame pointer elimination is disabled.
  84. bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
  85. const MachineFrameInfo &MFI = MF.getFrameInfo();
  86. return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
  87. TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
  88. MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
  89. MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
  90. MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
  91. MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
  92. MFI.hasStackMap() || MFI.hasPatchPoint() ||
  93. (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
  94. }
  95. static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
  96. if (IsLP64) {
  97. if (isInt<8>(Imm))
  98. return X86::SUB64ri8;
  99. return X86::SUB64ri32;
  100. } else {
  101. if (isInt<8>(Imm))
  102. return X86::SUB32ri8;
  103. return X86::SUB32ri;
  104. }
  105. }
  106. static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) {
  107. if (IsLP64) {
  108. if (isInt<8>(Imm))
  109. return X86::ADD64ri8;
  110. return X86::ADD64ri32;
  111. } else {
  112. if (isInt<8>(Imm))
  113. return X86::ADD32ri8;
  114. return X86::ADD32ri;
  115. }
  116. }
  117. static unsigned getSUBrrOpcode(bool IsLP64) {
  118. return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
  119. }
  120. static unsigned getADDrrOpcode(bool IsLP64) {
  121. return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
  122. }
  123. static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
  124. if (IsLP64) {
  125. if (isInt<8>(Imm))
  126. return X86::AND64ri8;
  127. return X86::AND64ri32;
  128. }
  129. if (isInt<8>(Imm))
  130. return X86::AND32ri8;
  131. return X86::AND32ri;
  132. }
  133. static unsigned getLEArOpcode(bool IsLP64) {
  134. return IsLP64 ? X86::LEA64r : X86::LEA32r;
  135. }
  136. static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
  137. if (Use64BitReg) {
  138. if (isUInt<32>(Imm))
  139. return X86::MOV32ri64;
  140. if (isInt<32>(Imm))
  141. return X86::MOV64ri32;
  142. return X86::MOV64ri;
  143. }
  144. return X86::MOV32ri;
  145. }
  146. static bool isEAXLiveIn(MachineBasicBlock &MBB) {
  147. for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
  148. unsigned Reg = RegMask.PhysReg;
  149. if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
  150. Reg == X86::AH || Reg == X86::AL)
  151. return true;
  152. }
  153. return false;
  154. }
  155. /// Check if the flags need to be preserved before the terminators.
  156. /// This would be the case, if the eflags is live-in of the region
  157. /// composed by the terminators or live-out of that region, without
  158. /// being defined by a terminator.
  159. static bool
  160. flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
  161. for (const MachineInstr &MI : MBB.terminators()) {
  162. bool BreakNext = false;
  163. for (const MachineOperand &MO : MI.operands()) {
  164. if (!MO.isReg())
  165. continue;
  166. Register Reg = MO.getReg();
  167. if (Reg != X86::EFLAGS)
  168. continue;
  169. // This terminator needs an eflags that is not defined
  170. // by a previous another terminator:
  171. // EFLAGS is live-in of the region composed by the terminators.
  172. if (!MO.isDef())
  173. return true;
  174. // This terminator defines the eflags, i.e., we don't need to preserve it.
  175. // However, we still need to check this specific terminator does not
  176. // read a live-in value.
  177. BreakNext = true;
  178. }
  179. // We found a definition of the eflags, no need to preserve them.
  180. if (BreakNext)
  181. return false;
  182. }
  183. // None of the terminators use or define the eflags.
  184. // Check if they are live-out, that would imply we need to preserve them.
  185. for (const MachineBasicBlock *Succ : MBB.successors())
  186. if (Succ->isLiveIn(X86::EFLAGS))
  187. return true;
  188. return false;
  189. }
  190. /// emitSPUpdate - Emit a series of instructions to increment / decrement the
  191. /// stack pointer by a constant value.
  192. void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
  193. MachineBasicBlock::iterator &MBBI,
  194. const DebugLoc &DL,
  195. int64_t NumBytes, bool InEpilogue) const {
  196. bool isSub = NumBytes < 0;
  197. uint64_t Offset = isSub ? -NumBytes : NumBytes;
  198. MachineInstr::MIFlag Flag =
  199. isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy;
  200. uint64_t Chunk = (1LL << 31) - 1;
  201. MachineFunction &MF = *MBB.getParent();
  202. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  203. const X86TargetLowering &TLI = *STI.getTargetLowering();
  204. const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
  205. // It's ok to not take into account large chunks when probing, as the
  206. // allocation is split in smaller chunks anyway.
  207. if (EmitInlineStackProbe && !InEpilogue) {
  208. // This pseudo-instruction is going to be expanded, potentially using a
  209. // loop, by inlineStackProbe().
  210. BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
  211. return;
  212. } else if (Offset > Chunk) {
  213. // Rather than emit a long series of instructions for large offsets,
  214. // load the offset into a register and do one sub/add
  215. unsigned Reg = 0;
  216. unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
  217. if (isSub && !isEAXLiveIn(MBB))
  218. Reg = Rax;
  219. else
  220. Reg = TRI->findDeadCallerSavedReg(MBB, MBBI);
  221. unsigned AddSubRROpc =
  222. isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit);
  223. if (Reg) {
  224. BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Reg)
  225. .addImm(Offset)
  226. .setMIFlag(Flag);
  227. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
  228. .addReg(StackPtr)
  229. .addReg(Reg);
  230. MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
  231. return;
  232. } else if (Offset > 8 * Chunk) {
  233. // If we would need more than 8 add or sub instructions (a >16GB stack
  234. // frame), it's worth spilling RAX to materialize this immediate.
  235. // pushq %rax
  236. // movabsq +-$Offset+-SlotSize, %rax
  237. // addq %rsp, %rax
  238. // xchg %rax, (%rsp)
  239. // movq (%rsp), %rsp
  240. assert(Is64Bit && "can't have 32-bit 16GB stack frame");
  241. BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
  242. .addReg(Rax, RegState::Kill)
  243. .setMIFlag(Flag);
  244. // Subtract is not commutative, so negate the offset and always use add.
  245. // Subtract 8 less and add 8 more to account for the PUSH we just did.
  246. if (isSub)
  247. Offset = -(Offset - SlotSize);
  248. else
  249. Offset = Offset + SlotSize;
  250. BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Rax)
  251. .addImm(Offset)
  252. .setMIFlag(Flag);
  253. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
  254. .addReg(Rax)
  255. .addReg(StackPtr);
  256. MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
  257. // Exchange the new SP in RAX with the top of the stack.
  258. addRegOffset(
  259. BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
  260. StackPtr, false, 0);
  261. // Load new SP from the top of the stack into RSP.
  262. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
  263. StackPtr, false, 0);
  264. return;
  265. }
  266. }
  267. while (Offset) {
  268. uint64_t ThisVal = std::min(Offset, Chunk);
  269. if (ThisVal == SlotSize) {
  270. // Use push / pop for slot sized adjustments as a size optimization. We
  271. // need to find a dead register when using pop.
  272. unsigned Reg = isSub
  273. ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
  274. : TRI->findDeadCallerSavedReg(MBB, MBBI);
  275. if (Reg) {
  276. unsigned Opc = isSub
  277. ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
  278. : (Is64Bit ? X86::POP64r : X86::POP32r);
  279. BuildMI(MBB, MBBI, DL, TII.get(Opc))
  280. .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
  281. .setMIFlag(Flag);
  282. Offset -= ThisVal;
  283. continue;
  284. }
  285. }
  286. BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
  287. .setMIFlag(Flag);
  288. Offset -= ThisVal;
  289. }
  290. }
  291. MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
  292. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  293. const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
  294. assert(Offset != 0 && "zero offset stack adjustment requested");
  295. // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
  296. // is tricky.
  297. bool UseLEA;
  298. if (!InEpilogue) {
  299. // Check if inserting the prologue at the beginning
  300. // of MBB would require to use LEA operations.
  301. // We need to use LEA operations if EFLAGS is live in, because
  302. // it means an instruction will read it before it gets defined.
  303. UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
  304. } else {
  305. // If we can use LEA for SP but we shouldn't, check that none
  306. // of the terminators uses the eflags. Otherwise we will insert
  307. // a ADD that will redefine the eflags and break the condition.
  308. // Alternatively, we could move the ADD, but this may not be possible
  309. // and is an optimization anyway.
  310. UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
  311. if (UseLEA && !STI.useLeaForSP())
  312. UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB);
  313. // If that assert breaks, that means we do not do the right thing
  314. // in canUseAsEpilogue.
  315. assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) &&
  316. "We shouldn't have allowed this insertion point");
  317. }
  318. MachineInstrBuilder MI;
  319. if (UseLEA) {
  320. MI = addRegOffset(BuildMI(MBB, MBBI, DL,
  321. TII.get(getLEArOpcode(Uses64BitFramePtr)),
  322. StackPtr),
  323. StackPtr, false, Offset);
  324. } else {
  325. bool IsSub = Offset < 0;
  326. uint64_t AbsOffset = IsSub ? -Offset : Offset;
  327. const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
  328. : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
  329. MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
  330. .addReg(StackPtr)
  331. .addImm(AbsOffset);
  332. MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
  333. }
  334. return MI;
  335. }
  336. int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
  337. MachineBasicBlock::iterator &MBBI,
  338. bool doMergeWithPrevious) const {
  339. if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
  340. (!doMergeWithPrevious && MBBI == MBB.end()))
  341. return 0;
  342. MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
  343. PI = skipDebugInstructionsBackward(PI, MBB.begin());
  344. // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
  345. // instruction, and that there are no DBG_VALUE or other instructions between
  346. // ADD/SUB/LEA and its corresponding CFI instruction.
  347. /* TODO: Add support for the case where there are multiple CFI instructions
  348. below the ADD/SUB/LEA, e.g.:
  349. ...
  350. add
  351. cfi_def_cfa_offset
  352. cfi_offset
  353. ...
  354. */
  355. if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
  356. PI = std::prev(PI);
  357. unsigned Opc = PI->getOpcode();
  358. int Offset = 0;
  359. if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
  360. Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
  361. PI->getOperand(0).getReg() == StackPtr){
  362. assert(PI->getOperand(1).getReg() == StackPtr);
  363. Offset = PI->getOperand(2).getImm();
  364. } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
  365. PI->getOperand(0).getReg() == StackPtr &&
  366. PI->getOperand(1).getReg() == StackPtr &&
  367. PI->getOperand(2).getImm() == 1 &&
  368. PI->getOperand(3).getReg() == X86::NoRegister &&
  369. PI->getOperand(5).getReg() == X86::NoRegister) {
  370. // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
  371. Offset = PI->getOperand(4).getImm();
  372. } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
  373. Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
  374. PI->getOperand(0).getReg() == StackPtr) {
  375. assert(PI->getOperand(1).getReg() == StackPtr);
  376. Offset = -PI->getOperand(2).getImm();
  377. } else
  378. return 0;
  379. PI = MBB.erase(PI);
  380. if (PI != MBB.end() && PI->isCFIInstruction()) {
  381. auto CIs = MBB.getParent()->getFrameInstructions();
  382. MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
  383. if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset ||
  384. CI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset)
  385. PI = MBB.erase(PI);
  386. }
  387. if (!doMergeWithPrevious)
  388. MBBI = skipDebugInstructionsForward(PI, MBB.end());
  389. return Offset;
  390. }
  391. void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
  392. MachineBasicBlock::iterator MBBI,
  393. const DebugLoc &DL,
  394. const MCCFIInstruction &CFIInst) const {
  395. MachineFunction &MF = *MBB.getParent();
  396. unsigned CFIIndex = MF.addFrameInst(CFIInst);
  397. BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
  398. .addCFIIndex(CFIIndex);
  399. }
  400. /// Emits Dwarf Info specifying offsets of callee saved registers and
  401. /// frame pointer. This is called only when basic block sections are enabled.
  402. void X86FrameLowering::emitCalleeSavedFrameMovesFullCFA(
  403. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
  404. MachineFunction &MF = *MBB.getParent();
  405. if (!hasFP(MF)) {
  406. emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true);
  407. return;
  408. }
  409. const MachineModuleInfo &MMI = MF.getMMI();
  410. const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
  411. const Register FramePtr = TRI->getFrameRegister(MF);
  412. const Register MachineFramePtr =
  413. STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
  414. : FramePtr;
  415. unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
  416. // Offset = space for return address + size of the frame pointer itself.
  417. unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
  418. BuildCFI(MBB, MBBI, DebugLoc{},
  419. MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
  420. emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true);
  421. }
  422. void X86FrameLowering::emitCalleeSavedFrameMoves(
  423. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  424. const DebugLoc &DL, bool IsPrologue) const {
  425. MachineFunction &MF = *MBB.getParent();
  426. MachineFrameInfo &MFI = MF.getFrameInfo();
  427. MachineModuleInfo &MMI = MF.getMMI();
  428. const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
  429. // Add callee saved registers to move list.
  430. const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
  431. // Calculate offsets.
  432. for (const CalleeSavedInfo &I : CSI) {
  433. int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
  434. Register Reg = I.getReg();
  435. unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
  436. if (IsPrologue) {
  437. BuildCFI(MBB, MBBI, DL,
  438. MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
  439. } else {
  440. BuildCFI(MBB, MBBI, DL,
  441. MCCFIInstruction::createRestore(nullptr, DwarfReg));
  442. }
  443. }
  444. }
  445. void X86FrameLowering::emitStackProbe(
  446. MachineFunction &MF, MachineBasicBlock &MBB,
  447. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
  448. Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
  449. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  450. if (STI.isTargetWindowsCoreCLR()) {
  451. if (InProlog) {
  452. BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
  453. .addImm(0 /* no explicit stack size */);
  454. } else {
  455. emitStackProbeInline(MF, MBB, MBBI, DL, false);
  456. }
  457. } else {
  458. emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
  459. }
  460. }
  461. bool X86FrameLowering::stackProbeFunctionModifiesSP() const {
  462. return STI.isOSWindows() && !STI.isTargetWin64();
  463. }
  464. void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
  465. MachineBasicBlock &PrologMBB) const {
  466. auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
  467. return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
  468. });
  469. if (Where != PrologMBB.end()) {
  470. DebugLoc DL = PrologMBB.findDebugLoc(Where);
  471. emitStackProbeInline(MF, PrologMBB, Where, DL, true);
  472. Where->eraseFromParent();
  473. }
  474. }
  475. void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
  476. MachineBasicBlock &MBB,
  477. MachineBasicBlock::iterator MBBI,
  478. const DebugLoc &DL,
  479. bool InProlog) const {
  480. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  481. if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
  482. emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
  483. else
  484. emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
  485. }
  486. void X86FrameLowering::emitStackProbeInlineGeneric(
  487. MachineFunction &MF, MachineBasicBlock &MBB,
  488. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
  489. MachineInstr &AllocWithProbe = *MBBI;
  490. uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
  491. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  492. const X86TargetLowering &TLI = *STI.getTargetLowering();
  493. assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
  494. "different expansion expected for CoreCLR 64 bit");
  495. const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
  496. uint64_t ProbeChunk = StackProbeSize * 8;
  497. uint64_t MaxAlign =
  498. TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
  499. // Synthesize a loop or unroll it, depending on the number of iterations.
  500. // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
  501. // between the unaligned rsp and current rsp.
  502. if (Offset > ProbeChunk) {
  503. emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
  504. MaxAlign % StackProbeSize);
  505. } else {
  506. emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
  507. MaxAlign % StackProbeSize);
  508. }
  509. }
  510. void X86FrameLowering::emitStackProbeInlineGenericBlock(
  511. MachineFunction &MF, MachineBasicBlock &MBB,
  512. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
  513. uint64_t AlignOffset) const {
  514. const bool NeedsDwarfCFI = needsDwarfCFI(MF);
  515. const bool HasFP = hasFP(MF);
  516. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  517. const X86TargetLowering &TLI = *STI.getTargetLowering();
  518. const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
  519. const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
  520. const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
  521. uint64_t CurrentOffset = 0;
  522. assert(AlignOffset < StackProbeSize);
  523. // If the offset is so small it fits within a page, there's nothing to do.
  524. if (StackProbeSize < Offset + AlignOffset) {
  525. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
  526. .addReg(StackPtr)
  527. .addImm(StackProbeSize - AlignOffset)
  528. .setMIFlag(MachineInstr::FrameSetup);
  529. if (!HasFP && NeedsDwarfCFI) {
  530. BuildCFI(MBB, MBBI, DL,
  531. MCCFIInstruction::createAdjustCfaOffset(
  532. nullptr, StackProbeSize - AlignOffset));
  533. }
  534. MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
  535. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
  536. .setMIFlag(MachineInstr::FrameSetup),
  537. StackPtr, false, 0)
  538. .addImm(0)
  539. .setMIFlag(MachineInstr::FrameSetup);
  540. NumFrameExtraProbe++;
  541. CurrentOffset = StackProbeSize - AlignOffset;
  542. }
  543. // For the next N - 1 pages, just probe. I tried to take advantage of
  544. // natural probes but it implies much more logic and there was very few
  545. // interesting natural probes to interleave.
  546. while (CurrentOffset + StackProbeSize < Offset) {
  547. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
  548. .addReg(StackPtr)
  549. .addImm(StackProbeSize)
  550. .setMIFlag(MachineInstr::FrameSetup);
  551. MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
  552. if (!HasFP && NeedsDwarfCFI) {
  553. BuildCFI(
  554. MBB, MBBI, DL,
  555. MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
  556. }
  557. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
  558. .setMIFlag(MachineInstr::FrameSetup),
  559. StackPtr, false, 0)
  560. .addImm(0)
  561. .setMIFlag(MachineInstr::FrameSetup);
  562. NumFrameExtraProbe++;
  563. CurrentOffset += StackProbeSize;
  564. }
  565. // No need to probe the tail, it is smaller than a Page.
  566. uint64_t ChunkSize = Offset - CurrentOffset;
  567. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
  568. .addReg(StackPtr)
  569. .addImm(ChunkSize)
  570. .setMIFlag(MachineInstr::FrameSetup);
  571. // No need to adjust Dwarf CFA offset here, the last position of the stack has
  572. // been defined
  573. MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
  574. }
  575. void X86FrameLowering::emitStackProbeInlineGenericLoop(
  576. MachineFunction &MF, MachineBasicBlock &MBB,
  577. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
  578. uint64_t AlignOffset) const {
  579. assert(Offset && "null offset");
  580. const bool NeedsDwarfCFI = needsDwarfCFI(MF);
  581. const bool HasFP = hasFP(MF);
  582. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  583. const X86TargetLowering &TLI = *STI.getTargetLowering();
  584. const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
  585. const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
  586. if (AlignOffset) {
  587. if (AlignOffset < StackProbeSize) {
  588. // Perform a first smaller allocation followed by a probe.
  589. const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset);
  590. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
  591. .addReg(StackPtr)
  592. .addImm(AlignOffset)
  593. .setMIFlag(MachineInstr::FrameSetup);
  594. MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
  595. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
  596. .setMIFlag(MachineInstr::FrameSetup),
  597. StackPtr, false, 0)
  598. .addImm(0)
  599. .setMIFlag(MachineInstr::FrameSetup);
  600. NumFrameExtraProbe++;
  601. Offset -= AlignOffset;
  602. }
  603. }
  604. // Synthesize a loop
  605. NumFrameLoopProbe++;
  606. const BasicBlock *LLVM_BB = MBB.getBasicBlock();
  607. MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
  608. MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
  609. MachineFunction::iterator MBBIter = ++MBB.getIterator();
  610. MF.insert(MBBIter, testMBB);
  611. MF.insert(MBBIter, tailMBB);
  612. Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
  613. : Is64Bit ? X86::R11D
  614. : X86::EAX;
  615. BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
  616. .addReg(StackPtr)
  617. .setMIFlag(MachineInstr::FrameSetup);
  618. // save loop bound
  619. {
  620. const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
  621. const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset);
  622. BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
  623. .addReg(FinalStackProbed)
  624. .addImm(BoundOffset)
  625. .setMIFlag(MachineInstr::FrameSetup);
  626. // while in the loop, use loop-invariant reg for CFI,
  627. // instead of the stack pointer, which changes during the loop
  628. if (!HasFP && NeedsDwarfCFI) {
  629. // x32 uses the same DWARF register numbers as x86-64,
  630. // so there isn't a register number for r11d, we must use r11 instead
  631. const Register DwarfFinalStackProbed =
  632. STI.isTarget64BitILP32()
  633. ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
  634. : FinalStackProbed;
  635. BuildCFI(MBB, MBBI, DL,
  636. MCCFIInstruction::createDefCfaRegister(
  637. nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
  638. BuildCFI(MBB, MBBI, DL,
  639. MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
  640. }
  641. }
  642. // allocate a page
  643. {
  644. const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
  645. BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
  646. .addReg(StackPtr)
  647. .addImm(StackProbeSize)
  648. .setMIFlag(MachineInstr::FrameSetup);
  649. }
  650. // touch the page
  651. addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
  652. .setMIFlag(MachineInstr::FrameSetup),
  653. StackPtr, false, 0)
  654. .addImm(0)
  655. .setMIFlag(MachineInstr::FrameSetup);
  656. // cmp with stack pointer bound
  657. BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
  658. .addReg(StackPtr)
  659. .addReg(FinalStackProbed)
  660. .setMIFlag(MachineInstr::FrameSetup);
  661. // jump
  662. BuildMI(testMBB, DL, TII.get(X86::JCC_1))
  663. .addMBB(testMBB)
  664. .addImm(X86::COND_NE)
  665. .setMIFlag(MachineInstr::FrameSetup);
  666. testMBB->addSuccessor(testMBB);
  667. testMBB->addSuccessor(tailMBB);
  668. // BB management
  669. tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
  670. tailMBB->transferSuccessorsAndUpdatePHIs(&MBB);
  671. MBB.addSuccessor(testMBB);
  672. // handle tail
  673. const unsigned TailOffset = Offset % StackProbeSize;
  674. MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
  675. if (TailOffset) {
  676. const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset);
  677. BuildMI(*tailMBB, TailMBBIter, DL, TII.get(Opc), StackPtr)
  678. .addReg(StackPtr)
  679. .addImm(TailOffset)
  680. .setMIFlag(MachineInstr::FrameSetup);
  681. }
  682. // after the loop, switch back to stack pointer for CFI
  683. if (!HasFP && NeedsDwarfCFI) {
  684. // x32 uses the same DWARF register numbers as x86-64,
  685. // so there isn't a register number for esp, we must use rsp instead
  686. const Register DwarfStackPtr =
  687. STI.isTarget64BitILP32()
  688. ? Register(getX86SubSuperRegister(StackPtr, 64))
  689. : Register(StackPtr);
  690. BuildCFI(*tailMBB, TailMBBIter, DL,
  691. MCCFIInstruction::createDefCfaRegister(
  692. nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
  693. }
  694. // Update Live In information
  695. recomputeLiveIns(*testMBB);
  696. recomputeLiveIns(*tailMBB);
  697. }
  698. void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
  699. MachineFunction &MF, MachineBasicBlock &MBB,
  700. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
  701. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  702. assert(STI.is64Bit() && "different expansion needed for 32 bit");
  703. assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
  704. const TargetInstrInfo &TII = *STI.getInstrInfo();
  705. const BasicBlock *LLVM_BB = MBB.getBasicBlock();
  706. // RAX contains the number of bytes of desired stack adjustment.
  707. // The handling here assumes this value has already been updated so as to
  708. // maintain stack alignment.
  709. //
  710. // We need to exit with RSP modified by this amount and execute suitable
  711. // page touches to notify the OS that we're growing the stack responsibly.
  712. // All stack probing must be done without modifying RSP.
  713. //
  714. // MBB:
  715. // SizeReg = RAX;
  716. // ZeroReg = 0
  717. // CopyReg = RSP
  718. // Flags, TestReg = CopyReg - SizeReg
  719. // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
  720. // LimitReg = gs magic thread env access
  721. // if FinalReg >= LimitReg goto ContinueMBB
  722. // RoundBB:
  723. // RoundReg = page address of FinalReg
  724. // LoopMBB:
  725. // LoopReg = PHI(LimitReg,ProbeReg)
  726. // ProbeReg = LoopReg - PageSize
  727. // [ProbeReg] = 0
  728. // if (ProbeReg > RoundReg) goto LoopMBB
  729. // ContinueMBB:
  730. // RSP = RSP - RAX
  731. // [rest of original MBB]
  732. // Set up the new basic blocks
  733. MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
  734. MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
  735. MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
  736. MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
  737. MF.insert(MBBIter, RoundMBB);
  738. MF.insert(MBBIter, LoopMBB);
  739. MF.insert(MBBIter, ContinueMBB);
  740. // Split MBB and move the tail portion down to ContinueMBB.
  741. MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
  742. ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
  743. ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB);
  744. // Some useful constants
  745. const int64_t ThreadEnvironmentStackLimit = 0x10;
  746. const int64_t PageSize = 0x1000;
  747. const int64_t PageMask = ~(PageSize - 1);
  748. // Registers we need. For the normal case we use virtual
  749. // registers. For the prolog expansion we use RAX, RCX and RDX.
  750. MachineRegisterInfo &MRI = MF.getRegInfo();
  751. const TargetRegisterClass *RegClass = &X86::GR64RegClass;
  752. const Register SizeReg = InProlog ? X86::RAX
  753. : MRI.createVirtualRegister(RegClass),
  754. ZeroReg = InProlog ? X86::RCX
  755. : MRI.createVirtualRegister(RegClass),
  756. CopyReg = InProlog ? X86::RDX
  757. : MRI.createVirtualRegister(RegClass),
  758. TestReg = InProlog ? X86::RDX
  759. : MRI.createVirtualRegister(RegClass),
  760. FinalReg = InProlog ? X86::RDX
  761. : MRI.createVirtualRegister(RegClass),
  762. RoundedReg = InProlog ? X86::RDX
  763. : MRI.createVirtualRegister(RegClass),
  764. LimitReg = InProlog ? X86::RCX
  765. : MRI.createVirtualRegister(RegClass),
  766. JoinReg = InProlog ? X86::RCX
  767. : MRI.createVirtualRegister(RegClass),
  768. ProbeReg = InProlog ? X86::RCX
  769. : MRI.createVirtualRegister(RegClass);
  770. // SP-relative offsets where we can save RCX and RDX.
  771. int64_t RCXShadowSlot = 0;
  772. int64_t RDXShadowSlot = 0;
  773. // If inlining in the prolog, save RCX and RDX.
  774. if (InProlog) {
  775. // Compute the offsets. We need to account for things already
  776. // pushed onto the stack at this point: return address, frame
  777. // pointer (if used), and callee saves.
  778. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  779. const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
  780. const bool HasFP = hasFP(MF);
  781. // Check if we need to spill RCX and/or RDX.
  782. // Here we assume that no earlier prologue instruction changes RCX and/or
  783. // RDX, so checking the block live-ins is enough.
  784. const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
  785. const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
  786. int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
  787. // Assign the initial slot to both registers, then change RDX's slot if both
  788. // need to be spilled.
  789. if (IsRCXLiveIn)
  790. RCXShadowSlot = InitSlot;
  791. if (IsRDXLiveIn)
  792. RDXShadowSlot = InitSlot;
  793. if (IsRDXLiveIn && IsRCXLiveIn)
  794. RDXShadowSlot += 8;
  795. // Emit the saves if needed.
  796. if (IsRCXLiveIn)
  797. addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
  798. RCXShadowSlot)
  799. .addReg(X86::RCX);
  800. if (IsRDXLiveIn)
  801. addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
  802. RDXShadowSlot)
  803. .addReg(X86::RDX);
  804. } else {
  805. // Not in the prolog. Copy RAX to a virtual reg.
  806. BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
  807. }
  808. // Add code to MBB to check for overflow and set the new target stack pointer
  809. // to zero if so.
  810. BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
  811. .addReg(ZeroReg, RegState::Undef)
  812. .addReg(ZeroReg, RegState::Undef);
  813. BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
  814. BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
  815. .addReg(CopyReg)
  816. .addReg(SizeReg);
  817. BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
  818. .addReg(TestReg)
  819. .addReg(ZeroReg)
  820. .addImm(X86::COND_B);
  821. // FinalReg now holds final stack pointer value, or zero if
  822. // allocation would overflow. Compare against the current stack
  823. // limit from the thread environment block. Note this limit is the
  824. // lowest touched page on the stack, not the point at which the OS
  825. // will cause an overflow exception, so this is just an optimization
  826. // to avoid unnecessarily touching pages that are below the current
  827. // SP but already committed to the stack by the OS.
  828. BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
  829. .addReg(0)
  830. .addImm(1)
  831. .addReg(0)
  832. .addImm(ThreadEnvironmentStackLimit)
  833. .addReg(X86::GS);
  834. BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
  835. // Jump if the desired stack pointer is at or above the stack limit.
  836. BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE);
  837. // Add code to roundMBB to round the final stack pointer to a page boundary.
  838. RoundMBB->addLiveIn(FinalReg);
  839. BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
  840. .addReg(FinalReg)
  841. .addImm(PageMask);
  842. BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
  843. // LimitReg now holds the current stack limit, RoundedReg page-rounded
  844. // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
  845. // and probe until we reach RoundedReg.
  846. if (!InProlog) {
  847. BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
  848. .addReg(LimitReg)
  849. .addMBB(RoundMBB)
  850. .addReg(ProbeReg)
  851. .addMBB(LoopMBB);
  852. }
  853. LoopMBB->addLiveIn(JoinReg);
  854. addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
  855. false, -PageSize);
  856. // Probe by storing a byte onto the stack.
  857. BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
  858. .addReg(ProbeReg)
  859. .addImm(1)
  860. .addReg(0)
  861. .addImm(0)
  862. .addReg(0)
  863. .addImm(0);
  864. LoopMBB->addLiveIn(RoundedReg);
  865. BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
  866. .addReg(RoundedReg)
  867. .addReg(ProbeReg);
  868. BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE);
  869. MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
  870. // If in prolog, restore RDX and RCX.
  871. if (InProlog) {
  872. if (RCXShadowSlot) // It means we spilled RCX in the prologue.
  873. addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
  874. TII.get(X86::MOV64rm), X86::RCX),
  875. X86::RSP, false, RCXShadowSlot);
  876. if (RDXShadowSlot) // It means we spilled RDX in the prologue.
  877. addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
  878. TII.get(X86::MOV64rm), X86::RDX),
  879. X86::RSP, false, RDXShadowSlot);
  880. }
  881. // Now that the probing is done, add code to continueMBB to update
  882. // the stack pointer for real.
  883. ContinueMBB->addLiveIn(SizeReg);
  884. BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
  885. .addReg(X86::RSP)
  886. .addReg(SizeReg);
  887. // Add the control flow edges we need.
  888. MBB.addSuccessor(ContinueMBB);
  889. MBB.addSuccessor(RoundMBB);
  890. RoundMBB->addSuccessor(LoopMBB);
  891. LoopMBB->addSuccessor(ContinueMBB);
  892. LoopMBB->addSuccessor(LoopMBB);
  893. // Mark all the instructions added to the prolog as frame setup.
  894. if (InProlog) {
  895. for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
  896. BeforeMBBI->setFlag(MachineInstr::FrameSetup);
  897. }
  898. for (MachineInstr &MI : *RoundMBB) {
  899. MI.setFlag(MachineInstr::FrameSetup);
  900. }
  901. for (MachineInstr &MI : *LoopMBB) {
  902. MI.setFlag(MachineInstr::FrameSetup);
  903. }
  904. for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin();
  905. CMBBI != ContinueMBBI; ++CMBBI) {
  906. CMBBI->setFlag(MachineInstr::FrameSetup);
  907. }
  908. }
  909. }
  910. void X86FrameLowering::emitStackProbeCall(
  911. MachineFunction &MF, MachineBasicBlock &MBB,
  912. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
  913. Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
  914. bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
  915. // FIXME: Add indirect thunk support and remove this.
  916. if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
  917. report_fatal_error("Emitting stack probe calls on 64-bit with the large "
  918. "code model and indirect thunks not yet implemented.");
  919. unsigned CallOp;
  920. if (Is64Bit)
  921. CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
  922. else
  923. CallOp = X86::CALLpcrel32;
  924. StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF);
  925. MachineInstrBuilder CI;
  926. MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
  927. // All current stack probes take AX and SP as input, clobber flags, and
  928. // preserve all registers. x86_64 probes leave RSP unmodified.
  929. if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
  930. // For the large code model, we have to call through a register. Use R11,
  931. // as it is scratch in all supported calling conventions.
  932. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
  933. .addExternalSymbol(MF.createExternalSymbolName(Symbol));
  934. CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
  935. } else {
  936. CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
  937. .addExternalSymbol(MF.createExternalSymbolName(Symbol));
  938. }
  939. unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
  940. unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
  941. CI.addReg(AX, RegState::Implicit)
  942. .addReg(SP, RegState::Implicit)
  943. .addReg(AX, RegState::Define | RegState::Implicit)
  944. .addReg(SP, RegState::Define | RegState::Implicit)
  945. .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
  946. MachineInstr *ModInst = CI;
  947. if (STI.isTargetWin64() || !STI.isOSWindows()) {
  948. // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
  949. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
  950. // themselves. They also does not clobber %rax so we can reuse it when
  951. // adjusting %rsp.
  952. // All other platforms do not specify a particular ABI for the stack probe
  953. // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
  954. ModInst =
  955. BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP)
  956. .addReg(SP)
  957. .addReg(AX);
  958. }
  959. // DebugInfo variable locations -- if there's an instruction number for the
  960. // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
  961. // modifies SP.
  962. if (InstrNum) {
  963. if (STI.isTargetWin64() || !STI.isOSWindows()) {
  964. // Label destination operand of the subtract.
  965. MF.makeDebugValueSubstitution(*InstrNum,
  966. {ModInst->getDebugInstrNum(), 0});
  967. } else {
  968. // Label the call. The operand number is the penultimate operand, zero
  969. // based.
  970. unsigned SPDefOperand = ModInst->getNumOperands() - 2;
  971. MF.makeDebugValueSubstitution(
  972. *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
  973. }
  974. }
  975. if (InProlog) {
  976. // Apply the frame setup flag to all inserted instrs.
  977. for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
  978. ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
  979. }
  980. }
  981. static unsigned calculateSetFPREG(uint64_t SPAdjust) {
  982. // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
  983. // and might require smaller successive adjustments.
  984. const uint64_t Win64MaxSEHOffset = 128;
  985. uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
  986. // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
  987. return SEHFrameOffset & -16;
  988. }
  989. // If we're forcing a stack realignment we can't rely on just the frame
  990. // info, we need to know the ABI stack alignment as well in case we
  991. // have a call out. Otherwise just make sure we have some alignment - we'll
  992. // go with the minimum SlotSize.
  993. uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
  994. const MachineFrameInfo &MFI = MF.getFrameInfo();
  995. Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
  996. Align StackAlign = getStackAlign();
  997. if (MF.getFunction().hasFnAttribute("stackrealign")) {
  998. if (MFI.hasCalls())
  999. MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
  1000. else if (MaxAlign < SlotSize)
  1001. MaxAlign = Align(SlotSize);
  1002. }
  1003. return MaxAlign.value();
  1004. }
  1005. void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
  1006. MachineBasicBlock::iterator MBBI,
  1007. const DebugLoc &DL, unsigned Reg,
  1008. uint64_t MaxAlign) const {
  1009. uint64_t Val = -MaxAlign;
  1010. unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
  1011. MachineFunction &MF = *MBB.getParent();
  1012. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  1013. const X86TargetLowering &TLI = *STI.getTargetLowering();
  1014. const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
  1015. const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
  1016. // We want to make sure that (in worst case) less than StackProbeSize bytes
  1017. // are not probed after the AND. This assumption is used in
  1018. // emitStackProbeInlineGeneric.
  1019. if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
  1020. {
  1021. NumFrameLoopProbe++;
  1022. MachineBasicBlock *entryMBB =
  1023. MF.CreateMachineBasicBlock(MBB.getBasicBlock());
  1024. MachineBasicBlock *headMBB =
  1025. MF.CreateMachineBasicBlock(MBB.getBasicBlock());
  1026. MachineBasicBlock *bodyMBB =
  1027. MF.CreateMachineBasicBlock(MBB.getBasicBlock());
  1028. MachineBasicBlock *footMBB =
  1029. MF.CreateMachineBasicBlock(MBB.getBasicBlock());
  1030. MachineFunction::iterator MBBIter = MBB.getIterator();
  1031. MF.insert(MBBIter, entryMBB);
  1032. MF.insert(MBBIter, headMBB);
  1033. MF.insert(MBBIter, bodyMBB);
  1034. MF.insert(MBBIter, footMBB);
  1035. const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
  1036. Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
  1037. : Is64Bit ? X86::R11D
  1038. : X86::EAX;
  1039. // Setup entry block
  1040. {
  1041. entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
  1042. BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
  1043. .addReg(StackPtr)
  1044. .setMIFlag(MachineInstr::FrameSetup);
  1045. MachineInstr *MI =
  1046. BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
  1047. .addReg(FinalStackProbed)
  1048. .addImm(Val)
  1049. .setMIFlag(MachineInstr::FrameSetup);
  1050. // The EFLAGS implicit def is dead.
  1051. MI->getOperand(3).setIsDead();
  1052. BuildMI(entryMBB, DL,
  1053. TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
  1054. .addReg(FinalStackProbed)
  1055. .addReg(StackPtr)
  1056. .setMIFlag(MachineInstr::FrameSetup);
  1057. BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
  1058. .addMBB(&MBB)
  1059. .addImm(X86::COND_E)
  1060. .setMIFlag(MachineInstr::FrameSetup);
  1061. entryMBB->addSuccessor(headMBB);
  1062. entryMBB->addSuccessor(&MBB);
  1063. }
  1064. // Loop entry block
  1065. {
  1066. const unsigned SUBOpc =
  1067. getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
  1068. BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
  1069. .addReg(StackPtr)
  1070. .addImm(StackProbeSize)
  1071. .setMIFlag(MachineInstr::FrameSetup);
  1072. BuildMI(headMBB, DL,
  1073. TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
  1074. .addReg(FinalStackProbed)
  1075. .addReg(StackPtr)
  1076. .setMIFlag(MachineInstr::FrameSetup);
  1077. // jump
  1078. BuildMI(headMBB, DL, TII.get(X86::JCC_1))
  1079. .addMBB(footMBB)
  1080. .addImm(X86::COND_B)
  1081. .setMIFlag(MachineInstr::FrameSetup);
  1082. headMBB->addSuccessor(bodyMBB);
  1083. headMBB->addSuccessor(footMBB);
  1084. }
  1085. // setup loop body
  1086. {
  1087. addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
  1088. .setMIFlag(MachineInstr::FrameSetup),
  1089. StackPtr, false, 0)
  1090. .addImm(0)
  1091. .setMIFlag(MachineInstr::FrameSetup);
  1092. const unsigned SUBOpc =
  1093. getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
  1094. BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
  1095. .addReg(StackPtr)
  1096. .addImm(StackProbeSize)
  1097. .setMIFlag(MachineInstr::FrameSetup);
  1098. // cmp with stack pointer bound
  1099. BuildMI(bodyMBB, DL,
  1100. TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
  1101. .addReg(FinalStackProbed)
  1102. .addReg(StackPtr)
  1103. .setMIFlag(MachineInstr::FrameSetup);
  1104. // jump
  1105. BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
  1106. .addMBB(bodyMBB)
  1107. .addImm(X86::COND_B)
  1108. .setMIFlag(MachineInstr::FrameSetup);
  1109. bodyMBB->addSuccessor(bodyMBB);
  1110. bodyMBB->addSuccessor(footMBB);
  1111. }
  1112. // setup loop footer
  1113. {
  1114. BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
  1115. .addReg(FinalStackProbed)
  1116. .setMIFlag(MachineInstr::FrameSetup);
  1117. addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
  1118. .setMIFlag(MachineInstr::FrameSetup),
  1119. StackPtr, false, 0)
  1120. .addImm(0)
  1121. .setMIFlag(MachineInstr::FrameSetup);
  1122. footMBB->addSuccessor(&MBB);
  1123. }
  1124. recomputeLiveIns(*headMBB);
  1125. recomputeLiveIns(*bodyMBB);
  1126. recomputeLiveIns(*footMBB);
  1127. recomputeLiveIns(MBB);
  1128. }
  1129. } else {
  1130. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
  1131. .addReg(Reg)
  1132. .addImm(Val)
  1133. .setMIFlag(MachineInstr::FrameSetup);
  1134. // The EFLAGS implicit def is dead.
  1135. MI->getOperand(3).setIsDead();
  1136. }
  1137. }
  1138. bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
  1139. // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
  1140. // clobbered by any interrupt handler.
  1141. assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
  1142. "MF used frame lowering for wrong subtarget");
  1143. const Function &Fn = MF.getFunction();
  1144. const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
  1145. return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
  1146. }
  1147. /// Return true if we need to use the restricted Windows x64 prologue and
  1148. /// epilogue code patterns that can be described with WinCFI (.seh_*
  1149. /// directives).
  1150. bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
  1151. return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
  1152. }
  1153. bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
  1154. return !isWin64Prologue(MF) && MF.needsFrameMoves();
  1155. }
  1156. /// emitPrologue - Push callee-saved registers onto the stack, which
  1157. /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
  1158. /// space for local variables. Also emit labels used by the exception handler to
  1159. /// generate the exception handling frames.
  1160. /*
  1161. Here's a gist of what gets emitted:
  1162. ; Establish frame pointer, if needed
  1163. [if needs FP]
  1164. push %rbp
  1165. .cfi_def_cfa_offset 16
  1166. .cfi_offset %rbp, -16
  1167. .seh_pushreg %rpb
  1168. mov %rsp, %rbp
  1169. .cfi_def_cfa_register %rbp
  1170. ; Spill general-purpose registers
  1171. [for all callee-saved GPRs]
  1172. pushq %<reg>
  1173. [if not needs FP]
  1174. .cfi_def_cfa_offset (offset from RETADDR)
  1175. .seh_pushreg %<reg>
  1176. ; If the required stack alignment > default stack alignment
  1177. ; rsp needs to be re-aligned. This creates a "re-alignment gap"
  1178. ; of unknown size in the stack frame.
  1179. [if stack needs re-alignment]
  1180. and $MASK, %rsp
  1181. ; Allocate space for locals
  1182. [if target is Windows and allocated space > 4096 bytes]
  1183. ; Windows needs special care for allocations larger
  1184. ; than one page.
  1185. mov $NNN, %rax
  1186. call ___chkstk_ms/___chkstk
  1187. sub %rax, %rsp
  1188. [else]
  1189. sub $NNN, %rsp
  1190. [if needs FP]
  1191. .seh_stackalloc (size of XMM spill slots)
  1192. .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
  1193. [else]
  1194. .seh_stackalloc NNN
  1195. ; Spill XMMs
  1196. ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
  1197. ; they may get spilled on any platform, if the current function
  1198. ; calls @llvm.eh.unwind.init
  1199. [if needs FP]
  1200. [for all callee-saved XMM registers]
  1201. movaps %<xmm reg>, -MMM(%rbp)
  1202. [for all callee-saved XMM registers]
  1203. .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
  1204. ; i.e. the offset relative to (%rbp - SEHFrameOffset)
  1205. [else]
  1206. [for all callee-saved XMM registers]
  1207. movaps %<xmm reg>, KKK(%rsp)
  1208. [for all callee-saved XMM registers]
  1209. .seh_savexmm %<xmm reg>, KKK
  1210. .seh_endprologue
  1211. [if needs base pointer]
  1212. mov %rsp, %rbx
  1213. [if needs to restore base pointer]
  1214. mov %rsp, -MMM(%rbp)
  1215. ; Emit CFI info
  1216. [if needs FP]
  1217. [for all callee-saved registers]
  1218. .cfi_offset %<reg>, (offset from %rbp)
  1219. [else]
  1220. .cfi_def_cfa_offset (offset from RETADDR)
  1221. [for all callee-saved registers]
  1222. .cfi_offset %<reg>, (offset from %rsp)
  1223. Notes:
  1224. - .seh directives are emitted only for Windows 64 ABI
  1225. - .cv_fpo directives are emitted on win32 when emitting CodeView
  1226. - .cfi directives are emitted for all other ABIs
  1227. - for 32-bit code, substitute %e?? registers for %r??
  1228. */
  1229. void X86FrameLowering::emitPrologue(MachineFunction &MF,
  1230. MachineBasicBlock &MBB) const {
  1231. assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
  1232. "MF used frame lowering for wrong subtarget");
  1233. MachineBasicBlock::iterator MBBI = MBB.begin();
  1234. MachineFrameInfo &MFI = MF.getFrameInfo();
  1235. const Function &Fn = MF.getFunction();
  1236. MachineModuleInfo &MMI = MF.getMMI();
  1237. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  1238. uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
  1239. uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
  1240. bool IsFunclet = MBB.isEHFuncletEntry();
  1241. EHPersonality Personality = EHPersonality::Unknown;
  1242. if (Fn.hasPersonalityFn())
  1243. Personality = classifyEHPersonality(Fn.getPersonalityFn());
  1244. bool FnHasClrFunclet =
  1245. MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
  1246. bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
  1247. bool HasFP = hasFP(MF);
  1248. bool IsWin64Prologue = isWin64Prologue(MF);
  1249. bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
  1250. // FIXME: Emit FPO data for EH funclets.
  1251. bool NeedsWinFPO =
  1252. !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
  1253. bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
  1254. bool NeedsDwarfCFI = needsDwarfCFI(MF);
  1255. Register FramePtr = TRI->getFrameRegister(MF);
  1256. const Register MachineFramePtr =
  1257. STI.isTarget64BitILP32()
  1258. ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
  1259. Register BasePtr = TRI->getBaseRegister();
  1260. bool HasWinCFI = false;
  1261. // Debug location must be unknown since the first debug location is used
  1262. // to determine the end of the prologue.
  1263. DebugLoc DL;
  1264. // Space reserved for stack-based arguments when making a (ABI-guaranteed)
  1265. // tail call.
  1266. unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
  1267. if (TailCallArgReserveSize && IsWin64Prologue)
  1268. report_fatal_error("Can't handle guaranteed tail call under win64 yet");
  1269. const bool EmitStackProbeCall =
  1270. STI.getTargetLowering()->hasStackProbeSymbol(MF);
  1271. unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
  1272. if (HasFP && X86FI->hasSwiftAsyncContext()) {
  1273. switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
  1274. case SwiftAsyncFramePointerMode::DeploymentBased:
  1275. if (STI.swiftAsyncContextIsDynamicallySet()) {
  1276. // The special symbol below is absolute and has a *value* suitable to be
  1277. // combined with the frame pointer directly.
  1278. BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
  1279. .addUse(MachineFramePtr)
  1280. .addUse(X86::RIP)
  1281. .addImm(1)
  1282. .addUse(X86::NoRegister)
  1283. .addExternalSymbol("swift_async_extendedFramePointerFlags",
  1284. X86II::MO_GOTPCREL)
  1285. .addUse(X86::NoRegister);
  1286. break;
  1287. }
  1288. LLVM_FALLTHROUGH;
  1289. case SwiftAsyncFramePointerMode::Always:
  1290. BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
  1291. .addUse(MachineFramePtr)
  1292. .addImm(60)
  1293. .setMIFlag(MachineInstr::FrameSetup);
  1294. break;
  1295. case SwiftAsyncFramePointerMode::Never:
  1296. break;
  1297. }
  1298. }
  1299. // Re-align the stack on 64-bit if the x86-interrupt calling convention is
  1300. // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
  1301. // stack alignment.
  1302. if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit &&
  1303. Fn.arg_size() == 2) {
  1304. StackSize += 8;
  1305. MFI.setStackSize(StackSize);
  1306. emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false);
  1307. }
  1308. // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
  1309. // function, and use up to 128 bytes of stack space, don't have a frame
  1310. // pointer, calls, or dynamic alloca then we do not need to adjust the
  1311. // stack pointer (we fit in the Red Zone). We also check that we don't
  1312. // push and pop from the stack.
  1313. if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
  1314. !MFI.hasVarSizedObjects() && // No dynamic alloca.
  1315. !MFI.adjustsStack() && // No calls.
  1316. !EmitStackProbeCall && // No stack probes.
  1317. !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
  1318. !MF.shouldSplitStack()) { // Regular stack
  1319. uint64_t MinSize =
  1320. X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta();
  1321. if (HasFP) MinSize += SlotSize;
  1322. X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
  1323. StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
  1324. MFI.setStackSize(StackSize);
  1325. }
  1326. // Insert stack pointer adjustment for later moving of return addr. Only
  1327. // applies to tail call optimized functions where the callee argument stack
  1328. // size is bigger than the callers.
  1329. if (TailCallArgReserveSize != 0) {
  1330. BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
  1331. /*InEpilogue=*/false)
  1332. .setMIFlag(MachineInstr::FrameSetup);
  1333. }
  1334. // Mapping for machine moves:
  1335. //
  1336. // DST: VirtualFP AND
  1337. // SRC: VirtualFP => DW_CFA_def_cfa_offset
  1338. // ELSE => DW_CFA_def_cfa
  1339. //
  1340. // SRC: VirtualFP AND
  1341. // DST: Register => DW_CFA_def_cfa_register
  1342. //
  1343. // ELSE
  1344. // OFFSET < 0 => DW_CFA_offset_extended_sf
  1345. // REG < 64 => DW_CFA_offset + Reg
  1346. // ELSE => DW_CFA_offset_extended
  1347. uint64_t NumBytes = 0;
  1348. int stackGrowth = -SlotSize;
  1349. // Find the funclet establisher parameter
  1350. Register Establisher = X86::NoRegister;
  1351. if (IsClrFunclet)
  1352. Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
  1353. else if (IsFunclet)
  1354. Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
  1355. if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
  1356. // Immediately spill establisher into the home slot.
  1357. // The runtime cares about this.
  1358. // MOV64mr %rdx, 16(%rsp)
  1359. unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
  1360. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
  1361. .addReg(Establisher)
  1362. .setMIFlag(MachineInstr::FrameSetup);
  1363. MBB.addLiveIn(Establisher);
  1364. }
  1365. if (HasFP) {
  1366. assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
  1367. // Calculate required stack adjustment.
  1368. uint64_t FrameSize = StackSize - SlotSize;
  1369. // If required, include space for extra hidden slot for stashing base pointer.
  1370. if (X86FI->getRestoreBasePointer())
  1371. FrameSize += SlotSize;
  1372. NumBytes = FrameSize -
  1373. (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
  1374. // Callee-saved registers are pushed on stack before the stack is realigned.
  1375. if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
  1376. NumBytes = alignTo(NumBytes, MaxAlign);
  1377. // Save EBP/RBP into the appropriate stack slot.
  1378. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
  1379. .addReg(MachineFramePtr, RegState::Kill)
  1380. .setMIFlag(MachineInstr::FrameSetup);
  1381. if (NeedsDwarfCFI) {
  1382. // Mark the place where EBP/RBP was saved.
  1383. // Define the current CFA rule to use the provided offset.
  1384. assert(StackSize);
  1385. BuildCFI(MBB, MBBI, DL,
  1386. MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth));
  1387. // Change the rule for the FramePtr to be an "offset" rule.
  1388. unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
  1389. BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset(
  1390. nullptr, DwarfFramePtr, 2 * stackGrowth));
  1391. }
  1392. if (NeedsWinCFI) {
  1393. HasWinCFI = true;
  1394. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
  1395. .addImm(FramePtr)
  1396. .setMIFlag(MachineInstr::FrameSetup);
  1397. }
  1398. if (!IsFunclet) {
  1399. if (X86FI->hasSwiftAsyncContext()) {
  1400. const auto &Attrs = MF.getFunction().getAttributes();
  1401. // Before we update the live frame pointer we have to ensure there's a
  1402. // valid (or null) asynchronous context in its slot just before FP in
  1403. // the frame record, so store it now.
  1404. if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
  1405. // We have an initial context in r14, store it just before the frame
  1406. // pointer.
  1407. MBB.addLiveIn(X86::R14);
  1408. BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
  1409. .addReg(X86::R14)
  1410. .setMIFlag(MachineInstr::FrameSetup);
  1411. } else {
  1412. // No initial context, store null so that there's no pointer that
  1413. // could be misused.
  1414. BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8))
  1415. .addImm(0)
  1416. .setMIFlag(MachineInstr::FrameSetup);
  1417. }
  1418. if (NeedsWinCFI) {
  1419. HasWinCFI = true;
  1420. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
  1421. .addImm(X86::R14)
  1422. .setMIFlag(MachineInstr::FrameSetup);
  1423. }
  1424. BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
  1425. .addUse(X86::RSP)
  1426. .addImm(1)
  1427. .addUse(X86::NoRegister)
  1428. .addImm(8)
  1429. .addUse(X86::NoRegister)
  1430. .setMIFlag(MachineInstr::FrameSetup);
  1431. BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP)
  1432. .addUse(X86::RSP)
  1433. .addImm(8)
  1434. .setMIFlag(MachineInstr::FrameSetup);
  1435. }
  1436. if (!IsWin64Prologue && !IsFunclet) {
  1437. // Update EBP with the new base value.
  1438. if (!X86FI->hasSwiftAsyncContext())
  1439. BuildMI(MBB, MBBI, DL,
  1440. TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
  1441. FramePtr)
  1442. .addReg(StackPtr)
  1443. .setMIFlag(MachineInstr::FrameSetup);
  1444. if (NeedsDwarfCFI) {
  1445. // Mark effective beginning of when frame pointer becomes valid.
  1446. // Define the current CFA to use the EBP/RBP register.
  1447. unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
  1448. BuildCFI(
  1449. MBB, MBBI, DL,
  1450. MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
  1451. }
  1452. if (NeedsWinFPO) {
  1453. // .cv_fpo_setframe $FramePtr
  1454. HasWinCFI = true;
  1455. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
  1456. .addImm(FramePtr)
  1457. .addImm(0)
  1458. .setMIFlag(MachineInstr::FrameSetup);
  1459. }
  1460. }
  1461. }
  1462. } else {
  1463. assert(!IsFunclet && "funclets without FPs not yet implemented");
  1464. NumBytes = StackSize -
  1465. (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
  1466. }
  1467. // Update the offset adjustment, which is mainly used by codeview to translate
  1468. // from ESP to VFRAME relative local variable offsets.
  1469. if (!IsFunclet) {
  1470. if (HasFP && TRI->hasStackRealignment(MF))
  1471. MFI.setOffsetAdjustment(-NumBytes);
  1472. else
  1473. MFI.setOffsetAdjustment(-StackSize);
  1474. }
  1475. // For EH funclets, only allocate enough space for outgoing calls. Save the
  1476. // NumBytes value that we would've used for the parent frame.
  1477. unsigned ParentFrameNumBytes = NumBytes;
  1478. if (IsFunclet)
  1479. NumBytes = getWinEHFuncletFrameSize(MF);
  1480. // Skip the callee-saved push instructions.
  1481. bool PushedRegs = false;
  1482. int StackOffset = 2 * stackGrowth;
  1483. while (MBBI != MBB.end() &&
  1484. MBBI->getFlag(MachineInstr::FrameSetup) &&
  1485. (MBBI->getOpcode() == X86::PUSH32r ||
  1486. MBBI->getOpcode() == X86::PUSH64r)) {
  1487. PushedRegs = true;
  1488. Register Reg = MBBI->getOperand(0).getReg();
  1489. ++MBBI;
  1490. if (!HasFP && NeedsDwarfCFI) {
  1491. // Mark callee-saved push instruction.
  1492. // Define the current CFA rule to use the provided offset.
  1493. assert(StackSize);
  1494. BuildCFI(MBB, MBBI, DL,
  1495. MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset));
  1496. StackOffset += stackGrowth;
  1497. }
  1498. if (NeedsWinCFI) {
  1499. HasWinCFI = true;
  1500. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
  1501. .addImm(Reg)
  1502. .setMIFlag(MachineInstr::FrameSetup);
  1503. }
  1504. }
  1505. // Realign stack after we pushed callee-saved registers (so that we'll be
  1506. // able to calculate their offsets from the frame pointer).
  1507. // Don't do this for Win64, it needs to realign the stack after the prologue.
  1508. if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) {
  1509. assert(HasFP && "There should be a frame pointer if stack is realigned.");
  1510. BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
  1511. if (NeedsWinCFI) {
  1512. HasWinCFI = true;
  1513. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
  1514. .addImm(MaxAlign)
  1515. .setMIFlag(MachineInstr::FrameSetup);
  1516. }
  1517. }
  1518. // If there is an SUB32ri of ESP immediately before this instruction, merge
  1519. // the two. This can be the case when tail call elimination is enabled and
  1520. // the callee has more arguments then the caller.
  1521. NumBytes -= mergeSPUpdates(MBB, MBBI, true);
  1522. // Adjust stack pointer: ESP -= numbytes.
  1523. // Windows and cygwin/mingw require a prologue helper routine when allocating
  1524. // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
  1525. // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
  1526. // stack and adjust the stack pointer in one go. The 64-bit version of
  1527. // __chkstk is only responsible for probing the stack. The 64-bit prologue is
  1528. // responsible for adjusting the stack pointer. Touching the stack at 4K
  1529. // increments is necessary to ensure that the guard pages used by the OS
  1530. // virtual memory manager are allocated in correct sequence.
  1531. uint64_t AlignedNumBytes = NumBytes;
  1532. if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
  1533. AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
  1534. if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
  1535. assert(!X86FI->getUsesRedZone() &&
  1536. "The Red Zone is not accounted for in stack probes");
  1537. // Check whether EAX is livein for this block.
  1538. bool isEAXAlive = isEAXLiveIn(MBB);
  1539. if (isEAXAlive) {
  1540. if (Is64Bit) {
  1541. // Save RAX
  1542. BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
  1543. .addReg(X86::RAX, RegState::Kill)
  1544. .setMIFlag(MachineInstr::FrameSetup);
  1545. } else {
  1546. // Save EAX
  1547. BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
  1548. .addReg(X86::EAX, RegState::Kill)
  1549. .setMIFlag(MachineInstr::FrameSetup);
  1550. }
  1551. }
  1552. if (Is64Bit) {
  1553. // Handle the 64-bit Windows ABI case where we need to call __chkstk.
  1554. // Function prologue is responsible for adjusting the stack pointer.
  1555. int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
  1556. BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
  1557. .addImm(Alloc)
  1558. .setMIFlag(MachineInstr::FrameSetup);
  1559. } else {
  1560. // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
  1561. // We'll also use 4 already allocated bytes for EAX.
  1562. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
  1563. .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
  1564. .setMIFlag(MachineInstr::FrameSetup);
  1565. }
  1566. // Call __chkstk, __chkstk_ms, or __alloca.
  1567. emitStackProbe(MF, MBB, MBBI, DL, true);
  1568. if (isEAXAlive) {
  1569. // Restore RAX/EAX
  1570. MachineInstr *MI;
  1571. if (Is64Bit)
  1572. MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
  1573. StackPtr, false, NumBytes - 8);
  1574. else
  1575. MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
  1576. StackPtr, false, NumBytes - 4);
  1577. MI->setFlag(MachineInstr::FrameSetup);
  1578. MBB.insert(MBBI, MI);
  1579. }
  1580. } else if (NumBytes) {
  1581. emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
  1582. }
  1583. if (NeedsWinCFI && NumBytes) {
  1584. HasWinCFI = true;
  1585. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
  1586. .addImm(NumBytes)
  1587. .setMIFlag(MachineInstr::FrameSetup);
  1588. }
  1589. int SEHFrameOffset = 0;
  1590. unsigned SPOrEstablisher;
  1591. if (IsFunclet) {
  1592. if (IsClrFunclet) {
  1593. // The establisher parameter passed to a CLR funclet is actually a pointer
  1594. // to the (mostly empty) frame of its nearest enclosing funclet; we have
  1595. // to find the root function establisher frame by loading the PSPSym from
  1596. // the intermediate frame.
  1597. unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
  1598. MachinePointerInfo NoInfo;
  1599. MBB.addLiveIn(Establisher);
  1600. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
  1601. Establisher, false, PSPSlotOffset)
  1602. .addMemOperand(MF.getMachineMemOperand(
  1603. NoInfo, MachineMemOperand::MOLoad, SlotSize, Align(SlotSize)));
  1604. ;
  1605. // Save the root establisher back into the current funclet's (mostly
  1606. // empty) frame, in case a sub-funclet or the GC needs it.
  1607. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
  1608. false, PSPSlotOffset)
  1609. .addReg(Establisher)
  1610. .addMemOperand(MF.getMachineMemOperand(
  1611. NoInfo,
  1612. MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
  1613. SlotSize, Align(SlotSize)));
  1614. }
  1615. SPOrEstablisher = Establisher;
  1616. } else {
  1617. SPOrEstablisher = StackPtr;
  1618. }
  1619. if (IsWin64Prologue && HasFP) {
  1620. // Set RBP to a small fixed offset from RSP. In the funclet case, we base
  1621. // this calculation on the incoming establisher, which holds the value of
  1622. // RSP from the parent frame at the end of the prologue.
  1623. SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
  1624. if (SEHFrameOffset)
  1625. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
  1626. SPOrEstablisher, false, SEHFrameOffset);
  1627. else
  1628. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
  1629. .addReg(SPOrEstablisher);
  1630. // If this is not a funclet, emit the CFI describing our frame pointer.
  1631. if (NeedsWinCFI && !IsFunclet) {
  1632. assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
  1633. HasWinCFI = true;
  1634. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
  1635. .addImm(FramePtr)
  1636. .addImm(SEHFrameOffset)
  1637. .setMIFlag(MachineInstr::FrameSetup);
  1638. if (isAsynchronousEHPersonality(Personality))
  1639. MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
  1640. }
  1641. } else if (IsFunclet && STI.is32Bit()) {
  1642. // Reset EBP / ESI to something good for funclets.
  1643. MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL);
  1644. // If we're a catch funclet, we can be returned to via catchret. Save ESP
  1645. // into the registration node so that the runtime will restore it for us.
  1646. if (!MBB.isCleanupFuncletEntry()) {
  1647. assert(Personality == EHPersonality::MSVC_CXX);
  1648. Register FrameReg;
  1649. int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex;
  1650. int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
  1651. // ESP is the first field, so no extra displacement is needed.
  1652. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
  1653. false, EHRegOffset)
  1654. .addReg(X86::ESP);
  1655. }
  1656. }
  1657. while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
  1658. const MachineInstr &FrameInstr = *MBBI;
  1659. ++MBBI;
  1660. if (NeedsWinCFI) {
  1661. int FI;
  1662. if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
  1663. if (X86::FR64RegClass.contains(Reg)) {
  1664. int Offset;
  1665. Register IgnoredFrameReg;
  1666. if (IsWin64Prologue && IsFunclet)
  1667. Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
  1668. else
  1669. Offset =
  1670. getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
  1671. SEHFrameOffset;
  1672. HasWinCFI = true;
  1673. assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
  1674. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
  1675. .addImm(Reg)
  1676. .addImm(Offset)
  1677. .setMIFlag(MachineInstr::FrameSetup);
  1678. }
  1679. }
  1680. }
  1681. }
  1682. if (NeedsWinCFI && HasWinCFI)
  1683. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
  1684. .setMIFlag(MachineInstr::FrameSetup);
  1685. if (FnHasClrFunclet && !IsFunclet) {
  1686. // Save the so-called Initial-SP (i.e. the value of the stack pointer
  1687. // immediately after the prolog) into the PSPSlot so that funclets
  1688. // and the GC can recover it.
  1689. unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
  1690. auto PSPInfo = MachinePointerInfo::getFixedStack(
  1691. MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx);
  1692. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
  1693. PSPSlotOffset)
  1694. .addReg(StackPtr)
  1695. .addMemOperand(MF.getMachineMemOperand(
  1696. PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
  1697. SlotSize, Align(SlotSize)));
  1698. }
  1699. // Realign stack after we spilled callee-saved registers (so that we'll be
  1700. // able to calculate their offsets from the frame pointer).
  1701. // Win64 requires aligning the stack after the prologue.
  1702. if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
  1703. assert(HasFP && "There should be a frame pointer if stack is realigned.");
  1704. BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
  1705. }
  1706. // We already dealt with stack realignment and funclets above.
  1707. if (IsFunclet && STI.is32Bit())
  1708. return;
  1709. // If we need a base pointer, set it up here. It's whatever the value
  1710. // of the stack pointer is at this point. Any variable size objects
  1711. // will be allocated after this, so we can still use the base pointer
  1712. // to reference locals.
  1713. if (TRI->hasBasePointer(MF)) {
  1714. // Update the base pointer with the current stack pointer.
  1715. unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
  1716. BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
  1717. .addReg(SPOrEstablisher)
  1718. .setMIFlag(MachineInstr::FrameSetup);
  1719. if (X86FI->getRestoreBasePointer()) {
  1720. // Stash value of base pointer. Saving RSP instead of EBP shortens
  1721. // dependence chain. Used by SjLj EH.
  1722. unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
  1723. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
  1724. FramePtr, true, X86FI->getRestoreBasePointerOffset())
  1725. .addReg(SPOrEstablisher)
  1726. .setMIFlag(MachineInstr::FrameSetup);
  1727. }
  1728. if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
  1729. // Stash the value of the frame pointer relative to the base pointer for
  1730. // Win32 EH. This supports Win32 EH, which does the inverse of the above:
  1731. // it recovers the frame pointer from the base pointer rather than the
  1732. // other way around.
  1733. unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
  1734. Register UsedReg;
  1735. int Offset =
  1736. getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
  1737. .getFixed();
  1738. assert(UsedReg == BasePtr);
  1739. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
  1740. .addReg(FramePtr)
  1741. .setMIFlag(MachineInstr::FrameSetup);
  1742. }
  1743. }
  1744. if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
  1745. // Mark end of stack pointer adjustment.
  1746. if (!HasFP && NumBytes) {
  1747. // Define the current CFA rule to use the provided offset.
  1748. assert(StackSize);
  1749. BuildCFI(
  1750. MBB, MBBI, DL,
  1751. MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth));
  1752. }
  1753. // Emit DWARF info specifying the offsets of the callee-saved registers.
  1754. emitCalleeSavedFrameMoves(MBB, MBBI, DL, true);
  1755. }
  1756. // X86 Interrupt handling function cannot assume anything about the direction
  1757. // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
  1758. // in each prologue of interrupt handler function.
  1759. //
  1760. // FIXME: Create "cld" instruction only in these cases:
  1761. // 1. The interrupt handling function uses any of the "rep" instructions.
  1762. // 2. Interrupt handling function calls another function.
  1763. //
  1764. if (Fn.getCallingConv() == CallingConv::X86_INTR)
  1765. BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
  1766. .setMIFlag(MachineInstr::FrameSetup);
  1767. // At this point we know if the function has WinCFI or not.
  1768. MF.setHasWinCFI(HasWinCFI);
  1769. }
  1770. bool X86FrameLowering::canUseLEAForSPInEpilogue(
  1771. const MachineFunction &MF) const {
  1772. // We can't use LEA instructions for adjusting the stack pointer if we don't
  1773. // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
  1774. // to deallocate the stack.
  1775. // This means that we can use LEA for SP in two situations:
  1776. // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
  1777. // 2. We *have* a frame pointer which means we are permitted to use LEA.
  1778. return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
  1779. }
  1780. static bool isFuncletReturnInstr(MachineInstr &MI) {
  1781. switch (MI.getOpcode()) {
  1782. case X86::CATCHRET:
  1783. case X86::CLEANUPRET:
  1784. return true;
  1785. default:
  1786. return false;
  1787. }
  1788. llvm_unreachable("impossible");
  1789. }
  1790. // CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
  1791. // stack. It holds a pointer to the bottom of the root function frame. The
  1792. // establisher frame pointer passed to a nested funclet may point to the
  1793. // (mostly empty) frame of its parent funclet, but it will need to find
  1794. // the frame of the root function to access locals. To facilitate this,
  1795. // every funclet copies the pointer to the bottom of the root function
  1796. // frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
  1797. // same offset for the PSPSym in the root function frame that's used in the
  1798. // funclets' frames allows each funclet to dynamically accept any ancestor
  1799. // frame as its establisher argument (the runtime doesn't guarantee the
  1800. // immediate parent for some reason lost to history), and also allows the GC,
  1801. // which uses the PSPSym for some bookkeeping, to find it in any funclet's
  1802. // frame with only a single offset reported for the entire method.
  1803. unsigned
  1804. X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
  1805. const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
  1806. Register SPReg;
  1807. int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
  1808. /*IgnoreSPUpdates*/ true)
  1809. .getFixed();
  1810. assert(Offset >= 0 && SPReg == TRI->getStackRegister());
  1811. return static_cast<unsigned>(Offset);
  1812. }
  1813. unsigned
  1814. X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
  1815. const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  1816. // This is the size of the pushed CSRs.
  1817. unsigned CSSize = X86FI->getCalleeSavedFrameSize();
  1818. // This is the size of callee saved XMMs.
  1819. const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
  1820. unsigned XMMSize = WinEHXMMSlotInfo.size() *
  1821. TRI->getSpillSize(X86::VR128RegClass);
  1822. // This is the amount of stack a funclet needs to allocate.
  1823. unsigned UsedSize;
  1824. EHPersonality Personality =
  1825. classifyEHPersonality(MF.getFunction().getPersonalityFn());
  1826. if (Personality == EHPersonality::CoreCLR) {
  1827. // CLR funclets need to hold enough space to include the PSPSym, at the
  1828. // same offset from the stack pointer (immediately after the prolog) as it
  1829. // resides at in the main function.
  1830. UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
  1831. } else {
  1832. // Other funclets just need enough stack for outgoing call arguments.
  1833. UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
  1834. }
  1835. // RBP is not included in the callee saved register block. After pushing RBP,
  1836. // everything is 16 byte aligned. Everything we allocate before an outgoing
  1837. // call must also be 16 byte aligned.
  1838. unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
  1839. // Subtract out the size of the callee saved registers. This is how much stack
  1840. // each funclet will allocate.
  1841. return FrameSizeMinusRBP + XMMSize - CSSize;
  1842. }
  1843. static bool isTailCallOpcode(unsigned Opc) {
  1844. return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
  1845. Opc == X86::TCRETURNmi ||
  1846. Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 ||
  1847. Opc == X86::TCRETURNmi64;
  1848. }
  1849. void X86FrameLowering::emitEpilogue(MachineFunction &MF,
  1850. MachineBasicBlock &MBB) const {
  1851. const MachineFrameInfo &MFI = MF.getFrameInfo();
  1852. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  1853. MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator();
  1854. MachineBasicBlock::iterator MBBI = Terminator;
  1855. DebugLoc DL;
  1856. if (MBBI != MBB.end())
  1857. DL = MBBI->getDebugLoc();
  1858. // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
  1859. const bool Is64BitILP32 = STI.isTarget64BitILP32();
  1860. Register FramePtr = TRI->getFrameRegister(MF);
  1861. Register MachineFramePtr =
  1862. Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
  1863. bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
  1864. bool NeedsWin64CFI =
  1865. IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
  1866. bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
  1867. // Get the number of bytes to allocate from the FrameInfo.
  1868. uint64_t StackSize = MFI.getStackSize();
  1869. uint64_t MaxAlign = calculateMaxStackAlign(MF);
  1870. unsigned CSSize = X86FI->getCalleeSavedFrameSize();
  1871. unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
  1872. bool HasFP = hasFP(MF);
  1873. uint64_t NumBytes = 0;
  1874. bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
  1875. !MF.getTarget().getTargetTriple().isOSWindows()) &&
  1876. MF.needsFrameMoves();
  1877. if (IsFunclet) {
  1878. assert(HasFP && "EH funclets without FP not yet implemented");
  1879. NumBytes = getWinEHFuncletFrameSize(MF);
  1880. } else if (HasFP) {
  1881. // Calculate required stack adjustment.
  1882. uint64_t FrameSize = StackSize - SlotSize;
  1883. NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
  1884. // Callee-saved registers were pushed on stack before the stack was
  1885. // realigned.
  1886. if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
  1887. NumBytes = alignTo(FrameSize, MaxAlign);
  1888. } else {
  1889. NumBytes = StackSize - CSSize - TailCallArgReserveSize;
  1890. }
  1891. uint64_t SEHStackAllocAmt = NumBytes;
  1892. // AfterPop is the position to insert .cfi_restore.
  1893. MachineBasicBlock::iterator AfterPop = MBBI;
  1894. if (HasFP) {
  1895. if (X86FI->hasSwiftAsyncContext()) {
  1896. // Discard the context.
  1897. int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
  1898. emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true);
  1899. }
  1900. // Pop EBP.
  1901. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
  1902. MachineFramePtr)
  1903. .setMIFlag(MachineInstr::FrameDestroy);
  1904. // We need to reset FP to its untagged state on return. Bit 60 is currently
  1905. // used to show the presence of an extended frame.
  1906. if (X86FI->hasSwiftAsyncContext()) {
  1907. BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8),
  1908. MachineFramePtr)
  1909. .addUse(MachineFramePtr)
  1910. .addImm(60)
  1911. .setMIFlag(MachineInstr::FrameDestroy);
  1912. }
  1913. if (NeedsDwarfCFI) {
  1914. unsigned DwarfStackPtr =
  1915. TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
  1916. BuildCFI(MBB, MBBI, DL,
  1917. MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize));
  1918. if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
  1919. unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
  1920. BuildCFI(MBB, AfterPop, DL,
  1921. MCCFIInstruction::createRestore(nullptr, DwarfFramePtr));
  1922. --MBBI;
  1923. --AfterPop;
  1924. }
  1925. --MBBI;
  1926. }
  1927. }
  1928. MachineBasicBlock::iterator FirstCSPop = MBBI;
  1929. // Skip the callee-saved pop instructions.
  1930. while (MBBI != MBB.begin()) {
  1931. MachineBasicBlock::iterator PI = std::prev(MBBI);
  1932. unsigned Opc = PI->getOpcode();
  1933. if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
  1934. if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
  1935. (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
  1936. (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
  1937. (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)))
  1938. break;
  1939. FirstCSPop = PI;
  1940. }
  1941. --MBBI;
  1942. }
  1943. MBBI = FirstCSPop;
  1944. if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
  1945. emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
  1946. if (MBBI != MBB.end())
  1947. DL = MBBI->getDebugLoc();
  1948. // If there is an ADD32ri or SUB32ri of ESP immediately before this
  1949. // instruction, merge the two instructions.
  1950. if (NumBytes || MFI.hasVarSizedObjects())
  1951. NumBytes += mergeSPUpdates(MBB, MBBI, true);
  1952. // If dynamic alloca is used, then reset esp to point to the last callee-saved
  1953. // slot before popping them off! Same applies for the case, when stack was
  1954. // realigned. Don't do this if this was a funclet epilogue, since the funclets
  1955. // will not do realignment or dynamic stack allocation.
  1956. if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
  1957. !IsFunclet) {
  1958. if (TRI->hasStackRealignment(MF))
  1959. MBBI = FirstCSPop;
  1960. unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
  1961. uint64_t LEAAmount =
  1962. IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
  1963. if (X86FI->hasSwiftAsyncContext())
  1964. LEAAmount -= 16;
  1965. // There are only two legal forms of epilogue:
  1966. // - add SEHAllocationSize, %rsp
  1967. // - lea SEHAllocationSize(%FramePtr), %rsp
  1968. //
  1969. // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
  1970. // However, we may use this sequence if we have a frame pointer because the
  1971. // effects of the prologue can safely be undone.
  1972. if (LEAAmount != 0) {
  1973. unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
  1974. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
  1975. FramePtr, false, LEAAmount);
  1976. --MBBI;
  1977. } else {
  1978. unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
  1979. BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
  1980. .addReg(FramePtr);
  1981. --MBBI;
  1982. }
  1983. } else if (NumBytes) {
  1984. // Adjust stack pointer back: ESP += numbytes.
  1985. emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
  1986. if (!HasFP && NeedsDwarfCFI) {
  1987. // Define the current CFA rule to use the provided offset.
  1988. BuildCFI(MBB, MBBI, DL,
  1989. MCCFIInstruction::cfiDefCfaOffset(
  1990. nullptr, CSSize + TailCallArgReserveSize + SlotSize));
  1991. }
  1992. --MBBI;
  1993. }
  1994. // Windows unwinder will not invoke function's exception handler if IP is
  1995. // either in prologue or in epilogue. This behavior causes a problem when a
  1996. // call immediately precedes an epilogue, because the return address points
  1997. // into the epilogue. To cope with that, we insert an epilogue marker here,
  1998. // then replace it with a 'nop' if it ends up immediately after a CALL in the
  1999. // final emitted code.
  2000. if (NeedsWin64CFI && MF.hasWinCFI())
  2001. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
  2002. if (!HasFP && NeedsDwarfCFI) {
  2003. MBBI = FirstCSPop;
  2004. int64_t Offset = -CSSize - SlotSize;
  2005. // Mark callee-saved pop instruction.
  2006. // Define the current CFA rule to use the provided offset.
  2007. while (MBBI != MBB.end()) {
  2008. MachineBasicBlock::iterator PI = MBBI;
  2009. unsigned Opc = PI->getOpcode();
  2010. ++MBBI;
  2011. if (Opc == X86::POP32r || Opc == X86::POP64r) {
  2012. Offset += SlotSize;
  2013. BuildCFI(MBB, MBBI, DL,
  2014. MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset));
  2015. }
  2016. }
  2017. }
  2018. // Emit DWARF info specifying the restores of the callee-saved registers.
  2019. // For epilogue with return inside or being other block without successor,
  2020. // no need to generate .cfi_restore for callee-saved registers.
  2021. if (NeedsDwarfCFI && !MBB.succ_empty())
  2022. emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
  2023. if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
  2024. // Add the return addr area delta back since we are not tail calling.
  2025. int Offset = -1 * X86FI->getTCReturnAddrDelta();
  2026. assert(Offset >= 0 && "TCDelta should never be positive");
  2027. if (Offset) {
  2028. // Check for possible merge with preceding ADD instruction.
  2029. Offset += mergeSPUpdates(MBB, Terminator, true);
  2030. emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
  2031. }
  2032. }
  2033. // Emit tilerelease for AMX kernel.
  2034. if (X86FI->hasVirtualTileReg())
  2035. BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
  2036. }
  2037. StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
  2038. int FI,
  2039. Register &FrameReg) const {
  2040. const MachineFrameInfo &MFI = MF.getFrameInfo();
  2041. bool IsFixed = MFI.isFixedObjectIndex(FI);
  2042. // We can't calculate offset from frame pointer if the stack is realigned,
  2043. // so enforce usage of stack/base pointer. The base pointer is used when we
  2044. // have dynamic allocas in addition to dynamic realignment.
  2045. if (TRI->hasBasePointer(MF))
  2046. FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
  2047. else if (TRI->hasStackRealignment(MF))
  2048. FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
  2049. else
  2050. FrameReg = TRI->getFrameRegister(MF);
  2051. // Offset will hold the offset from the stack pointer at function entry to the
  2052. // object.
  2053. // We need to factor in additional offsets applied during the prologue to the
  2054. // frame, base, and stack pointer depending on which is used.
  2055. int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
  2056. const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  2057. unsigned CSSize = X86FI->getCalleeSavedFrameSize();
  2058. uint64_t StackSize = MFI.getStackSize();
  2059. bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
  2060. int64_t FPDelta = 0;
  2061. // In an x86 interrupt, remove the offset we added to account for the return
  2062. // address from any stack object allocated in the caller's frame. Interrupts
  2063. // do not have a standard return address. Fixed objects in the current frame,
  2064. // such as SSE register spills, should not get this treatment.
  2065. if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR &&
  2066. Offset >= 0) {
  2067. Offset += getOffsetOfLocalArea();
  2068. }
  2069. if (IsWin64Prologue) {
  2070. assert(!MFI.hasCalls() || (StackSize % 16) == 8);
  2071. // Calculate required stack adjustment.
  2072. uint64_t FrameSize = StackSize - SlotSize;
  2073. // If required, include space for extra hidden slot for stashing base pointer.
  2074. if (X86FI->getRestoreBasePointer())
  2075. FrameSize += SlotSize;
  2076. uint64_t NumBytes = FrameSize - CSSize;
  2077. uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
  2078. if (FI && FI == X86FI->getFAIndex())
  2079. return StackOffset::getFixed(-SEHFrameOffset);
  2080. // FPDelta is the offset from the "traditional" FP location of the old base
  2081. // pointer followed by return address and the location required by the
  2082. // restricted Win64 prologue.
  2083. // Add FPDelta to all offsets below that go through the frame pointer.
  2084. FPDelta = FrameSize - SEHFrameOffset;
  2085. assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
  2086. "FPDelta isn't aligned per the Win64 ABI!");
  2087. }
  2088. if (FrameReg == TRI->getFramePtr()) {
  2089. // Skip saved EBP/RBP
  2090. Offset += SlotSize;
  2091. // Account for restricted Windows prologue.
  2092. Offset += FPDelta;
  2093. // Skip the RETADDR move area
  2094. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
  2095. if (TailCallReturnAddrDelta < 0)
  2096. Offset -= TailCallReturnAddrDelta;
  2097. return StackOffset::getFixed(Offset);
  2098. }
  2099. // FrameReg is either the stack pointer or a base pointer. But the base is
  2100. // located at the end of the statically known StackSize so the distinction
  2101. // doesn't really matter.
  2102. if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
  2103. assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
  2104. return StackOffset::getFixed(Offset + StackSize);
  2105. }
  2106. int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
  2107. Register &FrameReg) const {
  2108. const MachineFrameInfo &MFI = MF.getFrameInfo();
  2109. const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  2110. const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
  2111. const auto it = WinEHXMMSlotInfo.find(FI);
  2112. if (it == WinEHXMMSlotInfo.end())
  2113. return getFrameIndexReference(MF, FI, FrameReg).getFixed();
  2114. FrameReg = TRI->getStackRegister();
  2115. return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) +
  2116. it->second;
  2117. }
  2118. StackOffset
  2119. X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI,
  2120. Register &FrameReg,
  2121. int Adjustment) const {
  2122. const MachineFrameInfo &MFI = MF.getFrameInfo();
  2123. FrameReg = TRI->getStackRegister();
  2124. return StackOffset::getFixed(MFI.getObjectOffset(FI) -
  2125. getOffsetOfLocalArea() + Adjustment);
  2126. }
  2127. StackOffset
  2128. X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
  2129. int FI, Register &FrameReg,
  2130. bool IgnoreSPUpdates) const {
  2131. const MachineFrameInfo &MFI = MF.getFrameInfo();
  2132. // Does not include any dynamic realign.
  2133. const uint64_t StackSize = MFI.getStackSize();
  2134. // LLVM arranges the stack as follows:
  2135. // ...
  2136. // ARG2
  2137. // ARG1
  2138. // RETADDR
  2139. // PUSH RBP <-- RBP points here
  2140. // PUSH CSRs
  2141. // ~~~~~~~ <-- possible stack realignment (non-win64)
  2142. // ...
  2143. // STACK OBJECTS
  2144. // ... <-- RSP after prologue points here
  2145. // ~~~~~~~ <-- possible stack realignment (win64)
  2146. //
  2147. // if (hasVarSizedObjects()):
  2148. // ... <-- "base pointer" (ESI/RBX) points here
  2149. // DYNAMIC ALLOCAS
  2150. // ... <-- RSP points here
  2151. //
  2152. // Case 1: In the simple case of no stack realignment and no dynamic
  2153. // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
  2154. // with fixed offsets from RSP.
  2155. //
  2156. // Case 2: In the case of stack realignment with no dynamic allocas, fixed
  2157. // stack objects are addressed with RBP and regular stack objects with RSP.
  2158. //
  2159. // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
  2160. // to address stack arguments for outgoing calls and nothing else. The "base
  2161. // pointer" points to local variables, and RBP points to fixed objects.
  2162. //
  2163. // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
  2164. // answer we give is relative to the SP after the prologue, and not the
  2165. // SP in the middle of the function.
  2166. if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
  2167. !STI.isTargetWin64())
  2168. return getFrameIndexReference(MF, FI, FrameReg);
  2169. // If !hasReservedCallFrame the function might have SP adjustement in the
  2170. // body. So, even though the offset is statically known, it depends on where
  2171. // we are in the function.
  2172. if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
  2173. return getFrameIndexReference(MF, FI, FrameReg);
  2174. // We don't handle tail calls, and shouldn't be seeing them either.
  2175. assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 &&
  2176. "we don't handle this case!");
  2177. // This is how the math works out:
  2178. //
  2179. // %rsp grows (i.e. gets lower) left to right. Each box below is
  2180. // one word (eight bytes). Obj0 is the stack slot we're trying to
  2181. // get to.
  2182. //
  2183. // ----------------------------------
  2184. // | BP | Obj0 | Obj1 | ... | ObjN |
  2185. // ----------------------------------
  2186. // ^ ^ ^ ^
  2187. // A B C E
  2188. //
  2189. // A is the incoming stack pointer.
  2190. // (B - A) is the local area offset (-8 for x86-64) [1]
  2191. // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
  2192. //
  2193. // |(E - B)| is the StackSize (absolute value, positive). For a
  2194. // stack that grown down, this works out to be (B - E). [3]
  2195. //
  2196. // E is also the value of %rsp after stack has been set up, and we
  2197. // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
  2198. // (C - E) == (C - A) - (B - A) + (B - E)
  2199. // { Using [1], [2] and [3] above }
  2200. // == getObjectOffset - LocalAreaOffset + StackSize
  2201. return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
  2202. }
  2203. bool X86FrameLowering::assignCalleeSavedSpillSlots(
  2204. MachineFunction &MF, const TargetRegisterInfo *TRI,
  2205. std::vector<CalleeSavedInfo> &CSI) const {
  2206. MachineFrameInfo &MFI = MF.getFrameInfo();
  2207. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  2208. unsigned CalleeSavedFrameSize = 0;
  2209. unsigned XMMCalleeSavedFrameSize = 0;
  2210. auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
  2211. int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
  2212. int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
  2213. if (TailCallReturnAddrDelta < 0) {
  2214. // create RETURNADDR area
  2215. // arg
  2216. // arg
  2217. // RETADDR
  2218. // { ...
  2219. // RETADDR area
  2220. // ...
  2221. // }
  2222. // [EBP]
  2223. MFI.CreateFixedObject(-TailCallReturnAddrDelta,
  2224. TailCallReturnAddrDelta - SlotSize, true);
  2225. }
  2226. // Spill the BasePtr if it's used.
  2227. if (this->TRI->hasBasePointer(MF)) {
  2228. // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
  2229. if (MF.hasEHFunclets()) {
  2230. int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize));
  2231. X86FI->setHasSEHFramePtrSave(true);
  2232. X86FI->setSEHFramePtrSaveIndex(FI);
  2233. }
  2234. }
  2235. if (hasFP(MF)) {
  2236. // emitPrologue always spills frame register the first thing.
  2237. SpillSlotOffset -= SlotSize;
  2238. MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
  2239. // The async context lives directly before the frame pointer, and we
  2240. // allocate a second slot to preserve stack alignment.
  2241. if (X86FI->hasSwiftAsyncContext()) {
  2242. SpillSlotOffset -= SlotSize;
  2243. MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
  2244. SpillSlotOffset -= SlotSize;
  2245. }
  2246. // Since emitPrologue and emitEpilogue will handle spilling and restoring of
  2247. // the frame register, we can delete it from CSI list and not have to worry
  2248. // about avoiding it later.
  2249. Register FPReg = TRI->getFrameRegister(MF);
  2250. for (unsigned i = 0; i < CSI.size(); ++i) {
  2251. if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
  2252. CSI.erase(CSI.begin() + i);
  2253. break;
  2254. }
  2255. }
  2256. }
  2257. // Assign slots for GPRs. It increases frame size.
  2258. for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
  2259. Register Reg = I.getReg();
  2260. if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
  2261. continue;
  2262. SpillSlotOffset -= SlotSize;
  2263. CalleeSavedFrameSize += SlotSize;
  2264. int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
  2265. I.setFrameIdx(SlotIndex);
  2266. }
  2267. X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
  2268. MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
  2269. // Assign slots for XMMs.
  2270. for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
  2271. Register Reg = I.getReg();
  2272. if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
  2273. continue;
  2274. // If this is k-register make sure we lookup via the largest legal type.
  2275. MVT VT = MVT::Other;
  2276. if (X86::VK16RegClass.contains(Reg))
  2277. VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
  2278. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
  2279. unsigned Size = TRI->getSpillSize(*RC);
  2280. Align Alignment = TRI->getSpillAlign(*RC);
  2281. // ensure alignment
  2282. assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
  2283. SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
  2284. // spill into slot
  2285. SpillSlotOffset -= Size;
  2286. int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
  2287. I.setFrameIdx(SlotIndex);
  2288. MFI.ensureMaxAlignment(Alignment);
  2289. // Save the start offset and size of XMM in stack frame for funclets.
  2290. if (X86::VR128RegClass.contains(Reg)) {
  2291. WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
  2292. XMMCalleeSavedFrameSize += Size;
  2293. }
  2294. }
  2295. return true;
  2296. }
  2297. bool X86FrameLowering::spillCalleeSavedRegisters(
  2298. MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
  2299. ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
  2300. DebugLoc DL = MBB.findDebugLoc(MI);
  2301. // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
  2302. // for us, and there are no XMM CSRs on Win32.
  2303. if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
  2304. return true;
  2305. // Push GPRs. It increases frame size.
  2306. const MachineFunction &MF = *MBB.getParent();
  2307. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
  2308. for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
  2309. Register Reg = I.getReg();
  2310. if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
  2311. continue;
  2312. const MachineRegisterInfo &MRI = MF.getRegInfo();
  2313. bool isLiveIn = MRI.isLiveIn(Reg);
  2314. if (!isLiveIn)
  2315. MBB.addLiveIn(Reg);
  2316. // Decide whether we can add a kill flag to the use.
  2317. bool CanKill = !isLiveIn;
  2318. // Check if any subregister is live-in
  2319. if (CanKill) {
  2320. for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) {
  2321. if (MRI.isLiveIn(*AReg)) {
  2322. CanKill = false;
  2323. break;
  2324. }
  2325. }
  2326. }
  2327. // Do not set a kill flag on values that are also marked as live-in. This
  2328. // happens with the @llvm-returnaddress intrinsic and with arguments
  2329. // passed in callee saved registers.
  2330. // Omitting the kill flags is conservatively correct even if the live-in
  2331. // is not used after all.
  2332. BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill))
  2333. .setMIFlag(MachineInstr::FrameSetup);
  2334. }
  2335. // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
  2336. // It can be done by spilling XMMs to stack frame.
  2337. for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
  2338. Register Reg = I.getReg();
  2339. if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
  2340. continue;
  2341. // If this is k-register make sure we lookup via the largest legal type.
  2342. MVT VT = MVT::Other;
  2343. if (X86::VK16RegClass.contains(Reg))
  2344. VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
  2345. // Add the callee-saved register as live-in. It's killed at the spill.
  2346. MBB.addLiveIn(Reg);
  2347. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
  2348. TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI);
  2349. --MI;
  2350. MI->setFlag(MachineInstr::FrameSetup);
  2351. ++MI;
  2352. }
  2353. return true;
  2354. }
  2355. void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
  2356. MachineBasicBlock::iterator MBBI,
  2357. MachineInstr *CatchRet) const {
  2358. // SEH shouldn't use catchret.
  2359. assert(!isAsynchronousEHPersonality(classifyEHPersonality(
  2360. MBB.getParent()->getFunction().getPersonalityFn())) &&
  2361. "SEH should not use CATCHRET");
  2362. const DebugLoc &DL = CatchRet->getDebugLoc();
  2363. MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
  2364. // Fill EAX/RAX with the address of the target block.
  2365. if (STI.is64Bit()) {
  2366. // LEA64r CatchRetTarget(%rip), %rax
  2367. BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
  2368. .addReg(X86::RIP)
  2369. .addImm(0)
  2370. .addReg(0)
  2371. .addMBB(CatchRetTarget)
  2372. .addReg(0);
  2373. } else {
  2374. // MOV32ri $CatchRetTarget, %eax
  2375. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
  2376. .addMBB(CatchRetTarget);
  2377. }
  2378. // Record that we've taken the address of CatchRetTarget and no longer just
  2379. // reference it in a terminator.
  2380. CatchRetTarget->setHasAddressTaken();
  2381. }
  2382. bool X86FrameLowering::restoreCalleeSavedRegisters(
  2383. MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
  2384. MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
  2385. if (CSI.empty())
  2386. return false;
  2387. if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
  2388. // Don't restore CSRs in 32-bit EH funclets. Matches
  2389. // spillCalleeSavedRegisters.
  2390. if (STI.is32Bit())
  2391. return true;
  2392. // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
  2393. // funclets. emitEpilogue transforms these to normal jumps.
  2394. if (MI->getOpcode() == X86::CATCHRET) {
  2395. const Function &F = MBB.getParent()->getFunction();
  2396. bool IsSEH = isAsynchronousEHPersonality(
  2397. classifyEHPersonality(F.getPersonalityFn()));
  2398. if (IsSEH)
  2399. return true;
  2400. }
  2401. }
  2402. DebugLoc DL = MBB.findDebugLoc(MI);
  2403. // Reload XMMs from stack frame.
  2404. for (const CalleeSavedInfo &I : CSI) {
  2405. Register Reg = I.getReg();
  2406. if (X86::GR64RegClass.contains(Reg) ||
  2407. X86::GR32RegClass.contains(Reg))
  2408. continue;
  2409. // If this is k-register make sure we lookup via the largest legal type.
  2410. MVT VT = MVT::Other;
  2411. if (X86::VK16RegClass.contains(Reg))
  2412. VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
  2413. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
  2414. TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI);
  2415. }
  2416. // POP GPRs.
  2417. unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
  2418. for (const CalleeSavedInfo &I : CSI) {
  2419. Register Reg = I.getReg();
  2420. if (!X86::GR64RegClass.contains(Reg) &&
  2421. !X86::GR32RegClass.contains(Reg))
  2422. continue;
  2423. BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
  2424. .setMIFlag(MachineInstr::FrameDestroy);
  2425. }
  2426. return true;
  2427. }
  2428. void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
  2429. BitVector &SavedRegs,
  2430. RegScavenger *RS) const {
  2431. TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
  2432. // Spill the BasePtr if it's used.
  2433. if (TRI->hasBasePointer(MF)){
  2434. Register BasePtr = TRI->getBaseRegister();
  2435. if (STI.isTarget64BitILP32())
  2436. BasePtr = getX86SubSuperRegister(BasePtr, 64);
  2437. SavedRegs.set(BasePtr);
  2438. }
  2439. }
  2440. static bool
  2441. HasNestArgument(const MachineFunction *MF) {
  2442. const Function &F = MF->getFunction();
  2443. for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
  2444. I != E; I++) {
  2445. if (I->hasNestAttr() && !I->use_empty())
  2446. return true;
  2447. }
  2448. return false;
  2449. }
  2450. /// GetScratchRegister - Get a temp register for performing work in the
  2451. /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
  2452. /// and the properties of the function either one or two registers will be
  2453. /// needed. Set primary to true for the first register, false for the second.
  2454. static unsigned
  2455. GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
  2456. CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
  2457. // Erlang stuff.
  2458. if (CallingConvention == CallingConv::HiPE) {
  2459. if (Is64Bit)
  2460. return Primary ? X86::R14 : X86::R13;
  2461. else
  2462. return Primary ? X86::EBX : X86::EDI;
  2463. }
  2464. if (Is64Bit) {
  2465. if (IsLP64)
  2466. return Primary ? X86::R11 : X86::R12;
  2467. else
  2468. return Primary ? X86::R11D : X86::R12D;
  2469. }
  2470. bool IsNested = HasNestArgument(&MF);
  2471. if (CallingConvention == CallingConv::X86_FastCall ||
  2472. CallingConvention == CallingConv::Fast ||
  2473. CallingConvention == CallingConv::Tail) {
  2474. if (IsNested)
  2475. report_fatal_error("Segmented stacks does not support fastcall with "
  2476. "nested function.");
  2477. return Primary ? X86::EAX : X86::ECX;
  2478. }
  2479. if (IsNested)
  2480. return Primary ? X86::EDX : X86::EAX;
  2481. return Primary ? X86::ECX : X86::EAX;
  2482. }
  2483. // The stack limit in the TCB is set to this many bytes above the actual stack
  2484. // limit.
  2485. static const uint64_t kSplitStackAvailable = 256;
  2486. void X86FrameLowering::adjustForSegmentedStacks(
  2487. MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
  2488. MachineFrameInfo &MFI = MF.getFrameInfo();
  2489. uint64_t StackSize;
  2490. unsigned TlsReg, TlsOffset;
  2491. DebugLoc DL;
  2492. // To support shrink-wrapping we would need to insert the new blocks
  2493. // at the right place and update the branches to PrologueMBB.
  2494. assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
  2495. unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
  2496. assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
  2497. "Scratch register is live-in");
  2498. if (MF.getFunction().isVarArg())
  2499. report_fatal_error("Segmented stacks do not support vararg functions.");
  2500. if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
  2501. !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
  2502. !STI.isTargetDragonFly())
  2503. report_fatal_error("Segmented stacks not supported on this platform.");
  2504. // Eventually StackSize will be calculated by a link-time pass; which will
  2505. // also decide whether checking code needs to be injected into this particular
  2506. // prologue.
  2507. StackSize = MFI.getStackSize();
  2508. // Do not generate a prologue for leaf functions with a stack of size zero.
  2509. // For non-leaf functions we have to allow for the possibility that the
  2510. // callis to a non-split function, as in PR37807. This function could also
  2511. // take the address of a non-split function. When the linker tries to adjust
  2512. // its non-existent prologue, it would fail with an error. Mark the object
  2513. // file so that such failures are not errors. See this Go language bug-report
  2514. // https://go-review.googlesource.com/c/go/+/148819/
  2515. if (StackSize == 0 && !MFI.hasTailCall()) {
  2516. MF.getMMI().setHasNosplitStack(true);
  2517. return;
  2518. }
  2519. MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
  2520. MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
  2521. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  2522. bool IsNested = false;
  2523. // We need to know if the function has a nest argument only in 64 bit mode.
  2524. if (Is64Bit)
  2525. IsNested = HasNestArgument(&MF);
  2526. // The MOV R10, RAX needs to be in a different block, since the RET we emit in
  2527. // allocMBB needs to be last (terminating) instruction.
  2528. for (const auto &LI : PrologueMBB.liveins()) {
  2529. allocMBB->addLiveIn(LI);
  2530. checkMBB->addLiveIn(LI);
  2531. }
  2532. if (IsNested)
  2533. allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
  2534. MF.push_front(allocMBB);
  2535. MF.push_front(checkMBB);
  2536. // When the frame size is less than 256 we just compare the stack
  2537. // boundary directly to the value of the stack pointer, per gcc.
  2538. bool CompareStackPointer = StackSize < kSplitStackAvailable;
  2539. // Read the limit off the current stacklet off the stack_guard location.
  2540. if (Is64Bit) {
  2541. if (STI.isTargetLinux()) {
  2542. TlsReg = X86::FS;
  2543. TlsOffset = IsLP64 ? 0x70 : 0x40;
  2544. } else if (STI.isTargetDarwin()) {
  2545. TlsReg = X86::GS;
  2546. TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
  2547. } else if (STI.isTargetWin64()) {
  2548. TlsReg = X86::GS;
  2549. TlsOffset = 0x28; // pvArbitrary, reserved for application use
  2550. } else if (STI.isTargetFreeBSD()) {
  2551. TlsReg = X86::FS;
  2552. TlsOffset = 0x18;
  2553. } else if (STI.isTargetDragonFly()) {
  2554. TlsReg = X86::FS;
  2555. TlsOffset = 0x20; // use tls_tcb.tcb_segstack
  2556. } else {
  2557. report_fatal_error("Segmented stacks not supported on this platform.");
  2558. }
  2559. if (CompareStackPointer)
  2560. ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
  2561. else
  2562. BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
  2563. .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
  2564. BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
  2565. .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
  2566. } else {
  2567. if (STI.isTargetLinux()) {
  2568. TlsReg = X86::GS;
  2569. TlsOffset = 0x30;
  2570. } else if (STI.isTargetDarwin()) {
  2571. TlsReg = X86::GS;
  2572. TlsOffset = 0x48 + 90*4;
  2573. } else if (STI.isTargetWin32()) {
  2574. TlsReg = X86::FS;
  2575. TlsOffset = 0x14; // pvArbitrary, reserved for application use
  2576. } else if (STI.isTargetDragonFly()) {
  2577. TlsReg = X86::FS;
  2578. TlsOffset = 0x10; // use tls_tcb.tcb_segstack
  2579. } else if (STI.isTargetFreeBSD()) {
  2580. report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
  2581. } else {
  2582. report_fatal_error("Segmented stacks not supported on this platform.");
  2583. }
  2584. if (CompareStackPointer)
  2585. ScratchReg = X86::ESP;
  2586. else
  2587. BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
  2588. .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
  2589. if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
  2590. STI.isTargetDragonFly()) {
  2591. BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
  2592. .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
  2593. } else if (STI.isTargetDarwin()) {
  2594. // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
  2595. unsigned ScratchReg2;
  2596. bool SaveScratch2;
  2597. if (CompareStackPointer) {
  2598. // The primary scratch register is available for holding the TLS offset.
  2599. ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
  2600. SaveScratch2 = false;
  2601. } else {
  2602. // Need to use a second register to hold the TLS offset
  2603. ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
  2604. // Unfortunately, with fastcc the second scratch register may hold an
  2605. // argument.
  2606. SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
  2607. }
  2608. // If Scratch2 is live-in then it needs to be saved.
  2609. assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
  2610. "Scratch register is live-in and not saved");
  2611. if (SaveScratch2)
  2612. BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
  2613. .addReg(ScratchReg2, RegState::Kill);
  2614. BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
  2615. .addImm(TlsOffset);
  2616. BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
  2617. .addReg(ScratchReg)
  2618. .addReg(ScratchReg2).addImm(1).addReg(0)
  2619. .addImm(0)
  2620. .addReg(TlsReg);
  2621. if (SaveScratch2)
  2622. BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
  2623. }
  2624. }
  2625. // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
  2626. // It jumps to normal execution of the function body.
  2627. BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A);
  2628. // On 32 bit we first push the arguments size and then the frame size. On 64
  2629. // bit, we pass the stack frame size in r10 and the argument size in r11.
  2630. if (Is64Bit) {
  2631. // Functions with nested arguments use R10, so it needs to be saved across
  2632. // the call to _morestack
  2633. const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
  2634. const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
  2635. const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
  2636. const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
  2637. if (IsNested)
  2638. BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
  2639. BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
  2640. .addImm(StackSize);
  2641. BuildMI(allocMBB, DL,
  2642. TII.get(getMOVriOpcode(IsLP64, X86FI->getArgumentStackSize())),
  2643. Reg11)
  2644. .addImm(X86FI->getArgumentStackSize());
  2645. } else {
  2646. BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
  2647. .addImm(X86FI->getArgumentStackSize());
  2648. BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
  2649. .addImm(StackSize);
  2650. }
  2651. // __morestack is in libgcc
  2652. if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
  2653. // Under the large code model, we cannot assume that __morestack lives
  2654. // within 2^31 bytes of the call site, so we cannot use pc-relative
  2655. // addressing. We cannot perform the call via a temporary register,
  2656. // as the rax register may be used to store the static chain, and all
  2657. // other suitable registers may be either callee-save or used for
  2658. // parameter passing. We cannot use the stack at this point either
  2659. // because __morestack manipulates the stack directly.
  2660. //
  2661. // To avoid these issues, perform an indirect call via a read-only memory
  2662. // location containing the address.
  2663. //
  2664. // This solution is not perfect, as it assumes that the .rodata section
  2665. // is laid out within 2^31 bytes of each function body, but this seems
  2666. // to be sufficient for JIT.
  2667. // FIXME: Add retpoline support and remove the error here..
  2668. if (STI.useIndirectThunkCalls())
  2669. report_fatal_error("Emitting morestack calls on 64-bit with the large "
  2670. "code model and thunks not yet implemented.");
  2671. BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
  2672. .addReg(X86::RIP)
  2673. .addImm(0)
  2674. .addReg(0)
  2675. .addExternalSymbol("__morestack_addr")
  2676. .addReg(0);
  2677. MF.getMMI().setUsesMorestackAddr(true);
  2678. } else {
  2679. if (Is64Bit)
  2680. BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
  2681. .addExternalSymbol("__morestack");
  2682. else
  2683. BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
  2684. .addExternalSymbol("__morestack");
  2685. }
  2686. if (IsNested)
  2687. BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
  2688. else
  2689. BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
  2690. allocMBB->addSuccessor(&PrologueMBB);
  2691. checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
  2692. checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
  2693. #ifdef EXPENSIVE_CHECKS
  2694. MF.verify();
  2695. #endif
  2696. }
  2697. /// Lookup an ERTS parameter in the !hipe.literals named metadata node.
  2698. /// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
  2699. /// to fields it needs, through a named metadata node "hipe.literals" containing
  2700. /// name-value pairs.
  2701. static unsigned getHiPELiteral(
  2702. NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) {
  2703. for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
  2704. MDNode *Node = HiPELiteralsMD->getOperand(i);
  2705. if (Node->getNumOperands() != 2) continue;
  2706. MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
  2707. ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
  2708. if (!NodeName || !NodeVal) continue;
  2709. ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
  2710. if (ValConst && NodeName->getString() == LiteralName) {
  2711. return ValConst->getZExtValue();
  2712. }
  2713. }
  2714. report_fatal_error("HiPE literal " + LiteralName
  2715. + " required but not provided");
  2716. }
  2717. // Return true if there are no non-ehpad successors to MBB and there are no
  2718. // non-meta instructions between MBBI and MBB.end().
  2719. static bool blockEndIsUnreachable(const MachineBasicBlock &MBB,
  2720. MachineBasicBlock::const_iterator MBBI) {
  2721. return llvm::all_of(
  2722. MBB.successors(),
  2723. [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
  2724. std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
  2725. return MI.isMetaInstruction();
  2726. });
  2727. }
  2728. /// Erlang programs may need a special prologue to handle the stack size they
  2729. /// might need at runtime. That is because Erlang/OTP does not implement a C
  2730. /// stack but uses a custom implementation of hybrid stack/heap architecture.
  2731. /// (for more information see Eric Stenman's Ph.D. thesis:
  2732. /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
  2733. ///
  2734. /// CheckStack:
  2735. /// temp0 = sp - MaxStack
  2736. /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
  2737. /// OldStart:
  2738. /// ...
  2739. /// IncStack:
  2740. /// call inc_stack # doubles the stack space
  2741. /// temp0 = sp - MaxStack
  2742. /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
  2743. void X86FrameLowering::adjustForHiPEPrologue(
  2744. MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
  2745. MachineFrameInfo &MFI = MF.getFrameInfo();
  2746. DebugLoc DL;
  2747. // To support shrink-wrapping we would need to insert the new blocks
  2748. // at the right place and update the branches to PrologueMBB.
  2749. assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
  2750. // HiPE-specific values
  2751. NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule()
  2752. ->getNamedMetadata("hipe.literals");
  2753. if (!HiPELiteralsMD)
  2754. report_fatal_error(
  2755. "Can't generate HiPE prologue without runtime parameters");
  2756. const unsigned HipeLeafWords
  2757. = getHiPELiteral(HiPELiteralsMD,
  2758. Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
  2759. const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
  2760. const unsigned Guaranteed = HipeLeafWords * SlotSize;
  2761. unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ?
  2762. MF.getFunction().arg_size() - CCRegisteredArgs : 0;
  2763. unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize;
  2764. assert(STI.isTargetLinux() &&
  2765. "HiPE prologue is only supported on Linux operating systems.");
  2766. // Compute the largest caller's frame that is needed to fit the callees'
  2767. // frames. This 'MaxStack' is computed from:
  2768. //
  2769. // a) the fixed frame size, which is the space needed for all spilled temps,
  2770. // b) outgoing on-stack parameter areas, and
  2771. // c) the minimum stack space this function needs to make available for the
  2772. // functions it calls (a tunable ABI property).
  2773. if (MFI.hasCalls()) {
  2774. unsigned MoreStackForCalls = 0;
  2775. for (auto &MBB : MF) {
  2776. for (auto &MI : MBB) {
  2777. if (!MI.isCall())
  2778. continue;
  2779. // Get callee operand.
  2780. const MachineOperand &MO = MI.getOperand(0);
  2781. // Only take account of global function calls (no closures etc.).
  2782. if (!MO.isGlobal())
  2783. continue;
  2784. const Function *F = dyn_cast<Function>(MO.getGlobal());
  2785. if (!F)
  2786. continue;
  2787. // Do not update 'MaxStack' for primitive and built-in functions
  2788. // (encoded with names either starting with "erlang."/"bif_" or not
  2789. // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
  2790. // "_", such as the BIF "suspend_0") as they are executed on another
  2791. // stack.
  2792. if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
  2793. F->getName().find_first_of("._") == StringRef::npos)
  2794. continue;
  2795. unsigned CalleeStkArity =
  2796. F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
  2797. if (HipeLeafWords - 1 > CalleeStkArity)
  2798. MoreStackForCalls = std::max(MoreStackForCalls,
  2799. (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
  2800. }
  2801. }
  2802. MaxStack += MoreStackForCalls;
  2803. }
  2804. // If the stack frame needed is larger than the guaranteed then runtime checks
  2805. // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
  2806. if (MaxStack > Guaranteed) {
  2807. MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
  2808. MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
  2809. for (const auto &LI : PrologueMBB.liveins()) {
  2810. stackCheckMBB->addLiveIn(LI);
  2811. incStackMBB->addLiveIn(LI);
  2812. }
  2813. MF.push_front(incStackMBB);
  2814. MF.push_front(stackCheckMBB);
  2815. unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
  2816. unsigned LEAop, CMPop, CALLop;
  2817. SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
  2818. if (Is64Bit) {
  2819. SPReg = X86::RSP;
  2820. PReg = X86::RBP;
  2821. LEAop = X86::LEA64r;
  2822. CMPop = X86::CMP64rm;
  2823. CALLop = X86::CALL64pcrel32;
  2824. } else {
  2825. SPReg = X86::ESP;
  2826. PReg = X86::EBP;
  2827. LEAop = X86::LEA32r;
  2828. CMPop = X86::CMP32rm;
  2829. CALLop = X86::CALLpcrel32;
  2830. }
  2831. ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
  2832. assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
  2833. "HiPE prologue scratch register is live-in");
  2834. // Create new MBB for StackCheck:
  2835. addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
  2836. SPReg, false, -MaxStack);
  2837. // SPLimitOffset is in a fixed heap location (pointed by BP).
  2838. addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
  2839. .addReg(ScratchReg), PReg, false, SPLimitOffset);
  2840. BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE);
  2841. // Create new MBB for IncStack:
  2842. BuildMI(incStackMBB, DL, TII.get(CALLop)).
  2843. addExternalSymbol("inc_stack_0");
  2844. addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
  2845. SPReg, false, -MaxStack);
  2846. addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
  2847. .addReg(ScratchReg), PReg, false, SPLimitOffset);
  2848. BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE);
  2849. stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
  2850. stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
  2851. incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
  2852. incStackMBB->addSuccessor(incStackMBB, {1, 100});
  2853. }
  2854. #ifdef EXPENSIVE_CHECKS
  2855. MF.verify();
  2856. #endif
  2857. }
  2858. bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
  2859. MachineBasicBlock::iterator MBBI,
  2860. const DebugLoc &DL,
  2861. int Offset) const {
  2862. if (Offset <= 0)
  2863. return false;
  2864. if (Offset % SlotSize)
  2865. return false;
  2866. int NumPops = Offset / SlotSize;
  2867. // This is only worth it if we have at most 2 pops.
  2868. if (NumPops != 1 && NumPops != 2)
  2869. return false;
  2870. // Handle only the trivial case where the adjustment directly follows
  2871. // a call. This is the most common one, anyway.
  2872. if (MBBI == MBB.begin())
  2873. return false;
  2874. MachineBasicBlock::iterator Prev = std::prev(MBBI);
  2875. if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
  2876. return false;
  2877. unsigned Regs[2];
  2878. unsigned FoundRegs = 0;
  2879. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
  2880. const MachineOperand &RegMask = Prev->getOperand(1);
  2881. auto &RegClass =
  2882. Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
  2883. // Try to find up to NumPops free registers.
  2884. for (auto Candidate : RegClass) {
  2885. // Poor man's liveness:
  2886. // Since we're immediately after a call, any register that is clobbered
  2887. // by the call and not defined by it can be considered dead.
  2888. if (!RegMask.clobbersPhysReg(Candidate))
  2889. continue;
  2890. // Don't clobber reserved registers
  2891. if (MRI.isReserved(Candidate))
  2892. continue;
  2893. bool IsDef = false;
  2894. for (const MachineOperand &MO : Prev->implicit_operands()) {
  2895. if (MO.isReg() && MO.isDef() &&
  2896. TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
  2897. IsDef = true;
  2898. break;
  2899. }
  2900. }
  2901. if (IsDef)
  2902. continue;
  2903. Regs[FoundRegs++] = Candidate;
  2904. if (FoundRegs == (unsigned)NumPops)
  2905. break;
  2906. }
  2907. if (FoundRegs == 0)
  2908. return false;
  2909. // If we found only one free register, but need two, reuse the same one twice.
  2910. while (FoundRegs < (unsigned)NumPops)
  2911. Regs[FoundRegs++] = Regs[0];
  2912. for (int i = 0; i < NumPops; ++i)
  2913. BuildMI(MBB, MBBI, DL,
  2914. TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
  2915. return true;
  2916. }
  2917. MachineBasicBlock::iterator X86FrameLowering::
  2918. eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
  2919. MachineBasicBlock::iterator I) const {
  2920. bool reserveCallFrame = hasReservedCallFrame(MF);
  2921. unsigned Opcode = I->getOpcode();
  2922. bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
  2923. DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
  2924. uint64_t Amount = TII.getFrameSize(*I);
  2925. uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
  2926. I = MBB.erase(I);
  2927. auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
  2928. // Try to avoid emitting dead SP adjustments if the block end is unreachable,
  2929. // typically because the function is marked noreturn (abort, throw,
  2930. // assert_fail, etc).
  2931. if (isDestroy && blockEndIsUnreachable(MBB, I))
  2932. return I;
  2933. if (!reserveCallFrame) {
  2934. // If the stack pointer can be changed after prologue, turn the
  2935. // adjcallstackup instruction into a 'sub ESP, <amt>' and the
  2936. // adjcallstackdown instruction into 'add ESP, <amt>'
  2937. // We need to keep the stack aligned properly. To do this, we round the
  2938. // amount of space needed for the outgoing arguments up to the next
  2939. // alignment boundary.
  2940. Amount = alignTo(Amount, getStackAlign());
  2941. const Function &F = MF.getFunction();
  2942. bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
  2943. bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
  2944. // If we have any exception handlers in this function, and we adjust
  2945. // the SP before calls, we may need to indicate this to the unwinder
  2946. // using GNU_ARGS_SIZE. Note that this may be necessary even when
  2947. // Amount == 0, because the preceding function may have set a non-0
  2948. // GNU_ARGS_SIZE.
  2949. // TODO: We don't need to reset this between subsequent functions,
  2950. // if it didn't change.
  2951. bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
  2952. if (HasDwarfEHHandlers && !isDestroy &&
  2953. MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
  2954. BuildCFI(MBB, InsertPos, DL,
  2955. MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
  2956. if (Amount == 0)
  2957. return I;
  2958. // Factor out the amount that gets handled inside the sequence
  2959. // (Pushes of argument for frame setup, callee pops for frame destroy)
  2960. Amount -= InternalAmt;
  2961. // TODO: This is needed only if we require precise CFA.
  2962. // If this is a callee-pop calling convention, emit a CFA adjust for
  2963. // the amount the callee popped.
  2964. if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
  2965. BuildCFI(MBB, InsertPos, DL,
  2966. MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
  2967. // Add Amount to SP to destroy a frame, or subtract to setup.
  2968. int64_t StackAdjustment = isDestroy ? Amount : -Amount;
  2969. if (StackAdjustment) {
  2970. // Merge with any previous or following adjustment instruction. Note: the
  2971. // instructions merged with here do not have CFI, so their stack
  2972. // adjustments do not feed into CfaAdjustment.
  2973. StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
  2974. StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
  2975. if (StackAdjustment) {
  2976. if (!(F.hasMinSize() &&
  2977. adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
  2978. BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
  2979. /*InEpilogue=*/false);
  2980. }
  2981. }
  2982. if (DwarfCFI && !hasFP(MF)) {
  2983. // If we don't have FP, but need to generate unwind information,
  2984. // we need to set the correct CFA offset after the stack adjustment.
  2985. // How much we adjust the CFA offset depends on whether we're emitting
  2986. // CFI only for EH purposes or for debugging. EH only requires the CFA
  2987. // offset to be correct at each call site, while for debugging we want
  2988. // it to be more precise.
  2989. int64_t CfaAdjustment = -StackAdjustment;
  2990. // TODO: When not using precise CFA, we also need to adjust for the
  2991. // InternalAmt here.
  2992. if (CfaAdjustment) {
  2993. BuildCFI(MBB, InsertPos, DL,
  2994. MCCFIInstruction::createAdjustCfaOffset(nullptr,
  2995. CfaAdjustment));
  2996. }
  2997. }
  2998. return I;
  2999. }
  3000. if (InternalAmt) {
  3001. MachineBasicBlock::iterator CI = I;
  3002. MachineBasicBlock::iterator B = MBB.begin();
  3003. while (CI != B && !std::prev(CI)->isCall())
  3004. --CI;
  3005. BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
  3006. }
  3007. return I;
  3008. }
  3009. bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
  3010. assert(MBB.getParent() && "Block is not attached to a function!");
  3011. const MachineFunction &MF = *MBB.getParent();
  3012. if (!MBB.isLiveIn(X86::EFLAGS))
  3013. return true;
  3014. const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  3015. return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
  3016. }
  3017. bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
  3018. assert(MBB.getParent() && "Block is not attached to a function!");
  3019. // Win64 has strict requirements in terms of epilogue and we are
  3020. // not taking a chance at messing with them.
  3021. // I.e., unless this block is already an exit block, we can't use
  3022. // it as an epilogue.
  3023. if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
  3024. return false;
  3025. // Swift async context epilogue has a BTR instruction that clobbers parts of
  3026. // EFLAGS.
  3027. const MachineFunction &MF = *MBB.getParent();
  3028. if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext())
  3029. return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
  3030. if (canUseLEAForSPInEpilogue(*MBB.getParent()))
  3031. return true;
  3032. // If we cannot use LEA to adjust SP, we may need to use ADD, which
  3033. // clobbers the EFLAGS. Check that we do not need to preserve it,
  3034. // otherwise, conservatively assume this is not
  3035. // safe to insert the epilogue here.
  3036. return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
  3037. }
  3038. bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
  3039. // If we may need to emit frameless compact unwind information, give
  3040. // up as this is currently broken: PR25614.
  3041. bool CompactUnwind =
  3042. MF.getMMI().getContext().getObjectFileInfo()->getCompactUnwindSection() !=
  3043. nullptr;
  3044. return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
  3045. !CompactUnwind) &&
  3046. // The lowering of segmented stack and HiPE only support entry
  3047. // blocks as prologue blocks: PR26107. This limitation may be
  3048. // lifted if we fix:
  3049. // - adjustForSegmentedStacks
  3050. // - adjustForHiPEPrologue
  3051. MF.getFunction().getCallingConv() != CallingConv::HiPE &&
  3052. !MF.shouldSplitStack();
  3053. }
  3054. MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
  3055. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  3056. const DebugLoc &DL, bool RestoreSP) const {
  3057. assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
  3058. assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
  3059. assert(STI.is32Bit() && !Uses64BitFramePtr &&
  3060. "restoring EBP/ESI on non-32-bit target");
  3061. MachineFunction &MF = *MBB.getParent();
  3062. Register FramePtr = TRI->getFrameRegister(MF);
  3063. Register BasePtr = TRI->getBaseRegister();
  3064. WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
  3065. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  3066. MachineFrameInfo &MFI = MF.getFrameInfo();
  3067. // FIXME: Don't set FrameSetup flag in catchret case.
  3068. int FI = FuncInfo.EHRegNodeFrameIndex;
  3069. int EHRegSize = MFI.getObjectSize(FI);
  3070. if (RestoreSP) {
  3071. // MOV32rm -EHRegSize(%ebp), %esp
  3072. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
  3073. X86::EBP, true, -EHRegSize)
  3074. .setMIFlag(MachineInstr::FrameSetup);
  3075. }
  3076. Register UsedReg;
  3077. int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
  3078. int EndOffset = -EHRegOffset - EHRegSize;
  3079. FuncInfo.EHRegNodeEndOffset = EndOffset;
  3080. if (UsedReg == FramePtr) {
  3081. // ADD $offset, %ebp
  3082. unsigned ADDri = getADDriOpcode(false, EndOffset);
  3083. BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
  3084. .addReg(FramePtr)
  3085. .addImm(EndOffset)
  3086. .setMIFlag(MachineInstr::FrameSetup)
  3087. ->getOperand(3)
  3088. .setIsDead();
  3089. assert(EndOffset >= 0 &&
  3090. "end of registration object above normal EBP position!");
  3091. } else if (UsedReg == BasePtr) {
  3092. // LEA offset(%ebp), %esi
  3093. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
  3094. FramePtr, false, EndOffset)
  3095. .setMIFlag(MachineInstr::FrameSetup);
  3096. // MOV32rm SavedEBPOffset(%esi), %ebp
  3097. assert(X86FI->getHasSEHFramePtrSave());
  3098. int Offset =
  3099. getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
  3100. .getFixed();
  3101. assert(UsedReg == BasePtr);
  3102. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
  3103. UsedReg, true, Offset)
  3104. .setMIFlag(MachineInstr::FrameSetup);
  3105. } else {
  3106. llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
  3107. }
  3108. return MBBI;
  3109. }
  3110. int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
  3111. return TRI->getSlotSize();
  3112. }
  3113. Register
  3114. X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
  3115. return TRI->getDwarfRegNum(StackPtr, true);
  3116. }
  3117. namespace {
  3118. // Struct used by orderFrameObjects to help sort the stack objects.
  3119. struct X86FrameSortingObject {
  3120. bool IsValid = false; // true if we care about this Object.
  3121. unsigned ObjectIndex = 0; // Index of Object into MFI list.
  3122. unsigned ObjectSize = 0; // Size of Object in bytes.
  3123. Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
  3124. unsigned ObjectNumUses = 0; // Object static number of uses.
  3125. };
  3126. // The comparison function we use for std::sort to order our local
  3127. // stack symbols. The current algorithm is to use an estimated
  3128. // "density". This takes into consideration the size and number of
  3129. // uses each object has in order to roughly minimize code size.
  3130. // So, for example, an object of size 16B that is referenced 5 times
  3131. // will get higher priority than 4 4B objects referenced 1 time each.
  3132. // It's not perfect and we may be able to squeeze a few more bytes out of
  3133. // it (for example : 0(esp) requires fewer bytes, symbols allocated at the
  3134. // fringe end can have special consideration, given their size is less
  3135. // important, etc.), but the algorithmic complexity grows too much to be
  3136. // worth the extra gains we get. This gets us pretty close.
  3137. // The final order leaves us with objects with highest priority going
  3138. // at the end of our list.
  3139. struct X86FrameSortingComparator {
  3140. inline bool operator()(const X86FrameSortingObject &A,
  3141. const X86FrameSortingObject &B) const {
  3142. uint64_t DensityAScaled, DensityBScaled;
  3143. // For consistency in our comparison, all invalid objects are placed
  3144. // at the end. This also allows us to stop walking when we hit the
  3145. // first invalid item after it's all sorted.
  3146. if (!A.IsValid)
  3147. return false;
  3148. if (!B.IsValid)
  3149. return true;
  3150. // The density is calculated by doing :
  3151. // (double)DensityA = A.ObjectNumUses / A.ObjectSize
  3152. // (double)DensityB = B.ObjectNumUses / B.ObjectSize
  3153. // Since this approach may cause inconsistencies in
  3154. // the floating point <, >, == comparisons, depending on the floating
  3155. // point model with which the compiler was built, we're going
  3156. // to scale both sides by multiplying with
  3157. // A.ObjectSize * B.ObjectSize. This ends up factoring away
  3158. // the division and, with it, the need for any floating point
  3159. // arithmetic.
  3160. DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
  3161. static_cast<uint64_t>(B.ObjectSize);
  3162. DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
  3163. static_cast<uint64_t>(A.ObjectSize);
  3164. // If the two densities are equal, prioritize highest alignment
  3165. // objects. This allows for similar alignment objects
  3166. // to be packed together (given the same density).
  3167. // There's room for improvement here, also, since we can pack
  3168. // similar alignment (different density) objects next to each
  3169. // other to save padding. This will also require further
  3170. // complexity/iterations, and the overall gain isn't worth it,
  3171. // in general. Something to keep in mind, though.
  3172. if (DensityAScaled == DensityBScaled)
  3173. return A.ObjectAlignment < B.ObjectAlignment;
  3174. return DensityAScaled < DensityBScaled;
  3175. }
  3176. };
  3177. } // namespace
  3178. // Order the symbols in the local stack.
  3179. // We want to place the local stack objects in some sort of sensible order.
  3180. // The heuristic we use is to try and pack them according to static number
  3181. // of uses and size of object in order to minimize code size.
  3182. void X86FrameLowering::orderFrameObjects(
  3183. const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
  3184. const MachineFrameInfo &MFI = MF.getFrameInfo();
  3185. // Don't waste time if there's nothing to do.
  3186. if (ObjectsToAllocate.empty())
  3187. return;
  3188. // Create an array of all MFI objects. We won't need all of these
  3189. // objects, but we're going to create a full array of them to make
  3190. // it easier to index into when we're counting "uses" down below.
  3191. // We want to be able to easily/cheaply access an object by simply
  3192. // indexing into it, instead of having to search for it every time.
  3193. std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
  3194. // Walk the objects we care about and mark them as such in our working
  3195. // struct.
  3196. for (auto &Obj : ObjectsToAllocate) {
  3197. SortingObjects[Obj].IsValid = true;
  3198. SortingObjects[Obj].ObjectIndex = Obj;
  3199. SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
  3200. // Set the size.
  3201. int ObjectSize = MFI.getObjectSize(Obj);
  3202. if (ObjectSize == 0)
  3203. // Variable size. Just use 4.
  3204. SortingObjects[Obj].ObjectSize = 4;
  3205. else
  3206. SortingObjects[Obj].ObjectSize = ObjectSize;
  3207. }
  3208. // Count the number of uses for each object.
  3209. for (auto &MBB : MF) {
  3210. for (auto &MI : MBB) {
  3211. if (MI.isDebugInstr())
  3212. continue;
  3213. for (const MachineOperand &MO : MI.operands()) {
  3214. // Check to see if it's a local stack symbol.
  3215. if (!MO.isFI())
  3216. continue;
  3217. int Index = MO.getIndex();
  3218. // Check to see if it falls within our range, and is tagged
  3219. // to require ordering.
  3220. if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
  3221. SortingObjects[Index].IsValid)
  3222. SortingObjects[Index].ObjectNumUses++;
  3223. }
  3224. }
  3225. }
  3226. // Sort the objects using X86FrameSortingAlgorithm (see its comment for
  3227. // info).
  3228. llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
  3229. // Now modify the original list to represent the final order that
  3230. // we want. The order will depend on whether we're going to access them
  3231. // from the stack pointer or the frame pointer. For SP, the list should
  3232. // end up with the END containing objects that we want with smaller offsets.
  3233. // For FP, it should be flipped.
  3234. int i = 0;
  3235. for (auto &Obj : SortingObjects) {
  3236. // All invalid items are sorted at the end, so it's safe to stop.
  3237. if (!Obj.IsValid)
  3238. break;
  3239. ObjectsToAllocate[i++] = Obj.ObjectIndex;
  3240. }
  3241. // Flip it if we're accessing off of the FP.
  3242. if (!TRI->hasStackRealignment(MF) && hasFP(MF))
  3243. std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
  3244. }
  3245. unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
  3246. // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
  3247. unsigned Offset = 16;
  3248. // RBP is immediately pushed.
  3249. Offset += SlotSize;
  3250. // All callee-saved registers are then pushed.
  3251. Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
  3252. // Every funclet allocates enough stack space for the largest outgoing call.
  3253. Offset += getWinEHFuncletFrameSize(MF);
  3254. return Offset;
  3255. }
  3256. void X86FrameLowering::processFunctionBeforeFrameFinalized(
  3257. MachineFunction &MF, RegScavenger *RS) const {
  3258. // Mark the function as not having WinCFI. We will set it back to true in
  3259. // emitPrologue if it gets called and emits CFI.
  3260. MF.setHasWinCFI(false);
  3261. // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
  3262. // aligned. The format doesn't support misaligned stack adjustments.
  3263. if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
  3264. MF.getFrameInfo().ensureMaxAlignment(Align(SlotSize));
  3265. // If this function isn't doing Win64-style C++ EH, we don't need to do
  3266. // anything.
  3267. if (STI.is64Bit() && MF.hasEHFunclets() &&
  3268. classifyEHPersonality(MF.getFunction().getPersonalityFn()) ==
  3269. EHPersonality::MSVC_CXX) {
  3270. adjustFrameForMsvcCxxEh(MF);
  3271. }
  3272. }
  3273. void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
  3274. // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
  3275. // relative to RSP after the prologue. Find the offset of the last fixed
  3276. // object, so that we can allocate a slot immediately following it. If there
  3277. // were no fixed objects, use offset -SlotSize, which is immediately after the
  3278. // return address. Fixed objects have negative frame indices.
  3279. MachineFrameInfo &MFI = MF.getFrameInfo();
  3280. WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
  3281. int64_t MinFixedObjOffset = -SlotSize;
  3282. for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
  3283. MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
  3284. for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
  3285. for (WinEHHandlerType &H : TBME.HandlerArray) {
  3286. int FrameIndex = H.CatchObj.FrameIndex;
  3287. if (FrameIndex != INT_MAX) {
  3288. // Ensure alignment.
  3289. unsigned Align = MFI.getObjectAlign(FrameIndex).value();
  3290. MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
  3291. MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
  3292. MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
  3293. }
  3294. }
  3295. }
  3296. // Ensure alignment.
  3297. MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
  3298. int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
  3299. int UnwindHelpFI =
  3300. MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
  3301. EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
  3302. // Store -2 into UnwindHelp on function entry. We have to scan forwards past
  3303. // other frame setup instructions.
  3304. MachineBasicBlock &MBB = MF.front();
  3305. auto MBBI = MBB.begin();
  3306. while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
  3307. ++MBBI;
  3308. DebugLoc DL = MBB.findDebugLoc(MBBI);
  3309. addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
  3310. UnwindHelpFI)
  3311. .addImm(-2);
  3312. }
  3313. void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced(
  3314. MachineFunction &MF, RegScavenger *RS) const {
  3315. if (STI.is32Bit() && MF.hasEHFunclets())
  3316. restoreWinEHStackPointersInParent(MF);
  3317. }
  3318. void X86FrameLowering::restoreWinEHStackPointersInParent(
  3319. MachineFunction &MF) const {
  3320. // 32-bit functions have to restore stack pointers when control is transferred
  3321. // back to the parent function. These blocks are identified as eh pads that
  3322. // are not funclet entries.
  3323. bool IsSEH = isAsynchronousEHPersonality(
  3324. classifyEHPersonality(MF.getFunction().getPersonalityFn()));
  3325. for (MachineBasicBlock &MBB : MF) {
  3326. bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
  3327. if (NeedsRestore)
  3328. restoreWin32EHStackPointers(MBB, MBB.begin(), DebugLoc(),
  3329. /*RestoreSP=*/IsSEH);
  3330. }
  3331. }