RegAllocGreedy.cpp 105 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779
  1. //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the RAGreedy function pass for register allocation in
  10. // optimized builds.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "RegAllocGreedy.h"
  14. #include "AllocationOrder.h"
  15. #include "InterferenceCache.h"
  16. #include "LiveDebugVariables.h"
  17. #include "RegAllocBase.h"
  18. #include "RegAllocEvictionAdvisor.h"
  19. #include "SpillPlacement.h"
  20. #include "SplitKit.h"
  21. #include "llvm/ADT/ArrayRef.h"
  22. #include "llvm/ADT/BitVector.h"
  23. #include "llvm/ADT/DenseMap.h"
  24. #include "llvm/ADT/IndexedMap.h"
  25. #include "llvm/ADT/MapVector.h"
  26. #include "llvm/ADT/SetVector.h"
  27. #include "llvm/ADT/SmallPtrSet.h"
  28. #include "llvm/ADT/SmallSet.h"
  29. #include "llvm/ADT/SmallVector.h"
  30. #include "llvm/ADT/Statistic.h"
  31. #include "llvm/ADT/StringRef.h"
  32. #include "llvm/Analysis/AliasAnalysis.h"
  33. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  34. #include "llvm/CodeGen/CalcSpillWeights.h"
  35. #include "llvm/CodeGen/EdgeBundles.h"
  36. #include "llvm/CodeGen/LiveInterval.h"
  37. #include "llvm/CodeGen/LiveIntervalUnion.h"
  38. #include "llvm/CodeGen/LiveIntervals.h"
  39. #include "llvm/CodeGen/LiveRangeEdit.h"
  40. #include "llvm/CodeGen/LiveRegMatrix.h"
  41. #include "llvm/CodeGen/LiveStacks.h"
  42. #include "llvm/CodeGen/MachineBasicBlock.h"
  43. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
  44. #include "llvm/CodeGen/MachineDominators.h"
  45. #include "llvm/CodeGen/MachineFrameInfo.h"
  46. #include "llvm/CodeGen/MachineFunction.h"
  47. #include "llvm/CodeGen/MachineFunctionPass.h"
  48. #include "llvm/CodeGen/MachineInstr.h"
  49. #include "llvm/CodeGen/MachineLoopInfo.h"
  50. #include "llvm/CodeGen/MachineOperand.h"
  51. #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
  52. #include "llvm/CodeGen/MachineRegisterInfo.h"
  53. #include "llvm/CodeGen/RegAllocRegistry.h"
  54. #include "llvm/CodeGen/RegisterClassInfo.h"
  55. #include "llvm/CodeGen/SlotIndexes.h"
  56. #include "llvm/CodeGen/Spiller.h"
  57. #include "llvm/CodeGen/TargetInstrInfo.h"
  58. #include "llvm/CodeGen/TargetRegisterInfo.h"
  59. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  60. #include "llvm/CodeGen/VirtRegMap.h"
  61. #include "llvm/IR/DebugInfoMetadata.h"
  62. #include "llvm/IR/Function.h"
  63. #include "llvm/IR/LLVMContext.h"
  64. #include "llvm/MC/MCRegisterInfo.h"
  65. #include "llvm/Pass.h"
  66. #include "llvm/Support/BlockFrequency.h"
  67. #include "llvm/Support/BranchProbability.h"
  68. #include "llvm/Support/CommandLine.h"
  69. #include "llvm/Support/Debug.h"
  70. #include "llvm/Support/MathExtras.h"
  71. #include "llvm/Support/Timer.h"
  72. #include "llvm/Support/raw_ostream.h"
  73. #include "llvm/Target/TargetMachine.h"
  74. #include <algorithm>
  75. #include <cassert>
  76. #include <cstdint>
  77. #include <memory>
  78. #include <queue>
  79. #include <tuple>
  80. #include <utility>
  81. using namespace llvm;
  82. #define DEBUG_TYPE "regalloc"
  83. STATISTIC(NumGlobalSplits, "Number of split global live ranges");
  84. STATISTIC(NumLocalSplits, "Number of split local live ranges");
  85. STATISTIC(NumEvicted, "Number of interferences evicted");
  86. static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
  87. "split-spill-mode", cl::Hidden,
  88. cl::desc("Spill mode for splitting live ranges"),
  89. cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
  90. clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
  91. clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
  92. cl::init(SplitEditor::SM_Speed));
  93. static cl::opt<unsigned>
  94. LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
  95. cl::desc("Last chance recoloring max depth"),
  96. cl::init(5));
  97. static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
  98. "lcr-max-interf", cl::Hidden,
  99. cl::desc("Last chance recoloring maximum number of considered"
  100. " interference at a time"),
  101. cl::init(8));
  102. static cl::opt<bool> ExhaustiveSearch(
  103. "exhaustive-register-search", cl::NotHidden,
  104. cl::desc("Exhaustive Search for registers bypassing the depth "
  105. "and interference cutoffs of last chance recoloring"),
  106. cl::Hidden);
  107. static cl::opt<bool> EnableDeferredSpilling(
  108. "enable-deferred-spilling", cl::Hidden,
  109. cl::desc("Instead of spilling a variable right away, defer the actual "
  110. "code insertion to the end of the allocation. That way the "
  111. "allocator might still find a suitable coloring for this "
  112. "variable because of other evicted variables."),
  113. cl::init(false));
  114. // FIXME: Find a good default for this flag and remove the flag.
  115. static cl::opt<unsigned>
  116. CSRFirstTimeCost("regalloc-csr-first-time-cost",
  117. cl::desc("Cost for first time use of callee-saved register."),
  118. cl::init(0), cl::Hidden);
  119. static cl::opt<bool> ConsiderLocalIntervalCost(
  120. "consider-local-interval-cost", cl::Hidden,
  121. cl::desc("Consider the cost of local intervals created by a split "
  122. "candidate when choosing the best split candidate."),
  123. cl::init(false));
  124. static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
  125. createGreedyRegisterAllocator);
  126. char RAGreedy::ID = 0;
  127. char &llvm::RAGreedyID = RAGreedy::ID;
  128. INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
  129. "Greedy Register Allocator", false, false)
  130. INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
  131. INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
  132. INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
  133. INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
  134. INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
  135. INITIALIZE_PASS_DEPENDENCY(LiveStacks)
  136. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  137. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  138. INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
  139. INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
  140. INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
  141. INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
  142. INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
  143. INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
  144. INITIALIZE_PASS_END(RAGreedy, "greedy",
  145. "Greedy Register Allocator", false, false)
  146. #ifndef NDEBUG
  147. const char *const RAGreedy::StageName[] = {
  148. "RS_New",
  149. "RS_Assign",
  150. "RS_Split",
  151. "RS_Split2",
  152. "RS_Spill",
  153. "RS_Memory",
  154. "RS_Done"
  155. };
  156. #endif
  157. // Hysteresis to use when comparing floats.
  158. // This helps stabilize decisions based on float comparisons.
  159. const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
  160. FunctionPass* llvm::createGreedyRegisterAllocator() {
  161. return new RAGreedy();
  162. }
  163. namespace llvm {
  164. FunctionPass* createGreedyRegisterAllocator(
  165. std::function<bool(const TargetRegisterInfo &TRI,
  166. const TargetRegisterClass &RC)> Ftor);
  167. }
  168. FunctionPass* llvm::createGreedyRegisterAllocator(
  169. std::function<bool(const TargetRegisterInfo &TRI,
  170. const TargetRegisterClass &RC)> Ftor) {
  171. return new RAGreedy(Ftor);
  172. }
  173. RAGreedy::RAGreedy(RegClassFilterFunc F):
  174. MachineFunctionPass(ID),
  175. RegAllocBase(F) {
  176. }
  177. void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
  178. AU.setPreservesCFG();
  179. AU.addRequired<MachineBlockFrequencyInfo>();
  180. AU.addPreserved<MachineBlockFrequencyInfo>();
  181. AU.addRequired<AAResultsWrapperPass>();
  182. AU.addPreserved<AAResultsWrapperPass>();
  183. AU.addRequired<LiveIntervals>();
  184. AU.addPreserved<LiveIntervals>();
  185. AU.addRequired<SlotIndexes>();
  186. AU.addPreserved<SlotIndexes>();
  187. AU.addRequired<LiveDebugVariables>();
  188. AU.addPreserved<LiveDebugVariables>();
  189. AU.addRequired<LiveStacks>();
  190. AU.addPreserved<LiveStacks>();
  191. AU.addRequired<MachineDominatorTree>();
  192. AU.addPreserved<MachineDominatorTree>();
  193. AU.addRequired<MachineLoopInfo>();
  194. AU.addPreserved<MachineLoopInfo>();
  195. AU.addRequired<VirtRegMap>();
  196. AU.addPreserved<VirtRegMap>();
  197. AU.addRequired<LiveRegMatrix>();
  198. AU.addPreserved<LiveRegMatrix>();
  199. AU.addRequired<EdgeBundles>();
  200. AU.addRequired<SpillPlacement>();
  201. AU.addRequired<MachineOptimizationRemarkEmitterPass>();
  202. AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
  203. MachineFunctionPass::getAnalysisUsage(AU);
  204. }
  205. //===----------------------------------------------------------------------===//
  206. // LiveRangeEdit delegate methods
  207. //===----------------------------------------------------------------------===//
  208. bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
  209. LiveInterval &LI = LIS->getInterval(VirtReg);
  210. if (VRM->hasPhys(VirtReg)) {
  211. Matrix->unassign(LI);
  212. aboutToRemoveInterval(LI);
  213. return true;
  214. }
  215. // Unassigned virtreg is probably in the priority queue.
  216. // RegAllocBase will erase it after dequeueing.
  217. // Nonetheless, clear the live-range so that the debug
  218. // dump will show the right state for that VirtReg.
  219. LI.clear();
  220. return false;
  221. }
  222. void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
  223. if (!VRM->hasPhys(VirtReg))
  224. return;
  225. // Register is assigned, put it back on the queue for reassignment.
  226. LiveInterval &LI = LIS->getInterval(VirtReg);
  227. Matrix->unassign(LI);
  228. RegAllocBase::enqueue(&LI);
  229. }
  230. void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
  231. ExtraInfo->LRE_DidCloneVirtReg(New, Old);
  232. }
  233. void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
  234. // Cloning a register we haven't even heard about yet? Just ignore it.
  235. if (!Info.inBounds(Old))
  236. return;
  237. // LRE may clone a virtual register because dead code elimination causes it to
  238. // be split into connected components. The new components are much smaller
  239. // than the original, so they should get a new chance at being assigned.
  240. // same stage as the parent.
  241. Info[Old].Stage = RS_Assign;
  242. Info.grow(New.id());
  243. Info[New] = Info[Old];
  244. }
  245. void RAGreedy::releaseMemory() {
  246. SpillerInstance.reset();
  247. GlobalCand.clear();
  248. }
  249. void RAGreedy::enqueueImpl(LiveInterval *LI) { enqueue(Queue, LI); }
  250. void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
  251. // Prioritize live ranges by size, assigning larger ranges first.
  252. // The queue holds (size, reg) pairs.
  253. const unsigned Size = LI->getSize();
  254. const Register Reg = LI->reg();
  255. assert(Reg.isVirtual() && "Can only enqueue virtual registers");
  256. unsigned Prio;
  257. auto Stage = ExtraInfo->getOrInitStage(Reg);
  258. if (Stage == RS_New) {
  259. Stage = RS_Assign;
  260. ExtraInfo->setStage(Reg, Stage);
  261. }
  262. if (Stage == RS_Split) {
  263. // Unsplit ranges that couldn't be allocated immediately are deferred until
  264. // everything else has been allocated.
  265. Prio = Size;
  266. } else if (Stage == RS_Memory) {
  267. // Memory operand should be considered last.
  268. // Change the priority such that Memory operand are assigned in
  269. // the reverse order that they came in.
  270. // TODO: Make this a member variable and probably do something about hints.
  271. static unsigned MemOp = 0;
  272. Prio = MemOp++;
  273. } else {
  274. // Giant live ranges fall back to the global assignment heuristic, which
  275. // prevents excessive spilling in pathological cases.
  276. bool ReverseLocal = TRI->reverseLocalAssignment();
  277. const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
  278. bool ForceGlobal = !ReverseLocal &&
  279. (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
  280. if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
  281. LIS->intervalIsInOneMBB(*LI)) {
  282. // Allocate original local ranges in linear instruction order. Since they
  283. // are singly defined, this produces optimal coloring in the absence of
  284. // global interference and other constraints.
  285. if (!ReverseLocal)
  286. Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
  287. else {
  288. // Allocating bottom up may allow many short LRGs to be assigned first
  289. // to one of the cheap registers. This could be much faster for very
  290. // large blocks on targets with many physical registers.
  291. Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
  292. }
  293. Prio |= RC.AllocationPriority << 24;
  294. } else {
  295. // Allocate global and split ranges in long->short order. Long ranges that
  296. // don't fit should be spilled (or split) ASAP so they don't create
  297. // interference. Mark a bit to prioritize global above local ranges.
  298. Prio = (1u << 29) + Size;
  299. Prio |= RC.AllocationPriority << 24;
  300. }
  301. // Mark a higher bit to prioritize global and local above RS_Split.
  302. Prio |= (1u << 31);
  303. // Boost ranges that have a physical register hint.
  304. if (VRM->hasKnownPreference(Reg))
  305. Prio |= (1u << 30);
  306. }
  307. // The virtual register number is a tie breaker for same-sized ranges.
  308. // Give lower vreg numbers higher priority to assign them first.
  309. CurQueue.push(std::make_pair(Prio, ~Reg));
  310. }
  311. LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
  312. LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
  313. if (CurQueue.empty())
  314. return nullptr;
  315. LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
  316. CurQueue.pop();
  317. return LI;
  318. }
  319. //===----------------------------------------------------------------------===//
  320. // Direct Assignment
  321. //===----------------------------------------------------------------------===//
  322. /// tryAssign - Try to assign VirtReg to an available register.
  323. MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
  324. AllocationOrder &Order,
  325. SmallVectorImpl<Register> &NewVRegs,
  326. const SmallVirtRegSet &FixedRegisters) {
  327. MCRegister PhysReg;
  328. for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
  329. assert(*I);
  330. if (!Matrix->checkInterference(VirtReg, *I)) {
  331. if (I.isHint())
  332. return *I;
  333. else
  334. PhysReg = *I;
  335. }
  336. }
  337. if (!PhysReg.isValid())
  338. return PhysReg;
  339. // PhysReg is available, but there may be a better choice.
  340. // If we missed a simple hint, try to cheaply evict interference from the
  341. // preferred register.
  342. if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
  343. if (Order.isHint(Hint)) {
  344. MCRegister PhysHint = Hint.asMCReg();
  345. LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
  346. if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
  347. FixedRegisters)) {
  348. evictInterference(VirtReg, PhysHint, NewVRegs);
  349. return PhysHint;
  350. }
  351. // Record the missed hint, we may be able to recover
  352. // at the end if the surrounding allocation changed.
  353. SetOfBrokenHints.insert(&VirtReg);
  354. }
  355. // Try to evict interference from a cheaper alternative.
  356. uint8_t Cost = RegCosts[PhysReg];
  357. // Most registers have 0 additional cost.
  358. if (!Cost)
  359. return PhysReg;
  360. LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
  361. << (unsigned)Cost << '\n');
  362. MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
  363. return CheapReg ? CheapReg : PhysReg;
  364. }
  365. //===----------------------------------------------------------------------===//
  366. // Interference eviction
  367. //===----------------------------------------------------------------------===//
  368. Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg,
  369. Register PrevReg) const {
  370. auto Order =
  371. AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
  372. MCRegister PhysReg;
  373. for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
  374. if ((*I).id() == PrevReg.id())
  375. continue;
  376. MCRegUnitIterator Units(*I, TRI);
  377. for (; Units.isValid(); ++Units) {
  378. // Instantiate a "subquery", not to be confused with the Queries array.
  379. LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
  380. if (subQ.checkInterference())
  381. break;
  382. }
  383. // If no units have interference, break out with the current PhysReg.
  384. if (!Units.isValid())
  385. PhysReg = *I;
  386. }
  387. if (PhysReg)
  388. LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
  389. << printReg(PrevReg, TRI) << " to "
  390. << printReg(PhysReg, TRI) << '\n');
  391. return PhysReg;
  392. }
  393. /// Return true if all interferences between VirtReg and PhysReg between
  394. /// Start and End can be evicted.
  395. ///
  396. /// \param VirtReg Live range that is about to be assigned.
  397. /// \param PhysReg Desired register for assignment.
  398. /// \param Start Start of range to look for interferences.
  399. /// \param End End of range to look for interferences.
  400. /// \param MaxCost Only look for cheaper candidates and update with new cost
  401. /// when returning true.
  402. /// \return True when interference can be evicted cheaper than MaxCost.
  403. bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
  404. MCRegister PhysReg, SlotIndex Start,
  405. SlotIndex End,
  406. EvictionCost &MaxCost) const {
  407. EvictionCost Cost;
  408. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
  409. LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
  410. // Check if any interfering live range is heavier than MaxWeight.
  411. for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
  412. // Check if interference overlast the segment in interest.
  413. if (!Intf->overlaps(Start, End))
  414. continue;
  415. // Cannot evict non virtual reg interference.
  416. if (!Register::isVirtualRegister(Intf->reg()))
  417. return false;
  418. // Never evict spill products. They cannot split or spill.
  419. if (ExtraInfo->getStage(*Intf) == RS_Done)
  420. return false;
  421. // Would this break a satisfied hint?
  422. bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
  423. // Update eviction cost.
  424. Cost.BrokenHints += BreaksHint;
  425. Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
  426. // Abort if this would be too expensive.
  427. if (!(Cost < MaxCost))
  428. return false;
  429. }
  430. }
  431. if (Cost.MaxWeight == 0)
  432. return false;
  433. MaxCost = Cost;
  434. return true;
  435. }
  436. /// Return the physical register that will be best
  437. /// candidate for eviction by a local split interval that will be created
  438. /// between Start and End.
  439. ///
  440. /// \param Order The allocation order
  441. /// \param VirtReg Live range that is about to be assigned.
  442. /// \param Start Start of range to look for interferences
  443. /// \param End End of range to look for interferences
  444. /// \param BestEvictweight The eviction cost of that eviction
  445. /// \return The PhysReg which is the best candidate for eviction and the
  446. /// eviction cost in BestEvictweight
  447. MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
  448. const LiveInterval &VirtReg,
  449. SlotIndex Start, SlotIndex End,
  450. float *BestEvictweight) const {
  451. EvictionCost BestEvictCost;
  452. BestEvictCost.setMax();
  453. BestEvictCost.MaxWeight = VirtReg.weight();
  454. MCRegister BestEvicteePhys;
  455. // Go over all physical registers and find the best candidate for eviction
  456. for (MCRegister PhysReg : Order.getOrder()) {
  457. if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End,
  458. BestEvictCost))
  459. continue;
  460. // Best so far.
  461. BestEvicteePhys = PhysReg;
  462. }
  463. *BestEvictweight = BestEvictCost.MaxWeight;
  464. return BestEvicteePhys;
  465. }
  466. /// evictInterference - Evict any interferring registers that prevent VirtReg
  467. /// from being assigned to Physreg. This assumes that canEvictInterference
  468. /// returned true.
  469. void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
  470. SmallVectorImpl<Register> &NewVRegs) {
  471. // Make sure that VirtReg has a cascade number, and assign that cascade
  472. // number to every evicted register. These live ranges than then only be
  473. // evicted by a newer cascade, preventing infinite loops.
  474. unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
  475. LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
  476. << " interference: Cascade " << Cascade << '\n');
  477. // Collect all interfering virtregs first.
  478. SmallVector<LiveInterval*, 8> Intfs;
  479. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
  480. LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
  481. // We usually have the interfering VRegs cached so collectInterferingVRegs()
  482. // should be fast, we may need to recalculate if when different physregs
  483. // overlap the same register unit so we had different SubRanges queried
  484. // against it.
  485. ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
  486. Intfs.append(IVR.begin(), IVR.end());
  487. }
  488. // Evict them second. This will invalidate the queries.
  489. for (LiveInterval *Intf : Intfs) {
  490. // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
  491. if (!VRM->hasPhys(Intf->reg()))
  492. continue;
  493. LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
  494. Matrix->unassign(*Intf);
  495. assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
  496. VirtReg.isSpillable() < Intf->isSpillable()) &&
  497. "Cannot decrease cascade number, illegal eviction");
  498. ExtraInfo->setCascade(Intf->reg(), Cascade);
  499. ++NumEvicted;
  500. NewVRegs.push_back(Intf->reg());
  501. }
  502. }
  503. /// Returns true if the given \p PhysReg is a callee saved register and has not
  504. /// been used for allocation yet.
  505. bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
  506. MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
  507. if (!CSR)
  508. return false;
  509. return !Matrix->isPhysRegUsed(PhysReg);
  510. }
  511. Optional<unsigned>
  512. RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
  513. const AllocationOrder &Order,
  514. unsigned CostPerUseLimit) const {
  515. unsigned OrderLimit = Order.getOrder().size();
  516. if (CostPerUseLimit < uint8_t(~0u)) {
  517. // Check of any registers in RC are below CostPerUseLimit.
  518. const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
  519. uint8_t MinCost = RegClassInfo.getMinCost(RC);
  520. if (MinCost >= CostPerUseLimit) {
  521. LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
  522. << MinCost << ", no cheaper registers to be found.\n");
  523. return None;
  524. }
  525. // It is normal for register classes to have a long tail of registers with
  526. // the same cost. We don't need to look at them if they're too expensive.
  527. if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
  528. OrderLimit = RegClassInfo.getLastCostChange(RC);
  529. LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
  530. << " regs.\n");
  531. }
  532. }
  533. return OrderLimit;
  534. }
  535. bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
  536. MCRegister PhysReg) const {
  537. if (RegCosts[PhysReg] >= CostPerUseLimit)
  538. return false;
  539. // The first use of a callee-saved register in a function has cost 1.
  540. // Don't start using a CSR when the CostPerUseLimit is low.
  541. if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
  542. LLVM_DEBUG(
  543. dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
  544. << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
  545. << '\n');
  546. return false;
  547. }
  548. return true;
  549. }
  550. /// tryEvict - Try to evict all interferences for a physreg.
  551. /// @param VirtReg Currently unassigned virtual register.
  552. /// @param Order Physregs to try.
  553. /// @return Physreg to assign VirtReg, or 0.
  554. MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
  555. SmallVectorImpl<Register> &NewVRegs,
  556. uint8_t CostPerUseLimit,
  557. const SmallVirtRegSet &FixedRegisters) {
  558. NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
  559. TimePassesIsEnabled);
  560. MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
  561. VirtReg, Order, CostPerUseLimit, FixedRegisters);
  562. if (BestPhys.isValid())
  563. evictInterference(VirtReg, BestPhys, NewVRegs);
  564. return BestPhys;
  565. }
  566. //===----------------------------------------------------------------------===//
  567. // Region Splitting
  568. //===----------------------------------------------------------------------===//
  569. /// addSplitConstraints - Fill out the SplitConstraints vector based on the
  570. /// interference pattern in Physreg and its aliases. Add the constraints to
  571. /// SpillPlacement and return the static cost of this split in Cost, assuming
  572. /// that all preferences in SplitConstraints are met.
  573. /// Return false if there are no bundles with positive bias.
  574. bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
  575. BlockFrequency &Cost) {
  576. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
  577. // Reset interference dependent info.
  578. SplitConstraints.resize(UseBlocks.size());
  579. BlockFrequency StaticCost = 0;
  580. for (unsigned I = 0; I != UseBlocks.size(); ++I) {
  581. const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
  582. SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
  583. BC.Number = BI.MBB->getNumber();
  584. Intf.moveToBlock(BC.Number);
  585. BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
  586. BC.Exit = (BI.LiveOut &&
  587. !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
  588. ? SpillPlacement::PrefReg
  589. : SpillPlacement::DontCare;
  590. BC.ChangesValue = BI.FirstDef.isValid();
  591. if (!Intf.hasInterference())
  592. continue;
  593. // Number of spill code instructions to insert.
  594. unsigned Ins = 0;
  595. // Interference for the live-in value.
  596. if (BI.LiveIn) {
  597. if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
  598. BC.Entry = SpillPlacement::MustSpill;
  599. ++Ins;
  600. } else if (Intf.first() < BI.FirstInstr) {
  601. BC.Entry = SpillPlacement::PrefSpill;
  602. ++Ins;
  603. } else if (Intf.first() < BI.LastInstr) {
  604. ++Ins;
  605. }
  606. // Abort if the spill cannot be inserted at the MBB' start
  607. if (((BC.Entry == SpillPlacement::MustSpill) ||
  608. (BC.Entry == SpillPlacement::PrefSpill)) &&
  609. SlotIndex::isEarlierInstr(BI.FirstInstr,
  610. SA->getFirstSplitPoint(BC.Number)))
  611. return false;
  612. }
  613. // Interference for the live-out value.
  614. if (BI.LiveOut) {
  615. if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
  616. BC.Exit = SpillPlacement::MustSpill;
  617. ++Ins;
  618. } else if (Intf.last() > BI.LastInstr) {
  619. BC.Exit = SpillPlacement::PrefSpill;
  620. ++Ins;
  621. } else if (Intf.last() > BI.FirstInstr) {
  622. ++Ins;
  623. }
  624. }
  625. // Accumulate the total frequency of inserted spill code.
  626. while (Ins--)
  627. StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
  628. }
  629. Cost = StaticCost;
  630. // Add constraints for use-blocks. Note that these are the only constraints
  631. // that may add a positive bias, it is downhill from here.
  632. SpillPlacer->addConstraints(SplitConstraints);
  633. return SpillPlacer->scanActiveBundles();
  634. }
  635. /// addThroughConstraints - Add constraints and links to SpillPlacer from the
  636. /// live-through blocks in Blocks.
  637. bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
  638. ArrayRef<unsigned> Blocks) {
  639. const unsigned GroupSize = 8;
  640. SpillPlacement::BlockConstraint BCS[GroupSize];
  641. unsigned TBS[GroupSize];
  642. unsigned B = 0, T = 0;
  643. for (unsigned Number : Blocks) {
  644. Intf.moveToBlock(Number);
  645. if (!Intf.hasInterference()) {
  646. assert(T < GroupSize && "Array overflow");
  647. TBS[T] = Number;
  648. if (++T == GroupSize) {
  649. SpillPlacer->addLinks(makeArrayRef(TBS, T));
  650. T = 0;
  651. }
  652. continue;
  653. }
  654. assert(B < GroupSize && "Array overflow");
  655. BCS[B].Number = Number;
  656. // Abort if the spill cannot be inserted at the MBB' start
  657. MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
  658. auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
  659. if (FirstNonDebugInstr != MBB->end() &&
  660. SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
  661. SA->getFirstSplitPoint(Number)))
  662. return false;
  663. // Interference for the live-in value.
  664. if (Intf.first() <= Indexes->getMBBStartIdx(Number))
  665. BCS[B].Entry = SpillPlacement::MustSpill;
  666. else
  667. BCS[B].Entry = SpillPlacement::PrefSpill;
  668. // Interference for the live-out value.
  669. if (Intf.last() >= SA->getLastSplitPoint(Number))
  670. BCS[B].Exit = SpillPlacement::MustSpill;
  671. else
  672. BCS[B].Exit = SpillPlacement::PrefSpill;
  673. if (++B == GroupSize) {
  674. SpillPlacer->addConstraints(makeArrayRef(BCS, B));
  675. B = 0;
  676. }
  677. }
  678. SpillPlacer->addConstraints(makeArrayRef(BCS, B));
  679. SpillPlacer->addLinks(makeArrayRef(TBS, T));
  680. return true;
  681. }
  682. bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
  683. // Keep track of through blocks that have not been added to SpillPlacer.
  684. BitVector Todo = SA->getThroughBlocks();
  685. SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
  686. unsigned AddedTo = 0;
  687. #ifndef NDEBUG
  688. unsigned Visited = 0;
  689. #endif
  690. while (true) {
  691. ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
  692. // Find new through blocks in the periphery of PrefRegBundles.
  693. for (unsigned Bundle : NewBundles) {
  694. // Look at all blocks connected to Bundle in the full graph.
  695. ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
  696. for (unsigned Block : Blocks) {
  697. if (!Todo.test(Block))
  698. continue;
  699. Todo.reset(Block);
  700. // This is a new through block. Add it to SpillPlacer later.
  701. ActiveBlocks.push_back(Block);
  702. #ifndef NDEBUG
  703. ++Visited;
  704. #endif
  705. }
  706. }
  707. // Any new blocks to add?
  708. if (ActiveBlocks.size() == AddedTo)
  709. break;
  710. // Compute through constraints from the interference, or assume that all
  711. // through blocks prefer spilling when forming compact regions.
  712. auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
  713. if (Cand.PhysReg) {
  714. if (!addThroughConstraints(Cand.Intf, NewBlocks))
  715. return false;
  716. } else
  717. // Provide a strong negative bias on through blocks to prevent unwanted
  718. // liveness on loop backedges.
  719. SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
  720. AddedTo = ActiveBlocks.size();
  721. // Perhaps iterating can enable more bundles?
  722. SpillPlacer->iterate();
  723. }
  724. LLVM_DEBUG(dbgs() << ", v=" << Visited);
  725. return true;
  726. }
  727. /// calcCompactRegion - Compute the set of edge bundles that should be live
  728. /// when splitting the current live range into compact regions. Compact
  729. /// regions can be computed without looking at interference. They are the
  730. /// regions formed by removing all the live-through blocks from the live range.
  731. ///
  732. /// Returns false if the current live range is already compact, or if the
  733. /// compact regions would form single block regions anyway.
  734. bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
  735. // Without any through blocks, the live range is already compact.
  736. if (!SA->getNumThroughBlocks())
  737. return false;
  738. // Compact regions don't correspond to any physreg.
  739. Cand.reset(IntfCache, MCRegister::NoRegister);
  740. LLVM_DEBUG(dbgs() << "Compact region bundles");
  741. // Use the spill placer to determine the live bundles. GrowRegion pretends
  742. // that all the through blocks have interference when PhysReg is unset.
  743. SpillPlacer->prepare(Cand.LiveBundles);
  744. // The static split cost will be zero since Cand.Intf reports no interference.
  745. BlockFrequency Cost;
  746. if (!addSplitConstraints(Cand.Intf, Cost)) {
  747. LLVM_DEBUG(dbgs() << ", none.\n");
  748. return false;
  749. }
  750. if (!growRegion(Cand)) {
  751. LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
  752. return false;
  753. }
  754. SpillPlacer->finish();
  755. if (!Cand.LiveBundles.any()) {
  756. LLVM_DEBUG(dbgs() << ", none.\n");
  757. return false;
  758. }
  759. LLVM_DEBUG({
  760. for (int I : Cand.LiveBundles.set_bits())
  761. dbgs() << " EB#" << I;
  762. dbgs() << ".\n";
  763. });
  764. return true;
  765. }
  766. /// calcSpillCost - Compute how expensive it would be to split the live range in
  767. /// SA around all use blocks instead of forming bundle regions.
  768. BlockFrequency RAGreedy::calcSpillCost() {
  769. BlockFrequency Cost = 0;
  770. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
  771. for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
  772. unsigned Number = BI.MBB->getNumber();
  773. // We normally only need one spill instruction - a load or a store.
  774. Cost += SpillPlacer->getBlockFrequency(Number);
  775. // Unless the value is redefined in the block.
  776. if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
  777. Cost += SpillPlacer->getBlockFrequency(Number);
  778. }
  779. return Cost;
  780. }
  781. /// Check if splitting Evictee will create a local split interval in
  782. /// basic block number BBNumber that may cause a bad eviction chain. This is
  783. /// intended to prevent bad eviction sequences like:
  784. /// movl %ebp, 8(%esp) # 4-byte Spill
  785. /// movl %ecx, %ebp
  786. /// movl %ebx, %ecx
  787. /// movl %edi, %ebx
  788. /// movl %edx, %edi
  789. /// cltd
  790. /// idivl %esi
  791. /// movl %edi, %edx
  792. /// movl %ebx, %edi
  793. /// movl %ecx, %ebx
  794. /// movl %ebp, %ecx
  795. /// movl 16(%esp), %ebp # 4 - byte Reload
  796. ///
  797. /// Such sequences are created in 2 scenarios:
  798. ///
  799. /// Scenario #1:
  800. /// %0 is evicted from physreg0 by %1.
  801. /// Evictee %0 is intended for region splitting with split candidate
  802. /// physreg0 (the reg %0 was evicted from).
  803. /// Region splitting creates a local interval because of interference with the
  804. /// evictor %1 (normally region splitting creates 2 interval, the "by reg"
  805. /// and "by stack" intervals and local interval created when interference
  806. /// occurs).
  807. /// One of the split intervals ends up evicting %2 from physreg1.
  808. /// Evictee %2 is intended for region splitting with split candidate
  809. /// physreg1.
  810. /// One of the split intervals ends up evicting %3 from physreg2, etc.
  811. ///
  812. /// Scenario #2
  813. /// %0 is evicted from physreg0 by %1.
  814. /// %2 is evicted from physreg2 by %3 etc.
  815. /// Evictee %0 is intended for region splitting with split candidate
  816. /// physreg1.
  817. /// Region splitting creates a local interval because of interference with the
  818. /// evictor %1.
  819. /// One of the split intervals ends up evicting back original evictor %1
  820. /// from physreg0 (the reg %0 was evicted from).
  821. /// Another evictee %2 is intended for region splitting with split candidate
  822. /// physreg1.
  823. /// One of the split intervals ends up evicting %3 from physreg2, etc.
  824. ///
  825. /// \param Evictee The register considered to be split.
  826. /// \param Cand The split candidate that determines the physical register
  827. /// we are splitting for and the interferences.
  828. /// \param BBNumber The number of a BB for which the region split process will
  829. /// create a local split interval.
  830. /// \param Order The physical registers that may get evicted by a split
  831. /// artifact of Evictee.
  832. /// \return True if splitting Evictee may cause a bad eviction chain, false
  833. /// otherwise.
  834. bool RAGreedy::splitCanCauseEvictionChain(Register Evictee,
  835. GlobalSplitCandidate &Cand,
  836. unsigned BBNumber,
  837. const AllocationOrder &Order) {
  838. EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee);
  839. unsigned Evictor = VregEvictorInfo.first;
  840. MCRegister PhysReg = VregEvictorInfo.second;
  841. // No actual evictor.
  842. if (!Evictor || !PhysReg)
  843. return false;
  844. float MaxWeight = 0;
  845. MCRegister FutureEvictedPhysReg =
  846. getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
  847. Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
  848. // The bad eviction chain occurs when either the split candidate is the
  849. // evicting reg or one of the split artifact will evict the evicting reg.
  850. if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg))
  851. return false;
  852. Cand.Intf.moveToBlock(BBNumber);
  853. // Check to see if the Evictor contains interference (with Evictee) in the
  854. // given BB. If so, this interference caused the eviction of Evictee from
  855. // PhysReg. This suggest that we will create a local interval during the
  856. // region split to avoid this interference This local interval may cause a bad
  857. // eviction chain.
  858. if (!LIS->hasInterval(Evictor))
  859. return false;
  860. LiveInterval &EvictorLI = LIS->getInterval(Evictor);
  861. if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end())
  862. return false;
  863. // Now, check to see if the local interval we will create is going to be
  864. // expensive enough to evict somebody If so, this may cause a bad eviction
  865. // chain.
  866. float splitArtifactWeight =
  867. VRAI->futureWeight(LIS->getInterval(Evictee),
  868. Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
  869. if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight)
  870. return false;
  871. return true;
  872. }
  873. /// Check if splitting VirtRegToSplit will create a local split interval
  874. /// in basic block number BBNumber that may cause a spill.
  875. ///
  876. /// \param VirtRegToSplit The register considered to be split.
  877. /// \param Cand The split candidate that determines the physical
  878. /// register we are splitting for and the interferences.
  879. /// \param BBNumber The number of a BB for which the region split process
  880. /// will create a local split interval.
  881. /// \param Order The physical registers that may get evicted by a
  882. /// split artifact of VirtRegToSplit.
  883. /// \return True if splitting VirtRegToSplit may cause a spill, false
  884. /// otherwise.
  885. bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
  886. GlobalSplitCandidate &Cand,
  887. unsigned BBNumber,
  888. const AllocationOrder &Order) {
  889. Cand.Intf.moveToBlock(BBNumber);
  890. // Check if the local interval will find a non interfereing assignment.
  891. for (auto PhysReg : Order.getOrder()) {
  892. if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(),
  893. Cand.Intf.last(), PhysReg))
  894. return false;
  895. }
  896. // The local interval is not able to find non interferencing assignment
  897. // and not able to evict a less worthy interval, therfore, it can cause a
  898. // spill.
  899. return true;
  900. }
  901. /// calcGlobalSplitCost - Return the global split cost of following the split
  902. /// pattern in LiveBundles. This cost should be added to the local cost of the
  903. /// interference pattern in SplitConstraints.
  904. ///
  905. BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
  906. const AllocationOrder &Order,
  907. bool *CanCauseEvictionChain) {
  908. BlockFrequency GlobalCost = 0;
  909. const BitVector &LiveBundles = Cand.LiveBundles;
  910. Register VirtRegToSplit = SA->getParent().reg();
  911. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
  912. for (unsigned I = 0; I != UseBlocks.size(); ++I) {
  913. const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
  914. SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
  915. bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
  916. bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
  917. unsigned Ins = 0;
  918. Cand.Intf.moveToBlock(BC.Number);
  919. // Check wheather a local interval is going to be created during the region
  920. // split. Calculate adavanced spilt cost (cost of local intervals) if option
  921. // is enabled.
  922. if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn &&
  923. BI.LiveOut && RegIn && RegOut) {
  924. if (CanCauseEvictionChain &&
  925. splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) {
  926. // This interference causes our eviction from this assignment, we might
  927. // evict somebody else and eventually someone will spill, add that cost.
  928. // See splitCanCauseEvictionChain for detailed description of scenarios.
  929. GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
  930. GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
  931. *CanCauseEvictionChain = true;
  932. } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number,
  933. Order)) {
  934. // This interference causes local interval to spill, add that cost.
  935. GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
  936. GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
  937. }
  938. }
  939. if (BI.LiveIn)
  940. Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
  941. if (BI.LiveOut)
  942. Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
  943. while (Ins--)
  944. GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
  945. }
  946. for (unsigned Number : Cand.ActiveBlocks) {
  947. bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
  948. bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
  949. if (!RegIn && !RegOut)
  950. continue;
  951. if (RegIn && RegOut) {
  952. // We need double spill code if this block has interference.
  953. Cand.Intf.moveToBlock(Number);
  954. if (Cand.Intf.hasInterference()) {
  955. GlobalCost += SpillPlacer->getBlockFrequency(Number);
  956. GlobalCost += SpillPlacer->getBlockFrequency(Number);
  957. // Check wheather a local interval is going to be created during the
  958. // region split.
  959. if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
  960. splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) {
  961. // This interference cause our eviction from this assignment, we might
  962. // evict somebody else, add that cost.
  963. // See splitCanCauseEvictionChain for detailed description of
  964. // scenarios.
  965. GlobalCost += SpillPlacer->getBlockFrequency(Number);
  966. GlobalCost += SpillPlacer->getBlockFrequency(Number);
  967. *CanCauseEvictionChain = true;
  968. }
  969. }
  970. continue;
  971. }
  972. // live-in / stack-out or stack-in live-out.
  973. GlobalCost += SpillPlacer->getBlockFrequency(Number);
  974. }
  975. return GlobalCost;
  976. }
  977. /// splitAroundRegion - Split the current live range around the regions
  978. /// determined by BundleCand and GlobalCand.
  979. ///
  980. /// Before calling this function, GlobalCand and BundleCand must be initialized
  981. /// so each bundle is assigned to a valid candidate, or NoCand for the
  982. /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
  983. /// objects must be initialized for the current live range, and intervals
  984. /// created for the used candidates.
  985. ///
  986. /// @param LREdit The LiveRangeEdit object handling the current split.
  987. /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
  988. /// must appear in this list.
  989. void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
  990. ArrayRef<unsigned> UsedCands) {
  991. // These are the intervals created for new global ranges. We may create more
  992. // intervals for local ranges.
  993. const unsigned NumGlobalIntvs = LREdit.size();
  994. LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
  995. << " globals.\n");
  996. assert(NumGlobalIntvs && "No global intervals configured");
  997. // Isolate even single instructions when dealing with a proper sub-class.
  998. // That guarantees register class inflation for the stack interval because it
  999. // is all copies.
  1000. Register Reg = SA->getParent().reg();
  1001. bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
  1002. // First handle all the blocks with uses.
  1003. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
  1004. for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
  1005. unsigned Number = BI.MBB->getNumber();
  1006. unsigned IntvIn = 0, IntvOut = 0;
  1007. SlotIndex IntfIn, IntfOut;
  1008. if (BI.LiveIn) {
  1009. unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
  1010. if (CandIn != NoCand) {
  1011. GlobalSplitCandidate &Cand = GlobalCand[CandIn];
  1012. IntvIn = Cand.IntvIdx;
  1013. Cand.Intf.moveToBlock(Number);
  1014. IntfIn = Cand.Intf.first();
  1015. }
  1016. }
  1017. if (BI.LiveOut) {
  1018. unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
  1019. if (CandOut != NoCand) {
  1020. GlobalSplitCandidate &Cand = GlobalCand[CandOut];
  1021. IntvOut = Cand.IntvIdx;
  1022. Cand.Intf.moveToBlock(Number);
  1023. IntfOut = Cand.Intf.last();
  1024. }
  1025. }
  1026. // Create separate intervals for isolated blocks with multiple uses.
  1027. if (!IntvIn && !IntvOut) {
  1028. LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
  1029. if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
  1030. SE->splitSingleBlock(BI);
  1031. continue;
  1032. }
  1033. if (IntvIn && IntvOut)
  1034. SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
  1035. else if (IntvIn)
  1036. SE->splitRegInBlock(BI, IntvIn, IntfIn);
  1037. else
  1038. SE->splitRegOutBlock(BI, IntvOut, IntfOut);
  1039. }
  1040. // Handle live-through blocks. The relevant live-through blocks are stored in
  1041. // the ActiveBlocks list with each candidate. We need to filter out
  1042. // duplicates.
  1043. BitVector Todo = SA->getThroughBlocks();
  1044. for (unsigned UsedCand : UsedCands) {
  1045. ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
  1046. for (unsigned Number : Blocks) {
  1047. if (!Todo.test(Number))
  1048. continue;
  1049. Todo.reset(Number);
  1050. unsigned IntvIn = 0, IntvOut = 0;
  1051. SlotIndex IntfIn, IntfOut;
  1052. unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
  1053. if (CandIn != NoCand) {
  1054. GlobalSplitCandidate &Cand = GlobalCand[CandIn];
  1055. IntvIn = Cand.IntvIdx;
  1056. Cand.Intf.moveToBlock(Number);
  1057. IntfIn = Cand.Intf.first();
  1058. }
  1059. unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
  1060. if (CandOut != NoCand) {
  1061. GlobalSplitCandidate &Cand = GlobalCand[CandOut];
  1062. IntvOut = Cand.IntvIdx;
  1063. Cand.Intf.moveToBlock(Number);
  1064. IntfOut = Cand.Intf.last();
  1065. }
  1066. if (!IntvIn && !IntvOut)
  1067. continue;
  1068. SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
  1069. }
  1070. }
  1071. ++NumGlobalSplits;
  1072. SmallVector<unsigned, 8> IntvMap;
  1073. SE->finish(&IntvMap);
  1074. DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
  1075. unsigned OrigBlocks = SA->getNumLiveBlocks();
  1076. // Sort out the new intervals created by splitting. We get four kinds:
  1077. // - Remainder intervals should not be split again.
  1078. // - Candidate intervals can be assigned to Cand.PhysReg.
  1079. // - Block-local splits are candidates for local splitting.
  1080. // - DCE leftovers should go back on the queue.
  1081. for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
  1082. const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
  1083. // Ignore old intervals from DCE.
  1084. if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
  1085. continue;
  1086. // Remainder interval. Don't try splitting again, spill if it doesn't
  1087. // allocate.
  1088. if (IntvMap[I] == 0) {
  1089. ExtraInfo->setStage(Reg, RS_Spill);
  1090. continue;
  1091. }
  1092. // Global intervals. Allow repeated splitting as long as the number of live
  1093. // blocks is strictly decreasing.
  1094. if (IntvMap[I] < NumGlobalIntvs) {
  1095. if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
  1096. LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
  1097. << " blocks as original.\n");
  1098. // Don't allow repeated splitting as a safe guard against looping.
  1099. ExtraInfo->setStage(Reg, RS_Split2);
  1100. }
  1101. continue;
  1102. }
  1103. // Other intervals are treated as new. This includes local intervals created
  1104. // for blocks with multiple uses, and anything created by DCE.
  1105. }
  1106. if (VerifyEnabled)
  1107. MF->verify(this, "After splitting live range around region");
  1108. }
  1109. MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg,
  1110. AllocationOrder &Order,
  1111. SmallVectorImpl<Register> &NewVRegs) {
  1112. if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
  1113. return MCRegister::NoRegister;
  1114. unsigned NumCands = 0;
  1115. BlockFrequency SpillCost = calcSpillCost();
  1116. BlockFrequency BestCost;
  1117. // Check if we can split this live range around a compact region.
  1118. bool HasCompact = calcCompactRegion(GlobalCand.front());
  1119. if (HasCompact) {
  1120. // Yes, keep GlobalCand[0] as the compact region candidate.
  1121. NumCands = 1;
  1122. BestCost = BlockFrequency::getMaxFrequency();
  1123. } else {
  1124. // No benefit from the compact region, our fallback will be per-block
  1125. // splitting. Make sure we find a solution that is cheaper than spilling.
  1126. BestCost = SpillCost;
  1127. LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
  1128. MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
  1129. }
  1130. bool CanCauseEvictionChain = false;
  1131. unsigned BestCand =
  1132. calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
  1133. false /*IgnoreCSR*/, &CanCauseEvictionChain);
  1134. // Split candidates with compact regions can cause a bad eviction sequence.
  1135. // See splitCanCauseEvictionChain for detailed description of scenarios.
  1136. // To avoid it, we need to comapre the cost with the spill cost and not the
  1137. // current max frequency.
  1138. if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) &&
  1139. CanCauseEvictionChain) {
  1140. return MCRegister::NoRegister;
  1141. }
  1142. // No solutions found, fall back to single block splitting.
  1143. if (!HasCompact && BestCand == NoCand)
  1144. return MCRegister::NoRegister;
  1145. return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
  1146. }
  1147. unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
  1148. AllocationOrder &Order,
  1149. BlockFrequency &BestCost,
  1150. unsigned &NumCands, bool IgnoreCSR,
  1151. bool *CanCauseEvictionChain) {
  1152. unsigned BestCand = NoCand;
  1153. for (MCPhysReg PhysReg : Order) {
  1154. assert(PhysReg);
  1155. if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
  1156. continue;
  1157. // Discard bad candidates before we run out of interference cache cursors.
  1158. // This will only affect register classes with a lot of registers (>32).
  1159. if (NumCands == IntfCache.getMaxCursors()) {
  1160. unsigned WorstCount = ~0u;
  1161. unsigned Worst = 0;
  1162. for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
  1163. if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
  1164. continue;
  1165. unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
  1166. if (Count < WorstCount) {
  1167. Worst = CandIndex;
  1168. WorstCount = Count;
  1169. }
  1170. }
  1171. --NumCands;
  1172. GlobalCand[Worst] = GlobalCand[NumCands];
  1173. if (BestCand == NumCands)
  1174. BestCand = Worst;
  1175. }
  1176. if (GlobalCand.size() <= NumCands)
  1177. GlobalCand.resize(NumCands+1);
  1178. GlobalSplitCandidate &Cand = GlobalCand[NumCands];
  1179. Cand.reset(IntfCache, PhysReg);
  1180. SpillPlacer->prepare(Cand.LiveBundles);
  1181. BlockFrequency Cost;
  1182. if (!addSplitConstraints(Cand.Intf, Cost)) {
  1183. LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
  1184. continue;
  1185. }
  1186. LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
  1187. MBFI->printBlockFreq(dbgs(), Cost));
  1188. if (Cost >= BestCost) {
  1189. LLVM_DEBUG({
  1190. if (BestCand == NoCand)
  1191. dbgs() << " worse than no bundles\n";
  1192. else
  1193. dbgs() << " worse than "
  1194. << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
  1195. });
  1196. continue;
  1197. }
  1198. if (!growRegion(Cand)) {
  1199. LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
  1200. continue;
  1201. }
  1202. SpillPlacer->finish();
  1203. // No live bundles, defer to splitSingleBlocks().
  1204. if (!Cand.LiveBundles.any()) {
  1205. LLVM_DEBUG(dbgs() << " no bundles.\n");
  1206. continue;
  1207. }
  1208. bool HasEvictionChain = false;
  1209. Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain);
  1210. LLVM_DEBUG({
  1211. dbgs() << ", total = ";
  1212. MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
  1213. for (int I : Cand.LiveBundles.set_bits())
  1214. dbgs() << " EB#" << I;
  1215. dbgs() << ".\n";
  1216. });
  1217. if (Cost < BestCost) {
  1218. BestCand = NumCands;
  1219. BestCost = Cost;
  1220. // See splitCanCauseEvictionChain for detailed description of bad
  1221. // eviction chain scenarios.
  1222. if (CanCauseEvictionChain)
  1223. *CanCauseEvictionChain = HasEvictionChain;
  1224. }
  1225. ++NumCands;
  1226. }
  1227. if (CanCauseEvictionChain && BestCand != NoCand) {
  1228. // See splitCanCauseEvictionChain for detailed description of bad
  1229. // eviction chain scenarios.
  1230. LLVM_DEBUG(dbgs() << "Best split candidate of vreg "
  1231. << printReg(VirtReg.reg(), TRI) << " may ");
  1232. if (!(*CanCauseEvictionChain))
  1233. LLVM_DEBUG(dbgs() << "not ");
  1234. LLVM_DEBUG(dbgs() << "cause bad eviction chain\n");
  1235. }
  1236. return BestCand;
  1237. }
  1238. unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
  1239. bool HasCompact,
  1240. SmallVectorImpl<Register> &NewVRegs) {
  1241. SmallVector<unsigned, 8> UsedCands;
  1242. // Prepare split editor.
  1243. LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
  1244. SE->reset(LREdit, SplitSpillMode);
  1245. // Assign all edge bundles to the preferred candidate, or NoCand.
  1246. BundleCand.assign(Bundles->getNumBundles(), NoCand);
  1247. // Assign bundles for the best candidate region.
  1248. if (BestCand != NoCand) {
  1249. GlobalSplitCandidate &Cand = GlobalCand[BestCand];
  1250. if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
  1251. UsedCands.push_back(BestCand);
  1252. Cand.IntvIdx = SE->openIntv();
  1253. LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
  1254. << B << " bundles, intv " << Cand.IntvIdx << ".\n");
  1255. (void)B;
  1256. }
  1257. }
  1258. // Assign bundles for the compact region.
  1259. if (HasCompact) {
  1260. GlobalSplitCandidate &Cand = GlobalCand.front();
  1261. assert(!Cand.PhysReg && "Compact region has no physreg");
  1262. if (unsigned B = Cand.getBundles(BundleCand, 0)) {
  1263. UsedCands.push_back(0);
  1264. Cand.IntvIdx = SE->openIntv();
  1265. LLVM_DEBUG(dbgs() << "Split for compact region in " << B
  1266. << " bundles, intv " << Cand.IntvIdx << ".\n");
  1267. (void)B;
  1268. }
  1269. }
  1270. splitAroundRegion(LREdit, UsedCands);
  1271. return 0;
  1272. }
  1273. //===----------------------------------------------------------------------===//
  1274. // Per-Block Splitting
  1275. //===----------------------------------------------------------------------===//
  1276. /// tryBlockSplit - Split a global live range around every block with uses. This
  1277. /// creates a lot of local live ranges, that will be split by tryLocalSplit if
  1278. /// they don't allocate.
  1279. unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
  1280. SmallVectorImpl<Register> &NewVRegs) {
  1281. assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
  1282. Register Reg = VirtReg.reg();
  1283. bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
  1284. LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
  1285. SE->reset(LREdit, SplitSpillMode);
  1286. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
  1287. for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
  1288. if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
  1289. SE->splitSingleBlock(BI);
  1290. }
  1291. // No blocks were split.
  1292. if (LREdit.empty())
  1293. return 0;
  1294. // We did split for some blocks.
  1295. SmallVector<unsigned, 8> IntvMap;
  1296. SE->finish(&IntvMap);
  1297. // Tell LiveDebugVariables about the new ranges.
  1298. DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
  1299. // Sort out the new intervals created by splitting. The remainder interval
  1300. // goes straight to spilling, the new local ranges get to stay RS_New.
  1301. for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
  1302. const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
  1303. if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
  1304. ExtraInfo->setStage(LI, RS_Spill);
  1305. }
  1306. if (VerifyEnabled)
  1307. MF->verify(this, "After splitting live range around basic blocks");
  1308. return 0;
  1309. }
  1310. //===----------------------------------------------------------------------===//
  1311. // Per-Instruction Splitting
  1312. //===----------------------------------------------------------------------===//
  1313. /// Get the number of allocatable registers that match the constraints of \p Reg
  1314. /// on \p MI and that are also in \p SuperRC.
  1315. static unsigned getNumAllocatableRegsForConstraints(
  1316. const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
  1317. const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
  1318. const RegisterClassInfo &RCI) {
  1319. assert(SuperRC && "Invalid register class");
  1320. const TargetRegisterClass *ConstrainedRC =
  1321. MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
  1322. /* ExploreBundle */ true);
  1323. if (!ConstrainedRC)
  1324. return 0;
  1325. return RCI.getNumAllocatableRegs(ConstrainedRC);
  1326. }
  1327. /// tryInstructionSplit - Split a live range around individual instructions.
  1328. /// This is normally not worthwhile since the spiller is doing essentially the
  1329. /// same thing. However, when the live range is in a constrained register
  1330. /// class, it may help to insert copies such that parts of the live range can
  1331. /// be moved to a larger register class.
  1332. ///
  1333. /// This is similar to spilling to a larger register class.
  1334. unsigned
  1335. RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
  1336. SmallVectorImpl<Register> &NewVRegs) {
  1337. const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
  1338. // There is no point to this if there are no larger sub-classes.
  1339. if (!RegClassInfo.isProperSubClass(CurRC))
  1340. return 0;
  1341. // Always enable split spill mode, since we're effectively spilling to a
  1342. // register.
  1343. LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
  1344. SE->reset(LREdit, SplitEditor::SM_Size);
  1345. ArrayRef<SlotIndex> Uses = SA->getUseSlots();
  1346. if (Uses.size() <= 1)
  1347. return 0;
  1348. LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
  1349. << " individual instrs.\n");
  1350. const TargetRegisterClass *SuperRC =
  1351. TRI->getLargestLegalSuperClass(CurRC, *MF);
  1352. unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
  1353. // Split around every non-copy instruction if this split will relax
  1354. // the constraints on the virtual register.
  1355. // Otherwise, splitting just inserts uncoalescable copies that do not help
  1356. // the allocation.
  1357. for (const SlotIndex Use : Uses) {
  1358. if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
  1359. if (MI->isFullCopy() ||
  1360. SuperRCNumAllocatableRegs ==
  1361. getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
  1362. TII, TRI, RCI)) {
  1363. LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
  1364. continue;
  1365. }
  1366. SE->openIntv();
  1367. SlotIndex SegStart = SE->enterIntvBefore(Use);
  1368. SlotIndex SegStop = SE->leaveIntvAfter(Use);
  1369. SE->useIntv(SegStart, SegStop);
  1370. }
  1371. if (LREdit.empty()) {
  1372. LLVM_DEBUG(dbgs() << "All uses were copies.\n");
  1373. return 0;
  1374. }
  1375. SmallVector<unsigned, 8> IntvMap;
  1376. SE->finish(&IntvMap);
  1377. DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
  1378. // Assign all new registers to RS_Spill. This was the last chance.
  1379. ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
  1380. return 0;
  1381. }
  1382. //===----------------------------------------------------------------------===//
  1383. // Local Splitting
  1384. //===----------------------------------------------------------------------===//
  1385. /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
  1386. /// in order to use PhysReg between two entries in SA->UseSlots.
  1387. ///
  1388. /// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
  1389. ///
  1390. void RAGreedy::calcGapWeights(MCRegister PhysReg,
  1391. SmallVectorImpl<float> &GapWeight) {
  1392. assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
  1393. const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
  1394. ArrayRef<SlotIndex> Uses = SA->getUseSlots();
  1395. const unsigned NumGaps = Uses.size()-1;
  1396. // Start and end points for the interference check.
  1397. SlotIndex StartIdx =
  1398. BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
  1399. SlotIndex StopIdx =
  1400. BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
  1401. GapWeight.assign(NumGaps, 0.0f);
  1402. // Add interference from each overlapping register.
  1403. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
  1404. if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
  1405. .checkInterference())
  1406. continue;
  1407. // We know that VirtReg is a continuous interval from FirstInstr to
  1408. // LastInstr, so we don't need InterferenceQuery.
  1409. //
  1410. // Interference that overlaps an instruction is counted in both gaps
  1411. // surrounding the instruction. The exception is interference before
  1412. // StartIdx and after StopIdx.
  1413. //
  1414. LiveIntervalUnion::SegmentIter IntI =
  1415. Matrix->getLiveUnions()[*Units] .find(StartIdx);
  1416. for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
  1417. // Skip the gaps before IntI.
  1418. while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
  1419. if (++Gap == NumGaps)
  1420. break;
  1421. if (Gap == NumGaps)
  1422. break;
  1423. // Update the gaps covered by IntI.
  1424. const float weight = IntI.value()->weight();
  1425. for (; Gap != NumGaps; ++Gap) {
  1426. GapWeight[Gap] = std::max(GapWeight[Gap], weight);
  1427. if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
  1428. break;
  1429. }
  1430. if (Gap == NumGaps)
  1431. break;
  1432. }
  1433. }
  1434. // Add fixed interference.
  1435. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
  1436. const LiveRange &LR = LIS->getRegUnit(*Units);
  1437. LiveRange::const_iterator I = LR.find(StartIdx);
  1438. LiveRange::const_iterator E = LR.end();
  1439. // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
  1440. for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
  1441. while (Uses[Gap+1].getBoundaryIndex() < I->start)
  1442. if (++Gap == NumGaps)
  1443. break;
  1444. if (Gap == NumGaps)
  1445. break;
  1446. for (; Gap != NumGaps; ++Gap) {
  1447. GapWeight[Gap] = huge_valf;
  1448. if (Uses[Gap+1].getBaseIndex() >= I->end)
  1449. break;
  1450. }
  1451. if (Gap == NumGaps)
  1452. break;
  1453. }
  1454. }
  1455. }
  1456. /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
  1457. /// basic block.
  1458. ///
  1459. unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
  1460. SmallVectorImpl<Register> &NewVRegs) {
  1461. // TODO: the function currently only handles a single UseBlock; it should be
  1462. // possible to generalize.
  1463. if (SA->getUseBlocks().size() != 1)
  1464. return 0;
  1465. const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
  1466. // Note that it is possible to have an interval that is live-in or live-out
  1467. // while only covering a single block - A phi-def can use undef values from
  1468. // predecessors, and the block could be a single-block loop.
  1469. // We don't bother doing anything clever about such a case, we simply assume
  1470. // that the interval is continuous from FirstInstr to LastInstr. We should
  1471. // make sure that we don't do anything illegal to such an interval, though.
  1472. ArrayRef<SlotIndex> Uses = SA->getUseSlots();
  1473. if (Uses.size() <= 2)
  1474. return 0;
  1475. const unsigned NumGaps = Uses.size()-1;
  1476. LLVM_DEBUG({
  1477. dbgs() << "tryLocalSplit: ";
  1478. for (const auto &Use : Uses)
  1479. dbgs() << ' ' << Use;
  1480. dbgs() << '\n';
  1481. });
  1482. // If VirtReg is live across any register mask operands, compute a list of
  1483. // gaps with register masks.
  1484. SmallVector<unsigned, 8> RegMaskGaps;
  1485. if (Matrix->checkRegMaskInterference(VirtReg)) {
  1486. // Get regmask slots for the whole block.
  1487. ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
  1488. LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
  1489. // Constrain to VirtReg's live range.
  1490. unsigned RI =
  1491. llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
  1492. unsigned RE = RMS.size();
  1493. for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
  1494. // Look for Uses[I] <= RMS <= Uses[I + 1].
  1495. assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
  1496. if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
  1497. continue;
  1498. // Skip a regmask on the same instruction as the last use. It doesn't
  1499. // overlap the live range.
  1500. if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
  1501. break;
  1502. LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
  1503. << Uses[I + 1]);
  1504. RegMaskGaps.push_back(I);
  1505. // Advance ri to the next gap. A regmask on one of the uses counts in
  1506. // both gaps.
  1507. while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
  1508. ++RI;
  1509. }
  1510. LLVM_DEBUG(dbgs() << '\n');
  1511. }
  1512. // Since we allow local split results to be split again, there is a risk of
  1513. // creating infinite loops. It is tempting to require that the new live
  1514. // ranges have less instructions than the original. That would guarantee
  1515. // convergence, but it is too strict. A live range with 3 instructions can be
  1516. // split 2+3 (including the COPY), and we want to allow that.
  1517. //
  1518. // Instead we use these rules:
  1519. //
  1520. // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
  1521. // noop split, of course).
  1522. // 2. Require progress be made for ranges with getStage() == RS_Split2. All
  1523. // the new ranges must have fewer instructions than before the split.
  1524. // 3. New ranges with the same number of instructions are marked RS_Split2,
  1525. // smaller ranges are marked RS_New.
  1526. //
  1527. // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
  1528. // excessive splitting and infinite loops.
  1529. //
  1530. bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
  1531. // Best split candidate.
  1532. unsigned BestBefore = NumGaps;
  1533. unsigned BestAfter = 0;
  1534. float BestDiff = 0;
  1535. const float blockFreq =
  1536. SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
  1537. (1.0f / MBFI->getEntryFreq());
  1538. SmallVector<float, 8> GapWeight;
  1539. for (MCPhysReg PhysReg : Order) {
  1540. assert(PhysReg);
  1541. // Keep track of the largest spill weight that would need to be evicted in
  1542. // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
  1543. calcGapWeights(PhysReg, GapWeight);
  1544. // Remove any gaps with regmask clobbers.
  1545. if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
  1546. for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
  1547. GapWeight[RegMaskGaps[I]] = huge_valf;
  1548. // Try to find the best sequence of gaps to close.
  1549. // The new spill weight must be larger than any gap interference.
  1550. // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
  1551. unsigned SplitBefore = 0, SplitAfter = 1;
  1552. // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
  1553. // It is the spill weight that needs to be evicted.
  1554. float MaxGap = GapWeight[0];
  1555. while (true) {
  1556. // Live before/after split?
  1557. const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
  1558. const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
  1559. LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
  1560. << '-' << Uses[SplitAfter] << " I=" << MaxGap);
  1561. // Stop before the interval gets so big we wouldn't be making progress.
  1562. if (!LiveBefore && !LiveAfter) {
  1563. LLVM_DEBUG(dbgs() << " all\n");
  1564. break;
  1565. }
  1566. // Should the interval be extended or shrunk?
  1567. bool Shrink = true;
  1568. // How many gaps would the new range have?
  1569. unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
  1570. // Legally, without causing looping?
  1571. bool Legal = !ProgressRequired || NewGaps < NumGaps;
  1572. if (Legal && MaxGap < huge_valf) {
  1573. // Estimate the new spill weight. Each instruction reads or writes the
  1574. // register. Conservatively assume there are no read-modify-write
  1575. // instructions.
  1576. //
  1577. // Try to guess the size of the new interval.
  1578. const float EstWeight = normalizeSpillWeight(
  1579. blockFreq * (NewGaps + 1),
  1580. Uses[SplitBefore].distance(Uses[SplitAfter]) +
  1581. (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
  1582. 1);
  1583. // Would this split be possible to allocate?
  1584. // Never allocate all gaps, we wouldn't be making progress.
  1585. LLVM_DEBUG(dbgs() << " w=" << EstWeight);
  1586. if (EstWeight * Hysteresis >= MaxGap) {
  1587. Shrink = false;
  1588. float Diff = EstWeight - MaxGap;
  1589. if (Diff > BestDiff) {
  1590. LLVM_DEBUG(dbgs() << " (best)");
  1591. BestDiff = Hysteresis * Diff;
  1592. BestBefore = SplitBefore;
  1593. BestAfter = SplitAfter;
  1594. }
  1595. }
  1596. }
  1597. // Try to shrink.
  1598. if (Shrink) {
  1599. if (++SplitBefore < SplitAfter) {
  1600. LLVM_DEBUG(dbgs() << " shrink\n");
  1601. // Recompute the max when necessary.
  1602. if (GapWeight[SplitBefore - 1] >= MaxGap) {
  1603. MaxGap = GapWeight[SplitBefore];
  1604. for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
  1605. MaxGap = std::max(MaxGap, GapWeight[I]);
  1606. }
  1607. continue;
  1608. }
  1609. MaxGap = 0;
  1610. }
  1611. // Try to extend the interval.
  1612. if (SplitAfter >= NumGaps) {
  1613. LLVM_DEBUG(dbgs() << " end\n");
  1614. break;
  1615. }
  1616. LLVM_DEBUG(dbgs() << " extend\n");
  1617. MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
  1618. }
  1619. }
  1620. // Didn't find any candidates?
  1621. if (BestBefore == NumGaps)
  1622. return 0;
  1623. LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
  1624. << Uses[BestAfter] << ", " << BestDiff << ", "
  1625. << (BestAfter - BestBefore + 1) << " instrs\n");
  1626. LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
  1627. SE->reset(LREdit);
  1628. SE->openIntv();
  1629. SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
  1630. SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
  1631. SE->useIntv(SegStart, SegStop);
  1632. SmallVector<unsigned, 8> IntvMap;
  1633. SE->finish(&IntvMap);
  1634. DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
  1635. // If the new range has the same number of instructions as before, mark it as
  1636. // RS_Split2 so the next split will be forced to make progress. Otherwise,
  1637. // leave the new intervals as RS_New so they can compete.
  1638. bool LiveBefore = BestBefore != 0 || BI.LiveIn;
  1639. bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
  1640. unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
  1641. if (NewGaps >= NumGaps) {
  1642. LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
  1643. assert(!ProgressRequired && "Didn't make progress when it was required.");
  1644. for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
  1645. if (IntvMap[I] == 1) {
  1646. ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
  1647. LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
  1648. }
  1649. LLVM_DEBUG(dbgs() << '\n');
  1650. }
  1651. ++NumLocalSplits;
  1652. return 0;
  1653. }
  1654. //===----------------------------------------------------------------------===//
  1655. // Live Range Splitting
  1656. //===----------------------------------------------------------------------===//
  1657. /// trySplit - Try to split VirtReg or one of its interferences, making it
  1658. /// assignable.
  1659. /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
  1660. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
  1661. SmallVectorImpl<Register> &NewVRegs,
  1662. const SmallVirtRegSet &FixedRegisters) {
  1663. // Ranges must be Split2 or less.
  1664. if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
  1665. return 0;
  1666. // Local intervals are handled separately.
  1667. if (LIS->intervalIsInOneMBB(VirtReg)) {
  1668. NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
  1669. TimerGroupDescription, TimePassesIsEnabled);
  1670. SA->analyze(&VirtReg);
  1671. Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
  1672. if (PhysReg || !NewVRegs.empty())
  1673. return PhysReg;
  1674. return tryInstructionSplit(VirtReg, Order, NewVRegs);
  1675. }
  1676. NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
  1677. TimerGroupDescription, TimePassesIsEnabled);
  1678. SA->analyze(&VirtReg);
  1679. // First try to split around a region spanning multiple blocks. RS_Split2
  1680. // ranges already made dubious progress with region splitting, so they go
  1681. // straight to single block splitting.
  1682. if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
  1683. MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
  1684. if (PhysReg || !NewVRegs.empty())
  1685. return PhysReg;
  1686. }
  1687. // Then isolate blocks.
  1688. return tryBlockSplit(VirtReg, Order, NewVRegs);
  1689. }
  1690. //===----------------------------------------------------------------------===//
  1691. // Last Chance Recoloring
  1692. //===----------------------------------------------------------------------===//
  1693. /// Return true if \p reg has any tied def operand.
  1694. static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
  1695. for (const MachineOperand &MO : MRI->def_operands(reg))
  1696. if (MO.isTied())
  1697. return true;
  1698. return false;
  1699. }
  1700. /// mayRecolorAllInterferences - Check if the virtual registers that
  1701. /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
  1702. /// recolored to free \p PhysReg.
  1703. /// When true is returned, \p RecoloringCandidates has been augmented with all
  1704. /// the live intervals that need to be recolored in order to free \p PhysReg
  1705. /// for \p VirtReg.
  1706. /// \p FixedRegisters contains all the virtual registers that cannot be
  1707. /// recolored.
  1708. bool RAGreedy::mayRecolorAllInterferences(
  1709. MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates,
  1710. const SmallVirtRegSet &FixedRegisters) {
  1711. const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
  1712. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
  1713. LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
  1714. // If there is LastChanceRecoloringMaxInterference or more interferences,
  1715. // chances are one would not be recolorable.
  1716. if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
  1717. LastChanceRecoloringMaxInterference &&
  1718. !ExhaustiveSearch) {
  1719. LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
  1720. CutOffInfo |= CO_Interf;
  1721. return false;
  1722. }
  1723. for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
  1724. // If Intf is done and sit on the same register class as VirtReg,
  1725. // it would not be recolorable as it is in the same state as VirtReg.
  1726. // However, if VirtReg has tied defs and Intf doesn't, then
  1727. // there is still a point in examining if it can be recolorable.
  1728. if (((ExtraInfo->getStage(*Intf) == RS_Done &&
  1729. MRI->getRegClass(Intf->reg()) == CurRC) &&
  1730. !(hasTiedDef(MRI, VirtReg.reg()) &&
  1731. !hasTiedDef(MRI, Intf->reg()))) ||
  1732. FixedRegisters.count(Intf->reg())) {
  1733. LLVM_DEBUG(
  1734. dbgs() << "Early abort: the interference is not recolorable.\n");
  1735. return false;
  1736. }
  1737. RecoloringCandidates.insert(Intf);
  1738. }
  1739. }
  1740. return true;
  1741. }
  1742. /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
  1743. /// its interferences.
  1744. /// Last chance recoloring chooses a color for \p VirtReg and recolors every
  1745. /// virtual register that was using it. The recoloring process may recursively
  1746. /// use the last chance recoloring. Therefore, when a virtual register has been
  1747. /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
  1748. /// be last-chance-recolored again during this recoloring "session".
  1749. /// E.g.,
  1750. /// Let
  1751. /// vA can use {R1, R2 }
  1752. /// vB can use { R2, R3}
  1753. /// vC can use {R1 }
  1754. /// Where vA, vB, and vC cannot be split anymore (they are reloads for
  1755. /// instance) and they all interfere.
  1756. ///
  1757. /// vA is assigned R1
  1758. /// vB is assigned R2
  1759. /// vC tries to evict vA but vA is already done.
  1760. /// Regular register allocation fails.
  1761. ///
  1762. /// Last chance recoloring kicks in:
  1763. /// vC does as if vA was evicted => vC uses R1.
  1764. /// vC is marked as fixed.
  1765. /// vA needs to find a color.
  1766. /// None are available.
  1767. /// vA cannot evict vC: vC is a fixed virtual register now.
  1768. /// vA does as if vB was evicted => vA uses R2.
  1769. /// vB needs to find a color.
  1770. /// R3 is available.
  1771. /// Recoloring => vC = R1, vA = R2, vB = R3
  1772. ///
  1773. /// \p Order defines the preferred allocation order for \p VirtReg.
  1774. /// \p NewRegs will contain any new virtual register that have been created
  1775. /// (split, spill) during the process and that must be assigned.
  1776. /// \p FixedRegisters contains all the virtual registers that cannot be
  1777. /// recolored.
  1778. /// \p Depth gives the current depth of the last chance recoloring.
  1779. /// \return a physical register that can be used for VirtReg or ~0u if none
  1780. /// exists.
  1781. unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
  1782. AllocationOrder &Order,
  1783. SmallVectorImpl<Register> &NewVRegs,
  1784. SmallVirtRegSet &FixedRegisters,
  1785. unsigned Depth) {
  1786. if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
  1787. return ~0u;
  1788. LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
  1789. // Ranges must be Done.
  1790. assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
  1791. "Last chance recoloring should really be last chance");
  1792. // Set the max depth to LastChanceRecoloringMaxDepth.
  1793. // We may want to reconsider that if we end up with a too large search space
  1794. // for target with hundreds of registers.
  1795. // Indeed, in that case we may want to cut the search space earlier.
  1796. if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
  1797. LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
  1798. CutOffInfo |= CO_Depth;
  1799. return ~0u;
  1800. }
  1801. // Set of Live intervals that will need to be recolored.
  1802. SmallLISet RecoloringCandidates;
  1803. // Record the original mapping virtual register to physical register in case
  1804. // the recoloring fails.
  1805. DenseMap<Register, MCRegister> VirtRegToPhysReg;
  1806. // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
  1807. // this recoloring "session".
  1808. assert(!FixedRegisters.count(VirtReg.reg()));
  1809. FixedRegisters.insert(VirtReg.reg());
  1810. SmallVector<Register, 4> CurrentNewVRegs;
  1811. for (MCRegister PhysReg : Order) {
  1812. assert(PhysReg.isValid());
  1813. LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
  1814. << printReg(PhysReg, TRI) << '\n');
  1815. RecoloringCandidates.clear();
  1816. VirtRegToPhysReg.clear();
  1817. CurrentNewVRegs.clear();
  1818. // It is only possible to recolor virtual register interference.
  1819. if (Matrix->checkInterference(VirtReg, PhysReg) >
  1820. LiveRegMatrix::IK_VirtReg) {
  1821. LLVM_DEBUG(
  1822. dbgs() << "Some interferences are not with virtual registers.\n");
  1823. continue;
  1824. }
  1825. // Early give up on this PhysReg if it is obvious we cannot recolor all
  1826. // the interferences.
  1827. if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
  1828. FixedRegisters)) {
  1829. LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
  1830. continue;
  1831. }
  1832. // RecoloringCandidates contains all the virtual registers that interfer
  1833. // with VirtReg on PhysReg (or one of its aliases).
  1834. // Enqueue them for recoloring and perform the actual recoloring.
  1835. PQueue RecoloringQueue;
  1836. for (LiveInterval *RC : RecoloringCandidates) {
  1837. Register ItVirtReg = RC->reg();
  1838. enqueue(RecoloringQueue, RC);
  1839. assert(VRM->hasPhys(ItVirtReg) &&
  1840. "Interferences are supposed to be with allocated variables");
  1841. // Record the current allocation.
  1842. VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
  1843. // unset the related struct.
  1844. Matrix->unassign(*RC);
  1845. }
  1846. // Do as if VirtReg was assigned to PhysReg so that the underlying
  1847. // recoloring has the right information about the interferes and
  1848. // available colors.
  1849. Matrix->assign(VirtReg, PhysReg);
  1850. // Save the current recoloring state.
  1851. // If we cannot recolor all the interferences, we will have to start again
  1852. // at this point for the next physical register.
  1853. SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
  1854. if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
  1855. FixedRegisters, Depth)) {
  1856. // Push the queued vregs into the main queue.
  1857. for (Register NewVReg : CurrentNewVRegs)
  1858. NewVRegs.push_back(NewVReg);
  1859. // Do not mess up with the global assignment process.
  1860. // I.e., VirtReg must be unassigned.
  1861. Matrix->unassign(VirtReg);
  1862. return PhysReg;
  1863. }
  1864. LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
  1865. << printReg(PhysReg, TRI) << '\n');
  1866. // The recoloring attempt failed, undo the changes.
  1867. FixedRegisters = SaveFixedRegisters;
  1868. Matrix->unassign(VirtReg);
  1869. // For a newly created vreg which is also in RecoloringCandidates,
  1870. // don't add it to NewVRegs because its physical register will be restored
  1871. // below. Other vregs in CurrentNewVRegs are created by calling
  1872. // selectOrSplit and should be added into NewVRegs.
  1873. for (Register &R : CurrentNewVRegs) {
  1874. if (RecoloringCandidates.count(&LIS->getInterval(R)))
  1875. continue;
  1876. NewVRegs.push_back(R);
  1877. }
  1878. for (LiveInterval *RC : RecoloringCandidates) {
  1879. Register ItVirtReg = RC->reg();
  1880. if (VRM->hasPhys(ItVirtReg))
  1881. Matrix->unassign(*RC);
  1882. MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg];
  1883. Matrix->assign(*RC, ItPhysReg);
  1884. }
  1885. }
  1886. // Last chance recoloring did not worked either, give up.
  1887. return ~0u;
  1888. }
  1889. /// tryRecoloringCandidates - Try to assign a new color to every register
  1890. /// in \RecoloringQueue.
  1891. /// \p NewRegs will contain any new virtual register created during the
  1892. /// recoloring process.
  1893. /// \p FixedRegisters[in/out] contains all the registers that have been
  1894. /// recolored.
  1895. /// \return true if all virtual registers in RecoloringQueue were successfully
  1896. /// recolored, false otherwise.
  1897. bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
  1898. SmallVectorImpl<Register> &NewVRegs,
  1899. SmallVirtRegSet &FixedRegisters,
  1900. unsigned Depth) {
  1901. while (!RecoloringQueue.empty()) {
  1902. LiveInterval *LI = dequeue(RecoloringQueue);
  1903. LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
  1904. MCRegister PhysReg =
  1905. selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
  1906. // When splitting happens, the live-range may actually be empty.
  1907. // In that case, this is okay to continue the recoloring even
  1908. // if we did not find an alternative color for it. Indeed,
  1909. // there will not be anything to color for LI in the end.
  1910. if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
  1911. return false;
  1912. if (!PhysReg) {
  1913. assert(LI->empty() && "Only empty live-range do not require a register");
  1914. LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
  1915. << " succeeded. Empty LI.\n");
  1916. continue;
  1917. }
  1918. LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
  1919. << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
  1920. Matrix->assign(*LI, PhysReg);
  1921. FixedRegisters.insert(LI->reg());
  1922. }
  1923. return true;
  1924. }
  1925. //===----------------------------------------------------------------------===//
  1926. // Main Entry Point
  1927. //===----------------------------------------------------------------------===//
  1928. MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg,
  1929. SmallVectorImpl<Register> &NewVRegs) {
  1930. CutOffInfo = CO_None;
  1931. LLVMContext &Ctx = MF->getFunction().getContext();
  1932. SmallVirtRegSet FixedRegisters;
  1933. MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
  1934. if (Reg == ~0U && (CutOffInfo != CO_None)) {
  1935. uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
  1936. if (CutOffEncountered == CO_Depth)
  1937. Ctx.emitError("register allocation failed: maximum depth for recoloring "
  1938. "reached. Use -fexhaustive-register-search to skip "
  1939. "cutoffs");
  1940. else if (CutOffEncountered == CO_Interf)
  1941. Ctx.emitError("register allocation failed: maximum interference for "
  1942. "recoloring reached. Use -fexhaustive-register-search "
  1943. "to skip cutoffs");
  1944. else if (CutOffEncountered == (CO_Depth | CO_Interf))
  1945. Ctx.emitError("register allocation failed: maximum interference and "
  1946. "depth for recoloring reached. Use "
  1947. "-fexhaustive-register-search to skip cutoffs");
  1948. }
  1949. return Reg;
  1950. }
  1951. /// Using a CSR for the first time has a cost because it causes push|pop
  1952. /// to be added to prologue|epilogue. Splitting a cold section of the live
  1953. /// range can have lower cost than using the CSR for the first time;
  1954. /// Spilling a live range in the cold path can have lower cost than using
  1955. /// the CSR for the first time. Returns the physical register if we decide
  1956. /// to use the CSR; otherwise return 0.
  1957. MCRegister
  1958. RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
  1959. MCRegister PhysReg, uint8_t &CostPerUseLimit,
  1960. SmallVectorImpl<Register> &NewVRegs) {
  1961. if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
  1962. // We choose spill over using the CSR for the first time if the spill cost
  1963. // is lower than CSRCost.
  1964. SA->analyze(&VirtReg);
  1965. if (calcSpillCost() >= CSRCost)
  1966. return PhysReg;
  1967. // We are going to spill, set CostPerUseLimit to 1 to make sure that
  1968. // we will not use a callee-saved register in tryEvict.
  1969. CostPerUseLimit = 1;
  1970. return 0;
  1971. }
  1972. if (ExtraInfo->getStage(VirtReg) < RS_Split) {
  1973. // We choose pre-splitting over using the CSR for the first time if
  1974. // the cost of splitting is lower than CSRCost.
  1975. SA->analyze(&VirtReg);
  1976. unsigned NumCands = 0;
  1977. BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
  1978. unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
  1979. NumCands, true /*IgnoreCSR*/);
  1980. if (BestCand == NoCand)
  1981. // Use the CSR if we can't find a region split below CSRCost.
  1982. return PhysReg;
  1983. // Perform the actual pre-splitting.
  1984. doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
  1985. return 0;
  1986. }
  1987. return PhysReg;
  1988. }
  1989. void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) {
  1990. // Do not keep invalid information around.
  1991. SetOfBrokenHints.remove(&LI);
  1992. }
  1993. void RAGreedy::initializeCSRCost() {
  1994. // We use the larger one out of the command-line option and the value report
  1995. // by TRI.
  1996. CSRCost = BlockFrequency(
  1997. std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
  1998. if (!CSRCost.getFrequency())
  1999. return;
  2000. // Raw cost is relative to Entry == 2^14; scale it appropriately.
  2001. uint64_t ActualEntry = MBFI->getEntryFreq();
  2002. if (!ActualEntry) {
  2003. CSRCost = 0;
  2004. return;
  2005. }
  2006. uint64_t FixedEntry = 1 << 14;
  2007. if (ActualEntry < FixedEntry)
  2008. CSRCost *= BranchProbability(ActualEntry, FixedEntry);
  2009. else if (ActualEntry <= UINT32_MAX)
  2010. // Invert the fraction and divide.
  2011. CSRCost /= BranchProbability(FixedEntry, ActualEntry);
  2012. else
  2013. // Can't use BranchProbability in general, since it takes 32-bit numbers.
  2014. CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
  2015. }
  2016. /// Collect the hint info for \p Reg.
  2017. /// The results are stored into \p Out.
  2018. /// \p Out is not cleared before being populated.
  2019. void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
  2020. for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
  2021. if (!Instr.isFullCopy())
  2022. continue;
  2023. // Look for the other end of the copy.
  2024. Register OtherReg = Instr.getOperand(0).getReg();
  2025. if (OtherReg == Reg) {
  2026. OtherReg = Instr.getOperand(1).getReg();
  2027. if (OtherReg == Reg)
  2028. continue;
  2029. }
  2030. // Get the current assignment.
  2031. MCRegister OtherPhysReg =
  2032. OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
  2033. // Push the collected information.
  2034. Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
  2035. OtherPhysReg));
  2036. }
  2037. }
  2038. /// Using the given \p List, compute the cost of the broken hints if
  2039. /// \p PhysReg was used.
  2040. /// \return The cost of \p List for \p PhysReg.
  2041. BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
  2042. MCRegister PhysReg) {
  2043. BlockFrequency Cost = 0;
  2044. for (const HintInfo &Info : List) {
  2045. if (Info.PhysReg != PhysReg)
  2046. Cost += Info.Freq;
  2047. }
  2048. return Cost;
  2049. }
  2050. /// Using the register assigned to \p VirtReg, try to recolor
  2051. /// all the live ranges that are copy-related with \p VirtReg.
  2052. /// The recoloring is then propagated to all the live-ranges that have
  2053. /// been recolored and so on, until no more copies can be coalesced or
  2054. /// it is not profitable.
  2055. /// For a given live range, profitability is determined by the sum of the
  2056. /// frequencies of the non-identity copies it would introduce with the old
  2057. /// and new register.
  2058. void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
  2059. // We have a broken hint, check if it is possible to fix it by
  2060. // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
  2061. // some register and PhysReg may be available for the other live-ranges.
  2062. SmallSet<Register, 4> Visited;
  2063. SmallVector<unsigned, 2> RecoloringCandidates;
  2064. HintsInfo Info;
  2065. Register Reg = VirtReg.reg();
  2066. MCRegister PhysReg = VRM->getPhys(Reg);
  2067. // Start the recoloring algorithm from the input live-interval, then
  2068. // it will propagate to the ones that are copy-related with it.
  2069. Visited.insert(Reg);
  2070. RecoloringCandidates.push_back(Reg);
  2071. LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
  2072. << '(' << printReg(PhysReg, TRI) << ")\n");
  2073. do {
  2074. Reg = RecoloringCandidates.pop_back_val();
  2075. // We cannot recolor physical register.
  2076. if (Register::isPhysicalRegister(Reg))
  2077. continue;
  2078. // This may be a skipped class
  2079. if (!VRM->hasPhys(Reg)) {
  2080. assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
  2081. "We have an unallocated variable which should have been handled");
  2082. continue;
  2083. }
  2084. // Get the live interval mapped with this virtual register to be able
  2085. // to check for the interference with the new color.
  2086. LiveInterval &LI = LIS->getInterval(Reg);
  2087. MCRegister CurrPhys = VRM->getPhys(Reg);
  2088. // Check that the new color matches the register class constraints and
  2089. // that it is free for this live range.
  2090. if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
  2091. Matrix->checkInterference(LI, PhysReg)))
  2092. continue;
  2093. LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
  2094. << ") is recolorable.\n");
  2095. // Gather the hint info.
  2096. Info.clear();
  2097. collectHintInfo(Reg, Info);
  2098. // Check if recoloring the live-range will increase the cost of the
  2099. // non-identity copies.
  2100. if (CurrPhys != PhysReg) {
  2101. LLVM_DEBUG(dbgs() << "Checking profitability:\n");
  2102. BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
  2103. BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
  2104. LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
  2105. << "\nNew Cost: " << NewCopiesCost.getFrequency()
  2106. << '\n');
  2107. if (OldCopiesCost < NewCopiesCost) {
  2108. LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
  2109. continue;
  2110. }
  2111. // At this point, the cost is either cheaper or equal. If it is
  2112. // equal, we consider this is profitable because it may expose
  2113. // more recoloring opportunities.
  2114. LLVM_DEBUG(dbgs() << "=> Profitable.\n");
  2115. // Recolor the live-range.
  2116. Matrix->unassign(LI);
  2117. Matrix->assign(LI, PhysReg);
  2118. }
  2119. // Push all copy-related live-ranges to keep reconciling the broken
  2120. // hints.
  2121. for (const HintInfo &HI : Info) {
  2122. if (Visited.insert(HI.Reg).second)
  2123. RecoloringCandidates.push_back(HI.Reg);
  2124. }
  2125. } while (!RecoloringCandidates.empty());
  2126. }
  2127. /// Try to recolor broken hints.
  2128. /// Broken hints may be repaired by recoloring when an evicted variable
  2129. /// freed up a register for a larger live-range.
  2130. /// Consider the following example:
  2131. /// BB1:
  2132. /// a =
  2133. /// b =
  2134. /// BB2:
  2135. /// ...
  2136. /// = b
  2137. /// = a
  2138. /// Let us assume b gets split:
  2139. /// BB1:
  2140. /// a =
  2141. /// b =
  2142. /// BB2:
  2143. /// c = b
  2144. /// ...
  2145. /// d = c
  2146. /// = d
  2147. /// = a
  2148. /// Because of how the allocation work, b, c, and d may be assigned different
  2149. /// colors. Now, if a gets evicted later:
  2150. /// BB1:
  2151. /// a =
  2152. /// st a, SpillSlot
  2153. /// b =
  2154. /// BB2:
  2155. /// c = b
  2156. /// ...
  2157. /// d = c
  2158. /// = d
  2159. /// e = ld SpillSlot
  2160. /// = e
  2161. /// This is likely that we can assign the same register for b, c, and d,
  2162. /// getting rid of 2 copies.
  2163. void RAGreedy::tryHintsRecoloring() {
  2164. for (LiveInterval *LI : SetOfBrokenHints) {
  2165. assert(Register::isVirtualRegister(LI->reg()) &&
  2166. "Recoloring is possible only for virtual registers");
  2167. // Some dead defs may be around (e.g., because of debug uses).
  2168. // Ignore those.
  2169. if (!VRM->hasPhys(LI->reg()))
  2170. continue;
  2171. tryHintRecoloring(*LI);
  2172. }
  2173. }
  2174. MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
  2175. SmallVectorImpl<Register> &NewVRegs,
  2176. SmallVirtRegSet &FixedRegisters,
  2177. unsigned Depth) {
  2178. uint8_t CostPerUseLimit = uint8_t(~0u);
  2179. // First try assigning a free register.
  2180. auto Order =
  2181. AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
  2182. if (MCRegister PhysReg =
  2183. tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
  2184. // If VirtReg got an assignment, the eviction info is no longer relevant.
  2185. LastEvicted.clearEvicteeInfo(VirtReg.reg());
  2186. // When NewVRegs is not empty, we may have made decisions such as evicting
  2187. // a virtual register, go with the earlier decisions and use the physical
  2188. // register.
  2189. if (CSRCost.getFrequency() &&
  2190. EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
  2191. MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
  2192. CostPerUseLimit, NewVRegs);
  2193. if (CSRReg || !NewVRegs.empty())
  2194. // Return now if we decide to use a CSR or create new vregs due to
  2195. // pre-splitting.
  2196. return CSRReg;
  2197. } else
  2198. return PhysReg;
  2199. }
  2200. LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
  2201. LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
  2202. << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
  2203. // Try to evict a less worthy live range, but only for ranges from the primary
  2204. // queue. The RS_Split ranges already failed to do this, and they should not
  2205. // get a second chance until they have been split.
  2206. if (Stage != RS_Split)
  2207. if (Register PhysReg =
  2208. tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
  2209. FixedRegisters)) {
  2210. Register Hint = MRI->getSimpleHint(VirtReg.reg());
  2211. // If VirtReg has a hint and that hint is broken record this
  2212. // virtual register as a recoloring candidate for broken hint.
  2213. // Indeed, since we evicted a variable in its neighborhood it is
  2214. // likely we can at least partially recolor some of the
  2215. // copy-related live-ranges.
  2216. if (Hint && Hint != PhysReg)
  2217. SetOfBrokenHints.insert(&VirtReg);
  2218. // If VirtReg eviction someone, the eviction info for it as an evictee is
  2219. // no longer relevant.
  2220. LastEvicted.clearEvicteeInfo(VirtReg.reg());
  2221. return PhysReg;
  2222. }
  2223. assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
  2224. // The first time we see a live range, don't try to split or spill.
  2225. // Wait until the second time, when all smaller ranges have been allocated.
  2226. // This gives a better picture of the interference to split around.
  2227. if (Stage < RS_Split) {
  2228. ExtraInfo->setStage(VirtReg, RS_Split);
  2229. LLVM_DEBUG(dbgs() << "wait for second round\n");
  2230. NewVRegs.push_back(VirtReg.reg());
  2231. return 0;
  2232. }
  2233. if (Stage < RS_Spill) {
  2234. // Try splitting VirtReg or interferences.
  2235. unsigned NewVRegSizeBefore = NewVRegs.size();
  2236. Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
  2237. if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
  2238. // If VirtReg got split, the eviction info is no longer relevant.
  2239. LastEvicted.clearEvicteeInfo(VirtReg.reg());
  2240. return PhysReg;
  2241. }
  2242. }
  2243. // If we couldn't allocate a register from spilling, there is probably some
  2244. // invalid inline assembly. The base class will report it.
  2245. if (Stage >= RS_Done || !VirtReg.isSpillable())
  2246. return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
  2247. Depth);
  2248. // Finally spill VirtReg itself.
  2249. if ((EnableDeferredSpilling ||
  2250. TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
  2251. ExtraInfo->getStage(VirtReg) < RS_Memory) {
  2252. // TODO: This is experimental and in particular, we do not model
  2253. // the live range splitting done by spilling correctly.
  2254. // We would need a deep integration with the spiller to do the
  2255. // right thing here. Anyway, that is still good for early testing.
  2256. ExtraInfo->setStage(VirtReg, RS_Memory);
  2257. LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
  2258. NewVRegs.push_back(VirtReg.reg());
  2259. } else {
  2260. NamedRegionTimer T("spill", "Spiller", TimerGroupName,
  2261. TimerGroupDescription, TimePassesIsEnabled);
  2262. LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
  2263. spiller().spill(LRE);
  2264. ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
  2265. // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
  2266. // the new regs are kept in LDV (still mapping to the old register), until
  2267. // we rewrite spilled locations in LDV at a later stage.
  2268. DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
  2269. if (VerifyEnabled)
  2270. MF->verify(this, "After spilling");
  2271. }
  2272. // The live virtual register requesting allocation was spilled, so tell
  2273. // the caller not to allocate anything during this round.
  2274. return 0;
  2275. }
  2276. void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
  2277. using namespace ore;
  2278. if (Spills) {
  2279. R << NV("NumSpills", Spills) << " spills ";
  2280. R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
  2281. }
  2282. if (FoldedSpills) {
  2283. R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
  2284. R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
  2285. << " total folded spills cost ";
  2286. }
  2287. if (Reloads) {
  2288. R << NV("NumReloads", Reloads) << " reloads ";
  2289. R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
  2290. }
  2291. if (FoldedReloads) {
  2292. R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
  2293. R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
  2294. << " total folded reloads cost ";
  2295. }
  2296. if (ZeroCostFoldedReloads)
  2297. R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
  2298. << " zero cost folded reloads ";
  2299. if (Copies) {
  2300. R << NV("NumVRCopies", Copies) << " virtual registers copies ";
  2301. R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
  2302. }
  2303. }
  2304. RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
  2305. RAGreedyStats Stats;
  2306. const MachineFrameInfo &MFI = MF->getFrameInfo();
  2307. int FI;
  2308. auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
  2309. return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
  2310. A->getPseudoValue())->getFrameIndex());
  2311. };
  2312. auto isPatchpointInstr = [](const MachineInstr &MI) {
  2313. return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
  2314. MI.getOpcode() == TargetOpcode::STACKMAP ||
  2315. MI.getOpcode() == TargetOpcode::STATEPOINT;
  2316. };
  2317. for (MachineInstr &MI : MBB) {
  2318. if (MI.isCopy()) {
  2319. MachineOperand &Dest = MI.getOperand(0);
  2320. MachineOperand &Src = MI.getOperand(1);
  2321. if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() &&
  2322. Src.getReg().isVirtual())
  2323. ++Stats.Copies;
  2324. continue;
  2325. }
  2326. SmallVector<const MachineMemOperand *, 2> Accesses;
  2327. if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
  2328. ++Stats.Reloads;
  2329. continue;
  2330. }
  2331. if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
  2332. ++Stats.Spills;
  2333. continue;
  2334. }
  2335. if (TII->hasLoadFromStackSlot(MI, Accesses) &&
  2336. llvm::any_of(Accesses, isSpillSlotAccess)) {
  2337. if (!isPatchpointInstr(MI)) {
  2338. Stats.FoldedReloads += Accesses.size();
  2339. continue;
  2340. }
  2341. // For statepoint there may be folded and zero cost folded stack reloads.
  2342. std::pair<unsigned, unsigned> NonZeroCostRange =
  2343. TII->getPatchpointUnfoldableRange(MI);
  2344. SmallSet<unsigned, 16> FoldedReloads;
  2345. SmallSet<unsigned, 16> ZeroCostFoldedReloads;
  2346. for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
  2347. MachineOperand &MO = MI.getOperand(Idx);
  2348. if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
  2349. continue;
  2350. if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
  2351. FoldedReloads.insert(MO.getIndex());
  2352. else
  2353. ZeroCostFoldedReloads.insert(MO.getIndex());
  2354. }
  2355. // If stack slot is used in folded reload it is not zero cost then.
  2356. for (unsigned Slot : FoldedReloads)
  2357. ZeroCostFoldedReloads.erase(Slot);
  2358. Stats.FoldedReloads += FoldedReloads.size();
  2359. Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
  2360. continue;
  2361. }
  2362. Accesses.clear();
  2363. if (TII->hasStoreToStackSlot(MI, Accesses) &&
  2364. llvm::any_of(Accesses, isSpillSlotAccess)) {
  2365. Stats.FoldedSpills += Accesses.size();
  2366. }
  2367. }
  2368. // Set cost of collected statistic by multiplication to relative frequency of
  2369. // this basic block.
  2370. float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
  2371. Stats.ReloadsCost = RelFreq * Stats.Reloads;
  2372. Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
  2373. Stats.SpillsCost = RelFreq * Stats.Spills;
  2374. Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
  2375. Stats.CopiesCost = RelFreq * Stats.Copies;
  2376. return Stats;
  2377. }
  2378. RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
  2379. RAGreedyStats Stats;
  2380. // Sum up the spill and reloads in subloops.
  2381. for (MachineLoop *SubLoop : *L)
  2382. Stats.add(reportStats(SubLoop));
  2383. for (MachineBasicBlock *MBB : L->getBlocks())
  2384. // Handle blocks that were not included in subloops.
  2385. if (Loops->getLoopFor(MBB) == L)
  2386. Stats.add(computeStats(*MBB));
  2387. if (!Stats.isEmpty()) {
  2388. using namespace ore;
  2389. ORE->emit([&]() {
  2390. MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
  2391. L->getStartLoc(), L->getHeader());
  2392. Stats.report(R);
  2393. R << "generated in loop";
  2394. return R;
  2395. });
  2396. }
  2397. return Stats;
  2398. }
  2399. void RAGreedy::reportStats() {
  2400. if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
  2401. return;
  2402. RAGreedyStats Stats;
  2403. for (MachineLoop *L : *Loops)
  2404. Stats.add(reportStats(L));
  2405. // Process non-loop blocks.
  2406. for (MachineBasicBlock &MBB : *MF)
  2407. if (!Loops->getLoopFor(&MBB))
  2408. Stats.add(computeStats(MBB));
  2409. if (!Stats.isEmpty()) {
  2410. using namespace ore;
  2411. ORE->emit([&]() {
  2412. DebugLoc Loc;
  2413. if (auto *SP = MF->getFunction().getSubprogram())
  2414. Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
  2415. MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
  2416. &MF->front());
  2417. Stats.report(R);
  2418. R << "generated in function";
  2419. return R;
  2420. });
  2421. }
  2422. }
  2423. bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
  2424. LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
  2425. << "********** Function: " << mf.getName() << '\n');
  2426. MF = &mf;
  2427. TRI = MF->getSubtarget().getRegisterInfo();
  2428. TII = MF->getSubtarget().getInstrInfo();
  2429. RCI.runOnMachineFunction(mf);
  2430. EnableAdvancedRASplitCost =
  2431. ConsiderLocalIntervalCost.getNumOccurrences()
  2432. ? ConsiderLocalIntervalCost
  2433. : MF->getSubtarget().enableAdvancedRASplitCost();
  2434. if (VerifyEnabled)
  2435. MF->verify(this, "Before greedy register allocator");
  2436. RegAllocBase::init(getAnalysis<VirtRegMap>(),
  2437. getAnalysis<LiveIntervals>(),
  2438. getAnalysis<LiveRegMatrix>());
  2439. Indexes = &getAnalysis<SlotIndexes>();
  2440. MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
  2441. DomTree = &getAnalysis<MachineDominatorTree>();
  2442. ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
  2443. Loops = &getAnalysis<MachineLoopInfo>();
  2444. Bundles = &getAnalysis<EdgeBundles>();
  2445. SpillPlacer = &getAnalysis<SpillPlacement>();
  2446. DebugVars = &getAnalysis<LiveDebugVariables>();
  2447. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  2448. initializeCSRCost();
  2449. RegCosts = TRI->getRegisterCosts(*MF);
  2450. ExtraInfo.emplace();
  2451. EvictAdvisor =
  2452. getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
  2453. VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
  2454. SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
  2455. VRAI->calculateSpillWeightsAndHints();
  2456. LLVM_DEBUG(LIS->dump());
  2457. SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
  2458. SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
  2459. IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
  2460. GlobalCand.resize(32); // This will grow as needed.
  2461. SetOfBrokenHints.clear();
  2462. LastEvicted.clear();
  2463. allocatePhysRegs();
  2464. tryHintsRecoloring();
  2465. if (VerifyEnabled)
  2466. MF->verify(this, "Before post optimization");
  2467. postOptimization();
  2468. reportStats();
  2469. releaseMemory();
  2470. return true;
  2471. }