MachineSink.cpp 70 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897
  1. //===- MachineSink.cpp - Sinking for machine instructions -----------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass moves instructions into successor blocks when possible, so that
  10. // they aren't executed on paths where their results aren't needed.
  11. //
  12. // This pass is not intended to be a replacement or a complete alternative
  13. // for an LLVM-IR-level sinking pass. It is only designed to sink simple
  14. // constructs that are not exposed before lowering and instruction selection.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "llvm/ADT/DenseSet.h"
  18. #include "llvm/ADT/DepthFirstIterator.h"
  19. #include "llvm/ADT/MapVector.h"
  20. #include "llvm/ADT/PointerIntPair.h"
  21. #include "llvm/ADT/PostOrderIterator.h"
  22. #include "llvm/ADT/SetVector.h"
  23. #include "llvm/ADT/SmallSet.h"
  24. #include "llvm/ADT/SmallVector.h"
  25. #include "llvm/ADT/Statistic.h"
  26. #include "llvm/Analysis/AliasAnalysis.h"
  27. #include "llvm/Analysis/CFG.h"
  28. #include "llvm/CodeGen/MachineBasicBlock.h"
  29. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
  30. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  31. #include "llvm/CodeGen/MachineCycleAnalysis.h"
  32. #include "llvm/CodeGen/MachineDominators.h"
  33. #include "llvm/CodeGen/MachineFunction.h"
  34. #include "llvm/CodeGen/MachineFunctionPass.h"
  35. #include "llvm/CodeGen/MachineInstr.h"
  36. #include "llvm/CodeGen/MachineLoopInfo.h"
  37. #include "llvm/CodeGen/MachineOperand.h"
  38. #include "llvm/CodeGen/MachinePostDominators.h"
  39. #include "llvm/CodeGen/MachineRegisterInfo.h"
  40. #include "llvm/CodeGen/RegisterClassInfo.h"
  41. #include "llvm/CodeGen/RegisterPressure.h"
  42. #include "llvm/CodeGen/TargetInstrInfo.h"
  43. #include "llvm/CodeGen/TargetRegisterInfo.h"
  44. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  45. #include "llvm/IR/BasicBlock.h"
  46. #include "llvm/IR/DebugInfoMetadata.h"
  47. #include "llvm/IR/LLVMContext.h"
  48. #include "llvm/InitializePasses.h"
  49. #include "llvm/MC/MCRegisterInfo.h"
  50. #include "llvm/Pass.h"
  51. #include "llvm/Support/BranchProbability.h"
  52. #include "llvm/Support/CommandLine.h"
  53. #include "llvm/Support/Debug.h"
  54. #include "llvm/Support/raw_ostream.h"
  55. #include <algorithm>
  56. #include <cassert>
  57. #include <cstdint>
  58. #include <map>
  59. #include <utility>
  60. #include <vector>
  61. using namespace llvm;
  62. #define DEBUG_TYPE "machine-sink"
  63. static cl::opt<bool>
  64. SplitEdges("machine-sink-split",
  65. cl::desc("Split critical edges during machine sinking"),
  66. cl::init(true), cl::Hidden);
  67. static cl::opt<bool>
  68. UseBlockFreqInfo("machine-sink-bfi",
  69. cl::desc("Use block frequency info to find successors to sink"),
  70. cl::init(true), cl::Hidden);
  71. static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
  72. "machine-sink-split-probability-threshold",
  73. cl::desc(
  74. "Percentage threshold for splitting single-instruction critical edge. "
  75. "If the branch threshold is higher than this threshold, we allow "
  76. "speculative execution of up to 1 instruction to avoid branching to "
  77. "splitted critical edge"),
  78. cl::init(40), cl::Hidden);
  79. static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold(
  80. "machine-sink-load-instrs-threshold",
  81. cl::desc("Do not try to find alias store for a load if there is a in-path "
  82. "block whose instruction number is higher than this threshold."),
  83. cl::init(2000), cl::Hidden);
  84. static cl::opt<unsigned> SinkLoadBlocksThreshold(
  85. "machine-sink-load-blocks-threshold",
  86. cl::desc("Do not try to find alias store for a load if the block number in "
  87. "the straight line is higher than this threshold."),
  88. cl::init(20), cl::Hidden);
  89. static cl::opt<bool>
  90. SinkInstsIntoCycle("sink-insts-to-avoid-spills",
  91. cl::desc("Sink instructions into cycles to avoid "
  92. "register spills"),
  93. cl::init(false), cl::Hidden);
  94. static cl::opt<unsigned> SinkIntoCycleLimit(
  95. "machine-sink-cycle-limit",
  96. cl::desc("The maximum number of instructions considered for cycle sinking."),
  97. cl::init(50), cl::Hidden);
  98. STATISTIC(NumSunk, "Number of machine instructions sunk");
  99. STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
  100. STATISTIC(NumSplit, "Number of critical edges split");
  101. STATISTIC(NumCoalesces, "Number of copies coalesced");
  102. STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
  103. namespace {
  104. class MachineSinking : public MachineFunctionPass {
  105. const TargetInstrInfo *TII;
  106. const TargetRegisterInfo *TRI;
  107. MachineRegisterInfo *MRI; // Machine register information
  108. MachineDominatorTree *DT; // Machine dominator tree
  109. MachinePostDominatorTree *PDT; // Machine post dominator tree
  110. MachineCycleInfo *CI;
  111. MachineBlockFrequencyInfo *MBFI;
  112. const MachineBranchProbabilityInfo *MBPI;
  113. AliasAnalysis *AA;
  114. RegisterClassInfo RegClassInfo;
  115. // Remember which edges have been considered for breaking.
  116. SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
  117. CEBCandidates;
  118. // Remember which edges we are about to split.
  119. // This is different from CEBCandidates since those edges
  120. // will be split.
  121. SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
  122. DenseSet<Register> RegsToClearKillFlags;
  123. using AllSuccsCache =
  124. std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
  125. /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
  126. /// post-dominated by another DBG_VALUE of the same variable location.
  127. /// This is necessary to detect sequences such as:
  128. /// %0 = someinst
  129. /// DBG_VALUE %0, !123, !DIExpression()
  130. /// %1 = anotherinst
  131. /// DBG_VALUE %1, !123, !DIExpression()
  132. /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
  133. /// would re-order assignments.
  134. using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
  135. /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
  136. /// debug instructions to sink.
  137. SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
  138. /// Record of debug variables that have had their locations set in the
  139. /// current block.
  140. DenseSet<DebugVariable> SeenDbgVars;
  141. std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
  142. HasStoreCache;
  143. std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
  144. std::vector<MachineInstr *>>
  145. StoreInstrCache;
  146. /// Cached BB's register pressure.
  147. std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure;
  148. public:
  149. static char ID; // Pass identification
  150. MachineSinking() : MachineFunctionPass(ID) {
  151. initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
  152. }
  153. bool runOnMachineFunction(MachineFunction &MF) override;
  154. void getAnalysisUsage(AnalysisUsage &AU) const override {
  155. MachineFunctionPass::getAnalysisUsage(AU);
  156. AU.addRequired<AAResultsWrapperPass>();
  157. AU.addRequired<MachineDominatorTree>();
  158. AU.addRequired<MachinePostDominatorTree>();
  159. AU.addRequired<MachineCycleInfoWrapperPass>();
  160. AU.addRequired<MachineBranchProbabilityInfo>();
  161. AU.addPreserved<MachineCycleInfoWrapperPass>();
  162. AU.addPreserved<MachineLoopInfo>();
  163. if (UseBlockFreqInfo)
  164. AU.addRequired<MachineBlockFrequencyInfo>();
  165. }
  166. void releaseMemory() override {
  167. CEBCandidates.clear();
  168. }
  169. private:
  170. bool ProcessBlock(MachineBasicBlock &MBB);
  171. void ProcessDbgInst(MachineInstr &MI);
  172. bool isWorthBreakingCriticalEdge(MachineInstr &MI,
  173. MachineBasicBlock *From,
  174. MachineBasicBlock *To);
  175. bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
  176. MachineInstr &MI);
  177. /// Postpone the splitting of the given critical
  178. /// edge (\p From, \p To).
  179. ///
  180. /// We do not split the edges on the fly. Indeed, this invalidates
  181. /// the dominance information and thus triggers a lot of updates
  182. /// of that information underneath.
  183. /// Instead, we postpone all the splits after each iteration of
  184. /// the main loop. That way, the information is at least valid
  185. /// for the lifetime of an iteration.
  186. ///
  187. /// \return True if the edge is marked as toSplit, false otherwise.
  188. /// False can be returned if, for instance, this is not profitable.
  189. bool PostponeSplitCriticalEdge(MachineInstr &MI,
  190. MachineBasicBlock *From,
  191. MachineBasicBlock *To,
  192. bool BreakPHIEdge);
  193. bool SinkInstruction(MachineInstr &MI, bool &SawStore,
  194. AllSuccsCache &AllSuccessors);
  195. /// If we sink a COPY inst, some debug users of it's destination may no
  196. /// longer be dominated by the COPY, and will eventually be dropped.
  197. /// This is easily rectified by forwarding the non-dominated debug uses
  198. /// to the copy source.
  199. void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
  200. MachineBasicBlock *TargetBlock);
  201. bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
  202. MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
  203. bool &LocalUse) const;
  204. MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
  205. bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
  206. void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
  207. SmallVectorImpl<MachineInstr *> &Candidates);
  208. bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
  209. bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
  210. MachineBasicBlock *MBB,
  211. MachineBasicBlock *SuccToSinkTo,
  212. AllSuccsCache &AllSuccessors);
  213. bool PerformTrivialForwardCoalescing(MachineInstr &MI,
  214. MachineBasicBlock *MBB);
  215. SmallVector<MachineBasicBlock *, 4> &
  216. GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
  217. AllSuccsCache &AllSuccessors) const;
  218. std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB);
  219. };
  220. } // end anonymous namespace
  221. char MachineSinking::ID = 0;
  222. char &llvm::MachineSinkingID = MachineSinking::ID;
  223. INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
  224. "Machine code sinking", false, false)
  225. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  226. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  227. INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
  228. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  229. INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
  230. "Machine code sinking", false, false)
  231. bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
  232. MachineBasicBlock *MBB) {
  233. if (!MI.isCopy())
  234. return false;
  235. Register SrcReg = MI.getOperand(1).getReg();
  236. Register DstReg = MI.getOperand(0).getReg();
  237. if (!SrcReg.isVirtual() || !DstReg.isVirtual() ||
  238. !MRI->hasOneNonDBGUse(SrcReg))
  239. return false;
  240. const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
  241. const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
  242. if (SRC != DRC)
  243. return false;
  244. MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
  245. if (DefMI->isCopyLike())
  246. return false;
  247. LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
  248. LLVM_DEBUG(dbgs() << "*** to: " << MI);
  249. MRI->replaceRegWith(DstReg, SrcReg);
  250. MI.eraseFromParent();
  251. // Conservatively, clear any kill flags, since it's possible that they are no
  252. // longer correct.
  253. MRI->clearKillFlags(SrcReg);
  254. ++NumCoalesces;
  255. return true;
  256. }
  257. /// AllUsesDominatedByBlock - Return true if all uses of the specified register
  258. /// occur in blocks dominated by the specified block. If any use is in the
  259. /// definition block, then return false since it is never legal to move def
  260. /// after uses.
  261. bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
  262. MachineBasicBlock *MBB,
  263. MachineBasicBlock *DefMBB,
  264. bool &BreakPHIEdge,
  265. bool &LocalUse) const {
  266. assert(Reg.isVirtual() && "Only makes sense for vregs");
  267. // Ignore debug uses because debug info doesn't affect the code.
  268. if (MRI->use_nodbg_empty(Reg))
  269. return true;
  270. // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
  271. // into and they are all PHI nodes. In this case, machine-sink must break
  272. // the critical edge first. e.g.
  273. //
  274. // %bb.1:
  275. // Predecessors according to CFG: %bb.0
  276. // ...
  277. // %def = DEC64_32r %x, implicit-def dead %eflags
  278. // ...
  279. // JE_4 <%bb.37>, implicit %eflags
  280. // Successors according to CFG: %bb.37 %bb.2
  281. //
  282. // %bb.2:
  283. // %p = PHI %y, %bb.0, %def, %bb.1
  284. if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
  285. MachineInstr *UseInst = MO.getParent();
  286. unsigned OpNo = UseInst->getOperandNo(&MO);
  287. MachineBasicBlock *UseBlock = UseInst->getParent();
  288. return UseBlock == MBB && UseInst->isPHI() &&
  289. UseInst->getOperand(OpNo + 1).getMBB() == DefMBB;
  290. })) {
  291. BreakPHIEdge = true;
  292. return true;
  293. }
  294. for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
  295. // Determine the block of the use.
  296. MachineInstr *UseInst = MO.getParent();
  297. unsigned OpNo = &MO - &UseInst->getOperand(0);
  298. MachineBasicBlock *UseBlock = UseInst->getParent();
  299. if (UseInst->isPHI()) {
  300. // PHI nodes use the operand in the predecessor block, not the block with
  301. // the PHI.
  302. UseBlock = UseInst->getOperand(OpNo+1).getMBB();
  303. } else if (UseBlock == DefMBB) {
  304. LocalUse = true;
  305. return false;
  306. }
  307. // Check that it dominates.
  308. if (!DT->dominates(MBB, UseBlock))
  309. return false;
  310. }
  311. return true;
  312. }
  313. /// Return true if this machine instruction loads from global offset table or
  314. /// constant pool.
  315. static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
  316. assert(MI.mayLoad() && "Expected MI that loads!");
  317. // If we lost memory operands, conservatively assume that the instruction
  318. // reads from everything..
  319. if (MI.memoperands_empty())
  320. return true;
  321. for (MachineMemOperand *MemOp : MI.memoperands())
  322. if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
  323. if (PSV->isGOT() || PSV->isConstantPool())
  324. return true;
  325. return false;
  326. }
  327. void MachineSinking::FindCycleSinkCandidates(
  328. MachineCycle *Cycle, MachineBasicBlock *BB,
  329. SmallVectorImpl<MachineInstr *> &Candidates) {
  330. for (auto &MI : *BB) {
  331. LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
  332. if (!TII->shouldSink(MI)) {
  333. LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
  334. "target\n");
  335. continue;
  336. }
  337. if (!isCycleInvariant(Cycle, MI)) {
  338. LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
  339. continue;
  340. }
  341. bool DontMoveAcrossStore = true;
  342. if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
  343. LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
  344. continue;
  345. }
  346. if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
  347. LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
  348. continue;
  349. }
  350. if (MI.isConvergent())
  351. continue;
  352. const MachineOperand &MO = MI.getOperand(0);
  353. if (!MO.isReg() || !MO.getReg() || !MO.isDef())
  354. continue;
  355. if (!MRI->hasOneDef(MO.getReg()))
  356. continue;
  357. LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
  358. Candidates.push_back(&MI);
  359. }
  360. }
  361. bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
  362. if (skipFunction(MF.getFunction()))
  363. return false;
  364. LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
  365. TII = MF.getSubtarget().getInstrInfo();
  366. TRI = MF.getSubtarget().getRegisterInfo();
  367. MRI = &MF.getRegInfo();
  368. DT = &getAnalysis<MachineDominatorTree>();
  369. PDT = &getAnalysis<MachinePostDominatorTree>();
  370. CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
  371. MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
  372. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  373. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  374. RegClassInfo.runOnMachineFunction(MF);
  375. bool EverMadeChange = false;
  376. while (true) {
  377. bool MadeChange = false;
  378. // Process all basic blocks.
  379. CEBCandidates.clear();
  380. ToSplit.clear();
  381. for (auto &MBB: MF)
  382. MadeChange |= ProcessBlock(MBB);
  383. // If we have anything we marked as toSplit, split it now.
  384. for (const auto &Pair : ToSplit) {
  385. auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
  386. if (NewSucc != nullptr) {
  387. LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
  388. << printMBBReference(*Pair.first) << " -- "
  389. << printMBBReference(*NewSucc) << " -- "
  390. << printMBBReference(*Pair.second) << '\n');
  391. if (MBFI)
  392. MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI);
  393. MadeChange = true;
  394. ++NumSplit;
  395. } else
  396. LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
  397. }
  398. // If this iteration over the code changed anything, keep iterating.
  399. if (!MadeChange) break;
  400. EverMadeChange = true;
  401. }
  402. if (SinkInstsIntoCycle) {
  403. SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(),
  404. CI->toplevel_end());
  405. for (auto *Cycle : Cycles) {
  406. MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
  407. if (!Preheader) {
  408. LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
  409. continue;
  410. }
  411. SmallVector<MachineInstr *, 8> Candidates;
  412. FindCycleSinkCandidates(Cycle, Preheader, Candidates);
  413. // Walk the candidates in reverse order so that we start with the use
  414. // of a def-use chain, if there is any.
  415. // TODO: Sort the candidates using a cost-model.
  416. unsigned i = 0;
  417. for (MachineInstr *I : llvm::reverse(Candidates)) {
  418. if (i++ == SinkIntoCycleLimit) {
  419. LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
  420. "be analysed.");
  421. break;
  422. }
  423. if (!SinkIntoCycle(Cycle, *I))
  424. break;
  425. EverMadeChange = true;
  426. ++NumCycleSunk;
  427. }
  428. }
  429. }
  430. HasStoreCache.clear();
  431. StoreInstrCache.clear();
  432. // Now clear any kill flags for recorded registers.
  433. for (auto I : RegsToClearKillFlags)
  434. MRI->clearKillFlags(I);
  435. RegsToClearKillFlags.clear();
  436. return EverMadeChange;
  437. }
  438. bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
  439. // Can't sink anything out of a block that has less than two successors.
  440. if (MBB.succ_size() <= 1 || MBB.empty()) return false;
  441. // Don't bother sinking code out of unreachable blocks. In addition to being
  442. // unprofitable, it can also lead to infinite looping, because in an
  443. // unreachable cycle there may be nowhere to stop.
  444. if (!DT->isReachableFromEntry(&MBB)) return false;
  445. bool MadeChange = false;
  446. // Cache all successors, sorted by frequency info and cycle depth.
  447. AllSuccsCache AllSuccessors;
  448. // Walk the basic block bottom-up. Remember if we saw a store.
  449. MachineBasicBlock::iterator I = MBB.end();
  450. --I;
  451. bool ProcessedBegin, SawStore = false;
  452. do {
  453. MachineInstr &MI = *I; // The instruction to sink.
  454. // Predecrement I (if it's not begin) so that it isn't invalidated by
  455. // sinking.
  456. ProcessedBegin = I == MBB.begin();
  457. if (!ProcessedBegin)
  458. --I;
  459. if (MI.isDebugOrPseudoInstr()) {
  460. if (MI.isDebugValue())
  461. ProcessDbgInst(MI);
  462. continue;
  463. }
  464. bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
  465. if (Joined) {
  466. MadeChange = true;
  467. continue;
  468. }
  469. if (SinkInstruction(MI, SawStore, AllSuccessors)) {
  470. ++NumSunk;
  471. MadeChange = true;
  472. }
  473. // If we just processed the first instruction in the block, we're done.
  474. } while (!ProcessedBegin);
  475. SeenDbgUsers.clear();
  476. SeenDbgVars.clear();
  477. // recalculate the bb register pressure after sinking one BB.
  478. CachedRegisterPressure.clear();
  479. return MadeChange;
  480. }
  481. void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
  482. // When we see DBG_VALUEs for registers, record any vreg it reads, so that
  483. // we know what to sink if the vreg def sinks.
  484. assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
  485. DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
  486. MI.getDebugLoc()->getInlinedAt());
  487. bool SeenBefore = SeenDbgVars.contains(Var);
  488. for (MachineOperand &MO : MI.debug_operands()) {
  489. if (MO.isReg() && MO.getReg().isVirtual())
  490. SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
  491. }
  492. // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
  493. SeenDbgVars.insert(Var);
  494. }
  495. bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
  496. MachineBasicBlock *From,
  497. MachineBasicBlock *To) {
  498. // FIXME: Need much better heuristics.
  499. // If the pass has already considered breaking this edge (during this pass
  500. // through the function), then let's go ahead and break it. This means
  501. // sinking multiple "cheap" instructions into the same block.
  502. if (!CEBCandidates.insert(std::make_pair(From, To)).second)
  503. return true;
  504. if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
  505. return true;
  506. if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
  507. BranchProbability(SplitEdgeProbabilityThreshold, 100))
  508. return true;
  509. // MI is cheap, we probably don't want to break the critical edge for it.
  510. // However, if this would allow some definitions of its source operands
  511. // to be sunk then it's probably worth it.
  512. for (const MachineOperand &MO : MI.operands()) {
  513. if (!MO.isReg() || !MO.isUse())
  514. continue;
  515. Register Reg = MO.getReg();
  516. if (Reg == 0)
  517. continue;
  518. // We don't move live definitions of physical registers,
  519. // so sinking their uses won't enable any opportunities.
  520. if (Reg.isPhysical())
  521. continue;
  522. // If this instruction is the only user of a virtual register,
  523. // check if breaking the edge will enable sinking
  524. // both this instruction and the defining instruction.
  525. if (MRI->hasOneNonDBGUse(Reg)) {
  526. // If the definition resides in same MBB,
  527. // claim it's likely we can sink these together.
  528. // If definition resides elsewhere, we aren't
  529. // blocking it from being sunk so don't break the edge.
  530. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  531. if (DefMI->getParent() == MI.getParent())
  532. return true;
  533. }
  534. }
  535. return false;
  536. }
  537. bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
  538. MachineBasicBlock *FromBB,
  539. MachineBasicBlock *ToBB,
  540. bool BreakPHIEdge) {
  541. if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
  542. return false;
  543. // Avoid breaking back edge. From == To means backedge for single BB cycle.
  544. if (!SplitEdges || FromBB == ToBB)
  545. return false;
  546. MachineCycle *FromCycle = CI->getCycle(FromBB);
  547. MachineCycle *ToCycle = CI->getCycle(ToBB);
  548. // Check for backedges of more "complex" cycles.
  549. if (FromCycle == ToCycle && FromCycle &&
  550. (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
  551. return false;
  552. // It's not always legal to break critical edges and sink the computation
  553. // to the edge.
  554. //
  555. // %bb.1:
  556. // v1024
  557. // Beq %bb.3
  558. // <fallthrough>
  559. // %bb.2:
  560. // ... no uses of v1024
  561. // <fallthrough>
  562. // %bb.3:
  563. // ...
  564. // = v1024
  565. //
  566. // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
  567. //
  568. // %bb.1:
  569. // ...
  570. // Bne %bb.2
  571. // %bb.4:
  572. // v1024 =
  573. // B %bb.3
  574. // %bb.2:
  575. // ... no uses of v1024
  576. // <fallthrough>
  577. // %bb.3:
  578. // ...
  579. // = v1024
  580. //
  581. // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
  582. // flow. We need to ensure the new basic block where the computation is
  583. // sunk to dominates all the uses.
  584. // It's only legal to break critical edge and sink the computation to the
  585. // new block if all the predecessors of "To", except for "From", are
  586. // not dominated by "From". Given SSA property, this means these
  587. // predecessors are dominated by "To".
  588. //
  589. // There is no need to do this check if all the uses are PHI nodes. PHI
  590. // sources are only defined on the specific predecessor edges.
  591. if (!BreakPHIEdge) {
  592. for (MachineBasicBlock *Pred : ToBB->predecessors())
  593. if (Pred != FromBB && !DT->dominates(ToBB, Pred))
  594. return false;
  595. }
  596. ToSplit.insert(std::make_pair(FromBB, ToBB));
  597. return true;
  598. }
  599. std::vector<unsigned> &
  600. MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
  601. // Currently to save compiling time, MBB's register pressure will not change
  602. // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
  603. // register pressure is changed after sinking any instructions into it.
  604. // FIXME: need a accurate and cheap register pressure estiminate model here.
  605. auto RP = CachedRegisterPressure.find(&MBB);
  606. if (RP != CachedRegisterPressure.end())
  607. return RP->second;
  608. RegionPressure Pressure;
  609. RegPressureTracker RPTracker(Pressure);
  610. // Initialize the register pressure tracker.
  611. RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(),
  612. /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
  613. for (MachineBasicBlock::iterator MII = MBB.instr_end(),
  614. MIE = MBB.instr_begin();
  615. MII != MIE; --MII) {
  616. MachineInstr &MI = *std::prev(MII);
  617. if (MI.isDebugInstr() || MI.isPseudoProbe())
  618. continue;
  619. RegisterOperands RegOpers;
  620. RegOpers.collect(MI, *TRI, *MRI, false, false);
  621. RPTracker.recedeSkipDebugValues();
  622. assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
  623. RPTracker.recede(RegOpers);
  624. }
  625. RPTracker.closeRegion();
  626. auto It = CachedRegisterPressure.insert(
  627. std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
  628. return It.first->second;
  629. }
  630. /// isProfitableToSinkTo - Return true if it is profitable to sink MI.
  631. bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
  632. MachineBasicBlock *MBB,
  633. MachineBasicBlock *SuccToSinkTo,
  634. AllSuccsCache &AllSuccessors) {
  635. assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
  636. if (MBB == SuccToSinkTo)
  637. return false;
  638. // It is profitable if SuccToSinkTo does not post dominate current block.
  639. if (!PDT->dominates(SuccToSinkTo, MBB))
  640. return true;
  641. // It is profitable to sink an instruction from a deeper cycle to a shallower
  642. // cycle, even if the latter post-dominates the former (PR21115).
  643. if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo))
  644. return true;
  645. // Check if only use in post dominated block is PHI instruction.
  646. bool NonPHIUse = false;
  647. for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
  648. MachineBasicBlock *UseBlock = UseInst.getParent();
  649. if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
  650. NonPHIUse = true;
  651. }
  652. if (!NonPHIUse)
  653. return true;
  654. // If SuccToSinkTo post dominates then also it may be profitable if MI
  655. // can further profitably sinked into another block in next round.
  656. bool BreakPHIEdge = false;
  657. // FIXME - If finding successor is compile time expensive then cache results.
  658. if (MachineBasicBlock *MBB2 =
  659. FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
  660. return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
  661. MachineCycle *MCycle = CI->getCycle(MBB);
  662. // If the instruction is not inside a cycle, it is not profitable to sink MI to
  663. // a post dominate block SuccToSinkTo.
  664. if (!MCycle)
  665. return false;
  666. auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
  667. unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
  668. const int *PS = TRI->getRegClassPressureSets(RC);
  669. // Get register pressure for block SuccToSinkTo.
  670. std::vector<unsigned> BBRegisterPressure =
  671. getBBRegisterPressure(*SuccToSinkTo);
  672. for (; *PS != -1; PS++)
  673. // check if any register pressure set exceeds limit in block SuccToSinkTo
  674. // after sinking.
  675. if (Weight + BBRegisterPressure[*PS] >=
  676. TRI->getRegPressureSetLimit(*MBB->getParent(), *PS))
  677. return true;
  678. return false;
  679. };
  680. // If this instruction is inside a Cycle and sinking this instruction can make
  681. // more registers live range shorten, it is still prifitable.
  682. for (const MachineOperand &MO : MI.operands()) {
  683. // Ignore non-register operands.
  684. if (!MO.isReg())
  685. continue;
  686. Register Reg = MO.getReg();
  687. if (Reg == 0)
  688. continue;
  689. if (Reg.isPhysical()) {
  690. if (MO.isUse() &&
  691. (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
  692. continue;
  693. // Don't handle non-constant and non-ignorable physical register.
  694. return false;
  695. }
  696. // Users for the defs are all dominated by SuccToSinkTo.
  697. if (MO.isDef()) {
  698. // This def register's live range is shortened after sinking.
  699. bool LocalUse = false;
  700. if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge,
  701. LocalUse))
  702. return false;
  703. } else {
  704. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  705. if (!DefMI)
  706. continue;
  707. MachineCycle *Cycle = CI->getCycle(DefMI->getParent());
  708. // DefMI is defined outside of cycle. There should be no live range
  709. // impact for this operand. Defination outside of cycle means:
  710. // 1: defination is outside of cycle.
  711. // 2: defination is in this cycle, but it is a PHI in the cycle header.
  712. if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
  713. Cycle->getHeader() == DefMI->getParent()))
  714. continue;
  715. // The DefMI is defined inside the cycle.
  716. // If sinking this operand makes some register pressure set exceed limit,
  717. // it is not profitable.
  718. if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
  719. LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
  720. return false;
  721. }
  722. }
  723. }
  724. // If MI is in cycle and all its operands are alive across the whole cycle or
  725. // if no operand sinking make register pressure set exceed limit, it is
  726. // profitable to sink MI.
  727. return true;
  728. }
  729. /// Get the sorted sequence of successors for this MachineBasicBlock, possibly
  730. /// computing it if it was not already cached.
  731. SmallVector<MachineBasicBlock *, 4> &
  732. MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
  733. AllSuccsCache &AllSuccessors) const {
  734. // Do we have the sorted successors in cache ?
  735. auto Succs = AllSuccessors.find(MBB);
  736. if (Succs != AllSuccessors.end())
  737. return Succs->second;
  738. SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors());
  739. // Handle cases where sinking can happen but where the sink point isn't a
  740. // successor. For example:
  741. //
  742. // x = computation
  743. // if () {} else {}
  744. // use x
  745. //
  746. for (MachineDomTreeNode *DTChild : DT->getNode(MBB)->children()) {
  747. // DomTree children of MBB that have MBB as immediate dominator are added.
  748. if (DTChild->getIDom()->getBlock() == MI.getParent() &&
  749. // Skip MBBs already added to the AllSuccs vector above.
  750. !MBB->isSuccessor(DTChild->getBlock()))
  751. AllSuccs.push_back(DTChild->getBlock());
  752. }
  753. // Sort Successors according to their cycle depth or block frequency info.
  754. llvm::stable_sort(
  755. AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
  756. uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
  757. uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
  758. bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
  759. return HasBlockFreq ? LHSFreq < RHSFreq
  760. : CI->getCycleDepth(L) < CI->getCycleDepth(R);
  761. });
  762. auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
  763. return it.first->second;
  764. }
  765. /// FindSuccToSinkTo - Find a successor to sink this instruction to.
  766. MachineBasicBlock *
  767. MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
  768. bool &BreakPHIEdge,
  769. AllSuccsCache &AllSuccessors) {
  770. assert (MBB && "Invalid MachineBasicBlock!");
  771. // loop over all the operands of the specified instruction. If there is
  772. // anything we can't handle, bail out.
  773. // SuccToSinkTo - This is the successor to sink this instruction to, once we
  774. // decide.
  775. MachineBasicBlock *SuccToSinkTo = nullptr;
  776. for (const MachineOperand &MO : MI.operands()) {
  777. if (!MO.isReg()) continue; // Ignore non-register operands.
  778. Register Reg = MO.getReg();
  779. if (Reg == 0) continue;
  780. if (Reg.isPhysical()) {
  781. if (MO.isUse()) {
  782. // If the physreg has no defs anywhere, it's just an ambient register
  783. // and we can freely move its uses. Alternatively, if it's allocatable,
  784. // it could get allocated to something with a def during allocation.
  785. if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
  786. return nullptr;
  787. } else if (!MO.isDead()) {
  788. // A def that isn't dead. We can't move it.
  789. return nullptr;
  790. }
  791. } else {
  792. // Virtual register uses are always safe to sink.
  793. if (MO.isUse()) continue;
  794. // If it's not safe to move defs of the register class, then abort.
  795. if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
  796. return nullptr;
  797. // Virtual register defs can only be sunk if all their uses are in blocks
  798. // dominated by one of the successors.
  799. if (SuccToSinkTo) {
  800. // If a previous operand picked a block to sink to, then this operand
  801. // must be sinkable to the same block.
  802. bool LocalUse = false;
  803. if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
  804. BreakPHIEdge, LocalUse))
  805. return nullptr;
  806. continue;
  807. }
  808. // Otherwise, we should look at all the successors and decide which one
  809. // we should sink to. If we have reliable block frequency information
  810. // (frequency != 0) available, give successors with smaller frequencies
  811. // higher priority, otherwise prioritize smaller cycle depths.
  812. for (MachineBasicBlock *SuccBlock :
  813. GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
  814. bool LocalUse = false;
  815. if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
  816. BreakPHIEdge, LocalUse)) {
  817. SuccToSinkTo = SuccBlock;
  818. break;
  819. }
  820. if (LocalUse)
  821. // Def is used locally, it's never safe to move this def.
  822. return nullptr;
  823. }
  824. // If we couldn't find a block to sink to, ignore this instruction.
  825. if (!SuccToSinkTo)
  826. return nullptr;
  827. if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
  828. return nullptr;
  829. }
  830. }
  831. // It is not possible to sink an instruction into its own block. This can
  832. // happen with cycles.
  833. if (MBB == SuccToSinkTo)
  834. return nullptr;
  835. // It's not safe to sink instructions to EH landing pad. Control flow into
  836. // landing pad is implicitly defined.
  837. if (SuccToSinkTo && SuccToSinkTo->isEHPad())
  838. return nullptr;
  839. // It ought to be okay to sink instructions into an INLINEASM_BR target, but
  840. // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
  841. // the source block (which this code does not yet do). So for now, forbid
  842. // doing so.
  843. if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
  844. return nullptr;
  845. return SuccToSinkTo;
  846. }
  847. /// Return true if MI is likely to be usable as a memory operation by the
  848. /// implicit null check optimization.
  849. ///
  850. /// This is a "best effort" heuristic, and should not be relied upon for
  851. /// correctness. This returning true does not guarantee that the implicit null
  852. /// check optimization is legal over MI, and this returning false does not
  853. /// guarantee MI cannot possibly be used to do a null check.
  854. static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
  855. const TargetInstrInfo *TII,
  856. const TargetRegisterInfo *TRI) {
  857. using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
  858. auto *MBB = MI.getParent();
  859. if (MBB->pred_size() != 1)
  860. return false;
  861. auto *PredMBB = *MBB->pred_begin();
  862. auto *PredBB = PredMBB->getBasicBlock();
  863. // Frontends that don't use implicit null checks have no reason to emit
  864. // branches with make.implicit metadata, and this function should always
  865. // return false for them.
  866. if (!PredBB ||
  867. !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
  868. return false;
  869. const MachineOperand *BaseOp;
  870. int64_t Offset;
  871. bool OffsetIsScalable;
  872. if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
  873. return false;
  874. if (!BaseOp->isReg())
  875. return false;
  876. if (!(MI.mayLoad() && !MI.isPredicable()))
  877. return false;
  878. MachineBranchPredicate MBP;
  879. if (TII->analyzeBranchPredicate(*PredMBB, MBP, false))
  880. return false;
  881. return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
  882. (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
  883. MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
  884. MBP.LHS.getReg() == BaseOp->getReg();
  885. }
  886. /// If the sunk instruction is a copy, try to forward the copy instead of
  887. /// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
  888. /// there's any subregister weirdness involved. Returns true if copy
  889. /// propagation occurred.
  890. static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI,
  891. Register Reg) {
  892. const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
  893. const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
  894. // Copy DBG_VALUE operand and set the original to undef. We then check to
  895. // see whether this is something that can be copy-forwarded. If it isn't,
  896. // continue around the loop.
  897. const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
  898. auto CopyOperands = TII.isCopyInstr(SinkInst);
  899. if (!CopyOperands)
  900. return false;
  901. SrcMO = CopyOperands->Source;
  902. DstMO = CopyOperands->Destination;
  903. // Check validity of forwarding this copy.
  904. bool PostRA = MRI.getNumVirtRegs() == 0;
  905. // Trying to forward between physical and virtual registers is too hard.
  906. if (Reg.isVirtual() != SrcMO->getReg().isVirtual())
  907. return false;
  908. // Only try virtual register copy-forwarding before regalloc, and physical
  909. // register copy-forwarding after regalloc.
  910. bool arePhysRegs = !Reg.isVirtual();
  911. if (arePhysRegs != PostRA)
  912. return false;
  913. // Pre-regalloc, only forward if all subregisters agree (or there are no
  914. // subregs at all). More analysis might recover some forwardable copies.
  915. if (!PostRA)
  916. for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg))
  917. if (DbgMO.getSubReg() != SrcMO->getSubReg() ||
  918. DbgMO.getSubReg() != DstMO->getSubReg())
  919. return false;
  920. // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
  921. // of this copy. Only forward the copy if the DBG_VALUE operand exactly
  922. // matches the copy destination.
  923. if (PostRA && Reg != DstMO->getReg())
  924. return false;
  925. for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) {
  926. DbgMO.setReg(SrcMO->getReg());
  927. DbgMO.setSubReg(SrcMO->getSubReg());
  928. }
  929. return true;
  930. }
  931. using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>;
  932. /// Sink an instruction and its associated debug instructions.
  933. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
  934. MachineBasicBlock::iterator InsertPos,
  935. ArrayRef<MIRegs> DbgValuesToSink) {
  936. // If we cannot find a location to use (merge with), then we erase the debug
  937. // location to prevent debug-info driven tools from potentially reporting
  938. // wrong location information.
  939. if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
  940. MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
  941. InsertPos->getDebugLoc()));
  942. else
  943. MI.setDebugLoc(DebugLoc());
  944. // Move the instruction.
  945. MachineBasicBlock *ParentBlock = MI.getParent();
  946. SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
  947. ++MachineBasicBlock::iterator(MI));
  948. // Sink a copy of debug users to the insert position. Mark the original
  949. // DBG_VALUE location as 'undef', indicating that any earlier variable
  950. // location should be terminated as we've optimised away the value at this
  951. // point.
  952. for (const auto &DbgValueToSink : DbgValuesToSink) {
  953. MachineInstr *DbgMI = DbgValueToSink.first;
  954. MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI);
  955. SuccToSinkTo.insert(InsertPos, NewDbgMI);
  956. bool PropagatedAllSunkOps = true;
  957. for (unsigned Reg : DbgValueToSink.second) {
  958. if (DbgMI->hasDebugOperandForReg(Reg)) {
  959. if (!attemptDebugCopyProp(MI, *DbgMI, Reg)) {
  960. PropagatedAllSunkOps = false;
  961. break;
  962. }
  963. }
  964. }
  965. if (!PropagatedAllSunkOps)
  966. DbgMI->setDebugValueUndef();
  967. }
  968. }
  969. /// hasStoreBetween - check if there is store betweeen straight line blocks From
  970. /// and To.
  971. bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
  972. MachineBasicBlock *To, MachineInstr &MI) {
  973. // Make sure From and To are in straight line which means From dominates To
  974. // and To post dominates From.
  975. if (!DT->dominates(From, To) || !PDT->dominates(To, From))
  976. return true;
  977. auto BlockPair = std::make_pair(From, To);
  978. // Does these two blocks pair be queried before and have a definite cached
  979. // result?
  980. if (HasStoreCache.find(BlockPair) != HasStoreCache.end())
  981. return HasStoreCache[BlockPair];
  982. if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end())
  983. return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) {
  984. return I->mayAlias(AA, MI, false);
  985. });
  986. bool SawStore = false;
  987. bool HasAliasedStore = false;
  988. DenseSet<MachineBasicBlock *> HandledBlocks;
  989. DenseSet<MachineBasicBlock *> HandledDomBlocks;
  990. // Go through all reachable blocks from From.
  991. for (MachineBasicBlock *BB : depth_first(From)) {
  992. // We insert the instruction at the start of block To, so no need to worry
  993. // about stores inside To.
  994. // Store in block From should be already considered when just enter function
  995. // SinkInstruction.
  996. if (BB == To || BB == From)
  997. continue;
  998. // We already handle this BB in previous iteration.
  999. if (HandledBlocks.count(BB))
  1000. continue;
  1001. HandledBlocks.insert(BB);
  1002. // To post dominates BB, it must be a path from block From.
  1003. if (PDT->dominates(To, BB)) {
  1004. if (!HandledDomBlocks.count(BB))
  1005. HandledDomBlocks.insert(BB);
  1006. // If this BB is too big or the block number in straight line between From
  1007. // and To is too big, stop searching to save compiling time.
  1008. if (BB->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold) ||
  1009. HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
  1010. for (auto *DomBB : HandledDomBlocks) {
  1011. if (DomBB != BB && DT->dominates(DomBB, BB))
  1012. HasStoreCache[std::make_pair(DomBB, To)] = true;
  1013. else if(DomBB != BB && DT->dominates(BB, DomBB))
  1014. HasStoreCache[std::make_pair(From, DomBB)] = true;
  1015. }
  1016. HasStoreCache[BlockPair] = true;
  1017. return true;
  1018. }
  1019. for (MachineInstr &I : *BB) {
  1020. // Treat as alias conservatively for a call or an ordered memory
  1021. // operation.
  1022. if (I.isCall() || I.hasOrderedMemoryRef()) {
  1023. for (auto *DomBB : HandledDomBlocks) {
  1024. if (DomBB != BB && DT->dominates(DomBB, BB))
  1025. HasStoreCache[std::make_pair(DomBB, To)] = true;
  1026. else if(DomBB != BB && DT->dominates(BB, DomBB))
  1027. HasStoreCache[std::make_pair(From, DomBB)] = true;
  1028. }
  1029. HasStoreCache[BlockPair] = true;
  1030. return true;
  1031. }
  1032. if (I.mayStore()) {
  1033. SawStore = true;
  1034. // We still have chance to sink MI if all stores between are not
  1035. // aliased to MI.
  1036. // Cache all store instructions, so that we don't need to go through
  1037. // all From reachable blocks for next load instruction.
  1038. if (I.mayAlias(AA, MI, false))
  1039. HasAliasedStore = true;
  1040. StoreInstrCache[BlockPair].push_back(&I);
  1041. }
  1042. }
  1043. }
  1044. }
  1045. // If there is no store at all, cache the result.
  1046. if (!SawStore)
  1047. HasStoreCache[BlockPair] = false;
  1048. return HasAliasedStore;
  1049. }
  1050. /// Sink instructions into cycles if profitable. This especially tries to
  1051. /// prevent register spills caused by register pressure if there is little to no
  1052. /// overhead moving instructions into cycles.
  1053. bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
  1054. LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
  1055. MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
  1056. assert(Preheader && "Cycle sink needs a preheader block");
  1057. MachineBasicBlock *SinkBlock = nullptr;
  1058. bool CanSink = true;
  1059. const MachineOperand &MO = I.getOperand(0);
  1060. for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
  1061. LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
  1062. if (!Cycle->contains(MI.getParent())) {
  1063. LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
  1064. CanSink = false;
  1065. break;
  1066. }
  1067. // FIXME: Come up with a proper cost model that estimates whether sinking
  1068. // the instruction (and thus possibly executing it on every cycle
  1069. // iteration) is more expensive than a register.
  1070. // For now assumes that copies are cheap and thus almost always worth it.
  1071. if (!MI.isCopy()) {
  1072. LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
  1073. CanSink = false;
  1074. break;
  1075. }
  1076. if (!SinkBlock) {
  1077. SinkBlock = MI.getParent();
  1078. LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
  1079. << printMBBReference(*SinkBlock) << "\n");
  1080. continue;
  1081. }
  1082. SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
  1083. if (!SinkBlock) {
  1084. LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
  1085. CanSink = false;
  1086. break;
  1087. }
  1088. LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " <<
  1089. printMBBReference(*SinkBlock) << "\n");
  1090. }
  1091. if (!CanSink) {
  1092. LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
  1093. return false;
  1094. }
  1095. if (!SinkBlock) {
  1096. LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
  1097. return false;
  1098. }
  1099. if (SinkBlock == Preheader) {
  1100. LLVM_DEBUG(
  1101. dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
  1102. return false;
  1103. }
  1104. if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) {
  1105. LLVM_DEBUG(
  1106. dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
  1107. return false;
  1108. }
  1109. LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
  1110. SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
  1111. I);
  1112. // Conservatively clear any kill flags on uses of sunk instruction
  1113. for (MachineOperand &MO : I.operands()) {
  1114. if (MO.isReg() && MO.readsReg())
  1115. RegsToClearKillFlags.insert(MO.getReg());
  1116. }
  1117. // The instruction is moved from its basic block, so do not retain the
  1118. // debug information.
  1119. assert(!I.isDebugInstr() && "Should not sink debug inst");
  1120. I.setDebugLoc(DebugLoc());
  1121. return true;
  1122. }
  1123. /// Return true if a target defined block prologue instruction interferes
  1124. /// with a sink candidate.
  1125. static bool blockPrologueInterferes(MachineBasicBlock *BB,
  1126. MachineBasicBlock::iterator End,
  1127. MachineInstr &MI,
  1128. const TargetRegisterInfo *TRI,
  1129. const TargetInstrInfo *TII,
  1130. const MachineRegisterInfo *MRI) {
  1131. if (BB->begin() == End)
  1132. return false; // no prologue
  1133. for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
  1134. // Only check target defined prologue instructions
  1135. if (!TII->isBasicBlockPrologue(*PI))
  1136. continue;
  1137. for (auto &MO : MI.operands()) {
  1138. if (!MO.isReg())
  1139. continue;
  1140. Register Reg = MO.getReg();
  1141. if (!Reg)
  1142. continue;
  1143. if (MO.isUse()) {
  1144. if (Reg.isPhysical() &&
  1145. (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
  1146. continue;
  1147. if (PI->modifiesRegister(Reg, TRI))
  1148. return true;
  1149. } else {
  1150. if (PI->readsRegister(Reg, TRI))
  1151. return true;
  1152. // Check for interference with non-dead defs
  1153. auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
  1154. if (DefOp && !DefOp->isDead())
  1155. return true;
  1156. }
  1157. }
  1158. }
  1159. return false;
  1160. }
  1161. /// SinkInstruction - Determine whether it is safe to sink the specified machine
  1162. /// instruction out of its current block into a successor.
  1163. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
  1164. AllSuccsCache &AllSuccessors) {
  1165. // Don't sink instructions that the target prefers not to sink.
  1166. if (!TII->shouldSink(MI))
  1167. return false;
  1168. // Check if it's safe to move the instruction.
  1169. if (!MI.isSafeToMove(AA, SawStore))
  1170. return false;
  1171. // Convergent operations may not be made control-dependent on additional
  1172. // values.
  1173. if (MI.isConvergent())
  1174. return false;
  1175. // Don't break implicit null checks. This is a performance heuristic, and not
  1176. // required for correctness.
  1177. if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
  1178. return false;
  1179. // FIXME: This should include support for sinking instructions within the
  1180. // block they are currently in to shorten the live ranges. We often get
  1181. // instructions sunk into the top of a large block, but it would be better to
  1182. // also sink them down before their first use in the block. This xform has to
  1183. // be careful not to *increase* register pressure though, e.g. sinking
  1184. // "x = y + z" down if it kills y and z would increase the live ranges of y
  1185. // and z and only shrink the live range of x.
  1186. bool BreakPHIEdge = false;
  1187. MachineBasicBlock *ParentBlock = MI.getParent();
  1188. MachineBasicBlock *SuccToSinkTo =
  1189. FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
  1190. // If there are no outputs, it must have side-effects.
  1191. if (!SuccToSinkTo)
  1192. return false;
  1193. // If the instruction to move defines a dead physical register which is live
  1194. // when leaving the basic block, don't move it because it could turn into a
  1195. // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
  1196. for (const MachineOperand &MO : MI.operands()) {
  1197. if (!MO.isReg() || MO.isUse())
  1198. continue;
  1199. Register Reg = MO.getReg();
  1200. if (Reg == 0 || !Reg.isPhysical())
  1201. continue;
  1202. if (SuccToSinkTo->isLiveIn(Reg))
  1203. return false;
  1204. }
  1205. LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
  1206. // If the block has multiple predecessors, this is a critical edge.
  1207. // Decide if we can sink along it or need to break the edge.
  1208. if (SuccToSinkTo->pred_size() > 1) {
  1209. // We cannot sink a load across a critical edge - there may be stores in
  1210. // other code paths.
  1211. bool TryBreak = false;
  1212. bool Store =
  1213. MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true;
  1214. if (!MI.isSafeToMove(AA, Store)) {
  1215. LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
  1216. TryBreak = true;
  1217. }
  1218. // We don't want to sink across a critical edge if we don't dominate the
  1219. // successor. We could be introducing calculations to new code paths.
  1220. if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
  1221. LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
  1222. TryBreak = true;
  1223. }
  1224. // Don't sink instructions into a cycle.
  1225. if (!TryBreak && CI->getCycle(SuccToSinkTo) &&
  1226. (!CI->getCycle(SuccToSinkTo)->isReducible() ||
  1227. CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
  1228. LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
  1229. TryBreak = true;
  1230. }
  1231. // Otherwise we are OK with sinking along a critical edge.
  1232. if (!TryBreak)
  1233. LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
  1234. else {
  1235. // Mark this edge as to be split.
  1236. // If the edge can actually be split, the next iteration of the main loop
  1237. // will sink MI in the newly created block.
  1238. bool Status =
  1239. PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
  1240. if (!Status)
  1241. LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
  1242. "break critical edge\n");
  1243. // The instruction will not be sunk this time.
  1244. return false;
  1245. }
  1246. }
  1247. if (BreakPHIEdge) {
  1248. // BreakPHIEdge is true if all the uses are in the successor MBB being
  1249. // sunken into and they are all PHI nodes. In this case, machine-sink must
  1250. // break the critical edge first.
  1251. bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
  1252. SuccToSinkTo, BreakPHIEdge);
  1253. if (!Status)
  1254. LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
  1255. "break critical edge\n");
  1256. // The instruction will not be sunk this time.
  1257. return false;
  1258. }
  1259. // Determine where to insert into. Skip phi nodes.
  1260. MachineBasicBlock::iterator InsertPos =
  1261. SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
  1262. if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
  1263. LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
  1264. return false;
  1265. }
  1266. // Collect debug users of any vreg that this inst defines.
  1267. SmallVector<MIRegs, 4> DbgUsersToSink;
  1268. for (auto &MO : MI.operands()) {
  1269. if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
  1270. continue;
  1271. if (!SeenDbgUsers.count(MO.getReg()))
  1272. continue;
  1273. // Sink any users that don't pass any other DBG_VALUEs for this variable.
  1274. auto &Users = SeenDbgUsers[MO.getReg()];
  1275. for (auto &User : Users) {
  1276. MachineInstr *DbgMI = User.getPointer();
  1277. if (User.getInt()) {
  1278. // This DBG_VALUE would re-order assignments. If we can't copy-propagate
  1279. // it, it can't be recovered. Set it undef.
  1280. if (!attemptDebugCopyProp(MI, *DbgMI, MO.getReg()))
  1281. DbgMI->setDebugValueUndef();
  1282. } else {
  1283. DbgUsersToSink.push_back(
  1284. {DbgMI, SmallVector<unsigned, 2>(1, MO.getReg())});
  1285. }
  1286. }
  1287. }
  1288. // After sinking, some debug users may not be dominated any more. If possible,
  1289. // copy-propagate their operands. As it's expensive, don't do this if there's
  1290. // no debuginfo in the program.
  1291. if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
  1292. SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
  1293. performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
  1294. // Conservatively, clear any kill flags, since it's possible that they are no
  1295. // longer correct.
  1296. // Note that we have to clear the kill flags for any register this instruction
  1297. // uses as we may sink over another instruction which currently kills the
  1298. // used registers.
  1299. for (MachineOperand &MO : MI.operands()) {
  1300. if (MO.isReg() && MO.isUse())
  1301. RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
  1302. }
  1303. return true;
  1304. }
  1305. void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
  1306. MachineInstr &MI, MachineBasicBlock *TargetBlock) {
  1307. assert(MI.isCopy());
  1308. assert(MI.getOperand(1).isReg());
  1309. // Enumerate all users of vreg operands that are def'd. Skip those that will
  1310. // be sunk. For the rest, if they are not dominated by the block we will sink
  1311. // MI into, propagate the copy source to them.
  1312. SmallVector<MachineInstr *, 4> DbgDefUsers;
  1313. SmallVector<Register, 4> DbgUseRegs;
  1314. const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
  1315. for (auto &MO : MI.operands()) {
  1316. if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
  1317. continue;
  1318. DbgUseRegs.push_back(MO.getReg());
  1319. for (auto &User : MRI.use_instructions(MO.getReg())) {
  1320. if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
  1321. continue;
  1322. // If is in same block, will either sink or be use-before-def.
  1323. if (User.getParent() == MI.getParent())
  1324. continue;
  1325. assert(User.hasDebugOperandForReg(MO.getReg()) &&
  1326. "DBG_VALUE user of vreg, but has no operand for it?");
  1327. DbgDefUsers.push_back(&User);
  1328. }
  1329. }
  1330. // Point the users of this copy that are no longer dominated, at the source
  1331. // of the copy.
  1332. for (auto *User : DbgDefUsers) {
  1333. for (auto &Reg : DbgUseRegs) {
  1334. for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) {
  1335. DbgOp.setReg(MI.getOperand(1).getReg());
  1336. DbgOp.setSubReg(MI.getOperand(1).getSubReg());
  1337. }
  1338. }
  1339. }
  1340. }
  1341. //===----------------------------------------------------------------------===//
  1342. // This pass is not intended to be a replacement or a complete alternative
  1343. // for the pre-ra machine sink pass. It is only designed to sink COPY
  1344. // instructions which should be handled after RA.
  1345. //
  1346. // This pass sinks COPY instructions into a successor block, if the COPY is not
  1347. // used in the current block and the COPY is live-in to a single successor
  1348. // (i.e., doesn't require the COPY to be duplicated). This avoids executing the
  1349. // copy on paths where their results aren't needed. This also exposes
  1350. // additional opportunites for dead copy elimination and shrink wrapping.
  1351. //
  1352. // These copies were either not handled by or are inserted after the MachineSink
  1353. // pass. As an example of the former case, the MachineSink pass cannot sink
  1354. // COPY instructions with allocatable source registers; for AArch64 these type
  1355. // of copy instructions are frequently used to move function parameters (PhyReg)
  1356. // into virtual registers in the entry block.
  1357. //
  1358. // For the machine IR below, this pass will sink %w19 in the entry into its
  1359. // successor (%bb.1) because %w19 is only live-in in %bb.1.
  1360. // %bb.0:
  1361. // %wzr = SUBSWri %w1, 1
  1362. // %w19 = COPY %w0
  1363. // Bcc 11, %bb.2
  1364. // %bb.1:
  1365. // Live Ins: %w19
  1366. // BL @fun
  1367. // %w0 = ADDWrr %w0, %w19
  1368. // RET %w0
  1369. // %bb.2:
  1370. // %w0 = COPY %wzr
  1371. // RET %w0
  1372. // As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
  1373. // able to see %bb.0 as a candidate.
  1374. //===----------------------------------------------------------------------===//
  1375. namespace {
  1376. class PostRAMachineSinking : public MachineFunctionPass {
  1377. public:
  1378. bool runOnMachineFunction(MachineFunction &MF) override;
  1379. static char ID;
  1380. PostRAMachineSinking() : MachineFunctionPass(ID) {}
  1381. StringRef getPassName() const override { return "PostRA Machine Sink"; }
  1382. void getAnalysisUsage(AnalysisUsage &AU) const override {
  1383. AU.setPreservesCFG();
  1384. MachineFunctionPass::getAnalysisUsage(AU);
  1385. }
  1386. MachineFunctionProperties getRequiredProperties() const override {
  1387. return MachineFunctionProperties().set(
  1388. MachineFunctionProperties::Property::NoVRegs);
  1389. }
  1390. private:
  1391. /// Track which register units have been modified and used.
  1392. LiveRegUnits ModifiedRegUnits, UsedRegUnits;
  1393. /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
  1394. /// entry in this map for each unit it touches. The DBG_VALUE's entry
  1395. /// consists of a pointer to the instruction itself, and a vector of registers
  1396. /// referred to by the instruction that overlap the key register unit.
  1397. DenseMap<unsigned, SmallVector<MIRegs, 2>> SeenDbgInstrs;
  1398. /// Sink Copy instructions unused in the same block close to their uses in
  1399. /// successors.
  1400. bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
  1401. const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
  1402. };
  1403. } // namespace
  1404. char PostRAMachineSinking::ID = 0;
  1405. char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
  1406. INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
  1407. "PostRA Machine Sink", false, false)
  1408. static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg,
  1409. const TargetRegisterInfo *TRI) {
  1410. LiveRegUnits LiveInRegUnits(*TRI);
  1411. LiveInRegUnits.addLiveIns(MBB);
  1412. return !LiveInRegUnits.available(Reg);
  1413. }
  1414. static MachineBasicBlock *
  1415. getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
  1416. const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
  1417. unsigned Reg, const TargetRegisterInfo *TRI) {
  1418. // Try to find a single sinkable successor in which Reg is live-in.
  1419. MachineBasicBlock *BB = nullptr;
  1420. for (auto *SI : SinkableBBs) {
  1421. if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) {
  1422. // If BB is set here, Reg is live-in to at least two sinkable successors,
  1423. // so quit.
  1424. if (BB)
  1425. return nullptr;
  1426. BB = SI;
  1427. }
  1428. }
  1429. // Reg is not live-in to any sinkable successors.
  1430. if (!BB)
  1431. return nullptr;
  1432. // Check if any register aliased with Reg is live-in in other successors.
  1433. for (auto *SI : CurBB.successors()) {
  1434. if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI))
  1435. return nullptr;
  1436. }
  1437. return BB;
  1438. }
  1439. static MachineBasicBlock *
  1440. getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
  1441. const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
  1442. ArrayRef<unsigned> DefedRegsInCopy,
  1443. const TargetRegisterInfo *TRI) {
  1444. MachineBasicBlock *SingleBB = nullptr;
  1445. for (auto DefReg : DefedRegsInCopy) {
  1446. MachineBasicBlock *BB =
  1447. getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI);
  1448. if (!BB || (SingleBB && SingleBB != BB))
  1449. return nullptr;
  1450. SingleBB = BB;
  1451. }
  1452. return SingleBB;
  1453. }
  1454. static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
  1455. SmallVectorImpl<unsigned> &UsedOpsInCopy,
  1456. LiveRegUnits &UsedRegUnits,
  1457. const TargetRegisterInfo *TRI) {
  1458. for (auto U : UsedOpsInCopy) {
  1459. MachineOperand &MO = MI->getOperand(U);
  1460. Register SrcReg = MO.getReg();
  1461. if (!UsedRegUnits.available(SrcReg)) {
  1462. MachineBasicBlock::iterator NI = std::next(MI->getIterator());
  1463. for (MachineInstr &UI : make_range(NI, CurBB.end())) {
  1464. if (UI.killsRegister(SrcReg, TRI)) {
  1465. UI.clearRegisterKills(SrcReg, TRI);
  1466. MO.setIsKill(true);
  1467. break;
  1468. }
  1469. }
  1470. }
  1471. }
  1472. }
  1473. static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
  1474. SmallVectorImpl<unsigned> &UsedOpsInCopy,
  1475. SmallVectorImpl<unsigned> &DefedRegsInCopy) {
  1476. MachineFunction &MF = *SuccBB->getParent();
  1477. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  1478. for (unsigned DefReg : DefedRegsInCopy)
  1479. for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
  1480. SuccBB->removeLiveIn(*S);
  1481. for (auto U : UsedOpsInCopy) {
  1482. Register SrcReg = MI->getOperand(U).getReg();
  1483. LaneBitmask Mask;
  1484. for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) {
  1485. Mask |= (*S).second;
  1486. }
  1487. SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll());
  1488. }
  1489. SuccBB->sortUniqueLiveIns();
  1490. }
  1491. static bool hasRegisterDependency(MachineInstr *MI,
  1492. SmallVectorImpl<unsigned> &UsedOpsInCopy,
  1493. SmallVectorImpl<unsigned> &DefedRegsInCopy,
  1494. LiveRegUnits &ModifiedRegUnits,
  1495. LiveRegUnits &UsedRegUnits) {
  1496. bool HasRegDependency = false;
  1497. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  1498. MachineOperand &MO = MI->getOperand(i);
  1499. if (!MO.isReg())
  1500. continue;
  1501. Register Reg = MO.getReg();
  1502. if (!Reg)
  1503. continue;
  1504. if (MO.isDef()) {
  1505. if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
  1506. HasRegDependency = true;
  1507. break;
  1508. }
  1509. DefedRegsInCopy.push_back(Reg);
  1510. // FIXME: instead of isUse(), readsReg() would be a better fix here,
  1511. // For example, we can ignore modifications in reg with undef. However,
  1512. // it's not perfectly clear if skipping the internal read is safe in all
  1513. // other targets.
  1514. } else if (MO.isUse()) {
  1515. if (!ModifiedRegUnits.available(Reg)) {
  1516. HasRegDependency = true;
  1517. break;
  1518. }
  1519. UsedOpsInCopy.push_back(i);
  1520. }
  1521. }
  1522. return HasRegDependency;
  1523. }
  1524. bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
  1525. MachineFunction &MF,
  1526. const TargetRegisterInfo *TRI,
  1527. const TargetInstrInfo *TII) {
  1528. SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
  1529. // FIXME: For now, we sink only to a successor which has a single predecessor
  1530. // so that we can directly sink COPY instructions to the successor without
  1531. // adding any new block or branch instruction.
  1532. for (MachineBasicBlock *SI : CurBB.successors())
  1533. if (!SI->livein_empty() && SI->pred_size() == 1)
  1534. SinkableBBs.insert(SI);
  1535. if (SinkableBBs.empty())
  1536. return false;
  1537. bool Changed = false;
  1538. // Track which registers have been modified and used between the end of the
  1539. // block and the current instruction.
  1540. ModifiedRegUnits.clear();
  1541. UsedRegUnits.clear();
  1542. SeenDbgInstrs.clear();
  1543. for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) {
  1544. // Track the operand index for use in Copy.
  1545. SmallVector<unsigned, 2> UsedOpsInCopy;
  1546. // Track the register number defed in Copy.
  1547. SmallVector<unsigned, 2> DefedRegsInCopy;
  1548. // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
  1549. // for DBG_VALUEs later, record them when they're encountered.
  1550. if (MI.isDebugValue() && !MI.isDebugRef()) {
  1551. SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits;
  1552. bool IsValid = true;
  1553. for (MachineOperand &MO : MI.debug_operands()) {
  1554. if (MO.isReg() && MO.getReg().isPhysical()) {
  1555. // Bail if we can already tell the sink would be rejected, rather
  1556. // than needlessly accumulating lots of DBG_VALUEs.
  1557. if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
  1558. ModifiedRegUnits, UsedRegUnits)) {
  1559. IsValid = false;
  1560. break;
  1561. }
  1562. // Record debug use of each reg unit.
  1563. for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid();
  1564. ++RI)
  1565. MIUnits[*RI].push_back(MO.getReg());
  1566. }
  1567. }
  1568. if (IsValid) {
  1569. for (auto &RegOps : MIUnits)
  1570. SeenDbgInstrs[RegOps.first].emplace_back(&MI,
  1571. std::move(RegOps.second));
  1572. }
  1573. continue;
  1574. }
  1575. if (MI.isDebugOrPseudoInstr())
  1576. continue;
  1577. // Do not move any instruction across function call.
  1578. if (MI.isCall())
  1579. return false;
  1580. if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) {
  1581. LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
  1582. TRI);
  1583. continue;
  1584. }
  1585. // Don't sink the COPY if it would violate a register dependency.
  1586. if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
  1587. ModifiedRegUnits, UsedRegUnits)) {
  1588. LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
  1589. TRI);
  1590. continue;
  1591. }
  1592. assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
  1593. "Unexpect SrcReg or DefReg");
  1594. MachineBasicBlock *SuccBB =
  1595. getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
  1596. // Don't sink if we cannot find a single sinkable successor in which Reg
  1597. // is live-in.
  1598. if (!SuccBB) {
  1599. LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
  1600. TRI);
  1601. continue;
  1602. }
  1603. assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
  1604. "Unexpected predecessor");
  1605. // Collect DBG_VALUEs that must sink with this copy. We've previously
  1606. // recorded which reg units that DBG_VALUEs read, if this instruction
  1607. // writes any of those units then the corresponding DBG_VALUEs must sink.
  1608. MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
  1609. for (auto &MO : MI.operands()) {
  1610. if (!MO.isReg() || !MO.isDef())
  1611. continue;
  1612. for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); ++RI) {
  1613. for (const auto &MIRegs : SeenDbgInstrs.lookup(*RI)) {
  1614. auto &Regs = DbgValsToSinkMap[MIRegs.first];
  1615. for (unsigned Reg : MIRegs.second)
  1616. Regs.push_back(Reg);
  1617. }
  1618. }
  1619. }
  1620. auto DbgValsToSink = DbgValsToSinkMap.takeVector();
  1621. LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
  1622. MachineBasicBlock::iterator InsertPos =
  1623. SuccBB->SkipPHIsAndLabels(SuccBB->begin());
  1624. if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
  1625. LLVM_DEBUG(
  1626. dbgs() << " *** Not sinking: prologue interference\n");
  1627. continue;
  1628. }
  1629. // Clear the kill flag if SrcReg is killed between MI and the end of the
  1630. // block.
  1631. clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
  1632. performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
  1633. updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
  1634. Changed = true;
  1635. ++NumPostRACopySink;
  1636. }
  1637. return Changed;
  1638. }
  1639. bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
  1640. if (skipFunction(MF.getFunction()))
  1641. return false;
  1642. bool Changed = false;
  1643. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  1644. const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  1645. ModifiedRegUnits.init(*TRI);
  1646. UsedRegUnits.init(*TRI);
  1647. for (auto &BB : MF)
  1648. Changed |= tryToSinkCopy(BB, MF, TRI, TII);
  1649. return Changed;
  1650. }