MachineTraceMetrics.cpp 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351
  1. //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/CodeGen/MachineTraceMetrics.h"
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "llvm/ADT/DenseMap.h"
  11. #include "llvm/ADT/Optional.h"
  12. #include "llvm/ADT/PostOrderIterator.h"
  13. #include "llvm/ADT/SmallPtrSet.h"
  14. #include "llvm/ADT/SmallVector.h"
  15. #include "llvm/ADT/SparseSet.h"
  16. #include "llvm/CodeGen/MachineBasicBlock.h"
  17. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  18. #include "llvm/CodeGen/MachineFunction.h"
  19. #include "llvm/CodeGen/MachineInstr.h"
  20. #include "llvm/CodeGen/MachineLoopInfo.h"
  21. #include "llvm/CodeGen/MachineOperand.h"
  22. #include "llvm/CodeGen/MachineRegisterInfo.h"
  23. #include "llvm/CodeGen/TargetRegisterInfo.h"
  24. #include "llvm/CodeGen/TargetSchedule.h"
  25. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  26. #include "llvm/InitializePasses.h"
  27. #include "llvm/MC/MCRegisterInfo.h"
  28. #include "llvm/Pass.h"
  29. #include "llvm/Support/Debug.h"
  30. #include "llvm/Support/ErrorHandling.h"
  31. #include "llvm/Support/Format.h"
  32. #include "llvm/Support/raw_ostream.h"
  33. #include <algorithm>
  34. #include <cassert>
  35. #include <iterator>
  36. #include <tuple>
  37. #include <utility>
  38. using namespace llvm;
  39. #define DEBUG_TYPE "machine-trace-metrics"
  40. char MachineTraceMetrics::ID = 0;
  41. char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
  42. INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE,
  43. "Machine Trace Metrics", false, true)
  44. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  45. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  46. INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE,
  47. "Machine Trace Metrics", false, true)
  48. MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
  49. std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
  50. }
  51. void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
  52. AU.setPreservesAll();
  53. AU.addRequired<MachineBranchProbabilityInfo>();
  54. AU.addRequired<MachineLoopInfo>();
  55. MachineFunctionPass::getAnalysisUsage(AU);
  56. }
  57. bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
  58. MF = &Func;
  59. const TargetSubtargetInfo &ST = MF->getSubtarget();
  60. TII = ST.getInstrInfo();
  61. TRI = ST.getRegisterInfo();
  62. MRI = &MF->getRegInfo();
  63. Loops = &getAnalysis<MachineLoopInfo>();
  64. SchedModel.init(&ST);
  65. BlockInfo.resize(MF->getNumBlockIDs());
  66. ProcResourceCycles.resize(MF->getNumBlockIDs() *
  67. SchedModel.getNumProcResourceKinds());
  68. return false;
  69. }
  70. void MachineTraceMetrics::releaseMemory() {
  71. MF = nullptr;
  72. BlockInfo.clear();
  73. for (Ensemble *&E : Ensembles) {
  74. delete E;
  75. E = nullptr;
  76. }
  77. }
  78. //===----------------------------------------------------------------------===//
  79. // Fixed block information
  80. //===----------------------------------------------------------------------===//
  81. //
  82. // The number of instructions in a basic block and the CPU resources used by
  83. // those instructions don't depend on any given trace strategy.
  84. /// Compute the resource usage in basic block MBB.
  85. const MachineTraceMetrics::FixedBlockInfo*
  86. MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
  87. assert(MBB && "No basic block");
  88. FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
  89. if (FBI->hasResources())
  90. return FBI;
  91. // Compute resource usage in the block.
  92. FBI->HasCalls = false;
  93. unsigned InstrCount = 0;
  94. // Add up per-processor resource cycles as well.
  95. unsigned PRKinds = SchedModel.getNumProcResourceKinds();
  96. SmallVector<unsigned, 32> PRCycles(PRKinds);
  97. for (const auto &MI : *MBB) {
  98. if (MI.isTransient())
  99. continue;
  100. ++InstrCount;
  101. if (MI.isCall())
  102. FBI->HasCalls = true;
  103. // Count processor resources used.
  104. if (!SchedModel.hasInstrSchedModel())
  105. continue;
  106. const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
  107. if (!SC->isValid())
  108. continue;
  109. for (TargetSchedModel::ProcResIter
  110. PI = SchedModel.getWriteProcResBegin(SC),
  111. PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
  112. assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
  113. PRCycles[PI->ProcResourceIdx] += PI->Cycles;
  114. }
  115. }
  116. FBI->InstrCount = InstrCount;
  117. // Scale the resource cycles so they are comparable.
  118. unsigned PROffset = MBB->getNumber() * PRKinds;
  119. for (unsigned K = 0; K != PRKinds; ++K)
  120. ProcResourceCycles[PROffset + K] =
  121. PRCycles[K] * SchedModel.getResourceFactor(K);
  122. return FBI;
  123. }
  124. ArrayRef<unsigned>
  125. MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
  126. assert(BlockInfo[MBBNum].hasResources() &&
  127. "getResources() must be called before getProcResourceCycles()");
  128. unsigned PRKinds = SchedModel.getNumProcResourceKinds();
  129. assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
  130. return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
  131. }
  132. //===----------------------------------------------------------------------===//
  133. // Ensemble utility functions
  134. //===----------------------------------------------------------------------===//
  135. MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
  136. : MTM(*ct) {
  137. BlockInfo.resize(MTM.BlockInfo.size());
  138. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  139. ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
  140. ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
  141. }
  142. // Virtual destructor serves as an anchor.
  143. MachineTraceMetrics::Ensemble::~Ensemble() = default;
  144. const MachineLoop*
  145. MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
  146. return MTM.Loops->getLoopFor(MBB);
  147. }
  148. // Update resource-related information in the TraceBlockInfo for MBB.
  149. // Only update resources related to the trace above MBB.
  150. void MachineTraceMetrics::Ensemble::
  151. computeDepthResources(const MachineBasicBlock *MBB) {
  152. TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  153. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  154. unsigned PROffset = MBB->getNumber() * PRKinds;
  155. // Compute resources from trace above. The top block is simple.
  156. if (!TBI->Pred) {
  157. TBI->InstrDepth = 0;
  158. TBI->Head = MBB->getNumber();
  159. std::fill(ProcResourceDepths.begin() + PROffset,
  160. ProcResourceDepths.begin() + PROffset + PRKinds, 0);
  161. return;
  162. }
  163. // Compute from the block above. A post-order traversal ensures the
  164. // predecessor is always computed first.
  165. unsigned PredNum = TBI->Pred->getNumber();
  166. TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
  167. assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
  168. const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
  169. TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
  170. TBI->Head = PredTBI->Head;
  171. // Compute per-resource depths.
  172. ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
  173. ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
  174. for (unsigned K = 0; K != PRKinds; ++K)
  175. ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
  176. }
  177. // Update resource-related information in the TraceBlockInfo for MBB.
  178. // Only update resources related to the trace below MBB.
  179. void MachineTraceMetrics::Ensemble::
  180. computeHeightResources(const MachineBasicBlock *MBB) {
  181. TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  182. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  183. unsigned PROffset = MBB->getNumber() * PRKinds;
  184. // Compute resources for the current block.
  185. TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
  186. ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
  187. // The trace tail is done.
  188. if (!TBI->Succ) {
  189. TBI->Tail = MBB->getNumber();
  190. llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
  191. return;
  192. }
  193. // Compute from the block below. A post-order traversal ensures the
  194. // predecessor is always computed first.
  195. unsigned SuccNum = TBI->Succ->getNumber();
  196. TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
  197. assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
  198. TBI->InstrHeight += SuccTBI->InstrHeight;
  199. TBI->Tail = SuccTBI->Tail;
  200. // Compute per-resource heights.
  201. ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
  202. for (unsigned K = 0; K != PRKinds; ++K)
  203. ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
  204. }
  205. // Check if depth resources for MBB are valid and return the TBI.
  206. // Return NULL if the resources have been invalidated.
  207. const MachineTraceMetrics::TraceBlockInfo*
  208. MachineTraceMetrics::Ensemble::
  209. getDepthResources(const MachineBasicBlock *MBB) const {
  210. const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  211. return TBI->hasValidDepth() ? TBI : nullptr;
  212. }
  213. // Check if height resources for MBB are valid and return the TBI.
  214. // Return NULL if the resources have been invalidated.
  215. const MachineTraceMetrics::TraceBlockInfo*
  216. MachineTraceMetrics::Ensemble::
  217. getHeightResources(const MachineBasicBlock *MBB) const {
  218. const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  219. return TBI->hasValidHeight() ? TBI : nullptr;
  220. }
  221. /// Get an array of processor resource depths for MBB. Indexed by processor
  222. /// resource kind, this array contains the scaled processor resources consumed
  223. /// by all blocks preceding MBB in its trace. It does not include instructions
  224. /// in MBB.
  225. ///
  226. /// Compare TraceBlockInfo::InstrDepth.
  227. ArrayRef<unsigned>
  228. MachineTraceMetrics::Ensemble::
  229. getProcResourceDepths(unsigned MBBNum) const {
  230. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  231. assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
  232. return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
  233. }
  234. /// Get an array of processor resource heights for MBB. Indexed by processor
  235. /// resource kind, this array contains the scaled processor resources consumed
  236. /// by this block and all blocks following it in its trace.
  237. ///
  238. /// Compare TraceBlockInfo::InstrHeight.
  239. ArrayRef<unsigned>
  240. MachineTraceMetrics::Ensemble::
  241. getProcResourceHeights(unsigned MBBNum) const {
  242. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  243. assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
  244. return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
  245. }
  246. //===----------------------------------------------------------------------===//
  247. // Trace Selection Strategies
  248. //===----------------------------------------------------------------------===//
  249. //
  250. // A trace selection strategy is implemented as a sub-class of Ensemble. The
  251. // trace through a block B is computed by two DFS traversals of the CFG
  252. // starting from B. One upwards, and one downwards. During the upwards DFS,
  253. // pickTracePred() is called on the post-ordered blocks. During the downwards
  254. // DFS, pickTraceSucc() is called in a post-order.
  255. //
  256. // We never allow traces that leave loops, but we do allow traces to enter
  257. // nested loops. We also never allow traces to contain back-edges.
  258. //
  259. // This means that a loop header can never appear above the center block of a
  260. // trace, except as the trace head. Below the center block, loop exiting edges
  261. // are banned.
  262. //
  263. // Return true if an edge from the From loop to the To loop is leaving a loop.
  264. // Either of To and From can be null.
  265. static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
  266. return From && !From->contains(To);
  267. }
  268. // MinInstrCountEnsemble - Pick the trace that executes the least number of
  269. // instructions.
  270. namespace {
  271. class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
  272. const char *getName() const override { return "MinInstr"; }
  273. const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
  274. const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
  275. public:
  276. MinInstrCountEnsemble(MachineTraceMetrics *mtm)
  277. : MachineTraceMetrics::Ensemble(mtm) {}
  278. };
  279. } // end anonymous namespace
  280. // Select the preferred predecessor for MBB.
  281. const MachineBasicBlock*
  282. MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
  283. if (MBB->pred_empty())
  284. return nullptr;
  285. const MachineLoop *CurLoop = getLoopFor(MBB);
  286. // Don't leave loops, and never follow back-edges.
  287. if (CurLoop && MBB == CurLoop->getHeader())
  288. return nullptr;
  289. unsigned CurCount = MTM.getResources(MBB)->InstrCount;
  290. const MachineBasicBlock *Best = nullptr;
  291. unsigned BestDepth = 0;
  292. for (const MachineBasicBlock *Pred : MBB->predecessors()) {
  293. const MachineTraceMetrics::TraceBlockInfo *PredTBI =
  294. getDepthResources(Pred);
  295. // Ignore cycles that aren't natural loops.
  296. if (!PredTBI)
  297. continue;
  298. // Pick the predecessor that would give this block the smallest InstrDepth.
  299. unsigned Depth = PredTBI->InstrDepth + CurCount;
  300. if (!Best || Depth < BestDepth) {
  301. Best = Pred;
  302. BestDepth = Depth;
  303. }
  304. }
  305. return Best;
  306. }
  307. // Select the preferred successor for MBB.
  308. const MachineBasicBlock*
  309. MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
  310. if (MBB->pred_empty())
  311. return nullptr;
  312. const MachineLoop *CurLoop = getLoopFor(MBB);
  313. const MachineBasicBlock *Best = nullptr;
  314. unsigned BestHeight = 0;
  315. for (const MachineBasicBlock *Succ : MBB->successors()) {
  316. // Don't consider back-edges.
  317. if (CurLoop && Succ == CurLoop->getHeader())
  318. continue;
  319. // Don't consider successors exiting CurLoop.
  320. if (isExitingLoop(CurLoop, getLoopFor(Succ)))
  321. continue;
  322. const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
  323. getHeightResources(Succ);
  324. // Ignore cycles that aren't natural loops.
  325. if (!SuccTBI)
  326. continue;
  327. // Pick the successor that would give this block the smallest InstrHeight.
  328. unsigned Height = SuccTBI->InstrHeight;
  329. if (!Best || Height < BestHeight) {
  330. Best = Succ;
  331. BestHeight = Height;
  332. }
  333. }
  334. return Best;
  335. }
  336. // Get an Ensemble sub-class for the requested trace strategy.
  337. MachineTraceMetrics::Ensemble *
  338. MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
  339. assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
  340. Ensemble *&E = Ensembles[strategy];
  341. if (E)
  342. return E;
  343. // Allocate new Ensemble on demand.
  344. switch (strategy) {
  345. case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
  346. default: llvm_unreachable("Invalid trace strategy enum");
  347. }
  348. }
  349. void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
  350. LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
  351. << '\n');
  352. BlockInfo[MBB->getNumber()].invalidate();
  353. for (Ensemble *E : Ensembles)
  354. if (E)
  355. E->invalidate(MBB);
  356. }
  357. void MachineTraceMetrics::verifyAnalysis() const {
  358. if (!MF)
  359. return;
  360. #ifndef NDEBUG
  361. assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
  362. for (Ensemble *E : Ensembles)
  363. if (E)
  364. E->verify();
  365. #endif
  366. }
  367. //===----------------------------------------------------------------------===//
  368. // Trace building
  369. //===----------------------------------------------------------------------===//
  370. //
  371. // Traces are built by two CFG traversals. To avoid recomputing too much, use a
  372. // set abstraction that confines the search to the current loop, and doesn't
  373. // revisit blocks.
  374. namespace {
  375. struct LoopBounds {
  376. MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
  377. SmallPtrSet<const MachineBasicBlock*, 8> Visited;
  378. const MachineLoopInfo *Loops;
  379. bool Downward = false;
  380. LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
  381. const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
  382. };
  383. } // end anonymous namespace
  384. // Specialize po_iterator_storage in order to prune the post-order traversal so
  385. // it is limited to the current loop and doesn't traverse the loop back edges.
  386. namespace llvm {
  387. template<>
  388. class po_iterator_storage<LoopBounds, true> {
  389. LoopBounds &LB;
  390. public:
  391. po_iterator_storage(LoopBounds &lb) : LB(lb) {}
  392. void finishPostorder(const MachineBasicBlock*) {}
  393. bool insertEdge(Optional<const MachineBasicBlock *> From,
  394. const MachineBasicBlock *To) {
  395. // Skip already visited To blocks.
  396. MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
  397. if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
  398. return false;
  399. // From is null once when To is the trace center block.
  400. if (From) {
  401. if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
  402. // Don't follow backedges, don't leave FromLoop when going upwards.
  403. if ((LB.Downward ? To : *From) == FromLoop->getHeader())
  404. return false;
  405. // Don't leave FromLoop.
  406. if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
  407. return false;
  408. }
  409. }
  410. // To is a new block. Mark the block as visited in case the CFG has cycles
  411. // that MachineLoopInfo didn't recognize as a natural loop.
  412. return LB.Visited.insert(To).second;
  413. }
  414. };
  415. } // end namespace llvm
  416. /// Compute the trace through MBB.
  417. void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
  418. LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
  419. << printMBBReference(*MBB) << '\n');
  420. // Set up loop bounds for the backwards post-order traversal.
  421. LoopBounds Bounds(BlockInfo, MTM.Loops);
  422. // Run an upwards post-order search for the trace start.
  423. Bounds.Downward = false;
  424. Bounds.Visited.clear();
  425. for (auto I : inverse_post_order_ext(MBB, Bounds)) {
  426. LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
  427. TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
  428. // All the predecessors have been visited, pick the preferred one.
  429. TBI.Pred = pickTracePred(I);
  430. LLVM_DEBUG({
  431. if (TBI.Pred)
  432. dbgs() << printMBBReference(*TBI.Pred) << '\n';
  433. else
  434. dbgs() << "null\n";
  435. });
  436. // The trace leading to I is now known, compute the depth resources.
  437. computeDepthResources(I);
  438. }
  439. // Run a downwards post-order search for the trace end.
  440. Bounds.Downward = true;
  441. Bounds.Visited.clear();
  442. for (auto I : post_order_ext(MBB, Bounds)) {
  443. LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
  444. TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
  445. // All the successors have been visited, pick the preferred one.
  446. TBI.Succ = pickTraceSucc(I);
  447. LLVM_DEBUG({
  448. if (TBI.Succ)
  449. dbgs() << printMBBReference(*TBI.Succ) << '\n';
  450. else
  451. dbgs() << "null\n";
  452. });
  453. // The trace leaving I is now known, compute the height resources.
  454. computeHeightResources(I);
  455. }
  456. }
  457. /// Invalidate traces through BadMBB.
  458. void
  459. MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
  460. SmallVector<const MachineBasicBlock*, 16> WorkList;
  461. TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
  462. // Invalidate height resources of blocks above MBB.
  463. if (BadTBI.hasValidHeight()) {
  464. BadTBI.invalidateHeight();
  465. WorkList.push_back(BadMBB);
  466. do {
  467. const MachineBasicBlock *MBB = WorkList.pop_back_val();
  468. LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
  469. << getName() << " height.\n");
  470. // Find any MBB predecessors that have MBB as their preferred successor.
  471. // They are the only ones that need to be invalidated.
  472. for (const MachineBasicBlock *Pred : MBB->predecessors()) {
  473. TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
  474. if (!TBI.hasValidHeight())
  475. continue;
  476. if (TBI.Succ == MBB) {
  477. TBI.invalidateHeight();
  478. WorkList.push_back(Pred);
  479. continue;
  480. }
  481. // Verify that TBI.Succ is actually a *I successor.
  482. assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
  483. }
  484. } while (!WorkList.empty());
  485. }
  486. // Invalidate depth resources of blocks below MBB.
  487. if (BadTBI.hasValidDepth()) {
  488. BadTBI.invalidateDepth();
  489. WorkList.push_back(BadMBB);
  490. do {
  491. const MachineBasicBlock *MBB = WorkList.pop_back_val();
  492. LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
  493. << getName() << " depth.\n");
  494. // Find any MBB successors that have MBB as their preferred predecessor.
  495. // They are the only ones that need to be invalidated.
  496. for (const MachineBasicBlock *Succ : MBB->successors()) {
  497. TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
  498. if (!TBI.hasValidDepth())
  499. continue;
  500. if (TBI.Pred == MBB) {
  501. TBI.invalidateDepth();
  502. WorkList.push_back(Succ);
  503. continue;
  504. }
  505. // Verify that TBI.Pred is actually a *I predecessor.
  506. assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
  507. }
  508. } while (!WorkList.empty());
  509. }
  510. // Clear any per-instruction data. We only have to do this for BadMBB itself
  511. // because the instructions in that block may change. Other blocks may be
  512. // invalidated, but their instructions will stay the same, so there is no
  513. // need to erase the Cycle entries. They will be overwritten when we
  514. // recompute.
  515. for (const auto &I : *BadMBB)
  516. Cycles.erase(&I);
  517. }
  518. void MachineTraceMetrics::Ensemble::verify() const {
  519. #ifndef NDEBUG
  520. assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
  521. "Outdated BlockInfo size");
  522. for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
  523. const TraceBlockInfo &TBI = BlockInfo[Num];
  524. if (TBI.hasValidDepth() && TBI.Pred) {
  525. const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
  526. assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
  527. assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
  528. "Trace is broken, depth should have been invalidated.");
  529. const MachineLoop *Loop = getLoopFor(MBB);
  530. assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
  531. }
  532. if (TBI.hasValidHeight() && TBI.Succ) {
  533. const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
  534. assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
  535. assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
  536. "Trace is broken, height should have been invalidated.");
  537. const MachineLoop *Loop = getLoopFor(MBB);
  538. const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
  539. assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
  540. "Trace contains backedge");
  541. }
  542. }
  543. #endif
  544. }
  545. //===----------------------------------------------------------------------===//
  546. // Data Dependencies
  547. //===----------------------------------------------------------------------===//
  548. //
  549. // Compute the depth and height of each instruction based on data dependencies
  550. // and instruction latencies. These cycle numbers assume that the CPU can issue
  551. // an infinite number of instructions per cycle as long as their dependencies
  552. // are ready.
  553. // A data dependency is represented as a defining MI and operand numbers on the
  554. // defining and using MI.
  555. namespace {
  556. struct DataDep {
  557. const MachineInstr *DefMI;
  558. unsigned DefOp;
  559. unsigned UseOp;
  560. DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
  561. : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
  562. /// Create a DataDep from an SSA form virtual register.
  563. DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
  564. : UseOp(UseOp) {
  565. assert(Register::isVirtualRegister(VirtReg));
  566. MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
  567. assert(!DefI.atEnd() && "Register has no defs");
  568. DefMI = DefI->getParent();
  569. DefOp = DefI.getOperandNo();
  570. assert((++DefI).atEnd() && "Register has multiple defs");
  571. }
  572. };
  573. } // end anonymous namespace
  574. // Get the input data dependencies that must be ready before UseMI can issue.
  575. // Return true if UseMI has any physreg operands.
  576. static bool getDataDeps(const MachineInstr &UseMI,
  577. SmallVectorImpl<DataDep> &Deps,
  578. const MachineRegisterInfo *MRI) {
  579. // Debug values should not be included in any calculations.
  580. if (UseMI.isDebugInstr())
  581. return false;
  582. bool HasPhysRegs = false;
  583. for (MachineInstr::const_mop_iterator I = UseMI.operands_begin(),
  584. E = UseMI.operands_end(); I != E; ++I) {
  585. const MachineOperand &MO = *I;
  586. if (!MO.isReg())
  587. continue;
  588. Register Reg = MO.getReg();
  589. if (!Reg)
  590. continue;
  591. if (Register::isPhysicalRegister(Reg)) {
  592. HasPhysRegs = true;
  593. continue;
  594. }
  595. // Collect virtual register reads.
  596. if (MO.readsReg())
  597. Deps.push_back(DataDep(MRI, Reg, UseMI.getOperandNo(I)));
  598. }
  599. return HasPhysRegs;
  600. }
  601. // Get the input data dependencies of a PHI instruction, using Pred as the
  602. // preferred predecessor.
  603. // This will add at most one dependency to Deps.
  604. static void getPHIDeps(const MachineInstr &UseMI,
  605. SmallVectorImpl<DataDep> &Deps,
  606. const MachineBasicBlock *Pred,
  607. const MachineRegisterInfo *MRI) {
  608. // No predecessor at the beginning of a trace. Ignore dependencies.
  609. if (!Pred)
  610. return;
  611. assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
  612. for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
  613. if (UseMI.getOperand(i + 1).getMBB() == Pred) {
  614. Register Reg = UseMI.getOperand(i).getReg();
  615. Deps.push_back(DataDep(MRI, Reg, i));
  616. return;
  617. }
  618. }
  619. }
  620. // Identify physreg dependencies for UseMI, and update the live regunit
  621. // tracking set when scanning instructions downwards.
  622. static void updatePhysDepsDownwards(const MachineInstr *UseMI,
  623. SmallVectorImpl<DataDep> &Deps,
  624. SparseSet<LiveRegUnit> &RegUnits,
  625. const TargetRegisterInfo *TRI) {
  626. SmallVector<MCRegister, 8> Kills;
  627. SmallVector<unsigned, 8> LiveDefOps;
  628. for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(),
  629. ME = UseMI->operands_end(); MI != ME; ++MI) {
  630. const MachineOperand &MO = *MI;
  631. if (!MO.isReg() || !MO.getReg().isPhysical())
  632. continue;
  633. MCRegister Reg = MO.getReg().asMCReg();
  634. // Track live defs and kills for updating RegUnits.
  635. if (MO.isDef()) {
  636. if (MO.isDead())
  637. Kills.push_back(Reg);
  638. else
  639. LiveDefOps.push_back(UseMI->getOperandNo(MI));
  640. } else if (MO.isKill())
  641. Kills.push_back(Reg);
  642. // Identify dependencies.
  643. if (!MO.readsReg())
  644. continue;
  645. for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
  646. SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
  647. if (I == RegUnits.end())
  648. continue;
  649. Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
  650. break;
  651. }
  652. }
  653. // Update RegUnits to reflect live registers after UseMI.
  654. // First kills.
  655. for (MCRegister Kill : Kills)
  656. for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
  657. RegUnits.erase(*Units);
  658. // Second, live defs.
  659. for (unsigned DefOp : LiveDefOps) {
  660. for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg().asMCReg(),
  661. TRI);
  662. Units.isValid(); ++Units) {
  663. LiveRegUnit &LRU = RegUnits[*Units];
  664. LRU.MI = UseMI;
  665. LRU.Op = DefOp;
  666. }
  667. }
  668. }
  669. /// The length of the critical path through a trace is the maximum of two path
  670. /// lengths:
  671. ///
  672. /// 1. The maximum height+depth over all instructions in the trace center block.
  673. ///
  674. /// 2. The longest cross-block dependency chain. For small blocks, it is
  675. /// possible that the critical path through the trace doesn't include any
  676. /// instructions in the block.
  677. ///
  678. /// This function computes the second number from the live-in list of the
  679. /// center block.
  680. unsigned MachineTraceMetrics::Ensemble::
  681. computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
  682. assert(TBI.HasValidInstrDepths && "Missing depth info");
  683. assert(TBI.HasValidInstrHeights && "Missing height info");
  684. unsigned MaxLen = 0;
  685. for (const LiveInReg &LIR : TBI.LiveIns) {
  686. if (!LIR.Reg.isVirtual())
  687. continue;
  688. const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
  689. // Ignore dependencies outside the current trace.
  690. const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
  691. if (!DefTBI.isUsefulDominator(TBI))
  692. continue;
  693. unsigned Len = LIR.Height + Cycles[DefMI].Depth;
  694. MaxLen = std::max(MaxLen, Len);
  695. }
  696. return MaxLen;
  697. }
  698. void MachineTraceMetrics::Ensemble::
  699. updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI,
  700. SparseSet<LiveRegUnit> &RegUnits) {
  701. SmallVector<DataDep, 8> Deps;
  702. // Collect all data dependencies.
  703. if (UseMI.isPHI())
  704. getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
  705. else if (getDataDeps(UseMI, Deps, MTM.MRI))
  706. updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
  707. // Filter and process dependencies, computing the earliest issue cycle.
  708. unsigned Cycle = 0;
  709. for (const DataDep &Dep : Deps) {
  710. const TraceBlockInfo&DepTBI =
  711. BlockInfo[Dep.DefMI->getParent()->getNumber()];
  712. // Ignore dependencies from outside the current trace.
  713. if (!DepTBI.isUsefulDominator(TBI))
  714. continue;
  715. assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
  716. unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
  717. // Add latency if DefMI is a real instruction. Transients get latency 0.
  718. if (!Dep.DefMI->isTransient())
  719. DepCycle += MTM.SchedModel
  720. .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
  721. Cycle = std::max(Cycle, DepCycle);
  722. }
  723. // Remember the instruction depth.
  724. InstrCycles &MICycles = Cycles[&UseMI];
  725. MICycles.Depth = Cycle;
  726. if (TBI.HasValidInstrHeights) {
  727. // Update critical path length.
  728. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
  729. LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
  730. } else {
  731. LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
  732. }
  733. }
  734. void MachineTraceMetrics::Ensemble::
  735. updateDepth(const MachineBasicBlock *MBB, const MachineInstr &UseMI,
  736. SparseSet<LiveRegUnit> &RegUnits) {
  737. updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
  738. }
  739. void MachineTraceMetrics::Ensemble::
  740. updateDepths(MachineBasicBlock::iterator Start,
  741. MachineBasicBlock::iterator End,
  742. SparseSet<LiveRegUnit> &RegUnits) {
  743. for (; Start != End; Start++)
  744. updateDepth(Start->getParent(), *Start, RegUnits);
  745. }
  746. /// Compute instruction depths for all instructions above or in MBB in its
  747. /// trace. This assumes that the trace through MBB has already been computed.
  748. void MachineTraceMetrics::Ensemble::
  749. computeInstrDepths(const MachineBasicBlock *MBB) {
  750. // The top of the trace may already be computed, and HasValidInstrDepths
  751. // implies Head->HasValidInstrDepths, so we only need to start from the first
  752. // block in the trace that needs to be recomputed.
  753. SmallVector<const MachineBasicBlock*, 8> Stack;
  754. do {
  755. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  756. assert(TBI.hasValidDepth() && "Incomplete trace");
  757. if (TBI.HasValidInstrDepths)
  758. break;
  759. Stack.push_back(MBB);
  760. MBB = TBI.Pred;
  761. } while (MBB);
  762. // FIXME: If MBB is non-null at this point, it is the last pre-computed block
  763. // in the trace. We should track any live-out physregs that were defined in
  764. // the trace. This is quite rare in SSA form, typically created by CSE
  765. // hoisting a compare.
  766. SparseSet<LiveRegUnit> RegUnits;
  767. RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
  768. // Go through trace blocks in top-down order, stopping after the center block.
  769. while (!Stack.empty()) {
  770. MBB = Stack.pop_back_val();
  771. LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
  772. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  773. TBI.HasValidInstrDepths = true;
  774. TBI.CriticalPath = 0;
  775. // Print out resource depths here as well.
  776. LLVM_DEBUG({
  777. dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
  778. ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
  779. for (unsigned K = 0; K != PRDepths.size(); ++K)
  780. if (PRDepths[K]) {
  781. unsigned Factor = MTM.SchedModel.getResourceFactor(K);
  782. dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
  783. << MTM.SchedModel.getProcResource(K)->Name << " ("
  784. << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
  785. }
  786. });
  787. // Also compute the critical path length through MBB when possible.
  788. if (TBI.HasValidInstrHeights)
  789. TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
  790. for (const auto &UseMI : *MBB) {
  791. updateDepth(TBI, UseMI, RegUnits);
  792. }
  793. }
  794. }
  795. // Identify physreg dependencies for MI when scanning instructions upwards.
  796. // Return the issue height of MI after considering any live regunits.
  797. // Height is the issue height computed from virtual register dependencies alone.
  798. static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
  799. SparseSet<LiveRegUnit> &RegUnits,
  800. const TargetSchedModel &SchedModel,
  801. const TargetInstrInfo *TII,
  802. const TargetRegisterInfo *TRI) {
  803. SmallVector<unsigned, 8> ReadOps;
  804. for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
  805. MOE = MI.operands_end();
  806. MOI != MOE; ++MOI) {
  807. const MachineOperand &MO = *MOI;
  808. if (!MO.isReg())
  809. continue;
  810. Register Reg = MO.getReg();
  811. if (!Register::isPhysicalRegister(Reg))
  812. continue;
  813. if (MO.readsReg())
  814. ReadOps.push_back(MI.getOperandNo(MOI));
  815. if (!MO.isDef())
  816. continue;
  817. // This is a def of Reg. Remove corresponding entries from RegUnits, and
  818. // update MI Height to consider the physreg dependencies.
  819. for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
  820. ++Units) {
  821. SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
  822. if (I == RegUnits.end())
  823. continue;
  824. unsigned DepHeight = I->Cycle;
  825. if (!MI.isTransient()) {
  826. // We may not know the UseMI of this dependency, if it came from the
  827. // live-in list. SchedModel can handle a NULL UseMI.
  828. DepHeight += SchedModel.computeOperandLatency(&MI, MI.getOperandNo(MOI),
  829. I->MI, I->Op);
  830. }
  831. Height = std::max(Height, DepHeight);
  832. // This regunit is dead above MI.
  833. RegUnits.erase(I);
  834. }
  835. }
  836. // Now we know the height of MI. Update any regunits read.
  837. for (size_t I = 0, E = ReadOps.size(); I != E; ++I) {
  838. MCRegister Reg = MI.getOperand(ReadOps[I]).getReg().asMCReg();
  839. for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
  840. LiveRegUnit &LRU = RegUnits[*Units];
  841. // Set the height to the highest reader of the unit.
  842. if (LRU.Cycle <= Height && LRU.MI != &MI) {
  843. LRU.Cycle = Height;
  844. LRU.MI = &MI;
  845. LRU.Op = ReadOps[I];
  846. }
  847. }
  848. }
  849. return Height;
  850. }
  851. using MIHeightMap = DenseMap<const MachineInstr *, unsigned>;
  852. // Push the height of DefMI upwards if required to match UseMI.
  853. // Return true if this is the first time DefMI was seen.
  854. static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
  855. unsigned UseHeight, MIHeightMap &Heights,
  856. const TargetSchedModel &SchedModel,
  857. const TargetInstrInfo *TII) {
  858. // Adjust height by Dep.DefMI latency.
  859. if (!Dep.DefMI->isTransient())
  860. UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
  861. Dep.UseOp);
  862. // Update Heights[DefMI] to be the maximum height seen.
  863. MIHeightMap::iterator I;
  864. bool New;
  865. std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
  866. if (New)
  867. return true;
  868. // DefMI has been pushed before. Give it the max height.
  869. if (I->second < UseHeight)
  870. I->second = UseHeight;
  871. return false;
  872. }
  873. /// Assuming that the virtual register defined by DefMI:DefOp was used by
  874. /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
  875. /// when reaching the block that contains DefMI.
  876. void MachineTraceMetrics::Ensemble::
  877. addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
  878. ArrayRef<const MachineBasicBlock*> Trace) {
  879. assert(!Trace.empty() && "Trace should contain at least one block");
  880. Register Reg = DefMI->getOperand(DefOp).getReg();
  881. assert(Register::isVirtualRegister(Reg));
  882. const MachineBasicBlock *DefMBB = DefMI->getParent();
  883. // Reg is live-in to all blocks in Trace that follow DefMBB.
  884. for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
  885. if (MBB == DefMBB)
  886. return;
  887. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  888. // Just add the register. The height will be updated later.
  889. TBI.LiveIns.push_back(Reg);
  890. }
  891. }
  892. /// Compute instruction heights in the trace through MBB. This updates MBB and
  893. /// the blocks below it in the trace. It is assumed that the trace has already
  894. /// been computed.
  895. void MachineTraceMetrics::Ensemble::
  896. computeInstrHeights(const MachineBasicBlock *MBB) {
  897. // The bottom of the trace may already be computed.
  898. // Find the blocks that need updating.
  899. SmallVector<const MachineBasicBlock*, 8> Stack;
  900. do {
  901. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  902. assert(TBI.hasValidHeight() && "Incomplete trace");
  903. if (TBI.HasValidInstrHeights)
  904. break;
  905. Stack.push_back(MBB);
  906. TBI.LiveIns.clear();
  907. MBB = TBI.Succ;
  908. } while (MBB);
  909. // As we move upwards in the trace, keep track of instructions that are
  910. // required by deeper trace instructions. Map MI -> height required so far.
  911. MIHeightMap Heights;
  912. // For physregs, the def isn't known when we see the use.
  913. // Instead, keep track of the highest use of each regunit.
  914. SparseSet<LiveRegUnit> RegUnits;
  915. RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
  916. // If the bottom of the trace was already precomputed, initialize heights
  917. // from its live-in list.
  918. // MBB is the highest precomputed block in the trace.
  919. if (MBB) {
  920. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  921. for (LiveInReg &LI : TBI.LiveIns) {
  922. if (LI.Reg.isVirtual()) {
  923. // For virtual registers, the def latency is included.
  924. unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
  925. if (Height < LI.Height)
  926. Height = LI.Height;
  927. } else {
  928. // For register units, the def latency is not included because we don't
  929. // know the def yet.
  930. RegUnits[LI.Reg].Cycle = LI.Height;
  931. }
  932. }
  933. }
  934. // Go through the trace blocks in bottom-up order.
  935. SmallVector<DataDep, 8> Deps;
  936. for (;!Stack.empty(); Stack.pop_back()) {
  937. MBB = Stack.back();
  938. LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
  939. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  940. TBI.HasValidInstrHeights = true;
  941. TBI.CriticalPath = 0;
  942. LLVM_DEBUG({
  943. dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
  944. ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
  945. for (unsigned K = 0; K != PRHeights.size(); ++K)
  946. if (PRHeights[K]) {
  947. unsigned Factor = MTM.SchedModel.getResourceFactor(K);
  948. dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
  949. << MTM.SchedModel.getProcResource(K)->Name << " ("
  950. << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
  951. }
  952. });
  953. // Get dependencies from PHIs in the trace successor.
  954. const MachineBasicBlock *Succ = TBI.Succ;
  955. // If MBB is the last block in the trace, and it has a back-edge to the
  956. // loop header, get loop-carried dependencies from PHIs in the header. For
  957. // that purpose, pretend that all the loop header PHIs have height 0.
  958. if (!Succ)
  959. if (const MachineLoop *Loop = getLoopFor(MBB))
  960. if (MBB->isSuccessor(Loop->getHeader()))
  961. Succ = Loop->getHeader();
  962. if (Succ) {
  963. for (const auto &PHI : *Succ) {
  964. if (!PHI.isPHI())
  965. break;
  966. Deps.clear();
  967. getPHIDeps(PHI, Deps, MBB, MTM.MRI);
  968. if (!Deps.empty()) {
  969. // Loop header PHI heights are all 0.
  970. unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
  971. LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
  972. if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
  973. MTM.TII))
  974. addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
  975. }
  976. }
  977. }
  978. // Go through the block backwards.
  979. for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
  980. BI != BB;) {
  981. const MachineInstr &MI = *--BI;
  982. // Find the MI height as determined by virtual register uses in the
  983. // trace below.
  984. unsigned Cycle = 0;
  985. MIHeightMap::iterator HeightI = Heights.find(&MI);
  986. if (HeightI != Heights.end()) {
  987. Cycle = HeightI->second;
  988. // We won't be seeing any more MI uses.
  989. Heights.erase(HeightI);
  990. }
  991. // Don't process PHI deps. They depend on the specific predecessor, and
  992. // we'll get them when visiting the predecessor.
  993. Deps.clear();
  994. bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
  995. // There may also be regunit dependencies to include in the height.
  996. if (HasPhysRegs)
  997. Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
  998. MTM.TII, MTM.TRI);
  999. // Update the required height of any virtual registers read by MI.
  1000. for (const DataDep &Dep : Deps)
  1001. if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
  1002. addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
  1003. InstrCycles &MICycles = Cycles[&MI];
  1004. MICycles.Height = Cycle;
  1005. if (!TBI.HasValidInstrDepths) {
  1006. LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
  1007. continue;
  1008. }
  1009. // Update critical path length.
  1010. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
  1011. LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
  1012. }
  1013. // Update virtual live-in heights. They were added by addLiveIns() with a 0
  1014. // height because the final height isn't known until now.
  1015. LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
  1016. for (LiveInReg &LIR : TBI.LiveIns) {
  1017. const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
  1018. LIR.Height = Heights.lookup(DefMI);
  1019. LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
  1020. }
  1021. // Transfer the live regunits to the live-in list.
  1022. for (SparseSet<LiveRegUnit>::const_iterator
  1023. RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
  1024. TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
  1025. LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI) << '@'
  1026. << RI->Cycle);
  1027. }
  1028. LLVM_DEBUG(dbgs() << '\n');
  1029. if (!TBI.HasValidInstrDepths)
  1030. continue;
  1031. // Add live-ins to the critical path length.
  1032. TBI.CriticalPath = std::max(TBI.CriticalPath,
  1033. computeCrossBlockCriticalPath(TBI));
  1034. LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
  1035. }
  1036. }
  1037. MachineTraceMetrics::Trace
  1038. MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
  1039. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  1040. if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
  1041. computeTrace(MBB);
  1042. if (!TBI.HasValidInstrDepths)
  1043. computeInstrDepths(MBB);
  1044. if (!TBI.HasValidInstrHeights)
  1045. computeInstrHeights(MBB);
  1046. return Trace(*this, TBI);
  1047. }
  1048. unsigned
  1049. MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr &MI) const {
  1050. assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
  1051. "MI must be in the trace center block");
  1052. InstrCycles Cyc = getInstrCycles(MI);
  1053. return getCriticalPath() - (Cyc.Depth + Cyc.Height);
  1054. }
  1055. unsigned
  1056. MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr &PHI) const {
  1057. const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
  1058. SmallVector<DataDep, 1> Deps;
  1059. getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
  1060. assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
  1061. DataDep &Dep = Deps.front();
  1062. unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
  1063. // Add latency if DefMI is a real instruction. Transients get latency 0.
  1064. if (!Dep.DefMI->isTransient())
  1065. DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
  1066. &PHI, Dep.UseOp);
  1067. return DepCycle;
  1068. }
  1069. /// When bottom is set include instructions in current block in estimate.
  1070. unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
  1071. // Find the limiting processor resource.
  1072. // Numbers have been pre-scaled to be comparable.
  1073. unsigned PRMax = 0;
  1074. ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
  1075. if (Bottom) {
  1076. ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
  1077. for (unsigned K = 0; K != PRDepths.size(); ++K)
  1078. PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
  1079. } else {
  1080. for (unsigned PRD : PRDepths)
  1081. PRMax = std::max(PRMax, PRD);
  1082. }
  1083. // Convert to cycle count.
  1084. PRMax = TE.MTM.getCycles(PRMax);
  1085. /// All instructions before current block
  1086. unsigned Instrs = TBI.InstrDepth;
  1087. // plus instructions in current block
  1088. if (Bottom)
  1089. Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
  1090. if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
  1091. Instrs /= IW;
  1092. // Assume issue width 1 without a schedule model.
  1093. return std::max(Instrs, PRMax);
  1094. }
  1095. unsigned MachineTraceMetrics::Trace::getResourceLength(
  1096. ArrayRef<const MachineBasicBlock *> Extrablocks,
  1097. ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
  1098. ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
  1099. // Add up resources above and below the center block.
  1100. ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
  1101. ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
  1102. unsigned PRMax = 0;
  1103. // Capture computing cycles from extra instructions
  1104. auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
  1105. unsigned ResourceIdx)
  1106. ->unsigned {
  1107. unsigned Cycles = 0;
  1108. for (const MCSchedClassDesc *SC : Instrs) {
  1109. if (!SC->isValid())
  1110. continue;
  1111. for (TargetSchedModel::ProcResIter
  1112. PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
  1113. PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
  1114. PI != PE; ++PI) {
  1115. if (PI->ProcResourceIdx != ResourceIdx)
  1116. continue;
  1117. Cycles +=
  1118. (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
  1119. }
  1120. }
  1121. return Cycles;
  1122. };
  1123. for (unsigned K = 0; K != PRDepths.size(); ++K) {
  1124. unsigned PRCycles = PRDepths[K] + PRHeights[K];
  1125. for (const MachineBasicBlock *MBB : Extrablocks)
  1126. PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
  1127. PRCycles += extraCycles(ExtraInstrs, K);
  1128. PRCycles -= extraCycles(RemoveInstrs, K);
  1129. PRMax = std::max(PRMax, PRCycles);
  1130. }
  1131. // Convert to cycle count.
  1132. PRMax = TE.MTM.getCycles(PRMax);
  1133. // Instrs: #instructions in current trace outside current block.
  1134. unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
  1135. // Add instruction count from the extra blocks.
  1136. for (const MachineBasicBlock *MBB : Extrablocks)
  1137. Instrs += TE.MTM.getResources(MBB)->InstrCount;
  1138. Instrs += ExtraInstrs.size();
  1139. Instrs -= RemoveInstrs.size();
  1140. if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
  1141. Instrs /= IW;
  1142. // Assume issue width 1 without a schedule model.
  1143. return std::max(Instrs, PRMax);
  1144. }
  1145. bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI,
  1146. const MachineInstr &UseMI) const {
  1147. if (DefMI.getParent() == UseMI.getParent())
  1148. return true;
  1149. const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
  1150. const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
  1151. return DepTBI.isUsefulDominator(TBI);
  1152. }
  1153. void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
  1154. OS << getName() << " ensemble:\n";
  1155. for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
  1156. OS << " %bb." << i << '\t';
  1157. BlockInfo[i].print(OS);
  1158. OS << '\n';
  1159. }
  1160. }
  1161. void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
  1162. if (hasValidDepth()) {
  1163. OS << "depth=" << InstrDepth;
  1164. if (Pred)
  1165. OS << " pred=" << printMBBReference(*Pred);
  1166. else
  1167. OS << " pred=null";
  1168. OS << " head=%bb." << Head;
  1169. if (HasValidInstrDepths)
  1170. OS << " +instrs";
  1171. } else
  1172. OS << "depth invalid";
  1173. OS << ", ";
  1174. if (hasValidHeight()) {
  1175. OS << "height=" << InstrHeight;
  1176. if (Succ)
  1177. OS << " succ=" << printMBBReference(*Succ);
  1178. else
  1179. OS << " succ=null";
  1180. OS << " tail=%bb." << Tail;
  1181. if (HasValidInstrHeights)
  1182. OS << " +instrs";
  1183. } else
  1184. OS << "height invalid";
  1185. if (HasValidInstrDepths && HasValidInstrHeights)
  1186. OS << ", crit=" << CriticalPath;
  1187. }
  1188. void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
  1189. unsigned MBBNum = &TBI - &TE.BlockInfo[0];
  1190. OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
  1191. << " --> %bb." << TBI.Tail << ':';
  1192. if (TBI.hasValidHeight() && TBI.hasValidDepth())
  1193. OS << ' ' << getInstrCount() << " instrs.";
  1194. if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
  1195. OS << ' ' << TBI.CriticalPath << " cycles.";
  1196. const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
  1197. OS << "\n%bb." << MBBNum;
  1198. while (Block->hasValidDepth() && Block->Pred) {
  1199. unsigned Num = Block->Pred->getNumber();
  1200. OS << " <- " << printMBBReference(*Block->Pred);
  1201. Block = &TE.BlockInfo[Num];
  1202. }
  1203. Block = &TBI;
  1204. OS << "\n ";
  1205. while (Block->hasValidHeight() && Block->Succ) {
  1206. unsigned Num = Block->Succ->getNumber();
  1207. OS << " -> " << printMBBReference(*Block->Succ);
  1208. Block = &TE.BlockInfo[Num];
  1209. }
  1210. OS << '\n';
  1211. }