BottleneckAnalysis.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. ///
  10. /// This file implements the functionalities used by the BottleneckAnalysis
  11. /// to report bottleneck info.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "Views/BottleneckAnalysis.h"
  15. #include "llvm/MC/MCInst.h"
  16. #include "llvm/MCA/Support.h"
  17. #include "llvm/Support/Format.h"
  18. namespace llvm {
  19. namespace mca {
  20. #define DEBUG_TYPE "llvm-mca"
  21. PressureTracker::PressureTracker(const MCSchedModel &Model)
  22. : SM(Model),
  23. ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
  24. ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
  25. ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
  26. ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
  27. computeProcResourceMasks(SM, ProcResID2Mask);
  28. // Ignore the invalid resource at index zero.
  29. unsigned NextResourceUsersIdx = 0;
  30. for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
  31. const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
  32. ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
  33. NextResourceUsersIdx += ProcResource.NumUnits;
  34. uint64_t ResourceMask = ProcResID2Mask[I];
  35. ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
  36. }
  37. ResourceUsers.resize(NextResourceUsersIdx);
  38. std::fill(ResourceUsers.begin(), ResourceUsers.end(),
  39. std::make_pair<unsigned, unsigned>(~0U, 0U));
  40. }
  41. void PressureTracker::getResourceUsers(uint64_t ResourceMask,
  42. SmallVectorImpl<User> &Users) const {
  43. unsigned Index = getResourceStateIndex(ResourceMask);
  44. unsigned ProcResID = ResIdx2ProcResID[Index];
  45. const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
  46. for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
  47. const User U = getResourceUser(ProcResID, I);
  48. if (U.second && IPI.find(U.first) != IPI.end())
  49. Users.emplace_back(U);
  50. }
  51. }
  52. void PressureTracker::onInstructionDispatched(unsigned IID) {
  53. IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
  54. }
  55. void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
  56. void PressureTracker::handleInstructionIssuedEvent(
  57. const HWInstructionIssuedEvent &Event) {
  58. unsigned IID = Event.IR.getSourceIndex();
  59. for (const ResourceUse &Use : Event.UsedResources) {
  60. const ResourceRef &RR = Use.first;
  61. unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
  62. Index += countTrailingZeros(RR.second);
  63. ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
  64. }
  65. }
  66. void PressureTracker::updateResourcePressureDistribution(
  67. uint64_t CumulativeMask) {
  68. while (CumulativeMask) {
  69. uint64_t Current = CumulativeMask & (-CumulativeMask);
  70. unsigned ResIdx = getResourceStateIndex(Current);
  71. unsigned ProcResID = ResIdx2ProcResID[ResIdx];
  72. uint64_t Mask = ProcResID2Mask[ProcResID];
  73. if (Mask == Current) {
  74. ResourcePressureDistribution[ProcResID]++;
  75. CumulativeMask ^= Current;
  76. continue;
  77. }
  78. Mask ^= Current;
  79. while (Mask) {
  80. uint64_t SubUnit = Mask & (-Mask);
  81. ResIdx = getResourceStateIndex(SubUnit);
  82. ProcResID = ResIdx2ProcResID[ResIdx];
  83. ResourcePressureDistribution[ProcResID]++;
  84. Mask ^= SubUnit;
  85. }
  86. CumulativeMask ^= Current;
  87. }
  88. }
  89. void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
  90. assert(Event.Reason != HWPressureEvent::INVALID &&
  91. "Unexpected invalid event!");
  92. switch (Event.Reason) {
  93. default:
  94. break;
  95. case HWPressureEvent::RESOURCES: {
  96. const uint64_t ResourceMask = Event.ResourceMask;
  97. updateResourcePressureDistribution(Event.ResourceMask);
  98. for (const InstRef &IR : Event.AffectedInstructions) {
  99. const Instruction &IS = *IR.getInstruction();
  100. unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
  101. if (!BusyResources)
  102. continue;
  103. unsigned IID = IR.getSourceIndex();
  104. IPI[IID].ResourcePressureCycles++;
  105. }
  106. break;
  107. }
  108. case HWPressureEvent::REGISTER_DEPS:
  109. for (const InstRef &IR : Event.AffectedInstructions) {
  110. unsigned IID = IR.getSourceIndex();
  111. IPI[IID].RegisterPressureCycles++;
  112. }
  113. break;
  114. case HWPressureEvent::MEMORY_DEPS:
  115. for (const InstRef &IR : Event.AffectedInstructions) {
  116. unsigned IID = IR.getSourceIndex();
  117. IPI[IID].MemoryPressureCycles++;
  118. }
  119. }
  120. }
  121. #ifndef NDEBUG
  122. void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
  123. const DependencyEdge &DepEdge,
  124. MCInstPrinter &MCIP) const {
  125. unsigned FromIID = DepEdge.FromIID;
  126. unsigned ToIID = DepEdge.ToIID;
  127. assert(FromIID < ToIID && "Graph should be acyclic!");
  128. const DependencyEdge::Dependency &DE = DepEdge.Dep;
  129. assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
  130. OS << " FROM: " << FromIID << " TO: " << ToIID << " ";
  131. if (DE.Type == DependencyEdge::DT_REGISTER) {
  132. OS << " - REGISTER: ";
  133. MCIP.printRegName(OS, DE.ResourceOrRegID);
  134. } else if (DE.Type == DependencyEdge::DT_MEMORY) {
  135. OS << " - MEMORY";
  136. } else {
  137. assert(DE.Type == DependencyEdge::DT_RESOURCE &&
  138. "Unsupported dependency type!");
  139. OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
  140. }
  141. OS << " - COST: " << DE.Cost << '\n';
  142. }
  143. #endif // NDEBUG
  144. void DependencyGraph::pruneEdges(unsigned Iterations) {
  145. for (DGNode &N : Nodes) {
  146. unsigned NumPruned = 0;
  147. const unsigned Size = N.OutgoingEdges.size();
  148. // Use a cut-off threshold to prune edges with a low frequency.
  149. for (unsigned I = 0, E = Size; I < E; ++I) {
  150. DependencyEdge &Edge = N.OutgoingEdges[I];
  151. if (Edge.Frequency == Iterations)
  152. continue;
  153. double Factor = (double)Edge.Frequency / Iterations;
  154. if (0.10 < Factor)
  155. continue;
  156. Nodes[Edge.ToIID].NumPredecessors--;
  157. std::swap(Edge, N.OutgoingEdges[E - 1]);
  158. --E;
  159. ++NumPruned;
  160. }
  161. if (NumPruned)
  162. N.OutgoingEdges.resize(Size - NumPruned);
  163. }
  164. }
  165. void DependencyGraph::initializeRootSet(
  166. SmallVectorImpl<unsigned> &RootSet) const {
  167. for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
  168. const DGNode &N = Nodes[I];
  169. if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
  170. RootSet.emplace_back(I);
  171. }
  172. }
  173. void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet,
  174. unsigned Iterations) {
  175. SmallVector<unsigned, 8> ToVisit;
  176. // A critical sequence is computed as the longest path from a node of the
  177. // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
  178. // composed of nodes with at least one successor, and no predecessors.
  179. //
  180. // Each node of the graph starts with an initial default cost of zero. The
  181. // cost of a node is a measure of criticality: the higher the cost, the bigger
  182. // is the performance impact.
  183. // For register and memory dependencies, the cost is a function of the write
  184. // latency as well as the actual delay (in cycles) caused to users.
  185. // For processor resource dependencies, the cost is a function of the resource
  186. // pressure. Resource interferences with low frequency values are ignored.
  187. //
  188. // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
  189. // the inner loop selects (i.e. visits) a node N from a set of `unvisited
  190. // nodes`, and then propagates the cost of N to all its neighbors.
  191. //
  192. // The `unvisited nodes` set initially contains all the nodes from the
  193. // RootSet. A node N is added to the `unvisited nodes` if all its
  194. // predecessors have been visited already.
  195. //
  196. // For simplicity, every node tracks the number of unvisited incoming edges in
  197. // field `NumVisitedPredecessors`. When the value of that field drops to
  198. // zero, then the corresponding node is added to a `ToVisit` set.
  199. //
  200. // At the end of every iteration of the outer loop, set `ToVisit` becomes our
  201. // new `unvisited nodes` set.
  202. //
  203. // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
  204. // is empty. This algorithm works under the assumption that the graph is
  205. // acyclic.
  206. do {
  207. for (unsigned IID : RootSet) {
  208. const DGNode &N = Nodes[IID];
  209. for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
  210. unsigned ToIID = DepEdge.ToIID;
  211. DGNode &To = Nodes[ToIID];
  212. uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
  213. // Check if this is the most expensive incoming edge seen so far. In
  214. // case, update the total cost of the destination node (ToIID), as well
  215. // its field `CriticalPredecessor`.
  216. if (Cost > To.Cost) {
  217. To.CriticalPredecessor = DepEdge;
  218. To.Cost = Cost;
  219. To.Depth = N.Depth + 1;
  220. }
  221. To.NumVisitedPredecessors++;
  222. if (To.NumVisitedPredecessors == To.NumPredecessors)
  223. ToVisit.emplace_back(ToIID);
  224. }
  225. }
  226. std::swap(RootSet, ToVisit);
  227. ToVisit.clear();
  228. } while (!RootSet.empty());
  229. }
  230. void DependencyGraph::getCriticalSequence(
  231. SmallVectorImpl<const DependencyEdge *> &Seq) const {
  232. // At this stage, nodes of the graph have been already visited, and costs have
  233. // been propagated through the edges (see method `propagateThroughEdges()`).
  234. // Identify the node N with the highest cost in the graph. By construction,
  235. // that node is the last instruction of our critical sequence.
  236. // Field N.Depth would tell us the total length of the sequence.
  237. //
  238. // To obtain the sequence of critical edges, we simply follow the chain of
  239. // critical predecessors starting from node N (field
  240. // DGNode::CriticalPredecessor).
  241. const auto It = std::max_element(
  242. Nodes.begin(), Nodes.end(),
  243. [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
  244. unsigned IID = std::distance(Nodes.begin(), It);
  245. Seq.resize(Nodes[IID].Depth);
  246. for (const DependencyEdge *&DE : llvm::reverse(Seq)) {
  247. const DGNode &N = Nodes[IID];
  248. DE = &N.CriticalPredecessor;
  249. IID = N.CriticalPredecessor.FromIID;
  250. }
  251. }
  252. void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS,
  253. const MCInst &MCI,
  254. bool UseDifferentColor) const {
  255. FOS.PadToColumn(14);
  256. if (UseDifferentColor)
  257. FOS.changeColor(raw_ostream::CYAN, true, false);
  258. FOS << printInstructionString(MCI);
  259. if (UseDifferentColor)
  260. FOS.resetColor();
  261. }
  262. void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
  263. // Early exit if no bottlenecks were found during the simulation.
  264. if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
  265. return;
  266. SmallVector<const DependencyEdge *, 16> Seq;
  267. DG.getCriticalSequence(Seq);
  268. if (Seq.empty())
  269. return;
  270. OS << "\nCritical sequence based on the simulation:\n\n";
  271. const DependencyEdge &FirstEdge = *Seq[0];
  272. ArrayRef<llvm::MCInst> Source = getSource();
  273. unsigned FromIID = FirstEdge.FromIID % Source.size();
  274. unsigned ToIID = FirstEdge.ToIID % Source.size();
  275. bool IsLoopCarried = FromIID >= ToIID;
  276. formatted_raw_ostream FOS(OS);
  277. FOS.PadToColumn(14);
  278. FOS << "Instruction";
  279. FOS.PadToColumn(58);
  280. FOS << "Dependency Information";
  281. bool HasColors = FOS.has_colors();
  282. unsigned CurrentIID = 0;
  283. if (IsLoopCarried) {
  284. FOS << "\n +----< " << FromIID << ".";
  285. printInstruction(FOS, Source[FromIID], HasColors);
  286. FOS << "\n |\n | < loop carried > \n |";
  287. } else {
  288. while (CurrentIID < FromIID) {
  289. FOS << "\n " << CurrentIID << ".";
  290. printInstruction(FOS, Source[CurrentIID]);
  291. CurrentIID++;
  292. }
  293. FOS << "\n +----< " << CurrentIID << ".";
  294. printInstruction(FOS, Source[CurrentIID], HasColors);
  295. CurrentIID++;
  296. }
  297. for (const DependencyEdge *&DE : Seq) {
  298. ToIID = DE->ToIID % Source.size();
  299. unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
  300. while (CurrentIID < LastIID) {
  301. FOS << "\n | " << CurrentIID << ".";
  302. printInstruction(FOS, Source[CurrentIID]);
  303. CurrentIID++;
  304. }
  305. if (CurrentIID == ToIID) {
  306. FOS << "\n +----> " << ToIID << ".";
  307. printInstruction(FOS, Source[CurrentIID], HasColors);
  308. } else {
  309. FOS << "\n |\n | < loop carried > \n |"
  310. << "\n +----> " << ToIID << ".";
  311. printInstruction(FOS, Source[ToIID], HasColors);
  312. }
  313. FOS.PadToColumn(58);
  314. const DependencyEdge::Dependency &Dep = DE->Dep;
  315. if (HasColors)
  316. FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
  317. if (Dep.Type == DependencyEdge::DT_REGISTER) {
  318. FOS << "## REGISTER dependency: ";
  319. if (HasColors)
  320. FOS.changeColor(raw_ostream::MAGENTA, true, false);
  321. getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID);
  322. } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
  323. FOS << "## MEMORY dependency.";
  324. } else {
  325. assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
  326. "Unsupported dependency type!");
  327. FOS << "## RESOURCE interference: ";
  328. if (HasColors)
  329. FOS.changeColor(raw_ostream::MAGENTA, true, false);
  330. FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
  331. if (HasColors) {
  332. FOS.resetColor();
  333. FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
  334. }
  335. FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
  336. << "% ]";
  337. }
  338. if (HasColors)
  339. FOS.resetColor();
  340. ++CurrentIID;
  341. }
  342. while (CurrentIID < Source.size()) {
  343. FOS << "\n " << CurrentIID << ".";
  344. printInstruction(FOS, Source[CurrentIID]);
  345. CurrentIID++;
  346. }
  347. FOS << '\n';
  348. FOS.flush();
  349. }
  350. #ifndef NDEBUG
  351. void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
  352. OS << "\nREG DEPS\n";
  353. for (const DGNode &Node : Nodes)
  354. for (const DependencyEdge &DE : Node.OutgoingEdges)
  355. if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
  356. dumpDependencyEdge(OS, DE, MCIP);
  357. OS << "\nMEM DEPS\n";
  358. for (const DGNode &Node : Nodes)
  359. for (const DependencyEdge &DE : Node.OutgoingEdges)
  360. if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
  361. dumpDependencyEdge(OS, DE, MCIP);
  362. OS << "\nRESOURCE DEPS\n";
  363. for (const DGNode &Node : Nodes)
  364. for (const DependencyEdge &DE : Node.OutgoingEdges)
  365. if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
  366. dumpDependencyEdge(OS, DE, MCIP);
  367. }
  368. #endif // NDEBUG
  369. void DependencyGraph::addDependency(unsigned From, unsigned To,
  370. DependencyEdge::Dependency &&Dep) {
  371. DGNode &NodeFrom = Nodes[From];
  372. DGNode &NodeTo = Nodes[To];
  373. SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
  374. auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
  375. return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
  376. });
  377. if (It != Vec.end()) {
  378. It->Dep.Cost += Dep.Cost;
  379. It->Frequency++;
  380. return;
  381. }
  382. DependencyEdge DE = {Dep, From, To, 1};
  383. Vec.emplace_back(DE);
  384. NodeTo.NumPredecessors++;
  385. }
  386. BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
  387. MCInstPrinter &Printer,
  388. ArrayRef<MCInst> S, unsigned NumIter)
  389. : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()),
  390. DG(S.size() * 3), Iterations(NumIter), TotalCycles(0),
  391. PressureIncreasedBecauseOfResources(false),
  392. PressureIncreasedBecauseOfRegisterDependencies(false),
  393. PressureIncreasedBecauseOfMemoryDependencies(false),
  394. SeenStallCycles(false), BPI() {}
  395. void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
  396. unsigned RegID, unsigned Cost) {
  397. bool IsLoopCarried = From >= To;
  398. unsigned SourceSize = getSource().size();
  399. if (IsLoopCarried) {
  400. DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
  401. DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
  402. return;
  403. }
  404. DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
  405. }
  406. void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
  407. unsigned Cost) {
  408. bool IsLoopCarried = From >= To;
  409. unsigned SourceSize = getSource().size();
  410. if (IsLoopCarried) {
  411. DG.addMemoryDep(From, To + SourceSize, Cost);
  412. DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
  413. return;
  414. }
  415. DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
  416. }
  417. void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
  418. uint64_t Mask, unsigned Cost) {
  419. bool IsLoopCarried = From >= To;
  420. unsigned SourceSize = getSource().size();
  421. if (IsLoopCarried) {
  422. DG.addResourceDep(From, To + SourceSize, Mask, Cost);
  423. DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
  424. return;
  425. }
  426. DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
  427. }
  428. void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
  429. const unsigned IID = Event.IR.getSourceIndex();
  430. if (Event.Type == HWInstructionEvent::Dispatched) {
  431. Tracker.onInstructionDispatched(IID);
  432. return;
  433. }
  434. if (Event.Type == HWInstructionEvent::Executed) {
  435. Tracker.onInstructionExecuted(IID);
  436. return;
  437. }
  438. if (Event.Type != HWInstructionEvent::Issued)
  439. return;
  440. ArrayRef<llvm::MCInst> Source = getSource();
  441. const Instruction &IS = *Event.IR.getInstruction();
  442. unsigned To = IID % Source.size();
  443. unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
  444. uint64_t ResourceMask = IS.getCriticalResourceMask();
  445. SmallVector<std::pair<unsigned, unsigned>, 4> Users;
  446. while (ResourceMask) {
  447. uint64_t Current = ResourceMask & (-ResourceMask);
  448. Tracker.getResourceUsers(Current, Users);
  449. for (const std::pair<unsigned, unsigned> &U : Users)
  450. addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
  451. Users.clear();
  452. ResourceMask ^= Current;
  453. }
  454. const CriticalDependency &RegDep = IS.getCriticalRegDep();
  455. if (RegDep.Cycles) {
  456. Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
  457. unsigned From = RegDep.IID % Source.size();
  458. addRegisterDep(From, To, RegDep.RegID, Cycles);
  459. }
  460. const CriticalDependency &MemDep = IS.getCriticalMemDep();
  461. if (MemDep.Cycles) {
  462. Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
  463. unsigned From = MemDep.IID % Source.size();
  464. addMemoryDep(From, To, Cycles);
  465. }
  466. Tracker.handleInstructionIssuedEvent(
  467. static_cast<const HWInstructionIssuedEvent &>(Event));
  468. // Check if this is the last simulated instruction.
  469. if (IID == ((Iterations * Source.size()) - 1))
  470. DG.finalizeGraph(Iterations);
  471. }
  472. void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
  473. assert(Event.Reason != HWPressureEvent::INVALID &&
  474. "Unexpected invalid event!");
  475. Tracker.handlePressureEvent(Event);
  476. switch (Event.Reason) {
  477. default:
  478. break;
  479. case HWPressureEvent::RESOURCES:
  480. PressureIncreasedBecauseOfResources = true;
  481. break;
  482. case HWPressureEvent::REGISTER_DEPS:
  483. PressureIncreasedBecauseOfRegisterDependencies = true;
  484. break;
  485. case HWPressureEvent::MEMORY_DEPS:
  486. PressureIncreasedBecauseOfMemoryDependencies = true;
  487. break;
  488. }
  489. }
  490. void BottleneckAnalysis::onCycleEnd() {
  491. ++TotalCycles;
  492. bool PressureIncreasedBecauseOfDataDependencies =
  493. PressureIncreasedBecauseOfRegisterDependencies ||
  494. PressureIncreasedBecauseOfMemoryDependencies;
  495. if (!PressureIncreasedBecauseOfResources &&
  496. !PressureIncreasedBecauseOfDataDependencies)
  497. return;
  498. ++BPI.PressureIncreaseCycles;
  499. if (PressureIncreasedBecauseOfRegisterDependencies)
  500. ++BPI.RegisterDependencyCycles;
  501. if (PressureIncreasedBecauseOfMemoryDependencies)
  502. ++BPI.MemoryDependencyCycles;
  503. if (PressureIncreasedBecauseOfDataDependencies)
  504. ++BPI.DataDependencyCycles;
  505. if (PressureIncreasedBecauseOfResources)
  506. ++BPI.ResourcePressureCycles;
  507. PressureIncreasedBecauseOfResources = false;
  508. PressureIncreasedBecauseOfRegisterDependencies = false;
  509. PressureIncreasedBecauseOfMemoryDependencies = false;
  510. }
  511. void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
  512. if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
  513. OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
  514. return;
  515. }
  516. double PressurePerCycle =
  517. (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
  518. double ResourcePressurePerCycle =
  519. (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
  520. double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
  521. double RegDepPressurePerCycle =
  522. (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
  523. double MemDepPressurePerCycle =
  524. (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
  525. OS << "\n\nCycles with backend pressure increase [ "
  526. << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
  527. OS << "\nThroughput Bottlenecks: "
  528. << "\n Resource Pressure [ "
  529. << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
  530. << "% ]";
  531. if (BPI.PressureIncreaseCycles) {
  532. ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
  533. const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
  534. for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
  535. unsigned ResourceCycles = Distribution[I];
  536. if (ResourceCycles) {
  537. double Frequency = (double)ResourceCycles * 100 / TotalCycles;
  538. const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
  539. OS << "\n - " << PRDesc.Name << " [ "
  540. << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
  541. }
  542. }
  543. }
  544. OS << "\n Data Dependencies: [ "
  545. << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
  546. OS << "\n - Register Dependencies [ "
  547. << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
  548. << "% ]";
  549. OS << "\n - Memory Dependencies [ "
  550. << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
  551. << "% ]\n";
  552. }
  553. void BottleneckAnalysis::printView(raw_ostream &OS) const {
  554. std::string Buffer;
  555. raw_string_ostream TempStream(Buffer);
  556. printBottleneckHints(TempStream);
  557. TempStream.flush();
  558. OS << Buffer;
  559. printCriticalSequence(OS);
  560. }
  561. } // namespace mca.
  562. } // namespace llvm