DependenceGraphBuilder.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. //===- DependenceGraphBuilder.cpp ------------------------------------------==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. // This file implements common steps of the build algorithm for construction
  9. // of dependence graphs such as DDG and PDG.
  10. //===----------------------------------------------------------------------===//
  11. #include "llvm/Analysis/DependenceGraphBuilder.h"
  12. #include "llvm/ADT/DepthFirstIterator.h"
  13. #include "llvm/ADT/EnumeratedArray.h"
  14. #include "llvm/ADT/PostOrderIterator.h"
  15. #include "llvm/ADT/SCCIterator.h"
  16. #include "llvm/ADT/Statistic.h"
  17. #include "llvm/Analysis/DDG.h"
  18. using namespace llvm;
  19. #define DEBUG_TYPE "dgb"
  20. STATISTIC(TotalGraphs, "Number of dependence graphs created.");
  21. STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
  22. STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
  23. STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
  24. STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
  25. STATISTIC(TotalConfusedEdges,
  26. "Number of confused memory dependencies between two nodes.");
  27. STATISTIC(TotalEdgeReversals,
  28. "Number of times the source and sink of dependence was reversed to "
  29. "expose cycles in the graph.");
  30. using InstructionListType = SmallVector<Instruction *, 2>;
  31. //===--------------------------------------------------------------------===//
  32. // AbstractDependenceGraphBuilder implementation
  33. //===--------------------------------------------------------------------===//
  34. template <class G>
  35. void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() {
  36. // The BBList is expected to be in program order.
  37. size_t NextOrdinal = 1;
  38. for (auto *BB : BBList)
  39. for (auto &I : *BB)
  40. InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
  41. }
  42. template <class G>
  43. void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
  44. ++TotalGraphs;
  45. assert(IMap.empty() && "Expected empty instruction map at start");
  46. for (BasicBlock *BB : BBList)
  47. for (Instruction &I : *BB) {
  48. auto &NewNode = createFineGrainedNode(I);
  49. IMap.insert(std::make_pair(&I, &NewNode));
  50. NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
  51. ++TotalFineGrainedNodes;
  52. }
  53. }
  54. template <class G>
  55. void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
  56. // Create a root node that connects to every connected component of the graph.
  57. // This is done to allow graph iterators to visit all the disjoint components
  58. // of the graph, in a single walk.
  59. //
  60. // This algorithm works by going through each node of the graph and for each
  61. // node N, do a DFS starting from N. A rooted edge is established between the
  62. // root node and N (if N is not yet visited). All the nodes reachable from N
  63. // are marked as visited and are skipped in the DFS of subsequent nodes.
  64. //
  65. // Note: This algorithm tries to limit the number of edges out of the root
  66. // node to some extent, but there may be redundant edges created depending on
  67. // the iteration order. For example for a graph {A -> B}, an edge from the
  68. // root node is added to both nodes if B is visited before A. While it does
  69. // not result in minimal number of edges, this approach saves compile-time
  70. // while keeping the number of edges in check.
  71. auto &RootNode = createRootNode();
  72. df_iterator_default_set<const NodeType *, 4> Visited;
  73. for (auto *N : Graph) {
  74. if (*N == RootNode)
  75. continue;
  76. for (auto I : depth_first_ext(N, Visited))
  77. if (I == N)
  78. createRootedEdge(RootNode, *N);
  79. }
  80. }
  81. template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() {
  82. if (!shouldCreatePiBlocks())
  83. return;
  84. LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
  85. // The overall algorithm is as follows:
  86. // 1. Identify SCCs and for each SCC create a pi-block node containing all
  87. // the nodes in that SCC.
  88. // 2. Identify incoming edges incident to the nodes inside of the SCC and
  89. // reconnect them to the pi-block node.
  90. // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
  91. // outside of it and reconnect them so that the edges are coming out of the
  92. // SCC node instead.
  93. // Adding nodes as we iterate through the SCCs cause the SCC
  94. // iterators to get invalidated. To prevent this invalidation, we first
  95. // collect a list of nodes that are part of an SCC, and then iterate over
  96. // those lists to create the pi-block nodes. Each element of the list is a
  97. // list of nodes in an SCC. Note: trivial SCCs containing a single node are
  98. // ignored.
  99. SmallVector<NodeListType, 4> ListOfSCCs;
  100. for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
  101. if (SCC.size() > 1)
  102. ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
  103. }
  104. for (NodeListType &NL : ListOfSCCs) {
  105. LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
  106. << " nodes in it.\n");
  107. // SCC iterator may put the nodes in an order that's different from the
  108. // program order. To preserve original program order, we sort the list of
  109. // nodes based on ordinal numbers computed earlier.
  110. llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
  111. return getOrdinal(*LHS) < getOrdinal(*RHS);
  112. });
  113. NodeType &PiNode = createPiBlock(NL);
  114. ++TotalPiBlockNodes;
  115. // Build a set to speed up the lookup for edges whose targets
  116. // are inside the SCC.
  117. SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end());
  118. // We have the set of nodes in the SCC. We go through the set of nodes
  119. // that are outside of the SCC and look for edges that cross the two sets.
  120. for (NodeType *N : Graph) {
  121. // Skip the SCC node and all the nodes inside of it.
  122. if (*N == PiNode || NodesInSCC.count(N))
  123. continue;
  124. enum Direction {
  125. Incoming, // Incoming edges to the SCC
  126. Outgoing, // Edges going ot of the SCC
  127. DirectionCount // To make the enum usable as an array index.
  128. };
  129. // Use these flags to help us avoid creating redundant edges. If there
  130. // are more than one edges from an outside node to inside nodes, we only
  131. // keep one edge from that node to the pi-block node. Similarly, if
  132. // there are more than one edges from inside nodes to an outside node,
  133. // we only keep one edge from the pi-block node to the outside node.
  134. // There is a flag defined for each direction (incoming vs outgoing) and
  135. // for each type of edge supported, using a two-dimensional boolean
  136. // array.
  137. using EdgeKind = typename EdgeType::EdgeKind;
  138. EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{false,
  139. false};
  140. auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
  141. const EdgeKind K) {
  142. switch (K) {
  143. case EdgeKind::RegisterDefUse:
  144. createDefUseEdge(Src, Dst);
  145. break;
  146. case EdgeKind::MemoryDependence:
  147. createMemoryEdge(Src, Dst);
  148. break;
  149. case EdgeKind::Rooted:
  150. createRootedEdge(Src, Dst);
  151. break;
  152. default:
  153. llvm_unreachable("Unsupported type of edge.");
  154. }
  155. };
  156. auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
  157. const Direction Dir) {
  158. if (!Src->hasEdgeTo(*Dst))
  159. return;
  160. LLVM_DEBUG(
  161. dbgs() << "reconnecting("
  162. << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
  163. << ":\nSrc:" << *Src << "\nDst:" << *Dst << "\nNew:" << *New
  164. << "\n");
  165. assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
  166. "Invalid direction.");
  167. SmallVector<EdgeType *, 10> EL;
  168. Src->findEdgesTo(*Dst, EL);
  169. for (EdgeType *OldEdge : EL) {
  170. EdgeKind Kind = OldEdge->getKind();
  171. if (!EdgeAlreadyCreated[Dir][Kind]) {
  172. if (Dir == Direction::Incoming) {
  173. createEdgeOfKind(*Src, *New, Kind);
  174. LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
  175. } else if (Dir == Direction::Outgoing) {
  176. createEdgeOfKind(*New, *Dst, Kind);
  177. LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
  178. }
  179. EdgeAlreadyCreated[Dir][Kind] = true;
  180. }
  181. Src->removeEdge(*OldEdge);
  182. destroyEdge(*OldEdge);
  183. LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
  184. }
  185. };
  186. for (NodeType *SCCNode : NL) {
  187. // Process incoming edges incident to the pi-block node.
  188. reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
  189. // Process edges that are coming out of the pi-block node.
  190. reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
  191. }
  192. }
  193. }
  194. // Ordinal maps are no longer needed.
  195. InstOrdinalMap.clear();
  196. NodeOrdinalMap.clear();
  197. LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
  198. }
  199. template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
  200. for (NodeType *N : Graph) {
  201. InstructionListType SrcIList;
  202. N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
  203. // Use a set to mark the targets that we link to N, so we don't add
  204. // duplicate def-use edges when more than one instruction in a target node
  205. // use results of instructions that are contained in N.
  206. SmallPtrSet<NodeType *, 4> VisitedTargets;
  207. for (Instruction *II : SrcIList) {
  208. for (User *U : II->users()) {
  209. Instruction *UI = dyn_cast<Instruction>(U);
  210. if (!UI)
  211. continue;
  212. NodeType *DstNode = nullptr;
  213. if (IMap.find(UI) != IMap.end())
  214. DstNode = IMap.find(UI)->second;
  215. // In the case of loops, the scope of the subgraph is all the
  216. // basic blocks (and instructions within them) belonging to the loop. We
  217. // simply ignore all the edges coming from (or going into) instructions
  218. // or basic blocks outside of this range.
  219. if (!DstNode) {
  220. LLVM_DEBUG(
  221. dbgs()
  222. << "skipped def-use edge since the sink" << *UI
  223. << " is outside the range of instructions being considered.\n");
  224. continue;
  225. }
  226. // Self dependencies are ignored because they are redundant and
  227. // uninteresting.
  228. if (DstNode == N) {
  229. LLVM_DEBUG(dbgs()
  230. << "skipped def-use edge since the sink and the source ("
  231. << N << ") are the same.\n");
  232. continue;
  233. }
  234. if (VisitedTargets.insert(DstNode).second) {
  235. createDefUseEdge(*N, *DstNode);
  236. ++TotalDefUseEdges;
  237. }
  238. }
  239. }
  240. }
  241. }
  242. template <class G>
  243. void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
  244. using DGIterator = typename G::iterator;
  245. auto isMemoryAccess = [](const Instruction *I) {
  246. return I->mayReadOrWriteMemory();
  247. };
  248. for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
  249. InstructionListType SrcIList;
  250. (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
  251. if (SrcIList.empty())
  252. continue;
  253. for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
  254. if (**SrcIt == **DstIt)
  255. continue;
  256. InstructionListType DstIList;
  257. (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
  258. if (DstIList.empty())
  259. continue;
  260. bool ForwardEdgeCreated = false;
  261. bool BackwardEdgeCreated = false;
  262. for (Instruction *ISrc : SrcIList) {
  263. for (Instruction *IDst : DstIList) {
  264. auto D = DI.depends(ISrc, IDst, true);
  265. if (!D)
  266. continue;
  267. // If we have a dependence with its left-most non-'=' direction
  268. // being '>' we need to reverse the direction of the edge, because
  269. // the source of the dependence cannot occur after the sink. For
  270. // confused dependencies, we will create edges in both directions to
  271. // represent the possibility of a cycle.
  272. auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
  273. if (!ForwardEdgeCreated) {
  274. createMemoryEdge(Src, Dst);
  275. ++TotalMemoryEdges;
  276. }
  277. if (!BackwardEdgeCreated) {
  278. createMemoryEdge(Dst, Src);
  279. ++TotalMemoryEdges;
  280. }
  281. ForwardEdgeCreated = BackwardEdgeCreated = true;
  282. ++TotalConfusedEdges;
  283. };
  284. auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
  285. if (!ForwardEdgeCreated) {
  286. createMemoryEdge(Src, Dst);
  287. ++TotalMemoryEdges;
  288. }
  289. ForwardEdgeCreated = true;
  290. };
  291. auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
  292. if (!BackwardEdgeCreated) {
  293. createMemoryEdge(Dst, Src);
  294. ++TotalMemoryEdges;
  295. }
  296. BackwardEdgeCreated = true;
  297. };
  298. if (D->isConfused())
  299. createConfusedEdges(**SrcIt, **DstIt);
  300. else if (D->isOrdered() && !D->isLoopIndependent()) {
  301. bool ReversedEdge = false;
  302. for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
  303. if (D->getDirection(Level) == Dependence::DVEntry::EQ)
  304. continue;
  305. else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
  306. createBackwardEdge(**SrcIt, **DstIt);
  307. ReversedEdge = true;
  308. ++TotalEdgeReversals;
  309. break;
  310. } else if (D->getDirection(Level) == Dependence::DVEntry::LT)
  311. break;
  312. else {
  313. createConfusedEdges(**SrcIt, **DstIt);
  314. break;
  315. }
  316. }
  317. if (!ReversedEdge)
  318. createForwardEdge(**SrcIt, **DstIt);
  319. } else
  320. createForwardEdge(**SrcIt, **DstIt);
  321. // Avoid creating duplicate edges.
  322. if (ForwardEdgeCreated && BackwardEdgeCreated)
  323. break;
  324. }
  325. // If we've created edges in both directions, there is no more
  326. // unique edge that we can create between these two nodes, so we
  327. // can exit early.
  328. if (ForwardEdgeCreated && BackwardEdgeCreated)
  329. break;
  330. }
  331. }
  332. }
  333. }
  334. template <class G> void AbstractDependenceGraphBuilder<G>::simplify() {
  335. if (!shouldSimplify())
  336. return;
  337. LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
  338. // This algorithm works by first collecting a set of candidate nodes that have
  339. // an out-degree of one (in terms of def-use edges), and then ignoring those
  340. // whose targets have an in-degree more than one. Each node in the resulting
  341. // set can then be merged with its corresponding target and put back into the
  342. // worklist until no further merge candidates are available.
  343. SmallPtrSet<NodeType *, 32> CandidateSourceNodes;
  344. // A mapping between nodes and their in-degree. To save space, this map
  345. // only contains nodes that are targets of nodes in the CandidateSourceNodes.
  346. DenseMap<NodeType *, unsigned> TargetInDegreeMap;
  347. for (NodeType *N : Graph) {
  348. if (N->getEdges().size() != 1)
  349. continue;
  350. EdgeType &Edge = N->back();
  351. if (!Edge.isDefUse())
  352. continue;
  353. CandidateSourceNodes.insert(N);
  354. // Insert an element into the in-degree map and initialize to zero. The
  355. // count will get updated in the next step.
  356. TargetInDegreeMap.insert({&Edge.getTargetNode(), 0});
  357. }
  358. LLVM_DEBUG({
  359. dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size()
  360. << "\nNode with single outgoing def-use edge:\n";
  361. for (NodeType *N : CandidateSourceNodes) {
  362. dbgs() << N << "\n";
  363. }
  364. });
  365. for (NodeType *N : Graph) {
  366. for (EdgeType *E : *N) {
  367. NodeType *Tgt = &E->getTargetNode();
  368. auto TgtIT = TargetInDegreeMap.find(Tgt);
  369. if (TgtIT != TargetInDegreeMap.end())
  370. ++(TgtIT->second);
  371. }
  372. }
  373. LLVM_DEBUG({
  374. dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size()
  375. << "\nContent of in-degree map:\n";
  376. for (auto &I : TargetInDegreeMap) {
  377. dbgs() << I.first << " --> " << I.second << "\n";
  378. }
  379. });
  380. SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(),
  381. CandidateSourceNodes.end());
  382. while (!Worklist.empty()) {
  383. NodeType &Src = *Worklist.pop_back_val();
  384. // As nodes get merged, we need to skip any node that has been removed from
  385. // the candidate set (see below).
  386. if (!CandidateSourceNodes.erase(&Src))
  387. continue;
  388. assert(Src.getEdges().size() == 1 &&
  389. "Expected a single edge from the candidate src node.");
  390. NodeType &Tgt = Src.back().getTargetNode();
  391. assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() &&
  392. "Expected target to be in the in-degree map.");
  393. if (TargetInDegreeMap[&Tgt] != 1)
  394. continue;
  395. if (!areNodesMergeable(Src, Tgt))
  396. continue;
  397. // Do not merge if there is also an edge from target to src (immediate
  398. // cycle).
  399. if (Tgt.hasEdgeTo(Src))
  400. continue;
  401. LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n");
  402. mergeNodes(Src, Tgt);
  403. // If the target node is in the candidate set itself, we need to put the
  404. // src node back into the worklist again so it gives the target a chance
  405. // to get merged into it. For example if we have:
  406. // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
  407. // then after merging (a) and (b) together, we need to put (a,b) back in
  408. // the worklist so that (c) can get merged in as well resulting in
  409. // {(a,b,c) -> d}
  410. // We also need to remove the old target (b), from the worklist. We first
  411. // remove it from the candidate set here, and skip any item from the
  412. // worklist that is not in the set.
  413. if (CandidateSourceNodes.erase(&Tgt)) {
  414. Worklist.push_back(&Src);
  415. CandidateSourceNodes.insert(&Src);
  416. LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n");
  417. }
  418. }
  419. LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
  420. }
  421. template <class G>
  422. void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
  423. // If we don't create pi-blocks, then we may not have a DAG.
  424. if (!shouldCreatePiBlocks())
  425. return;
  426. SmallVector<NodeType *, 64> NodesInPO;
  427. using NodeKind = typename NodeType::NodeKind;
  428. for (NodeType *N : post_order(&Graph)) {
  429. if (N->getKind() == NodeKind::PiBlock) {
  430. // Put members of the pi-block right after the pi-block itself, for
  431. // convenience.
  432. const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
  433. llvm::append_range(NodesInPO, PiBlockMembers);
  434. }
  435. NodesInPO.push_back(N);
  436. }
  437. size_t OldSize = Graph.Nodes.size();
  438. Graph.Nodes.clear();
  439. append_range(Graph.Nodes, reverse(NodesInPO));
  440. if (Graph.Nodes.size() != OldSize)
  441. assert(false &&
  442. "Expected the number of nodes to stay the same after the sort");
  443. }
  444. template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
  445. template class llvm::DependenceGraphInfo<DDGNode>;