SampleProfileInference.cpp 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347
  1. //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements a profile inference algorithm. Given an incomplete and
  10. // possibly imprecise block counts, the algorithm reconstructs realistic block
  11. // and edge counts that satisfy flow conservation rules, while minimally modify
  12. // input block counts.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/Transforms/Utils/SampleProfileInference.h"
  16. #include "llvm/ADT/BitVector.h"
  17. #include "llvm/Support/CommandLine.h"
  18. #include "llvm/Support/Debug.h"
  19. #include <queue>
  20. #include <set>
  21. #include <stack>
  22. using namespace llvm;
  23. #define DEBUG_TYPE "sample-profile-inference"
  24. namespace {
  25. static cl::opt<bool> SampleProfileEvenFlowDistribution(
  26. "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden,
  27. cl::desc("Try to evenly distribute flow when there are multiple equally "
  28. "likely options."));
  29. static cl::opt<bool> SampleProfileRebalanceUnknown(
  30. "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden,
  31. cl::desc("Evenly re-distribute flow among unknown subgraphs."));
  32. static cl::opt<bool> SampleProfileJoinIslands(
  33. "sample-profile-join-islands", cl::init(true), cl::Hidden,
  34. cl::desc("Join isolated components having positive flow."));
  35. static cl::opt<unsigned> SampleProfileProfiCostBlockInc(
  36. "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden,
  37. cl::desc("The cost of increasing a block's count by one."));
  38. static cl::opt<unsigned> SampleProfileProfiCostBlockDec(
  39. "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden,
  40. cl::desc("The cost of decreasing a block's count by one."));
  41. static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
  42. "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
  43. cl::desc("The cost of increasing the entry block's count by one."));
  44. static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
  45. "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
  46. cl::desc("The cost of decreasing the entry block's count by one."));
  47. static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
  48. "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden,
  49. cl::desc("The cost of increasing a count of zero-weight block by one."));
  50. static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
  51. "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
  52. cl::desc("The cost of increasing an unknown block's count by one."));
  53. /// A value indicating an infinite flow/capacity/weight of a block/edge.
  54. /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
  55. /// during the execution.
  56. static constexpr int64_t INF = ((int64_t)1) << 50;
  57. /// The minimum-cost maximum flow algorithm.
  58. ///
  59. /// The algorithm finds the maximum flow of minimum cost on a given (directed)
  60. /// network using a modified version of the classical Moore-Bellman-Ford
  61. /// approach. The algorithm applies a number of augmentation iterations in which
  62. /// flow is sent along paths of positive capacity from the source to the sink.
  63. /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
  64. /// where m is the number of edges, n is the number of vertices, and v(f) is the
  65. /// value of the maximum flow. However, the observed running time on typical
  66. /// instances is sub-quadratic, that is, o(n^2).
  67. ///
  68. /// The input is a set of edges with specified costs and capacities, and a pair
  69. /// of nodes (source and sink). The output is the flow along each edge of the
  70. /// minimum total cost respecting the given edge capacities.
  71. class MinCostMaxFlow {
  72. public:
  73. MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
  74. // Initialize algorithm's data structures for a network of a given size.
  75. void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
  76. Source = SourceNode;
  77. Target = SinkNode;
  78. Nodes = std::vector<Node>(NodeCount);
  79. Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
  80. if (Params.EvenFlowDistribution)
  81. AugmentingEdges =
  82. std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
  83. }
  84. // Run the algorithm.
  85. int64_t run() {
  86. LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");
  87. // Iteratively find an augmentation path/dag in the network and send the
  88. // flow along its edges
  89. size_t AugmentationIters = applyFlowAugmentation();
  90. // Compute the total flow and its cost
  91. int64_t TotalCost = 0;
  92. int64_t TotalFlow = 0;
  93. for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
  94. for (auto &Edge : Edges[Src]) {
  95. if (Edge.Flow > 0) {
  96. TotalCost += Edge.Cost * Edge.Flow;
  97. if (Src == Source)
  98. TotalFlow += Edge.Flow;
  99. }
  100. }
  101. }
  102. LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
  103. << " iterations with " << TotalFlow << " total flow"
  104. << " of " << TotalCost << " cost\n");
  105. (void)TotalFlow;
  106. (void)AugmentationIters;
  107. return TotalCost;
  108. }
  109. /// Adding an edge to the network with a specified capacity and a cost.
  110. /// Multiple edges between a pair of nodes are allowed but self-edges
  111. /// are not supported.
  112. void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
  113. assert(Capacity > 0 && "adding an edge of zero capacity");
  114. assert(Src != Dst && "loop edge are not supported");
  115. Edge SrcEdge;
  116. SrcEdge.Dst = Dst;
  117. SrcEdge.Cost = Cost;
  118. SrcEdge.Capacity = Capacity;
  119. SrcEdge.Flow = 0;
  120. SrcEdge.RevEdgeIndex = Edges[Dst].size();
  121. Edge DstEdge;
  122. DstEdge.Dst = Src;
  123. DstEdge.Cost = -Cost;
  124. DstEdge.Capacity = 0;
  125. DstEdge.Flow = 0;
  126. DstEdge.RevEdgeIndex = Edges[Src].size();
  127. Edges[Src].push_back(SrcEdge);
  128. Edges[Dst].push_back(DstEdge);
  129. }
  130. /// Adding an edge to the network of infinite capacity and a given cost.
  131. void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
  132. addEdge(Src, Dst, INF, Cost);
  133. }
  134. /// Get the total flow from a given source node.
  135. /// Returns a list of pairs (target node, amount of flow to the target).
  136. const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
  137. std::vector<std::pair<uint64_t, int64_t>> Flow;
  138. for (const auto &Edge : Edges[Src]) {
  139. if (Edge.Flow > 0)
  140. Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
  141. }
  142. return Flow;
  143. }
  144. /// Get the total flow between a pair of nodes.
  145. int64_t getFlow(uint64_t Src, uint64_t Dst) const {
  146. int64_t Flow = 0;
  147. for (const auto &Edge : Edges[Src]) {
  148. if (Edge.Dst == Dst) {
  149. Flow += Edge.Flow;
  150. }
  151. }
  152. return Flow;
  153. }
  154. private:
  155. /// Iteratively find an augmentation path/dag in the network and send the
  156. /// flow along its edges. The method returns the number of applied iterations.
  157. size_t applyFlowAugmentation() {
  158. size_t AugmentationIters = 0;
  159. while (findAugmentingPath()) {
  160. uint64_t PathCapacity = computeAugmentingPathCapacity();
  161. while (PathCapacity > 0) {
  162. bool Progress = false;
  163. if (Params.EvenFlowDistribution) {
  164. // Identify node/edge candidates for augmentation
  165. identifyShortestEdges(PathCapacity);
  166. // Find an augmenting DAG
  167. auto AugmentingOrder = findAugmentingDAG();
  168. // Apply the DAG augmentation
  169. Progress = augmentFlowAlongDAG(AugmentingOrder);
  170. PathCapacity = computeAugmentingPathCapacity();
  171. }
  172. if (!Progress) {
  173. augmentFlowAlongPath(PathCapacity);
  174. PathCapacity = 0;
  175. }
  176. AugmentationIters++;
  177. }
  178. }
  179. return AugmentationIters;
  180. }
  181. /// Compute the capacity of the cannonical augmenting path. If the path is
  182. /// saturated (that is, no flow can be sent along the path), then return 0.
  183. uint64_t computeAugmentingPathCapacity() {
  184. uint64_t PathCapacity = INF;
  185. uint64_t Now = Target;
  186. while (Now != Source) {
  187. uint64_t Pred = Nodes[Now].ParentNode;
  188. auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
  189. assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
  190. uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
  191. PathCapacity = std::min(PathCapacity, EdgeCapacity);
  192. Now = Pred;
  193. }
  194. return PathCapacity;
  195. }
  196. /// Check for existence of an augmenting path with a positive capacity.
  197. bool findAugmentingPath() {
  198. // Initialize data structures
  199. for (auto &Node : Nodes) {
  200. Node.Distance = INF;
  201. Node.ParentNode = uint64_t(-1);
  202. Node.ParentEdgeIndex = uint64_t(-1);
  203. Node.Taken = false;
  204. }
  205. std::queue<uint64_t> Queue;
  206. Queue.push(Source);
  207. Nodes[Source].Distance = 0;
  208. Nodes[Source].Taken = true;
  209. while (!Queue.empty()) {
  210. uint64_t Src = Queue.front();
  211. Queue.pop();
  212. Nodes[Src].Taken = false;
  213. // Although the residual network contains edges with negative costs
  214. // (in particular, backward edges), it can be shown that there are no
  215. // negative-weight cycles and the following two invariants are maintained:
  216. // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
  217. // where Dist is the length of the shortest path between two nodes. This
  218. // allows to prune the search-space of the path-finding algorithm using
  219. // the following early-stop criteria:
  220. // -- If we find a path with zero-distance from Source to Target, stop the
  221. // search, as the path is the shortest since Dist[Source, Target] >= 0;
  222. // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
  223. // process node V, as it is guaranteed _not_ to be on a shortest path
  224. // from Source to Target; it follows from inequalities
  225. // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
  226. // >= Dist[Source, V]
  227. if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)
  228. break;
  229. if (Nodes[Src].Distance > Nodes[Target].Distance)
  230. continue;
  231. // Process adjacent edges
  232. for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
  233. auto &Edge = Edges[Src][EdgeIdx];
  234. if (Edge.Flow < Edge.Capacity) {
  235. uint64_t Dst = Edge.Dst;
  236. int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
  237. if (Nodes[Dst].Distance > NewDistance) {
  238. // Update the distance and the parent node/edge
  239. Nodes[Dst].Distance = NewDistance;
  240. Nodes[Dst].ParentNode = Src;
  241. Nodes[Dst].ParentEdgeIndex = EdgeIdx;
  242. // Add the node to the queue, if it is not there yet
  243. if (!Nodes[Dst].Taken) {
  244. Queue.push(Dst);
  245. Nodes[Dst].Taken = true;
  246. }
  247. }
  248. }
  249. }
  250. }
  251. return Nodes[Target].Distance != INF;
  252. }
  253. /// Update the current flow along the augmenting path.
  254. void augmentFlowAlongPath(uint64_t PathCapacity) {
  255. assert(PathCapacity > 0 && "found an incorrect augmenting path");
  256. uint64_t Now = Target;
  257. while (Now != Source) {
  258. uint64_t Pred = Nodes[Now].ParentNode;
  259. auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
  260. auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
  261. Edge.Flow += PathCapacity;
  262. RevEdge.Flow -= PathCapacity;
  263. Now = Pred;
  264. }
  265. }
  266. /// Find an Augmenting DAG order using a modified version of DFS in which we
  267. /// can visit a node multiple times. In the DFS search, when scanning each
  268. /// edge out of a node, continue search at Edge.Dst endpoint if it has not
  269. /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
  270. /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
  271. /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
  272. /// that starts with Source and ends with Target.
  273. std::vector<uint64_t> findAugmentingDAG() {
  274. // We use a stack based implemenation of DFS to avoid recursion.
  275. // Defining DFS data structures:
  276. // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
  277. // - we are currently visiting Nodes[NodeIdx] and
  278. // - the next edge to scan is Edges[NodeIdx][EdgeIdx]
  279. typedef std::pair<uint64_t, uint64_t> StackItemType;
  280. std::stack<StackItemType> Stack;
  281. std::vector<uint64_t> AugmentingOrder;
  282. // Phase 0: Initialize Node attributes and Time for DFS run
  283. for (auto &Node : Nodes) {
  284. Node.Discovery = 0;
  285. Node.Finish = 0;
  286. Node.NumCalls = 0;
  287. Node.Taken = false;
  288. }
  289. uint64_t Time = 0;
  290. // Mark Target as Taken
  291. // Taken attribute will be propagated backwards from Target towards Source
  292. Nodes[Target].Taken = true;
  293. // Phase 1: Start DFS traversal from Source
  294. Stack.emplace(Source, 0);
  295. Nodes[Source].Discovery = ++Time;
  296. while (!Stack.empty()) {
  297. auto NodeIdx = Stack.top().first;
  298. auto EdgeIdx = Stack.top().second;
  299. // If we haven't scanned all edges out of NodeIdx, continue scanning
  300. if (EdgeIdx < Edges[NodeIdx].size()) {
  301. auto &Edge = Edges[NodeIdx][EdgeIdx];
  302. auto &Dst = Nodes[Edge.Dst];
  303. Stack.top().second++;
  304. if (Edge.OnShortestPath) {
  305. // If we haven't seen Edge.Dst so far, continue DFS search there
  306. if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
  307. Dst.Discovery = ++Time;
  308. Stack.emplace(Edge.Dst, 0);
  309. Dst.NumCalls++;
  310. } else if (Dst.Taken && Dst.Finish != 0) {
  311. // Else, if Edge.Dst already have a path to Target, so that NodeIdx
  312. Nodes[NodeIdx].Taken = true;
  313. }
  314. }
  315. } else {
  316. // If we are done scanning all edge out of NodeIdx
  317. Stack.pop();
  318. // If we haven't found a path from NodeIdx to Target, forget about it
  319. if (!Nodes[NodeIdx].Taken) {
  320. Nodes[NodeIdx].Discovery = 0;
  321. } else {
  322. // If we have found a path from NodeIdx to Target, then finish NodeIdx
  323. // and propagate Taken flag to DFS parent unless at the Source
  324. Nodes[NodeIdx].Finish = ++Time;
  325. // NodeIdx == Source if and only if the stack is empty
  326. if (NodeIdx != Source) {
  327. assert(!Stack.empty() && "empty stack while running dfs");
  328. Nodes[Stack.top().first].Taken = true;
  329. }
  330. AugmentingOrder.push_back(NodeIdx);
  331. }
  332. }
  333. }
  334. // Nodes are collected decreasing Finish time, so the order is reversed
  335. std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
  336. // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
  337. for (size_t Src : AugmentingOrder) {
  338. AugmentingEdges[Src].clear();
  339. for (auto &Edge : Edges[Src]) {
  340. uint64_t Dst = Edge.Dst;
  341. if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
  342. Nodes[Dst].Finish < Nodes[Src].Finish) {
  343. AugmentingEdges[Src].push_back(&Edge);
  344. }
  345. }
  346. assert((Src == Target || !AugmentingEdges[Src].empty()) &&
  347. "incorrectly constructed augmenting edges");
  348. }
  349. return AugmentingOrder;
  350. }
  351. /// Update the current flow along the given (acyclic) subgraph specified by
  352. /// the vertex order, AugmentingOrder. The objective is to send as much flow
  353. /// as possible while evenly distributing flow among successors of each node.
  354. /// After the update at least one edge is saturated.
  355. bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
  356. // Phase 0: Initialization
  357. for (uint64_t Src : AugmentingOrder) {
  358. Nodes[Src].FracFlow = 0;
  359. Nodes[Src].IntFlow = 0;
  360. for (auto &Edge : AugmentingEdges[Src]) {
  361. Edge->AugmentedFlow = 0;
  362. }
  363. }
  364. // Phase 1: Send a unit of fractional flow along the DAG
  365. uint64_t MaxFlowAmount = INF;
  366. Nodes[Source].FracFlow = 1.0;
  367. for (uint64_t Src : AugmentingOrder) {
  368. assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
  369. "incorrectly computed fractional flow");
  370. // Distribute flow evenly among successors of Src
  371. uint64_t Degree = AugmentingEdges[Src].size();
  372. for (auto &Edge : AugmentingEdges[Src]) {
  373. double EdgeFlow = Nodes[Src].FracFlow / Degree;
  374. Nodes[Edge->Dst].FracFlow += EdgeFlow;
  375. if (Edge->Capacity == INF)
  376. continue;
  377. uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
  378. MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
  379. }
  380. }
  381. // Stop early if we cannot send any (integral) flow from Source to Target
  382. if (MaxFlowAmount == 0)
  383. return false;
  384. // Phase 2: Send an integral flow of MaxFlowAmount
  385. Nodes[Source].IntFlow = MaxFlowAmount;
  386. for (uint64_t Src : AugmentingOrder) {
  387. if (Src == Target)
  388. break;
  389. // Distribute flow evenly among successors of Src, rounding up to make
  390. // sure all flow is sent
  391. uint64_t Degree = AugmentingEdges[Src].size();
  392. // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
  393. uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
  394. for (auto &Edge : AugmentingEdges[Src]) {
  395. uint64_t Dst = Edge->Dst;
  396. uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
  397. EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
  398. Nodes[Dst].IntFlow += EdgeFlow;
  399. Nodes[Src].IntFlow -= EdgeFlow;
  400. Edge->AugmentedFlow += EdgeFlow;
  401. }
  402. }
  403. assert(Nodes[Target].IntFlow <= MaxFlowAmount);
  404. Nodes[Target].IntFlow = 0;
  405. // Phase 3: Send excess flow back traversing the nodes backwards.
  406. // Because of rounding, not all flow can be sent along the edges of Src.
  407. // Hence, sending the remaining flow back to maintain flow conservation
  408. for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
  409. uint64_t Src = AugmentingOrder[Idx - 1];
  410. // Try to send excess flow back along each edge.
  411. // Make sure we only send back flow we just augmented (AugmentedFlow).
  412. for (auto &Edge : AugmentingEdges[Src]) {
  413. uint64_t Dst = Edge->Dst;
  414. if (Nodes[Dst].IntFlow == 0)
  415. continue;
  416. uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
  417. Nodes[Dst].IntFlow -= EdgeFlow;
  418. Nodes[Src].IntFlow += EdgeFlow;
  419. Edge->AugmentedFlow -= EdgeFlow;
  420. }
  421. }
  422. // Phase 4: Update flow values along all edges
  423. bool HasSaturatedEdges = false;
  424. for (uint64_t Src : AugmentingOrder) {
  425. // Verify that we have sent all the excess flow from the node
  426. assert(Src == Source || Nodes[Src].IntFlow == 0);
  427. for (auto &Edge : AugmentingEdges[Src]) {
  428. assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
  429. // Update flow values along the edge and its reverse copy
  430. auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
  431. Edge->Flow += Edge->AugmentedFlow;
  432. RevEdge.Flow -= Edge->AugmentedFlow;
  433. if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
  434. HasSaturatedEdges = true;
  435. }
  436. }
  437. // The augmentation is successful iff at least one edge becomes saturated
  438. return HasSaturatedEdges;
  439. }
  440. /// Identify candidate (shortest) edges for augmentation.
  441. void identifyShortestEdges(uint64_t PathCapacity) {
  442. assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
  443. // To make sure the augmentation DAG contains only edges with large residual
  444. // capacity, we prune all edges whose capacity is below a fraction of
  445. // the capacity of the augmented path.
  446. // (All edges of the path itself are always in the DAG)
  447. uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
  448. // Decide which edges are on a shortest path from Source to Target
  449. for (size_t Src = 0; Src < Nodes.size(); Src++) {
  450. // An edge cannot be augmenting if the endpoint has large distance
  451. if (Nodes[Src].Distance > Nodes[Target].Distance)
  452. continue;
  453. for (auto &Edge : Edges[Src]) {
  454. uint64_t Dst = Edge.Dst;
  455. Edge.OnShortestPath =
  456. Src != Target && Dst != Source &&
  457. Nodes[Dst].Distance <= Nodes[Target].Distance &&
  458. Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
  459. Edge.Capacity > Edge.Flow &&
  460. uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
  461. }
  462. }
  463. }
  464. /// Maximum number of DFS iterations for DAG finding.
  465. static constexpr uint64_t MaxDfsCalls = 10;
  466. /// A node in a flow network.
  467. struct Node {
  468. /// The cost of the cheapest path from the source to the current node.
  469. int64_t Distance;
  470. /// The node preceding the current one in the path.
  471. uint64_t ParentNode;
  472. /// The index of the edge between ParentNode and the current node.
  473. uint64_t ParentEdgeIndex;
  474. /// An indicator of whether the current node is in a queue.
  475. bool Taken;
  476. /// Data fields utilized in DAG-augmentation:
  477. /// Fractional flow.
  478. double FracFlow;
  479. /// Integral flow.
  480. uint64_t IntFlow;
  481. /// Discovery time.
  482. uint64_t Discovery;
  483. /// Finish time.
  484. uint64_t Finish;
  485. /// NumCalls.
  486. uint64_t NumCalls;
  487. };
  488. /// An edge in a flow network.
  489. struct Edge {
  490. /// The cost of the edge.
  491. int64_t Cost;
  492. /// The capacity of the edge.
  493. int64_t Capacity;
  494. /// The current flow on the edge.
  495. int64_t Flow;
  496. /// The destination node of the edge.
  497. uint64_t Dst;
  498. /// The index of the reverse edge between Dst and the current node.
  499. uint64_t RevEdgeIndex;
  500. /// Data fields utilized in DAG-augmentation:
  501. /// Whether the edge is currently on a shortest path from Source to Target.
  502. bool OnShortestPath;
  503. /// Extra flow along the edge.
  504. uint64_t AugmentedFlow;
  505. };
  506. /// The set of network nodes.
  507. std::vector<Node> Nodes;
  508. /// The set of network edges.
  509. std::vector<std::vector<Edge>> Edges;
  510. /// Source node of the flow.
  511. uint64_t Source;
  512. /// Target (sink) node of the flow.
  513. uint64_t Target;
  514. /// Augmenting edges.
  515. std::vector<std::vector<Edge *>> AugmentingEdges;
  516. /// Params for flow computation.
  517. const ProfiParams &Params;
  518. };
  519. /// A post-processing adjustment of the control flow. It applies two steps by
  520. /// rerouting some flow and making it more realistic:
  521. ///
  522. /// - First, it removes all isolated components ("islands") with a positive flow
  523. /// that are unreachable from the entry block. For every such component, we
  524. /// find the shortest from the entry to an exit passing through the component,
  525. /// and increase the flow by one unit along the path.
  526. ///
  527. /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
  528. /// with no sampled counts. Then it rebalnces the flow that goes through such
  529. /// a subgraph so that each branch is taken with probability 50%.
  530. /// An unknown subgraph is such that for every two nodes u and v:
  531. /// - u dominates v and u is not unknown;
  532. /// - v post-dominates u; and
  533. /// - all inner-nodes of all (u,v)-paths are unknown.
  534. ///
  535. class FlowAdjuster {
  536. public:
  537. FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
  538. : Params(Params), Func(Func) {}
  539. /// Apply the post-processing.
  540. void run() {
  541. if (Params.JoinIslands) {
  542. // Adjust the flow to get rid of isolated components
  543. joinIsolatedComponents();
  544. }
  545. if (Params.RebalanceUnknown) {
  546. // Rebalance the flow inside unknown subgraphs
  547. rebalanceUnknownSubgraphs();
  548. }
  549. }
  550. private:
  551. void joinIsolatedComponents() {
  552. // Find blocks that are reachable from the source
  553. auto Visited = BitVector(NumBlocks(), false);
  554. findReachable(Func.Entry, Visited);
  555. // Iterate over all non-reachable blocks and adjust their weights
  556. for (uint64_t I = 0; I < NumBlocks(); I++) {
  557. auto &Block = Func.Blocks[I];
  558. if (Block.Flow > 0 && !Visited[I]) {
  559. // Find a path from the entry to an exit passing through the block I
  560. auto Path = findShortestPath(I);
  561. // Increase the flow along the path
  562. assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
  563. "incorrectly computed path adjusting control flow");
  564. Func.Blocks[Func.Entry].Flow += 1;
  565. for (auto &Jump : Path) {
  566. Jump->Flow += 1;
  567. Func.Blocks[Jump->Target].Flow += 1;
  568. // Update reachability
  569. findReachable(Jump->Target, Visited);
  570. }
  571. }
  572. }
  573. }
  574. /// Run BFS from a given block along the jumps with a positive flow and mark
  575. /// all reachable blocks.
  576. void findReachable(uint64_t Src, BitVector &Visited) {
  577. if (Visited[Src])
  578. return;
  579. std::queue<uint64_t> Queue;
  580. Queue.push(Src);
  581. Visited[Src] = true;
  582. while (!Queue.empty()) {
  583. Src = Queue.front();
  584. Queue.pop();
  585. for (auto *Jump : Func.Blocks[Src].SuccJumps) {
  586. uint64_t Dst = Jump->Target;
  587. if (Jump->Flow > 0 && !Visited[Dst]) {
  588. Queue.push(Dst);
  589. Visited[Dst] = true;
  590. }
  591. }
  592. }
  593. }
  594. /// Find the shortest path from the entry block to an exit block passing
  595. /// through a given block.
  596. std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
  597. // A path from the entry block to BlockIdx
  598. auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
  599. // A path from BlockIdx to an exit block
  600. auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
  601. // Concatenate the two paths
  602. std::vector<FlowJump *> Result;
  603. Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
  604. Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
  605. return Result;
  606. }
  607. /// Apply the Dijkstra algorithm to find the shortest path from a given
  608. /// Source to a given Target block.
  609. /// If Target == -1, then the path ends at an exit block.
  610. std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
  611. // Quit early, if possible
  612. if (Source == Target)
  613. return std::vector<FlowJump *>();
  614. if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
  615. return std::vector<FlowJump *>();
  616. // Initialize data structures
  617. auto Distance = std::vector<int64_t>(NumBlocks(), INF);
  618. auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
  619. Distance[Source] = 0;
  620. std::set<std::pair<uint64_t, uint64_t>> Queue;
  621. Queue.insert(std::make_pair(Distance[Source], Source));
  622. // Run the Dijkstra algorithm
  623. while (!Queue.empty()) {
  624. uint64_t Src = Queue.begin()->second;
  625. Queue.erase(Queue.begin());
  626. // If we found a solution, quit early
  627. if (Src == Target ||
  628. (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
  629. break;
  630. for (auto *Jump : Func.Blocks[Src].SuccJumps) {
  631. uint64_t Dst = Jump->Target;
  632. int64_t JumpDist = jumpDistance(Jump);
  633. if (Distance[Dst] > Distance[Src] + JumpDist) {
  634. Queue.erase(std::make_pair(Distance[Dst], Dst));
  635. Distance[Dst] = Distance[Src] + JumpDist;
  636. Parent[Dst] = Jump;
  637. Queue.insert(std::make_pair(Distance[Dst], Dst));
  638. }
  639. }
  640. }
  641. // If Target is not provided, find the closest exit block
  642. if (Target == AnyExitBlock) {
  643. for (uint64_t I = 0; I < NumBlocks(); I++) {
  644. if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
  645. if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
  646. Target = I;
  647. }
  648. }
  649. }
  650. }
  651. assert(Parent[Target] != nullptr && "a path does not exist");
  652. // Extract the constructed path
  653. std::vector<FlowJump *> Result;
  654. uint64_t Now = Target;
  655. while (Now != Source) {
  656. assert(Now == Parent[Now]->Target && "incorrect parent jump");
  657. Result.push_back(Parent[Now]);
  658. Now = Parent[Now]->Source;
  659. }
  660. // Reverse the path, since it is extracted from Target to Source
  661. std::reverse(Result.begin(), Result.end());
  662. return Result;
  663. }
  664. /// A distance of a path for a given jump.
  665. /// In order to incite the path to use blocks/jumps with large positive flow,
  666. /// and avoid changing branch probability of outgoing edges drastically,
  667. /// set the jump distance so as:
  668. /// - to minimize the number of unlikely jumps used and subject to that,
  669. /// - to minimize the number of Flow == 0 jumps used and subject to that,
  670. /// - minimizes total multiplicative Flow increase for the remaining edges.
  671. /// To capture this objective with integer distances, we round off fractional
  672. /// parts to a multiple of 1 / BaseDistance.
  673. int64_t jumpDistance(FlowJump *Jump) const {
  674. if (Jump->IsUnlikely)
  675. return Params.CostUnlikely;
  676. uint64_t BaseDistance =
  677. std::max(FlowAdjuster::MinBaseDistance,
  678. std::min(Func.Blocks[Func.Entry].Flow,
  679. Params.CostUnlikely / (2 * (NumBlocks() + 1))));
  680. if (Jump->Flow > 0)
  681. return BaseDistance + BaseDistance / Jump->Flow;
  682. return 2 * BaseDistance * (NumBlocks() + 1);
  683. };
  684. uint64_t NumBlocks() const { return Func.Blocks.size(); }
  685. /// Rebalance unknown subgraphs so that the flow is split evenly across the
  686. /// outgoing branches of every block of the subgraph. The method iterates over
  687. /// blocks with known weight and identifies unknown subgraphs rooted at the
  688. /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
  689. void rebalanceUnknownSubgraphs() {
  690. // Try to find unknown subgraphs from each block
  691. for (const FlowBlock &SrcBlock : Func.Blocks) {
  692. // Verify if rebalancing rooted at SrcBlock is feasible
  693. if (!canRebalanceAtRoot(&SrcBlock))
  694. continue;
  695. // Find an unknown subgraphs starting at SrcBlock. Along the way,
  696. // fill in known destinations and intermediate unknown blocks.
  697. std::vector<FlowBlock *> UnknownBlocks;
  698. std::vector<FlowBlock *> KnownDstBlocks;
  699. findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
  700. // Verify if rebalancing of the subgraph is feasible. If the search is
  701. // successful, find the unique destination block (which can be null)
  702. FlowBlock *DstBlock = nullptr;
  703. if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
  704. DstBlock))
  705. continue;
  706. // We cannot rebalance subgraphs containing cycles among unknown blocks
  707. if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
  708. continue;
  709. // Rebalance the flow
  710. rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
  711. }
  712. }
  713. /// Verify if rebalancing rooted at a given block is possible.
  714. bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
  715. // Do not attempt to find unknown subgraphs from an unknown or a
  716. // zero-flow block
  717. if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
  718. return false;
  719. // Do not attempt to process subgraphs from a block w/o unknown sucessors
  720. bool HasUnknownSuccs = false;
  721. for (auto *Jump : SrcBlock->SuccJumps) {
  722. if (Func.Blocks[Jump->Target].HasUnknownWeight) {
  723. HasUnknownSuccs = true;
  724. break;
  725. }
  726. }
  727. if (!HasUnknownSuccs)
  728. return false;
  729. return true;
  730. }
  731. /// Find an unknown subgraph starting at block SrcBlock. The method sets
  732. /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
  733. void findUnknownSubgraph(const FlowBlock *SrcBlock,
  734. std::vector<FlowBlock *> &KnownDstBlocks,
  735. std::vector<FlowBlock *> &UnknownBlocks) {
  736. // Run BFS from SrcBlock and make sure all paths are going through unknown
  737. // blocks and end at a known DstBlock
  738. auto Visited = BitVector(NumBlocks(), false);
  739. std::queue<uint64_t> Queue;
  740. Queue.push(SrcBlock->Index);
  741. Visited[SrcBlock->Index] = true;
  742. while (!Queue.empty()) {
  743. auto &Block = Func.Blocks[Queue.front()];
  744. Queue.pop();
  745. // Process blocks reachable from Block
  746. for (auto *Jump : Block.SuccJumps) {
  747. // If Jump can be ignored, skip it
  748. if (ignoreJump(SrcBlock, nullptr, Jump))
  749. continue;
  750. uint64_t Dst = Jump->Target;
  751. // If Dst has been visited, skip Jump
  752. if (Visited[Dst])
  753. continue;
  754. // Process block Dst
  755. Visited[Dst] = true;
  756. if (!Func.Blocks[Dst].HasUnknownWeight) {
  757. KnownDstBlocks.push_back(&Func.Blocks[Dst]);
  758. } else {
  759. Queue.push(Dst);
  760. UnknownBlocks.push_back(&Func.Blocks[Dst]);
  761. }
  762. }
  763. }
  764. }
  765. /// Verify if rebalancing of the subgraph is feasible. If the checks are
  766. /// successful, set the unique destination block, DstBlock (can be null).
  767. bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
  768. const std::vector<FlowBlock *> &KnownDstBlocks,
  769. const std::vector<FlowBlock *> &UnknownBlocks,
  770. FlowBlock *&DstBlock) {
  771. // If the list of unknown blocks is empty, we don't need rebalancing
  772. if (UnknownBlocks.empty())
  773. return false;
  774. // If there are multiple known sinks, we can't rebalance
  775. if (KnownDstBlocks.size() > 1)
  776. return false;
  777. DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
  778. // Verify sinks of the subgraph
  779. for (auto *Block : UnknownBlocks) {
  780. if (Block->SuccJumps.empty()) {
  781. // If there are multiple (known and unknown) sinks, we can't rebalance
  782. if (DstBlock != nullptr)
  783. return false;
  784. continue;
  785. }
  786. size_t NumIgnoredJumps = 0;
  787. for (auto *Jump : Block->SuccJumps) {
  788. if (ignoreJump(SrcBlock, DstBlock, Jump))
  789. NumIgnoredJumps++;
  790. }
  791. // If there is a non-sink block in UnknownBlocks with all jumps ignored,
  792. // then we can't rebalance
  793. if (NumIgnoredJumps == Block->SuccJumps.size())
  794. return false;
  795. }
  796. return true;
  797. }
  798. /// Decide whether the Jump is ignored while processing an unknown subgraphs
  799. /// rooted at basic block SrcBlock with the destination block, DstBlock.
  800. bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
  801. const FlowJump *Jump) {
  802. // Ignore unlikely jumps with zero flow
  803. if (Jump->IsUnlikely && Jump->Flow == 0)
  804. return true;
  805. auto JumpSource = &Func.Blocks[Jump->Source];
  806. auto JumpTarget = &Func.Blocks[Jump->Target];
  807. // Do not ignore jumps coming into DstBlock
  808. if (DstBlock != nullptr && JumpTarget == DstBlock)
  809. return false;
  810. // Ignore jumps out of SrcBlock to known blocks
  811. if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
  812. return true;
  813. // Ignore jumps to known blocks with zero flow
  814. if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
  815. return true;
  816. return false;
  817. }
  818. /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
  819. /// UnknownBlocks in the topological order (so that all jumps are "forward").
  820. bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
  821. std::vector<FlowBlock *> &UnknownBlocks) {
  822. // Extract local in-degrees in the considered subgraph
  823. auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
  824. auto fillInDegree = [&](const FlowBlock *Block) {
  825. for (auto *Jump : Block->SuccJumps) {
  826. if (ignoreJump(SrcBlock, DstBlock, Jump))
  827. continue;
  828. LocalInDegree[Jump->Target]++;
  829. }
  830. };
  831. fillInDegree(SrcBlock);
  832. for (auto *Block : UnknownBlocks) {
  833. fillInDegree(Block);
  834. }
  835. // A loop containing SrcBlock
  836. if (LocalInDegree[SrcBlock->Index] > 0)
  837. return false;
  838. std::vector<FlowBlock *> AcyclicOrder;
  839. std::queue<uint64_t> Queue;
  840. Queue.push(SrcBlock->Index);
  841. while (!Queue.empty()) {
  842. FlowBlock *Block = &Func.Blocks[Queue.front()];
  843. Queue.pop();
  844. // Stop propagation once we reach DstBlock, if any
  845. if (DstBlock != nullptr && Block == DstBlock)
  846. break;
  847. // Keep an acyclic order of unknown blocks
  848. if (Block->HasUnknownWeight && Block != SrcBlock)
  849. AcyclicOrder.push_back(Block);
  850. // Add to the queue all successors with zero local in-degree
  851. for (auto *Jump : Block->SuccJumps) {
  852. if (ignoreJump(SrcBlock, DstBlock, Jump))
  853. continue;
  854. uint64_t Dst = Jump->Target;
  855. LocalInDegree[Dst]--;
  856. if (LocalInDegree[Dst] == 0) {
  857. Queue.push(Dst);
  858. }
  859. }
  860. }
  861. // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
  862. // of all blocks
  863. if (UnknownBlocks.size() != AcyclicOrder.size())
  864. return false;
  865. UnknownBlocks = AcyclicOrder;
  866. return true;
  867. }
  868. /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
  869. /// having UnknownBlocks intermediate blocks.
  870. void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
  871. const FlowBlock *DstBlock,
  872. const std::vector<FlowBlock *> &UnknownBlocks) {
  873. assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
  874. // Ditribute flow from the source block
  875. uint64_t BlockFlow = 0;
  876. // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
  877. for (auto *Jump : SrcBlock->SuccJumps) {
  878. if (ignoreJump(SrcBlock, DstBlock, Jump))
  879. continue;
  880. BlockFlow += Jump->Flow;
  881. }
  882. rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
  883. // Ditribute flow from the remaining blocks
  884. for (auto *Block : UnknownBlocks) {
  885. assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
  886. uint64_t BlockFlow = 0;
  887. // Block's flow is the sum of incoming flows
  888. for (auto *Jump : Block->PredJumps) {
  889. BlockFlow += Jump->Flow;
  890. }
  891. Block->Flow = BlockFlow;
  892. rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
  893. }
  894. }
  895. /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
  896. /// and ending at DstBlock.
  897. void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
  898. const FlowBlock *Block, uint64_t BlockFlow) {
  899. // Process all successor jumps and update corresponding flow values
  900. size_t BlockDegree = 0;
  901. for (auto *Jump : Block->SuccJumps) {
  902. if (ignoreJump(SrcBlock, DstBlock, Jump))
  903. continue;
  904. BlockDegree++;
  905. }
  906. // If all successor jumps of the block are ignored, skip it
  907. if (DstBlock == nullptr && BlockDegree == 0)
  908. return;
  909. assert(BlockDegree > 0 && "all outgoing jumps are ignored");
  910. // Each of the Block's successors gets the following amount of flow.
  911. // Rounding the value up so that all flow is propagated
  912. uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
  913. for (auto *Jump : Block->SuccJumps) {
  914. if (ignoreJump(SrcBlock, DstBlock, Jump))
  915. continue;
  916. uint64_t Flow = std::min(SuccFlow, BlockFlow);
  917. Jump->Flow = Flow;
  918. BlockFlow -= Flow;
  919. }
  920. assert(BlockFlow == 0 && "not all flow is propagated");
  921. }
  922. /// A constant indicating an arbitrary exit block of a function.
  923. static constexpr uint64_t AnyExitBlock = uint64_t(-1);
  924. /// Minimum BaseDistance for the jump distance values in island joining.
  925. static constexpr uint64_t MinBaseDistance = 10000;
  926. /// Params for flow computation.
  927. const ProfiParams &Params;
  928. /// The function.
  929. FlowFunction &Func;
  930. };
  931. std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
  932. const FlowBlock &Block);
  933. std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
  934. const FlowJump &Jump);
  935. /// Initializing flow network for a given function.
  936. ///
  937. /// Every block is split into two nodes that are responsible for (i) an
  938. /// incoming flow, (ii) an outgoing flow; they penalize an increase or a
  939. /// reduction of the block weight.
  940. void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
  941. FlowFunction &Func) {
  942. uint64_t NumBlocks = Func.Blocks.size();
  943. assert(NumBlocks > 1 && "Too few blocks in a function");
  944. uint64_t NumJumps = Func.Jumps.size();
  945. assert(NumJumps > 0 && "Too few jumps in a function");
  946. // Introducing dummy source/sink pairs to allow flow circulation.
  947. // The nodes corresponding to blocks of the function have indicies in
  948. // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
  949. // next four values.
  950. uint64_t S = 2 * NumBlocks;
  951. uint64_t T = S + 1;
  952. uint64_t S1 = S + 2;
  953. uint64_t T1 = S + 3;
  954. Network.initialize(2 * NumBlocks + 4, S1, T1);
  955. // Initialize nodes of the flow network
  956. for (uint64_t B = 0; B < NumBlocks; B++) {
  957. auto &Block = Func.Blocks[B];
  958. // Split every block into two auxiliary nodes to allow
  959. // increase/reduction of the block count.
  960. uint64_t Bin = 2 * B;
  961. uint64_t Bout = 2 * B + 1;
  962. // Edges from S and to T
  963. if (Block.isEntry()) {
  964. Network.addEdge(S, Bin, 0);
  965. } else if (Block.isExit()) {
  966. Network.addEdge(Bout, T, 0);
  967. }
  968. // Assign costs for increasing/decreasing the block counts
  969. auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
  970. // Add the corresponding edges to the network
  971. Network.addEdge(Bin, Bout, AuxCostInc);
  972. if (Block.Weight > 0) {
  973. Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);
  974. Network.addEdge(S1, Bout, Block.Weight, 0);
  975. Network.addEdge(Bin, T1, Block.Weight, 0);
  976. }
  977. }
  978. // Initialize edges of the flow network
  979. for (uint64_t J = 0; J < NumJumps; J++) {
  980. auto &Jump = Func.Jumps[J];
  981. // Get the endpoints corresponding to the jump
  982. uint64_t Jin = 2 * Jump.Source + 1;
  983. uint64_t Jout = 2 * Jump.Target;
  984. // Assign costs for increasing/decreasing the jump counts
  985. auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
  986. // Add the corresponding edges to the network
  987. Network.addEdge(Jin, Jout, AuxCostInc);
  988. if (Jump.Weight > 0) {
  989. Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
  990. Network.addEdge(S1, Jout, Jump.Weight, 0);
  991. Network.addEdge(Jin, T1, Jump.Weight, 0);
  992. }
  993. }
  994. // Make sure we have a valid flow circulation
  995. Network.addEdge(T, S, 0);
  996. }
  997. /// Assign costs for increasing/decreasing the block counts.
  998. std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
  999. const FlowBlock &Block) {
  1000. // Modifying the weight of an unlikely block is expensive
  1001. if (Block.IsUnlikely)
  1002. return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
  1003. // Assign default values for the costs
  1004. int64_t CostInc = Params.CostBlockInc;
  1005. int64_t CostDec = Params.CostBlockDec;
  1006. // Update the costs depending on the block metadata
  1007. if (Block.HasUnknownWeight) {
  1008. CostInc = Params.CostBlockUnknownInc;
  1009. CostDec = 0;
  1010. } else {
  1011. // Increasing the count for "cold" blocks with zero initial count is more
  1012. // expensive than for "hot" ones
  1013. if (Block.Weight == 0)
  1014. CostInc = Params.CostBlockZeroInc;
  1015. // Modifying the count of the entry block is expensive
  1016. if (Block.isEntry()) {
  1017. CostInc = Params.CostBlockEntryInc;
  1018. CostDec = Params.CostBlockEntryDec;
  1019. }
  1020. }
  1021. return std::make_pair(CostInc, CostDec);
  1022. }
  1023. /// Assign costs for increasing/decreasing the jump counts.
  1024. std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
  1025. const FlowJump &Jump) {
  1026. // Modifying the weight of an unlikely jump is expensive
  1027. if (Jump.IsUnlikely)
  1028. return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
  1029. // Assign default values for the costs
  1030. int64_t CostInc = Params.CostJumpInc;
  1031. int64_t CostDec = Params.CostJumpDec;
  1032. // Update the costs depending on the block metadata
  1033. if (Jump.Source + 1 == Jump.Target) {
  1034. // Adjusting the fall-through branch
  1035. CostInc = Params.CostJumpFTInc;
  1036. CostDec = Params.CostJumpFTDec;
  1037. }
  1038. if (Jump.HasUnknownWeight) {
  1039. // The cost is different for fall-through and non-fall-through branches
  1040. if (Jump.Source + 1 == Jump.Target)
  1041. CostInc = Params.CostJumpUnknownFTInc;
  1042. else
  1043. CostInc = Params.CostJumpUnknownInc;
  1044. CostDec = 0;
  1045. } else {
  1046. assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
  1047. }
  1048. return std::make_pair(CostInc, CostDec);
  1049. }
  1050. /// Extract resulting block and edge counts from the flow network.
  1051. void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
  1052. FlowFunction &Func) {
  1053. uint64_t NumBlocks = Func.Blocks.size();
  1054. uint64_t NumJumps = Func.Jumps.size();
  1055. // Extract resulting jump counts
  1056. for (uint64_t J = 0; J < NumJumps; J++) {
  1057. auto &Jump = Func.Jumps[J];
  1058. uint64_t SrcOut = 2 * Jump.Source + 1;
  1059. uint64_t DstIn = 2 * Jump.Target;
  1060. int64_t Flow = 0;
  1061. int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
  1062. if (Jump.Source != Jump.Target)
  1063. Flow = int64_t(Jump.Weight) + AuxFlow;
  1064. else
  1065. Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
  1066. Jump.Flow = Flow;
  1067. assert(Flow >= 0 && "negative jump flow");
  1068. }
  1069. // Extract resulting block counts
  1070. auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
  1071. auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
  1072. for (auto &Jump : Func.Jumps) {
  1073. InFlow[Jump.Target] += Jump.Flow;
  1074. OutFlow[Jump.Source] += Jump.Flow;
  1075. }
  1076. for (uint64_t B = 0; B < NumBlocks; B++) {
  1077. auto &Block = Func.Blocks[B];
  1078. Block.Flow = std::max(OutFlow[B], InFlow[B]);
  1079. }
  1080. }
  1081. #ifndef NDEBUG
  1082. /// Verify that the provided block/jump weights are as expected.
  1083. void verifyInput(const FlowFunction &Func) {
  1084. // Verify the entry block
  1085. assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
  1086. for (size_t I = 1; I < Func.Blocks.size(); I++) {
  1087. assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
  1088. }
  1089. // Verify CFG jumps
  1090. for (auto &Block : Func.Blocks) {
  1091. assert((!Block.isEntry() || !Block.isExit()) &&
  1092. "a block cannot be an entry and an exit");
  1093. }
  1094. // Verify input block weights
  1095. for (auto &Block : Func.Blocks) {
  1096. assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
  1097. "non-zero weight of a block w/o weight except for an entry");
  1098. }
  1099. // Verify input jump weights
  1100. for (auto &Jump : Func.Jumps) {
  1101. assert((!Jump.HasUnknownWeight || Jump.Weight == 0) &&
  1102. "non-zero weight of a jump w/o weight");
  1103. }
  1104. }
  1105. /// Verify that the computed flow values satisfy flow conservation rules.
  1106. void verifyOutput(const FlowFunction &Func) {
  1107. const uint64_t NumBlocks = Func.Blocks.size();
  1108. auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
  1109. auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
  1110. for (const auto &Jump : Func.Jumps) {
  1111. InFlow[Jump.Target] += Jump.Flow;
  1112. OutFlow[Jump.Source] += Jump.Flow;
  1113. }
  1114. uint64_t TotalInFlow = 0;
  1115. uint64_t TotalOutFlow = 0;
  1116. for (uint64_t I = 0; I < NumBlocks; I++) {
  1117. auto &Block = Func.Blocks[I];
  1118. if (Block.isEntry()) {
  1119. TotalInFlow += Block.Flow;
  1120. assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
  1121. } else if (Block.isExit()) {
  1122. TotalOutFlow += Block.Flow;
  1123. assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
  1124. } else {
  1125. assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
  1126. assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
  1127. }
  1128. }
  1129. assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
  1130. // Verify that there are no isolated flow components
  1131. // One could modify FlowFunction to hold edges indexed by the sources, which
  1132. // will avoid a creation of the object
  1133. auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
  1134. for (const auto &Jump : Func.Jumps) {
  1135. if (Jump.Flow > 0) {
  1136. PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
  1137. }
  1138. }
  1139. // Run BFS from the source along edges with positive flow
  1140. std::queue<uint64_t> Queue;
  1141. auto Visited = BitVector(NumBlocks, false);
  1142. Queue.push(Func.Entry);
  1143. Visited[Func.Entry] = true;
  1144. while (!Queue.empty()) {
  1145. uint64_t Src = Queue.front();
  1146. Queue.pop();
  1147. for (uint64_t Dst : PositiveFlowEdges[Src]) {
  1148. if (!Visited[Dst]) {
  1149. Queue.push(Dst);
  1150. Visited[Dst] = true;
  1151. }
  1152. }
  1153. }
  1154. // Verify that every block that has a positive flow is reached from the source
  1155. // along edges with a positive flow
  1156. for (uint64_t I = 0; I < NumBlocks; I++) {
  1157. auto &Block = Func.Blocks[I];
  1158. assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
  1159. }
  1160. }
  1161. #endif
  1162. } // end of anonymous namespace
  1163. /// Apply the profile inference algorithm for a given function
  1164. void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {
  1165. #ifndef NDEBUG
  1166. // Verify the input data
  1167. verifyInput(Func);
  1168. #endif
  1169. // Create and apply an inference network model
  1170. auto InferenceNetwork = MinCostMaxFlow(Params);
  1171. initializeNetwork(Params, InferenceNetwork, Func);
  1172. InferenceNetwork.run();
  1173. // Extract flow values for every block and every edge
  1174. extractWeights(Params, InferenceNetwork, Func);
  1175. // Post-processing adjustments to the flow
  1176. auto Adjuster = FlowAdjuster(Params, Func);
  1177. Adjuster.run();
  1178. #ifndef NDEBUG
  1179. // Verify the result
  1180. verifyOutput(Func);
  1181. #endif
  1182. }
  1183. /// Apply the profile inference algorithm for a given flow function
  1184. void llvm::applyFlowInference(FlowFunction &Func) {
  1185. ProfiParams Params;
  1186. // Set the params from the command-line flags.
  1187. Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
  1188. Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
  1189. Params.JoinIslands = SampleProfileJoinIslands;
  1190. Params.CostBlockInc = SampleProfileProfiCostBlockInc;
  1191. Params.CostBlockDec = SampleProfileProfiCostBlockDec;
  1192. Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
  1193. Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
  1194. Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
  1195. Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
  1196. applyFlowInference(Params, Func);
  1197. }