123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954 |
- //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file implements a profile inference algorithm. Given an incomplete and
- // possibly imprecise block counts, the algorithm reconstructs realistic block
- // and edge counts that satisfy flow conservation rules, while minimally modify
- // input block counts.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/Transforms/Utils/SampleProfileInference.h"
- #include "llvm/ADT/BitVector.h"
- #include "llvm/Support/Debug.h"
- #include <queue>
- #include <set>
- using namespace llvm;
- #define DEBUG_TYPE "sample-profile-inference"
- namespace {
- /// A value indicating an infinite flow/capacity/weight of a block/edge.
- /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
- /// during the execution.
- static constexpr int64_t INF = ((int64_t)1) << 50;
- /// The minimum-cost maximum flow algorithm.
- ///
- /// The algorithm finds the maximum flow of minimum cost on a given (directed)
- /// network using a modified version of the classical Moore-Bellman-Ford
- /// approach. The algorithm applies a number of augmentation iterations in which
- /// flow is sent along paths of positive capacity from the source to the sink.
- /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
- /// where m is the number of edges, n is the number of vertices, and v(f) is the
- /// value of the maximum flow. However, the observed running time on typical
- /// instances is sub-quadratic, that is, o(n^2).
- ///
- /// The input is a set of edges with specified costs and capacities, and a pair
- /// of nodes (source and sink). The output is the flow along each edge of the
- /// minimum total cost respecting the given edge capacities.
- class MinCostMaxFlow {
- public:
- // Initialize algorithm's data structures for a network of a given size.
- void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
- Source = SourceNode;
- Target = SinkNode;
- Nodes = std::vector<Node>(NodeCount);
- Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
- }
- // Run the algorithm.
- int64_t run() {
- // Find an augmenting path and update the flow along the path
- size_t AugmentationIters = 0;
- while (findAugmentingPath()) {
- augmentFlowAlongPath();
- AugmentationIters++;
- }
- // Compute the total flow and its cost
- int64_t TotalCost = 0;
- int64_t TotalFlow = 0;
- for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
- for (auto &Edge : Edges[Src]) {
- if (Edge.Flow > 0) {
- TotalCost += Edge.Cost * Edge.Flow;
- if (Src == Source)
- TotalFlow += Edge.Flow;
- }
- }
- }
- LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
- << " iterations with " << TotalFlow << " total flow"
- << " of " << TotalCost << " cost\n");
- (void)TotalFlow;
- return TotalCost;
- }
- /// Adding an edge to the network with a specified capacity and a cost.
- /// Multiple edges between a pair of nodes are allowed but self-edges
- /// are not supported.
- void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
- assert(Capacity > 0 && "adding an edge of zero capacity");
- assert(Src != Dst && "loop edge are not supported");
- Edge SrcEdge;
- SrcEdge.Dst = Dst;
- SrcEdge.Cost = Cost;
- SrcEdge.Capacity = Capacity;
- SrcEdge.Flow = 0;
- SrcEdge.RevEdgeIndex = Edges[Dst].size();
- Edge DstEdge;
- DstEdge.Dst = Src;
- DstEdge.Cost = -Cost;
- DstEdge.Capacity = 0;
- DstEdge.Flow = 0;
- DstEdge.RevEdgeIndex = Edges[Src].size();
- Edges[Src].push_back(SrcEdge);
- Edges[Dst].push_back(DstEdge);
- }
- /// Adding an edge to the network of infinite capacity and a given cost.
- void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
- addEdge(Src, Dst, INF, Cost);
- }
- /// Get the total flow from a given source node.
- /// Returns a list of pairs (target node, amount of flow to the target).
- const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
- std::vector<std::pair<uint64_t, int64_t>> Flow;
- for (auto &Edge : Edges[Src]) {
- if (Edge.Flow > 0)
- Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
- }
- return Flow;
- }
- /// Get the total flow between a pair of nodes.
- int64_t getFlow(uint64_t Src, uint64_t Dst) const {
- int64_t Flow = 0;
- for (auto &Edge : Edges[Src]) {
- if (Edge.Dst == Dst) {
- Flow += Edge.Flow;
- }
- }
- return Flow;
- }
- /// A cost of increasing a block's count by one.
- static constexpr int64_t AuxCostInc = 10;
- /// A cost of decreasing a block's count by one.
- static constexpr int64_t AuxCostDec = 20;
- /// A cost of increasing a count of zero-weight block by one.
- static constexpr int64_t AuxCostIncZero = 11;
- /// A cost of increasing the entry block's count by one.
- static constexpr int64_t AuxCostIncEntry = 40;
- /// A cost of decreasing the entry block's count by one.
- static constexpr int64_t AuxCostDecEntry = 10;
- /// A cost of taking an unlikely jump.
- static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
- private:
- /// Check for existence of an augmenting path with a positive capacity.
- bool findAugmentingPath() {
- // Initialize data structures
- for (auto &Node : Nodes) {
- Node.Distance = INF;
- Node.ParentNode = uint64_t(-1);
- Node.ParentEdgeIndex = uint64_t(-1);
- Node.Taken = false;
- }
- std::queue<uint64_t> Queue;
- Queue.push(Source);
- Nodes[Source].Distance = 0;
- Nodes[Source].Taken = true;
- while (!Queue.empty()) {
- uint64_t Src = Queue.front();
- Queue.pop();
- Nodes[Src].Taken = false;
- // Although the residual network contains edges with negative costs
- // (in particular, backward edges), it can be shown that there are no
- // negative-weight cycles and the following two invariants are maintained:
- // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
- // where Dist is the length of the shortest path between two nodes. This
- // allows to prune the search-space of the path-finding algorithm using
- // the following early-stop criteria:
- // -- If we find a path with zero-distance from Source to Target, stop the
- // search, as the path is the shortest since Dist[Source, Target] >= 0;
- // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
- // process node V, as it is guaranteed _not_ to be on a shortest path
- // from Source to Target; it follows from inequalities
- // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
- // >= Dist[Source, V]
- if (Nodes[Target].Distance == 0)
- break;
- if (Nodes[Src].Distance > Nodes[Target].Distance)
- continue;
- // Process adjacent edges
- for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
- auto &Edge = Edges[Src][EdgeIdx];
- if (Edge.Flow < Edge.Capacity) {
- uint64_t Dst = Edge.Dst;
- int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
- if (Nodes[Dst].Distance > NewDistance) {
- // Update the distance and the parent node/edge
- Nodes[Dst].Distance = NewDistance;
- Nodes[Dst].ParentNode = Src;
- Nodes[Dst].ParentEdgeIndex = EdgeIdx;
- // Add the node to the queue, if it is not there yet
- if (!Nodes[Dst].Taken) {
- Queue.push(Dst);
- Nodes[Dst].Taken = true;
- }
- }
- }
- }
- }
- return Nodes[Target].Distance != INF;
- }
- /// Update the current flow along the augmenting path.
- void augmentFlowAlongPath() {
- // Find path capacity
- int64_t PathCapacity = INF;
- uint64_t Now = Target;
- while (Now != Source) {
- uint64_t Pred = Nodes[Now].ParentNode;
- auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
- PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
- Now = Pred;
- }
- assert(PathCapacity > 0 && "found an incorrect augmenting path");
- // Update the flow along the path
- Now = Target;
- while (Now != Source) {
- uint64_t Pred = Nodes[Now].ParentNode;
- auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
- auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
- Edge.Flow += PathCapacity;
- RevEdge.Flow -= PathCapacity;
- Now = Pred;
- }
- }
- /// A node in a flow network.
- struct Node {
- /// The cost of the cheapest path from the source to the current node.
- int64_t Distance;
- /// The node preceding the current one in the path.
- uint64_t ParentNode;
- /// The index of the edge between ParentNode and the current node.
- uint64_t ParentEdgeIndex;
- /// An indicator of whether the current node is in a queue.
- bool Taken;
- };
- /// An edge in a flow network.
- struct Edge {
- /// The cost of the edge.
- int64_t Cost;
- /// The capacity of the edge.
- int64_t Capacity;
- /// The current flow on the edge.
- int64_t Flow;
- /// The destination node of the edge.
- uint64_t Dst;
- /// The index of the reverse edge between Dst and the current node.
- uint64_t RevEdgeIndex;
- };
- /// The set of network nodes.
- std::vector<Node> Nodes;
- /// The set of network edges.
- std::vector<std::vector<Edge>> Edges;
- /// Source node of the flow.
- uint64_t Source;
- /// Target (sink) node of the flow.
- uint64_t Target;
- };
- /// A post-processing adjustment of control flow. It applies two steps by
- /// rerouting some flow and making it more realistic:
- ///
- /// - First, it removes all isolated components ("islands") with a positive flow
- /// that are unreachable from the entry block. For every such component, we
- /// find the shortest from the entry to an exit passing through the component,
- /// and increase the flow by one unit along the path.
- ///
- /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
- /// with no sampled counts. Then it rebalnces the flow that goes through such
- /// a subgraph so that each branch is taken with probability 50%.
- /// An unknown subgraph is such that for every two nodes u and v:
- /// - u dominates v and u is not unknown;
- /// - v post-dominates u; and
- /// - all inner-nodes of all (u,v)-paths are unknown.
- ///
- class FlowAdjuster {
- public:
- FlowAdjuster(FlowFunction &Func) : Func(Func) {
- assert(Func.Blocks[Func.Entry].isEntry() &&
- "incorrect index of the entry block");
- }
- // Run the post-processing
- void run() {
- /// Adjust the flow to get rid of isolated components.
- joinIsolatedComponents();
- /// Rebalance the flow inside unknown subgraphs.
- rebalanceUnknownSubgraphs();
- }
- private:
- void joinIsolatedComponents() {
- // Find blocks that are reachable from the source
- auto Visited = BitVector(NumBlocks(), false);
- findReachable(Func.Entry, Visited);
- // Iterate over all non-reachable blocks and adjust their weights
- for (uint64_t I = 0; I < NumBlocks(); I++) {
- auto &Block = Func.Blocks[I];
- if (Block.Flow > 0 && !Visited[I]) {
- // Find a path from the entry to an exit passing through the block I
- auto Path = findShortestPath(I);
- // Increase the flow along the path
- assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
- "incorrectly computed path adjusting control flow");
- Func.Blocks[Func.Entry].Flow += 1;
- for (auto &Jump : Path) {
- Jump->Flow += 1;
- Func.Blocks[Jump->Target].Flow += 1;
- // Update reachability
- findReachable(Jump->Target, Visited);
- }
- }
- }
- }
- /// Run BFS from a given block along the jumps with a positive flow and mark
- /// all reachable blocks.
- void findReachable(uint64_t Src, BitVector &Visited) {
- if (Visited[Src])
- return;
- std::queue<uint64_t> Queue;
- Queue.push(Src);
- Visited[Src] = true;
- while (!Queue.empty()) {
- Src = Queue.front();
- Queue.pop();
- for (auto Jump : Func.Blocks[Src].SuccJumps) {
- uint64_t Dst = Jump->Target;
- if (Jump->Flow > 0 && !Visited[Dst]) {
- Queue.push(Dst);
- Visited[Dst] = true;
- }
- }
- }
- }
- /// Find the shortest path from the entry block to an exit block passing
- /// through a given block.
- std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
- // A path from the entry block to BlockIdx
- auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
- // A path from BlockIdx to an exit block
- auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
- // Concatenate the two paths
- std::vector<FlowJump *> Result;
- Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
- Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
- return Result;
- }
- /// Apply the Dijkstra algorithm to find the shortest path from a given
- /// Source to a given Target block.
- /// If Target == -1, then the path ends at an exit block.
- std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
- // Quit early, if possible
- if (Source == Target)
- return std::vector<FlowJump *>();
- if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
- return std::vector<FlowJump *>();
- // Initialize data structures
- auto Distance = std::vector<int64_t>(NumBlocks(), INF);
- auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
- Distance[Source] = 0;
- std::set<std::pair<uint64_t, uint64_t>> Queue;
- Queue.insert(std::make_pair(Distance[Source], Source));
- // Run the Dijkstra algorithm
- while (!Queue.empty()) {
- uint64_t Src = Queue.begin()->second;
- Queue.erase(Queue.begin());
- // If we found a solution, quit early
- if (Src == Target ||
- (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
- break;
- for (auto Jump : Func.Blocks[Src].SuccJumps) {
- uint64_t Dst = Jump->Target;
- int64_t JumpDist = jumpDistance(Jump);
- if (Distance[Dst] > Distance[Src] + JumpDist) {
- Queue.erase(std::make_pair(Distance[Dst], Dst));
- Distance[Dst] = Distance[Src] + JumpDist;
- Parent[Dst] = Jump;
- Queue.insert(std::make_pair(Distance[Dst], Dst));
- }
- }
- }
- // If Target is not provided, find the closest exit block
- if (Target == AnyExitBlock) {
- for (uint64_t I = 0; I < NumBlocks(); I++) {
- if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
- if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
- Target = I;
- }
- }
- }
- }
- assert(Parent[Target] != nullptr && "a path does not exist");
- // Extract the constructed path
- std::vector<FlowJump *> Result;
- uint64_t Now = Target;
- while (Now != Source) {
- assert(Now == Parent[Now]->Target && "incorrect parent jump");
- Result.push_back(Parent[Now]);
- Now = Parent[Now]->Source;
- }
- // Reverse the path, since it is extracted from Target to Source
- std::reverse(Result.begin(), Result.end());
- return Result;
- }
- /// A distance of a path for a given jump.
- /// In order to incite the path to use blocks/jumps with large positive flow,
- /// and avoid changing branch probability of outgoing edges drastically,
- /// set the distance as follows:
- /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
- /// if Block.Weight > 0, then distance = 1
- /// otherwise distance >> 1
- int64_t jumpDistance(FlowJump *Jump) const {
- int64_t BaseDistance = 100;
- if (Jump->IsUnlikely)
- return MinCostMaxFlow::AuxCostUnlikely;
- if (Jump->Flow > 0)
- return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
- if (Func.Blocks[Jump->Target].Weight > 0)
- return BaseDistance;
- return BaseDistance * (NumBlocks() + 1);
- };
- uint64_t NumBlocks() const { return Func.Blocks.size(); }
- /// Rebalance unknown subgraphs so that the flow is split evenly across the
- /// outgoing branches of every block of the subgraph. The method iterates over
- /// blocks with known weight and identifies unknown subgraphs rooted at the
- /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
- void rebalanceUnknownSubgraphs() {
- // Try to find unknown subgraphs from each block
- for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
- auto SrcBlock = &Func.Blocks[I];
- // Verify if rebalancing rooted at SrcBlock is feasible
- if (!canRebalanceAtRoot(SrcBlock))
- continue;
- // Find an unknown subgraphs starting at SrcBlock. Along the way,
- // fill in known destinations and intermediate unknown blocks.
- std::vector<FlowBlock *> UnknownBlocks;
- std::vector<FlowBlock *> KnownDstBlocks;
- findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
- // Verify if rebalancing of the subgraph is feasible. If the search is
- // successful, find the unique destination block (which can be null)
- FlowBlock *DstBlock = nullptr;
- if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
- DstBlock))
- continue;
- // We cannot rebalance subgraphs containing cycles among unknown blocks
- if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
- continue;
- // Rebalance the flow
- rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
- }
- }
- /// Verify if rebalancing rooted at a given block is possible.
- bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
- // Do not attempt to find unknown subgraphs from an unknown or a
- // zero-flow block
- if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
- return false;
- // Do not attempt to process subgraphs from a block w/o unknown sucessors
- bool HasUnknownSuccs = false;
- for (auto Jump : SrcBlock->SuccJumps) {
- if (Func.Blocks[Jump->Target].UnknownWeight) {
- HasUnknownSuccs = true;
- break;
- }
- }
- if (!HasUnknownSuccs)
- return false;
- return true;
- }
- /// Find an unknown subgraph starting at block SrcBlock. The method sets
- /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
- void findUnknownSubgraph(const FlowBlock *SrcBlock,
- std::vector<FlowBlock *> &KnownDstBlocks,
- std::vector<FlowBlock *> &UnknownBlocks) {
- // Run BFS from SrcBlock and make sure all paths are going through unknown
- // blocks and end at a non-unknown DstBlock
- auto Visited = BitVector(NumBlocks(), false);
- std::queue<uint64_t> Queue;
- Queue.push(SrcBlock->Index);
- Visited[SrcBlock->Index] = true;
- while (!Queue.empty()) {
- auto &Block = Func.Blocks[Queue.front()];
- Queue.pop();
- // Process blocks reachable from Block
- for (auto Jump : Block.SuccJumps) {
- // If Jump can be ignored, skip it
- if (ignoreJump(SrcBlock, nullptr, Jump))
- continue;
- uint64_t Dst = Jump->Target;
- // If Dst has been visited, skip Jump
- if (Visited[Dst])
- continue;
- // Process block Dst
- Visited[Dst] = true;
- if (!Func.Blocks[Dst].UnknownWeight) {
- KnownDstBlocks.push_back(&Func.Blocks[Dst]);
- } else {
- Queue.push(Dst);
- UnknownBlocks.push_back(&Func.Blocks[Dst]);
- }
- }
- }
- }
- /// Verify if rebalancing of the subgraph is feasible. If the checks are
- /// successful, set the unique destination block, DstBlock (can be null).
- bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
- const std::vector<FlowBlock *> &KnownDstBlocks,
- const std::vector<FlowBlock *> &UnknownBlocks,
- FlowBlock *&DstBlock) {
- // If the list of unknown blocks is empty, we don't need rebalancing
- if (UnknownBlocks.empty())
- return false;
- // If there are multiple known sinks, we can't rebalance
- if (KnownDstBlocks.size() > 1)
- return false;
- DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
- // Verify sinks of the subgraph
- for (auto Block : UnknownBlocks) {
- if (Block->SuccJumps.empty()) {
- // If there are multiple (known and unknown) sinks, we can't rebalance
- if (DstBlock != nullptr)
- return false;
- continue;
- }
- size_t NumIgnoredJumps = 0;
- for (auto Jump : Block->SuccJumps) {
- if (ignoreJump(SrcBlock, DstBlock, Jump))
- NumIgnoredJumps++;
- }
- // If there is a non-sink block in UnknownBlocks with all jumps ignored,
- // then we can't rebalance
- if (NumIgnoredJumps == Block->SuccJumps.size())
- return false;
- }
- return true;
- }
- /// Decide whether the Jump is ignored while processing an unknown subgraphs
- /// rooted at basic block SrcBlock with the destination block, DstBlock.
- bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
- const FlowJump *Jump) {
- // Ignore unlikely jumps with zero flow
- if (Jump->IsUnlikely && Jump->Flow == 0)
- return true;
- auto JumpSource = &Func.Blocks[Jump->Source];
- auto JumpTarget = &Func.Blocks[Jump->Target];
- // Do not ignore jumps coming into DstBlock
- if (DstBlock != nullptr && JumpTarget == DstBlock)
- return false;
- // Ignore jumps out of SrcBlock to known blocks
- if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
- return true;
- // Ignore jumps to known blocks with zero flow
- if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
- return true;
- return false;
- }
- /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
- /// UnknownBlocks in the topological order (so that all jumps are "forward").
- bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
- std::vector<FlowBlock *> &UnknownBlocks) {
- // Extract local in-degrees in the considered subgraph
- auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
- auto fillInDegree = [&](const FlowBlock *Block) {
- for (auto Jump : Block->SuccJumps) {
- if (ignoreJump(SrcBlock, DstBlock, Jump))
- continue;
- LocalInDegree[Jump->Target]++;
- }
- };
- fillInDegree(SrcBlock);
- for (auto Block : UnknownBlocks) {
- fillInDegree(Block);
- }
- // A loop containing SrcBlock
- if (LocalInDegree[SrcBlock->Index] > 0)
- return false;
- std::vector<FlowBlock *> AcyclicOrder;
- std::queue<uint64_t> Queue;
- Queue.push(SrcBlock->Index);
- while (!Queue.empty()) {
- FlowBlock *Block = &Func.Blocks[Queue.front()];
- Queue.pop();
- // Stop propagation once we reach DstBlock, if any
- if (DstBlock != nullptr && Block == DstBlock)
- break;
- // Keep an acyclic order of unknown blocks
- if (Block->UnknownWeight && Block != SrcBlock)
- AcyclicOrder.push_back(Block);
- // Add to the queue all successors with zero local in-degree
- for (auto Jump : Block->SuccJumps) {
- if (ignoreJump(SrcBlock, DstBlock, Jump))
- continue;
- uint64_t Dst = Jump->Target;
- LocalInDegree[Dst]--;
- if (LocalInDegree[Dst] == 0) {
- Queue.push(Dst);
- }
- }
- }
- // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
- // of all blocks
- if (UnknownBlocks.size() != AcyclicOrder.size())
- return false;
- UnknownBlocks = AcyclicOrder;
- return true;
- }
- /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
- /// having UnknownBlocks intermediate blocks.
- void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
- const FlowBlock *DstBlock,
- const std::vector<FlowBlock *> &UnknownBlocks) {
- assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
- // Ditribute flow from the source block
- uint64_t BlockFlow = 0;
- // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
- for (auto Jump : SrcBlock->SuccJumps) {
- if (ignoreJump(SrcBlock, DstBlock, Jump))
- continue;
- BlockFlow += Jump->Flow;
- }
- rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
- // Ditribute flow from the remaining blocks
- for (auto Block : UnknownBlocks) {
- assert(Block->UnknownWeight && "incorrect unknown subgraph");
- uint64_t BlockFlow = 0;
- // Block's flow is the sum of incoming flows
- for (auto Jump : Block->PredJumps) {
- BlockFlow += Jump->Flow;
- }
- Block->Flow = BlockFlow;
- rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
- }
- }
- /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
- /// and ending at DstBlock.
- void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
- const FlowBlock *Block, uint64_t BlockFlow) {
- // Process all successor jumps and update corresponding flow values
- size_t BlockDegree = 0;
- for (auto Jump : Block->SuccJumps) {
- if (ignoreJump(SrcBlock, DstBlock, Jump))
- continue;
- BlockDegree++;
- }
- // If all successor jumps of the block are ignored, skip it
- if (DstBlock == nullptr && BlockDegree == 0)
- return;
- assert(BlockDegree > 0 && "all outgoing jumps are ignored");
- // Each of the Block's successors gets the following amount of flow.
- // Rounding the value up so that all flow is propagated
- uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
- for (auto Jump : Block->SuccJumps) {
- if (ignoreJump(SrcBlock, DstBlock, Jump))
- continue;
- uint64_t Flow = std::min(SuccFlow, BlockFlow);
- Jump->Flow = Flow;
- BlockFlow -= Flow;
- }
- assert(BlockFlow == 0 && "not all flow is propagated");
- }
- /// A constant indicating an arbitrary exit block of a function.
- static constexpr uint64_t AnyExitBlock = uint64_t(-1);
- /// The function.
- FlowFunction &Func;
- };
- /// Initializing flow network for a given function.
- ///
- /// Every block is split into three nodes that are responsible for (i) an
- /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
- /// reduction of the block weight.
- void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
- uint64_t NumBlocks = Func.Blocks.size();
- assert(NumBlocks > 1 && "Too few blocks in a function");
- LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
- // Pre-process data: make sure the entry weight is at least 1
- if (Func.Blocks[Func.Entry].Weight == 0) {
- Func.Blocks[Func.Entry].Weight = 1;
- }
- // Introducing dummy source/sink pairs to allow flow circulation.
- // The nodes corresponding to blocks of Func have indicies in the range
- // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
- uint64_t S = 3 * NumBlocks;
- uint64_t T = S + 1;
- uint64_t S1 = S + 2;
- uint64_t T1 = S + 3;
- Network.initialize(3 * NumBlocks + 4, S1, T1);
- // Create three nodes for every block of the function
- for (uint64_t B = 0; B < NumBlocks; B++) {
- auto &Block = Func.Blocks[B];
- assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
- "non-zero weight of a block w/o weight except for an entry");
- // Split every block into two nodes
- uint64_t Bin = 3 * B;
- uint64_t Bout = 3 * B + 1;
- uint64_t Baux = 3 * B + 2;
- if (Block.Weight > 0) {
- Network.addEdge(S1, Bout, Block.Weight, 0);
- Network.addEdge(Bin, T1, Block.Weight, 0);
- }
- // Edges from S and to T
- assert((!Block.isEntry() || !Block.isExit()) &&
- "a block cannot be an entry and an exit");
- if (Block.isEntry()) {
- Network.addEdge(S, Bin, 0);
- } else if (Block.isExit()) {
- Network.addEdge(Bout, T, 0);
- }
- // An auxiliary node to allow increase/reduction of block counts:
- // We assume that decreasing block counts is more expensive than increasing,
- // and thus, setting separate costs here. In the future we may want to tune
- // the relative costs so as to maximize the quality of generated profiles.
- int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
- int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
- if (Block.UnknownWeight) {
- // Do not penalize changing weights of blocks w/o known profile count
- AuxCostInc = 0;
- AuxCostDec = 0;
- } else {
- // Increasing the count for "cold" blocks with zero initial count is more
- // expensive than for "hot" ones
- if (Block.Weight == 0) {
- AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
- }
- // Modifying the count of the entry block is expensive
- if (Block.isEntry()) {
- AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
- AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
- }
- }
- // For blocks with self-edges, do not penalize a reduction of the count,
- // as all of the increase can be attributed to the self-edge
- if (Block.HasSelfEdge) {
- AuxCostDec = 0;
- }
- Network.addEdge(Bin, Baux, AuxCostInc);
- Network.addEdge(Baux, Bout, AuxCostInc);
- if (Block.Weight > 0) {
- Network.addEdge(Bout, Baux, AuxCostDec);
- Network.addEdge(Baux, Bin, AuxCostDec);
- }
- }
- // Creating edges for every jump
- for (auto &Jump : Func.Jumps) {
- uint64_t Src = Jump.Source;
- uint64_t Dst = Jump.Target;
- if (Src != Dst) {
- uint64_t SrcOut = 3 * Src + 1;
- uint64_t DstIn = 3 * Dst;
- uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
- Network.addEdge(SrcOut, DstIn, Cost);
- }
- }
- // Make sure we have a valid flow circulation
- Network.addEdge(T, S, 0);
- }
- /// Extract resulting block and edge counts from the flow network.
- void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
- uint64_t NumBlocks = Func.Blocks.size();
- // Extract resulting block counts
- for (uint64_t Src = 0; Src < NumBlocks; Src++) {
- auto &Block = Func.Blocks[Src];
- uint64_t SrcOut = 3 * Src + 1;
- int64_t Flow = 0;
- for (auto &Adj : Network.getFlow(SrcOut)) {
- uint64_t DstIn = Adj.first;
- int64_t DstFlow = Adj.second;
- bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
- if (!IsAuxNode || Block.HasSelfEdge) {
- Flow += DstFlow;
- }
- }
- Block.Flow = Flow;
- assert(Flow >= 0 && "negative block flow");
- }
- // Extract resulting jump counts
- for (auto &Jump : Func.Jumps) {
- uint64_t Src = Jump.Source;
- uint64_t Dst = Jump.Target;
- int64_t Flow = 0;
- if (Src != Dst) {
- uint64_t SrcOut = 3 * Src + 1;
- uint64_t DstIn = 3 * Dst;
- Flow = Network.getFlow(SrcOut, DstIn);
- } else {
- uint64_t SrcOut = 3 * Src + 1;
- uint64_t SrcAux = 3 * Src + 2;
- int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
- if (AuxFlow > 0)
- Flow = AuxFlow;
- }
- Jump.Flow = Flow;
- assert(Flow >= 0 && "negative jump flow");
- }
- }
- #ifndef NDEBUG
- /// Verify that the computed flow values satisfy flow conservation rules
- void verifyWeights(const FlowFunction &Func) {
- const uint64_t NumBlocks = Func.Blocks.size();
- auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
- auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
- for (auto &Jump : Func.Jumps) {
- InFlow[Jump.Target] += Jump.Flow;
- OutFlow[Jump.Source] += Jump.Flow;
- }
- uint64_t TotalInFlow = 0;
- uint64_t TotalOutFlow = 0;
- for (uint64_t I = 0; I < NumBlocks; I++) {
- auto &Block = Func.Blocks[I];
- if (Block.isEntry()) {
- TotalInFlow += Block.Flow;
- assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
- } else if (Block.isExit()) {
- TotalOutFlow += Block.Flow;
- assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
- } else {
- assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
- assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
- }
- }
- assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
- // Verify that there are no isolated flow components
- // One could modify FlowFunction to hold edges indexed by the sources, which
- // will avoid a creation of the object
- auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
- for (auto &Jump : Func.Jumps) {
- if (Jump.Flow > 0) {
- PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
- }
- }
- // Run BFS from the source along edges with positive flow
- std::queue<uint64_t> Queue;
- auto Visited = BitVector(NumBlocks, false);
- Queue.push(Func.Entry);
- Visited[Func.Entry] = true;
- while (!Queue.empty()) {
- uint64_t Src = Queue.front();
- Queue.pop();
- for (uint64_t Dst : PositiveFlowEdges[Src]) {
- if (!Visited[Dst]) {
- Queue.push(Dst);
- Visited[Dst] = true;
- }
- }
- }
- // Verify that every block that has a positive flow is reached from the source
- // along edges with a positive flow
- for (uint64_t I = 0; I < NumBlocks; I++) {
- auto &Block = Func.Blocks[I];
- assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
- }
- }
- #endif
- } // end of anonymous namespace
- /// Apply the profile inference algorithm for a given flow function
- void llvm::applyFlowInference(FlowFunction &Func) {
- // Create and apply an inference network model
- auto InferenceNetwork = MinCostMaxFlow();
- initializeNetwork(InferenceNetwork, Func);
- InferenceNetwork.run();
- // Extract flow values for every block and every edge
- extractWeights(InferenceNetwork, Func);
- // Post-processing adjustments to the flow
- auto Adjuster = FlowAdjuster(Func);
- Adjuster.run();
- #ifndef NDEBUG
- // Verify the result
- verifyWeights(Func);
- #endif
- }
|