MergedLoadStoreMotion.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. //===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. //! \file
  10. //! This pass performs merges of loads and stores on both sides of a
  11. // diamond (hammock). It hoists the loads and sinks the stores.
  12. //
  13. // The algorithm iteratively hoists two loads to the same address out of a
  14. // diamond (hammock) and merges them into a single load in the header. Similar
  15. // it sinks and merges two stores to the tail block (footer). The algorithm
  16. // iterates over the instructions of one side of the diamond and attempts to
  17. // find a matching load/store on the other side. New tail/footer block may be
  18. // insterted if the tail/footer block has more predecessors (not only the two
  19. // predecessors that are forming the diamond). It hoists / sinks when it thinks
  20. // it safe to do so. This optimization helps with eg. hiding load latencies,
  21. // triggering if-conversion, and reducing static code size.
  22. //
  23. // NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist.
  24. //
  25. //===----------------------------------------------------------------------===//
  26. //
  27. //
  28. // Example:
  29. // Diamond shaped code before merge:
  30. //
  31. // header:
  32. // br %cond, label %if.then, label %if.else
  33. // + +
  34. // + +
  35. // + +
  36. // if.then: if.else:
  37. // %lt = load %addr_l %le = load %addr_l
  38. // <use %lt> <use %le>
  39. // <...> <...>
  40. // store %st, %addr_s store %se, %addr_s
  41. // br label %if.end br label %if.end
  42. // + +
  43. // + +
  44. // + +
  45. // if.end ("footer"):
  46. // <...>
  47. //
  48. // Diamond shaped code after merge:
  49. //
  50. // header:
  51. // %l = load %addr_l
  52. // br %cond, label %if.then, label %if.else
  53. // + +
  54. // + +
  55. // + +
  56. // if.then: if.else:
  57. // <use %l> <use %l>
  58. // <...> <...>
  59. // br label %if.end br label %if.end
  60. // + +
  61. // + +
  62. // + +
  63. // if.end ("footer"):
  64. // %s.sink = phi [%st, if.then], [%se, if.else]
  65. // <...>
  66. // store %s.sink, %addr_s
  67. // <...>
  68. //
  69. //
  70. //===----------------------- TODO -----------------------------------------===//
  71. //
  72. // 1) Generalize to regions other than diamonds
  73. // 2) Be more aggressive merging memory operations
  74. // Note that both changes require register pressure control
  75. //
  76. //===----------------------------------------------------------------------===//
  77. #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
  78. #include "llvm/ADT/Statistic.h"
  79. #include "llvm/Analysis/AliasAnalysis.h"
  80. #include "llvm/Analysis/CFG.h"
  81. #include "llvm/Analysis/GlobalsModRef.h"
  82. #include "llvm/Analysis/Loads.h"
  83. #include "llvm/Analysis/ValueTracking.h"
  84. #include "llvm/IR/Metadata.h"
  85. #include "llvm/InitializePasses.h"
  86. #include "llvm/Support/Debug.h"
  87. #include "llvm/Support/raw_ostream.h"
  88. #include "llvm/Transforms/Scalar.h"
  89. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  90. using namespace llvm;
  91. #define DEBUG_TYPE "mldst-motion"
  92. namespace {
  93. //===----------------------------------------------------------------------===//
  94. // MergedLoadStoreMotion Pass
  95. //===----------------------------------------------------------------------===//
  96. class MergedLoadStoreMotion {
  97. AliasAnalysis *AA = nullptr;
  98. // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
  99. // where Size0 and Size1 are the #instructions on the two sides of
  100. // the diamond. The constant chosen here is arbitrary. Compiler Time
  101. // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
  102. const int MagicCompileTimeControl = 250;
  103. const bool SplitFooterBB;
  104. public:
  105. MergedLoadStoreMotion(bool SplitFooterBB) : SplitFooterBB(SplitFooterBB) {}
  106. bool run(Function &F, AliasAnalysis &AA);
  107. private:
  108. BasicBlock *getDiamondTail(BasicBlock *BB);
  109. bool isDiamondHead(BasicBlock *BB);
  110. // Routines for sinking stores
  111. StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
  112. PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
  113. bool isStoreSinkBarrierInRange(const Instruction &Start,
  114. const Instruction &End, MemoryLocation Loc);
  115. bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const;
  116. void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand,
  117. StoreInst *ElseInst);
  118. bool mergeStores(BasicBlock *BB);
  119. };
  120. } // end anonymous namespace
  121. ///
  122. /// Return tail block of a diamond.
  123. ///
  124. BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
  125. assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
  126. return BB->getTerminator()->getSuccessor(0)->getSingleSuccessor();
  127. }
  128. ///
  129. /// True when BB is the head of a diamond (hammock)
  130. ///
  131. bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
  132. if (!BB)
  133. return false;
  134. auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
  135. if (!BI || !BI->isConditional())
  136. return false;
  137. BasicBlock *Succ0 = BI->getSuccessor(0);
  138. BasicBlock *Succ1 = BI->getSuccessor(1);
  139. if (!Succ0->getSinglePredecessor())
  140. return false;
  141. if (!Succ1->getSinglePredecessor())
  142. return false;
  143. BasicBlock *Succ0Succ = Succ0->getSingleSuccessor();
  144. BasicBlock *Succ1Succ = Succ1->getSingleSuccessor();
  145. // Ignore triangles.
  146. if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ)
  147. return false;
  148. return true;
  149. }
  150. ///
  151. /// True when instruction is a sink barrier for a store
  152. /// located in Loc
  153. ///
  154. /// Whenever an instruction could possibly read or modify the
  155. /// value being stored or protect against the store from
  156. /// happening it is considered a sink barrier.
  157. ///
  158. bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
  159. const Instruction &End,
  160. MemoryLocation Loc) {
  161. for (const Instruction &Inst :
  162. make_range(Start.getIterator(), End.getIterator()))
  163. if (Inst.mayThrow())
  164. return true;
  165. return AA->canInstructionRangeModRef(Start, End, Loc, ModRefInfo::ModRef);
  166. }
  167. ///
  168. /// Check if \p BB contains a store to the same address as \p SI
  169. ///
  170. /// \return The store in \p when it is safe to sink. Otherwise return Null.
  171. ///
  172. StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
  173. StoreInst *Store0) {
  174. LLVM_DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
  175. BasicBlock *BB0 = Store0->getParent();
  176. for (Instruction &Inst : reverse(*BB1)) {
  177. auto *Store1 = dyn_cast<StoreInst>(&Inst);
  178. if (!Store1)
  179. continue;
  180. MemoryLocation Loc0 = MemoryLocation::get(Store0);
  181. MemoryLocation Loc1 = MemoryLocation::get(Store1);
  182. if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
  183. !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) &&
  184. !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) {
  185. return Store1;
  186. }
  187. }
  188. return nullptr;
  189. }
  190. ///
  191. /// Create a PHI node in BB for the operands of S0 and S1
  192. ///
  193. PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
  194. StoreInst *S1) {
  195. // Create a phi if the values mismatch.
  196. Value *Opd1 = S0->getValueOperand();
  197. Value *Opd2 = S1->getValueOperand();
  198. if (Opd1 == Opd2)
  199. return nullptr;
  200. auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
  201. &BB->front());
  202. NewPN->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
  203. NewPN->addIncoming(Opd1, S0->getParent());
  204. NewPN->addIncoming(Opd2, S1->getParent());
  205. return NewPN;
  206. }
  207. ///
  208. /// Check if 2 stores can be sunk together with corresponding GEPs
  209. ///
  210. bool MergedLoadStoreMotion::canSinkStoresAndGEPs(StoreInst *S0,
  211. StoreInst *S1) const {
  212. auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
  213. auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
  214. return A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
  215. (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
  216. (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0);
  217. }
  218. ///
  219. /// Merge two stores to same address and sink into \p BB
  220. ///
  221. /// Also sinks GEP instruction computing the store address
  222. ///
  223. void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0,
  224. StoreInst *S1) {
  225. // Only one definition?
  226. auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
  227. auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
  228. LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
  229. dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
  230. dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
  231. // Hoist the instruction.
  232. BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
  233. // Intersect optional metadata.
  234. S0->andIRFlags(S1);
  235. S0->dropUnknownNonDebugMetadata();
  236. // Create the new store to be inserted at the join point.
  237. StoreInst *SNew = cast<StoreInst>(S0->clone());
  238. Instruction *ANew = A0->clone();
  239. SNew->insertBefore(&*InsertPt);
  240. ANew->insertBefore(SNew);
  241. assert(S0->getParent() == A0->getParent());
  242. assert(S1->getParent() == A1->getParent());
  243. // New PHI operand? Use it.
  244. if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
  245. SNew->setOperand(0, NewPN);
  246. S0->eraseFromParent();
  247. S1->eraseFromParent();
  248. A0->replaceAllUsesWith(ANew);
  249. A0->eraseFromParent();
  250. A1->replaceAllUsesWith(ANew);
  251. A1->eraseFromParent();
  252. }
  253. ///
  254. /// True when two stores are equivalent and can sink into the footer
  255. ///
  256. /// Starting from a diamond head block, iterate over the instructions in one
  257. /// successor block and try to match a store in the second successor.
  258. ///
  259. bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) {
  260. bool MergedStores = false;
  261. BasicBlock *TailBB = getDiamondTail(HeadBB);
  262. BasicBlock *SinkBB = TailBB;
  263. assert(SinkBB && "Footer of a diamond cannot be empty");
  264. succ_iterator SI = succ_begin(HeadBB);
  265. assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors");
  266. BasicBlock *Pred0 = *SI;
  267. ++SI;
  268. assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor");
  269. BasicBlock *Pred1 = *SI;
  270. // tail block of a diamond/hammock?
  271. if (Pred0 == Pred1)
  272. return false; // No.
  273. // bail out early if we can not merge into the footer BB
  274. if (!SplitFooterBB && TailBB->hasNPredecessorsOrMore(3))
  275. return false;
  276. // #Instructions in Pred1 for Compile Time Control
  277. auto InstsNoDbg = Pred1->instructionsWithoutDebug();
  278. int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end());
  279. int NStores = 0;
  280. for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
  281. RBI != RBE;) {
  282. Instruction *I = &*RBI;
  283. ++RBI;
  284. // Don't sink non-simple (atomic, volatile) stores.
  285. auto *S0 = dyn_cast<StoreInst>(I);
  286. if (!S0 || !S0->isSimple())
  287. continue;
  288. ++NStores;
  289. if (NStores * Size1 >= MagicCompileTimeControl)
  290. break;
  291. if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
  292. if (!canSinkStoresAndGEPs(S0, S1))
  293. // Don't attempt to sink below stores that had to stick around
  294. // But after removal of a store and some of its feeding
  295. // instruction search again from the beginning since the iterator
  296. // is likely stale at this point.
  297. break;
  298. if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(3)) {
  299. // We have more than 2 predecessors. Insert a new block
  300. // postdominating 2 predecessors we're going to sink from.
  301. SinkBB = SplitBlockPredecessors(TailBB, {Pred0, Pred1}, ".sink.split");
  302. if (!SinkBB)
  303. break;
  304. }
  305. MergedStores = true;
  306. sinkStoresAndGEPs(SinkBB, S0, S1);
  307. RBI = Pred0->rbegin();
  308. RBE = Pred0->rend();
  309. LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
  310. }
  311. }
  312. return MergedStores;
  313. }
  314. bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) {
  315. this->AA = &AA;
  316. bool Changed = false;
  317. LLVM_DEBUG(dbgs() << "Instruction Merger\n");
  318. // Merge unconditional branches, allowing PRE to catch more
  319. // optimization opportunities.
  320. // This loop doesn't care about newly inserted/split blocks
  321. // since they never will be diamond heads.
  322. for (BasicBlock &BB : make_early_inc_range(F))
  323. // Hoist equivalent loads and sink stores
  324. // outside diamonds when possible
  325. if (isDiamondHead(&BB))
  326. Changed |= mergeStores(&BB);
  327. return Changed;
  328. }
  329. namespace {
  330. class MergedLoadStoreMotionLegacyPass : public FunctionPass {
  331. const bool SplitFooterBB;
  332. public:
  333. static char ID; // Pass identification, replacement for typeid
  334. MergedLoadStoreMotionLegacyPass(bool SplitFooterBB = false)
  335. : FunctionPass(ID), SplitFooterBB(SplitFooterBB) {
  336. initializeMergedLoadStoreMotionLegacyPassPass(
  337. *PassRegistry::getPassRegistry());
  338. }
  339. ///
  340. /// Run the transformation for each function
  341. ///
  342. bool runOnFunction(Function &F) override {
  343. if (skipFunction(F))
  344. return false;
  345. MergedLoadStoreMotion Impl(SplitFooterBB);
  346. return Impl.run(F, getAnalysis<AAResultsWrapperPass>().getAAResults());
  347. }
  348. private:
  349. void getAnalysisUsage(AnalysisUsage &AU) const override {
  350. if (!SplitFooterBB)
  351. AU.setPreservesCFG();
  352. AU.addRequired<AAResultsWrapperPass>();
  353. AU.addPreserved<GlobalsAAWrapperPass>();
  354. }
  355. };
  356. char MergedLoadStoreMotionLegacyPass::ID = 0;
  357. } // anonymous namespace
  358. ///
  359. /// createMergedLoadStoreMotionPass - The public interface to this file.
  360. ///
  361. FunctionPass *llvm::createMergedLoadStoreMotionPass(bool SplitFooterBB) {
  362. return new MergedLoadStoreMotionLegacyPass(SplitFooterBB);
  363. }
  364. INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion",
  365. "MergedLoadStoreMotion", false, false)
  366. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  367. INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion",
  368. "MergedLoadStoreMotion", false, false)
  369. PreservedAnalyses
  370. MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
  371. MergedLoadStoreMotion Impl(Options.SplitFooterBB);
  372. auto &AA = AM.getResult<AAManager>(F);
  373. if (!Impl.run(F, AA))
  374. return PreservedAnalyses::all();
  375. PreservedAnalyses PA;
  376. if (!Options.SplitFooterBB)
  377. PA.preserveSet<CFGAnalyses>();
  378. return PA;
  379. }
  380. void MergedLoadStoreMotionPass::printPipeline(
  381. raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
  382. static_cast<PassInfoMixin<MergedLoadStoreMotionPass> *>(this)->printPipeline(
  383. OS, MapClassName2PassName);
  384. OS << "<";
  385. OS << (Options.SplitFooterBB ? "" : "no-") << "split-footer-bb";
  386. OS << ">";
  387. }