MergedLoadStoreMotion.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. //===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. //! \file
  10. //! This pass performs merges of loads and stores on both sides of a
  11. // diamond (hammock). It hoists the loads and sinks the stores.
  12. //
  13. // The algorithm iteratively hoists two loads to the same address out of a
  14. // diamond (hammock) and merges them into a single load in the header. Similar
  15. // it sinks and merges two stores to the tail block (footer). The algorithm
  16. // iterates over the instructions of one side of the diamond and attempts to
  17. // find a matching load/store on the other side. New tail/footer block may be
  18. // insterted if the tail/footer block has more predecessors (not only the two
  19. // predecessors that are forming the diamond). It hoists / sinks when it thinks
  20. // it safe to do so. This optimization helps with eg. hiding load latencies,
  21. // triggering if-conversion, and reducing static code size.
  22. //
  23. // NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist.
  24. //
  25. //===----------------------------------------------------------------------===//
  26. //
  27. //
  28. // Example:
  29. // Diamond shaped code before merge:
  30. //
  31. // header:
  32. // br %cond, label %if.then, label %if.else
  33. // + +
  34. // + +
  35. // + +
  36. // if.then: if.else:
  37. // %lt = load %addr_l %le = load %addr_l
  38. // <use %lt> <use %le>
  39. // <...> <...>
  40. // store %st, %addr_s store %se, %addr_s
  41. // br label %if.end br label %if.end
  42. // + +
  43. // + +
  44. // + +
  45. // if.end ("footer"):
  46. // <...>
  47. //
  48. // Diamond shaped code after merge:
  49. //
  50. // header:
  51. // %l = load %addr_l
  52. // br %cond, label %if.then, label %if.else
  53. // + +
  54. // + +
  55. // + +
  56. // if.then: if.else:
  57. // <use %l> <use %l>
  58. // <...> <...>
  59. // br label %if.end br label %if.end
  60. // + +
  61. // + +
  62. // + +
  63. // if.end ("footer"):
  64. // %s.sink = phi [%st, if.then], [%se, if.else]
  65. // <...>
  66. // store %s.sink, %addr_s
  67. // <...>
  68. //
  69. //
  70. //===----------------------- TODO -----------------------------------------===//
  71. //
  72. // 1) Generalize to regions other than diamonds
  73. // 2) Be more aggressive merging memory operations
  74. // Note that both changes require register pressure control
  75. //
  76. //===----------------------------------------------------------------------===//
  77. #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
  78. #include "llvm/Analysis/AliasAnalysis.h"
  79. #include "llvm/Analysis/GlobalsModRef.h"
  80. #include "llvm/IR/Instructions.h"
  81. #include "llvm/InitializePasses.h"
  82. #include "llvm/Support/Debug.h"
  83. #include "llvm/Support/raw_ostream.h"
  84. #include "llvm/Transforms/Scalar.h"
  85. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  86. using namespace llvm;
  87. #define DEBUG_TYPE "mldst-motion"
  88. namespace {
  89. //===----------------------------------------------------------------------===//
  90. // MergedLoadStoreMotion Pass
  91. //===----------------------------------------------------------------------===//
  92. class MergedLoadStoreMotion {
  93. AliasAnalysis *AA = nullptr;
  94. // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
  95. // where Size0 and Size1 are the #instructions on the two sides of
  96. // the diamond. The constant chosen here is arbitrary. Compiler Time
  97. // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
  98. const int MagicCompileTimeControl = 250;
  99. const bool SplitFooterBB;
  100. public:
  101. MergedLoadStoreMotion(bool SplitFooterBB) : SplitFooterBB(SplitFooterBB) {}
  102. bool run(Function &F, AliasAnalysis &AA);
  103. private:
  104. BasicBlock *getDiamondTail(BasicBlock *BB);
  105. bool isDiamondHead(BasicBlock *BB);
  106. // Routines for sinking stores
  107. StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
  108. PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
  109. bool isStoreSinkBarrierInRange(const Instruction &Start,
  110. const Instruction &End, MemoryLocation Loc);
  111. bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const;
  112. void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand,
  113. StoreInst *ElseInst);
  114. bool mergeStores(BasicBlock *BB);
  115. };
  116. } // end anonymous namespace
  117. ///
  118. /// Return tail block of a diamond.
  119. ///
  120. BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
  121. assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
  122. return BB->getTerminator()->getSuccessor(0)->getSingleSuccessor();
  123. }
  124. ///
  125. /// True when BB is the head of a diamond (hammock)
  126. ///
  127. bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
  128. if (!BB)
  129. return false;
  130. auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
  131. if (!BI || !BI->isConditional())
  132. return false;
  133. BasicBlock *Succ0 = BI->getSuccessor(0);
  134. BasicBlock *Succ1 = BI->getSuccessor(1);
  135. if (!Succ0->getSinglePredecessor())
  136. return false;
  137. if (!Succ1->getSinglePredecessor())
  138. return false;
  139. BasicBlock *Succ0Succ = Succ0->getSingleSuccessor();
  140. BasicBlock *Succ1Succ = Succ1->getSingleSuccessor();
  141. // Ignore triangles.
  142. if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ)
  143. return false;
  144. return true;
  145. }
  146. ///
  147. /// True when instruction is a sink barrier for a store
  148. /// located in Loc
  149. ///
  150. /// Whenever an instruction could possibly read or modify the
  151. /// value being stored or protect against the store from
  152. /// happening it is considered a sink barrier.
  153. ///
  154. bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
  155. const Instruction &End,
  156. MemoryLocation Loc) {
  157. for (const Instruction &Inst :
  158. make_range(Start.getIterator(), End.getIterator()))
  159. if (Inst.mayThrow())
  160. return true;
  161. return AA->canInstructionRangeModRef(Start, End, Loc, ModRefInfo::ModRef);
  162. }
  163. ///
  164. /// Check if \p BB contains a store to the same address as \p SI
  165. ///
  166. /// \return The store in \p when it is safe to sink. Otherwise return Null.
  167. ///
  168. StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
  169. StoreInst *Store0) {
  170. LLVM_DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
  171. BasicBlock *BB0 = Store0->getParent();
  172. for (Instruction &Inst : reverse(*BB1)) {
  173. auto *Store1 = dyn_cast<StoreInst>(&Inst);
  174. if (!Store1)
  175. continue;
  176. MemoryLocation Loc0 = MemoryLocation::get(Store0);
  177. MemoryLocation Loc1 = MemoryLocation::get(Store1);
  178. if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
  179. !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) &&
  180. !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) {
  181. return Store1;
  182. }
  183. }
  184. return nullptr;
  185. }
  186. ///
  187. /// Create a PHI node in BB for the operands of S0 and S1
  188. ///
  189. PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
  190. StoreInst *S1) {
  191. // Create a phi if the values mismatch.
  192. Value *Opd1 = S0->getValueOperand();
  193. Value *Opd2 = S1->getValueOperand();
  194. if (Opd1 == Opd2)
  195. return nullptr;
  196. auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
  197. &BB->front());
  198. NewPN->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
  199. NewPN->addIncoming(Opd1, S0->getParent());
  200. NewPN->addIncoming(Opd2, S1->getParent());
  201. return NewPN;
  202. }
  203. ///
  204. /// Check if 2 stores can be sunk, optionally together with corresponding GEPs.
  205. ///
  206. bool MergedLoadStoreMotion::canSinkStoresAndGEPs(StoreInst *S0,
  207. StoreInst *S1) const {
  208. if (S0->getPointerOperand() == S1->getPointerOperand())
  209. return true;
  210. auto *GEP0 = dyn_cast<GetElementPtrInst>(S0->getPointerOperand());
  211. auto *GEP1 = dyn_cast<GetElementPtrInst>(S1->getPointerOperand());
  212. return GEP0 && GEP1 && GEP0->isIdenticalTo(GEP1) && GEP0->hasOneUse() &&
  213. (GEP0->getParent() == S0->getParent()) && GEP1->hasOneUse() &&
  214. (GEP1->getParent() == S1->getParent());
  215. }
  216. ///
  217. /// Merge two stores to same address and sink into \p BB
  218. ///
  219. /// Optionally also sinks GEP instruction computing the store address
  220. ///
  221. void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0,
  222. StoreInst *S1) {
  223. Value *Ptr0 = S0->getPointerOperand();
  224. Value *Ptr1 = S1->getPointerOperand();
  225. // Only one definition?
  226. LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
  227. dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
  228. dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
  229. // Hoist the instruction.
  230. BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
  231. // Intersect optional metadata.
  232. S0->andIRFlags(S1);
  233. S0->dropUnknownNonDebugMetadata();
  234. S0->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
  235. S0->mergeDIAssignID(S1);
  236. // Create the new store to be inserted at the join point.
  237. StoreInst *SNew = cast<StoreInst>(S0->clone());
  238. SNew->insertBefore(&*InsertPt);
  239. // New PHI operand? Use it.
  240. if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
  241. SNew->setOperand(0, NewPN);
  242. S0->eraseFromParent();
  243. S1->eraseFromParent();
  244. if (Ptr0 != Ptr1) {
  245. auto *GEP0 = cast<GetElementPtrInst>(Ptr0);
  246. auto *GEP1 = cast<GetElementPtrInst>(Ptr1);
  247. Instruction *GEPNew = GEP0->clone();
  248. GEPNew->insertBefore(SNew);
  249. GEPNew->applyMergedLocation(GEP0->getDebugLoc(), GEP1->getDebugLoc());
  250. SNew->setOperand(1, GEPNew);
  251. GEP0->replaceAllUsesWith(GEPNew);
  252. GEP0->eraseFromParent();
  253. GEP1->replaceAllUsesWith(GEPNew);
  254. GEP1->eraseFromParent();
  255. }
  256. }
  257. ///
  258. /// True when two stores are equivalent and can sink into the footer
  259. ///
  260. /// Starting from a diamond head block, iterate over the instructions in one
  261. /// successor block and try to match a store in the second successor.
  262. ///
  263. bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) {
  264. bool MergedStores = false;
  265. BasicBlock *TailBB = getDiamondTail(HeadBB);
  266. BasicBlock *SinkBB = TailBB;
  267. assert(SinkBB && "Footer of a diamond cannot be empty");
  268. succ_iterator SI = succ_begin(HeadBB);
  269. assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors");
  270. BasicBlock *Pred0 = *SI;
  271. ++SI;
  272. assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor");
  273. BasicBlock *Pred1 = *SI;
  274. // tail block of a diamond/hammock?
  275. if (Pred0 == Pred1)
  276. return false; // No.
  277. // bail out early if we can not merge into the footer BB
  278. if (!SplitFooterBB && TailBB->hasNPredecessorsOrMore(3))
  279. return false;
  280. // #Instructions in Pred1 for Compile Time Control
  281. auto InstsNoDbg = Pred1->instructionsWithoutDebug();
  282. int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end());
  283. int NStores = 0;
  284. for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
  285. RBI != RBE;) {
  286. Instruction *I = &*RBI;
  287. ++RBI;
  288. // Don't sink non-simple (atomic, volatile) stores.
  289. auto *S0 = dyn_cast<StoreInst>(I);
  290. if (!S0 || !S0->isSimple())
  291. continue;
  292. ++NStores;
  293. if (NStores * Size1 >= MagicCompileTimeControl)
  294. break;
  295. if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
  296. if (!canSinkStoresAndGEPs(S0, S1))
  297. // Don't attempt to sink below stores that had to stick around
  298. // But after removal of a store and some of its feeding
  299. // instruction search again from the beginning since the iterator
  300. // is likely stale at this point.
  301. break;
  302. if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(3)) {
  303. // We have more than 2 predecessors. Insert a new block
  304. // postdominating 2 predecessors we're going to sink from.
  305. SinkBB = SplitBlockPredecessors(TailBB, {Pred0, Pred1}, ".sink.split");
  306. if (!SinkBB)
  307. break;
  308. }
  309. MergedStores = true;
  310. sinkStoresAndGEPs(SinkBB, S0, S1);
  311. RBI = Pred0->rbegin();
  312. RBE = Pred0->rend();
  313. LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
  314. }
  315. }
  316. return MergedStores;
  317. }
  318. bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) {
  319. this->AA = &AA;
  320. bool Changed = false;
  321. LLVM_DEBUG(dbgs() << "Instruction Merger\n");
  322. // Merge unconditional branches, allowing PRE to catch more
  323. // optimization opportunities.
  324. // This loop doesn't care about newly inserted/split blocks
  325. // since they never will be diamond heads.
  326. for (BasicBlock &BB : make_early_inc_range(F))
  327. // Hoist equivalent loads and sink stores
  328. // outside diamonds when possible
  329. if (isDiamondHead(&BB))
  330. Changed |= mergeStores(&BB);
  331. return Changed;
  332. }
  333. namespace {
  334. class MergedLoadStoreMotionLegacyPass : public FunctionPass {
  335. const bool SplitFooterBB;
  336. public:
  337. static char ID; // Pass identification, replacement for typeid
  338. MergedLoadStoreMotionLegacyPass(bool SplitFooterBB = false)
  339. : FunctionPass(ID), SplitFooterBB(SplitFooterBB) {
  340. initializeMergedLoadStoreMotionLegacyPassPass(
  341. *PassRegistry::getPassRegistry());
  342. }
  343. ///
  344. /// Run the transformation for each function
  345. ///
  346. bool runOnFunction(Function &F) override {
  347. if (skipFunction(F))
  348. return false;
  349. MergedLoadStoreMotion Impl(SplitFooterBB);
  350. return Impl.run(F, getAnalysis<AAResultsWrapperPass>().getAAResults());
  351. }
  352. private:
  353. void getAnalysisUsage(AnalysisUsage &AU) const override {
  354. if (!SplitFooterBB)
  355. AU.setPreservesCFG();
  356. AU.addRequired<AAResultsWrapperPass>();
  357. AU.addPreserved<GlobalsAAWrapperPass>();
  358. }
  359. };
  360. char MergedLoadStoreMotionLegacyPass::ID = 0;
  361. } // anonymous namespace
  362. ///
  363. /// createMergedLoadStoreMotionPass - The public interface to this file.
  364. ///
  365. FunctionPass *llvm::createMergedLoadStoreMotionPass(bool SplitFooterBB) {
  366. return new MergedLoadStoreMotionLegacyPass(SplitFooterBB);
  367. }
  368. INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion",
  369. "MergedLoadStoreMotion", false, false)
  370. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  371. INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion",
  372. "MergedLoadStoreMotion", false, false)
  373. PreservedAnalyses
  374. MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
  375. MergedLoadStoreMotion Impl(Options.SplitFooterBB);
  376. auto &AA = AM.getResult<AAManager>(F);
  377. if (!Impl.run(F, AA))
  378. return PreservedAnalyses::all();
  379. PreservedAnalyses PA;
  380. if (!Options.SplitFooterBB)
  381. PA.preserveSet<CFGAnalyses>();
  382. return PA;
  383. }
  384. void MergedLoadStoreMotionPass::printPipeline(
  385. raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
  386. static_cast<PassInfoMixin<MergedLoadStoreMotionPass> *>(this)->printPipeline(
  387. OS, MapClassName2PassName);
  388. OS << "<";
  389. OS << (Options.SplitFooterBB ? "" : "no-") << "split-footer-bb";
  390. OS << ">";
  391. }