GVNSink.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930
  1. //===- GVNSink.cpp - sink expressions into successors ---------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file GVNSink.cpp
  10. /// This pass attempts to sink instructions into successors, reducing static
  11. /// instruction count and enabling if-conversion.
  12. ///
  13. /// We use a variant of global value numbering to decide what can be sunk.
  14. /// Consider:
  15. ///
  16. /// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
  17. /// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
  18. /// \ /
  19. /// [ %e = phi i32 %a2, %c2 ]
  20. /// [ add i32 %e, 4 ]
  21. ///
  22. ///
  23. /// GVN would number %a1 and %c1 differently because they compute different
  24. /// results - the VN of an instruction is a function of its opcode and the
  25. /// transitive closure of its operands. This is the key property for hoisting
  26. /// and CSE.
  27. ///
  28. /// What we want when sinking however is for a numbering that is a function of
  29. /// the *uses* of an instruction, which allows us to answer the question "if I
  30. /// replace %a1 with %c1, will it contribute in an equivalent way to all
  31. /// successive instructions?". The PostValueTable class in GVN provides this
  32. /// mapping.
  33. //
  34. //===----------------------------------------------------------------------===//
  35. #include "llvm/ADT/ArrayRef.h"
  36. #include "llvm/ADT/DenseMap.h"
  37. #include "llvm/ADT/DenseSet.h"
  38. #include "llvm/ADT/Hashing.h"
  39. #include "llvm/ADT/PostOrderIterator.h"
  40. #include "llvm/ADT/STLExtras.h"
  41. #include "llvm/ADT/SmallPtrSet.h"
  42. #include "llvm/ADT/SmallVector.h"
  43. #include "llvm/ADT/Statistic.h"
  44. #include "llvm/Analysis/GlobalsModRef.h"
  45. #include "llvm/IR/BasicBlock.h"
  46. #include "llvm/IR/CFG.h"
  47. #include "llvm/IR/Constants.h"
  48. #include "llvm/IR/Function.h"
  49. #include "llvm/IR/InstrTypes.h"
  50. #include "llvm/IR/Instruction.h"
  51. #include "llvm/IR/Instructions.h"
  52. #include "llvm/IR/PassManager.h"
  53. #include "llvm/IR/Type.h"
  54. #include "llvm/IR/Use.h"
  55. #include "llvm/IR/Value.h"
  56. #include "llvm/InitializePasses.h"
  57. #include "llvm/Pass.h"
  58. #include "llvm/Support/Allocator.h"
  59. #include "llvm/Support/ArrayRecycler.h"
  60. #include "llvm/Support/AtomicOrdering.h"
  61. #include "llvm/Support/Casting.h"
  62. #include "llvm/Support/Compiler.h"
  63. #include "llvm/Support/Debug.h"
  64. #include "llvm/Support/raw_ostream.h"
  65. #include "llvm/Transforms/Scalar.h"
  66. #include "llvm/Transforms/Scalar/GVN.h"
  67. #include "llvm/Transforms/Scalar/GVNExpression.h"
  68. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  69. #include "llvm/Transforms/Utils/Local.h"
  70. #include <algorithm>
  71. #include <cassert>
  72. #include <cstddef>
  73. #include <cstdint>
  74. #include <iterator>
  75. #include <utility>
  76. using namespace llvm;
  77. #define DEBUG_TYPE "gvn-sink"
  78. STATISTIC(NumRemoved, "Number of instructions removed");
  79. namespace llvm {
  80. namespace GVNExpression {
  81. LLVM_DUMP_METHOD void Expression::dump() const {
  82. print(dbgs());
  83. dbgs() << "\n";
  84. }
  85. } // end namespace GVNExpression
  86. } // end namespace llvm
  87. namespace {
  88. static bool isMemoryInst(const Instruction *I) {
  89. return isa<LoadInst>(I) || isa<StoreInst>(I) ||
  90. (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
  91. (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
  92. }
  93. /// Iterates through instructions in a set of blocks in reverse order from the
  94. /// first non-terminator. For example (assume all blocks have size n):
  95. /// LockstepReverseIterator I([B1, B2, B3]);
  96. /// *I-- = [B1[n], B2[n], B3[n]];
  97. /// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
  98. /// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
  99. /// ...
  100. ///
  101. /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
  102. /// to
  103. /// determine which blocks are still going and the order they appear in the
  104. /// list returned by operator*.
  105. class LockstepReverseIterator {
  106. ArrayRef<BasicBlock *> Blocks;
  107. SmallSetVector<BasicBlock *, 4> ActiveBlocks;
  108. SmallVector<Instruction *, 4> Insts;
  109. bool Fail;
  110. public:
  111. LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
  112. reset();
  113. }
  114. void reset() {
  115. Fail = false;
  116. ActiveBlocks.clear();
  117. for (BasicBlock *BB : Blocks)
  118. ActiveBlocks.insert(BB);
  119. Insts.clear();
  120. for (BasicBlock *BB : Blocks) {
  121. if (BB->size() <= 1) {
  122. // Block wasn't big enough - only contained a terminator.
  123. ActiveBlocks.remove(BB);
  124. continue;
  125. }
  126. Insts.push_back(BB->getTerminator()->getPrevNode());
  127. }
  128. if (Insts.empty())
  129. Fail = true;
  130. }
  131. bool isValid() const { return !Fail; }
  132. ArrayRef<Instruction *> operator*() const { return Insts; }
  133. // Note: This needs to return a SmallSetVector as the elements of
  134. // ActiveBlocks will be later copied to Blocks using std::copy. The
  135. // resultant order of elements in Blocks needs to be deterministic.
  136. // Using SmallPtrSet instead causes non-deterministic order while
  137. // copying. And we cannot simply sort Blocks as they need to match the
  138. // corresponding Values.
  139. SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
  140. void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
  141. for (auto II = Insts.begin(); II != Insts.end();) {
  142. if (!llvm::is_contained(Blocks, (*II)->getParent())) {
  143. ActiveBlocks.remove((*II)->getParent());
  144. II = Insts.erase(II);
  145. } else {
  146. ++II;
  147. }
  148. }
  149. }
  150. void operator--() {
  151. if (Fail)
  152. return;
  153. SmallVector<Instruction *, 4> NewInsts;
  154. for (auto *Inst : Insts) {
  155. if (Inst == &Inst->getParent()->front())
  156. ActiveBlocks.remove(Inst->getParent());
  157. else
  158. NewInsts.push_back(Inst->getPrevNode());
  159. }
  160. if (NewInsts.empty()) {
  161. Fail = true;
  162. return;
  163. }
  164. Insts = NewInsts;
  165. }
  166. };
  167. //===----------------------------------------------------------------------===//
  168. /// Candidate solution for sinking. There may be different ways to
  169. /// sink instructions, differing in the number of instructions sunk,
  170. /// the number of predecessors sunk from and the number of PHIs
  171. /// required.
  172. struct SinkingInstructionCandidate {
  173. unsigned NumBlocks;
  174. unsigned NumInstructions;
  175. unsigned NumPHIs;
  176. unsigned NumMemoryInsts;
  177. int Cost = -1;
  178. SmallVector<BasicBlock *, 4> Blocks;
  179. void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
  180. unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
  181. unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
  182. Cost = (NumInstructions * (NumBlocks - 1)) -
  183. (NumExtraPHIs *
  184. NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
  185. - SplitEdgeCost;
  186. }
  187. bool operator>(const SinkingInstructionCandidate &Other) const {
  188. return Cost > Other.Cost;
  189. }
  190. };
  191. #ifndef NDEBUG
  192. raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
  193. OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
  194. << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
  195. return OS;
  196. }
  197. #endif
  198. //===----------------------------------------------------------------------===//
  199. /// Describes a PHI node that may or may not exist. These track the PHIs
  200. /// that must be created if we sunk a sequence of instructions. It provides
  201. /// a hash function for efficient equality comparisons.
  202. class ModelledPHI {
  203. SmallVector<Value *, 4> Values;
  204. SmallVector<BasicBlock *, 4> Blocks;
  205. public:
  206. ModelledPHI() = default;
  207. ModelledPHI(const PHINode *PN) {
  208. // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
  209. SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
  210. for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
  211. Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
  212. llvm::sort(Ops);
  213. for (auto &P : Ops) {
  214. Blocks.push_back(P.first);
  215. Values.push_back(P.second);
  216. }
  217. }
  218. /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
  219. /// without the same ID.
  220. /// \note This is specifically for DenseMapInfo - do not use this!
  221. static ModelledPHI createDummy(size_t ID) {
  222. ModelledPHI M;
  223. M.Values.push_back(reinterpret_cast<Value*>(ID));
  224. return M;
  225. }
  226. /// Create a PHI from an array of incoming values and incoming blocks.
  227. template <typename VArray, typename BArray>
  228. ModelledPHI(const VArray &V, const BArray &B) {
  229. llvm::copy(V, std::back_inserter(Values));
  230. llvm::copy(B, std::back_inserter(Blocks));
  231. }
  232. /// Create a PHI from [I[OpNum] for I in Insts].
  233. template <typename BArray>
  234. ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
  235. llvm::copy(B, std::back_inserter(Blocks));
  236. for (auto *I : Insts)
  237. Values.push_back(I->getOperand(OpNum));
  238. }
  239. /// Restrict the PHI's contents down to only \c NewBlocks.
  240. /// \c NewBlocks must be a subset of \c this->Blocks.
  241. void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
  242. auto BI = Blocks.begin();
  243. auto VI = Values.begin();
  244. while (BI != Blocks.end()) {
  245. assert(VI != Values.end());
  246. if (!llvm::is_contained(NewBlocks, *BI)) {
  247. BI = Blocks.erase(BI);
  248. VI = Values.erase(VI);
  249. } else {
  250. ++BI;
  251. ++VI;
  252. }
  253. }
  254. assert(Blocks.size() == NewBlocks.size());
  255. }
  256. ArrayRef<Value *> getValues() const { return Values; }
  257. bool areAllIncomingValuesSame() const {
  258. return llvm::all_equal(Values);
  259. }
  260. bool areAllIncomingValuesSameType() const {
  261. return llvm::all_of(
  262. Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
  263. }
  264. bool areAnyIncomingValuesConstant() const {
  265. return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
  266. }
  267. // Hash functor
  268. unsigned hash() const {
  269. return (unsigned)hash_combine_range(Values.begin(), Values.end());
  270. }
  271. bool operator==(const ModelledPHI &Other) const {
  272. return Values == Other.Values && Blocks == Other.Blocks;
  273. }
  274. };
  275. template <typename ModelledPHI> struct DenseMapInfo {
  276. static inline ModelledPHI &getEmptyKey() {
  277. static ModelledPHI Dummy = ModelledPHI::createDummy(0);
  278. return Dummy;
  279. }
  280. static inline ModelledPHI &getTombstoneKey() {
  281. static ModelledPHI Dummy = ModelledPHI::createDummy(1);
  282. return Dummy;
  283. }
  284. static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
  285. static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
  286. return LHS == RHS;
  287. }
  288. };
  289. using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
  290. //===----------------------------------------------------------------------===//
  291. // ValueTable
  292. //===----------------------------------------------------------------------===//
  293. // This is a value number table where the value number is a function of the
  294. // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
  295. // that the program would be equivalent if we replaced A with PHI(A, B).
  296. //===----------------------------------------------------------------------===//
  297. /// A GVN expression describing how an instruction is used. The operands
  298. /// field of BasicExpression is used to store uses, not operands.
  299. ///
  300. /// This class also contains fields for discriminators used when determining
  301. /// equivalence of instructions with sideeffects.
  302. class InstructionUseExpr : public GVNExpression::BasicExpression {
  303. unsigned MemoryUseOrder = -1;
  304. bool Volatile = false;
  305. ArrayRef<int> ShuffleMask;
  306. public:
  307. InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
  308. BumpPtrAllocator &A)
  309. : GVNExpression::BasicExpression(I->getNumUses()) {
  310. allocateOperands(R, A);
  311. setOpcode(I->getOpcode());
  312. setType(I->getType());
  313. if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
  314. ShuffleMask = SVI->getShuffleMask().copy(A);
  315. for (auto &U : I->uses())
  316. op_push_back(U.getUser());
  317. llvm::sort(op_begin(), op_end());
  318. }
  319. void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
  320. void setVolatile(bool V) { Volatile = V; }
  321. hash_code getHashValue() const override {
  322. return hash_combine(GVNExpression::BasicExpression::getHashValue(),
  323. MemoryUseOrder, Volatile, ShuffleMask);
  324. }
  325. template <typename Function> hash_code getHashValue(Function MapFn) {
  326. hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
  327. ShuffleMask);
  328. for (auto *V : operands())
  329. H = hash_combine(H, MapFn(V));
  330. return H;
  331. }
  332. };
  333. using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
  334. class ValueTable {
  335. DenseMap<Value *, uint32_t> ValueNumbering;
  336. DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
  337. DenseMap<size_t, uint32_t> HashNumbering;
  338. BumpPtrAllocator Allocator;
  339. ArrayRecycler<Value *> Recycler;
  340. uint32_t nextValueNumber = 1;
  341. BasicBlocksSet ReachableBBs;
  342. /// Create an expression for I based on its opcode and its uses. If I
  343. /// touches or reads memory, the expression is also based upon its memory
  344. /// order - see \c getMemoryUseOrder().
  345. InstructionUseExpr *createExpr(Instruction *I) {
  346. InstructionUseExpr *E =
  347. new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
  348. if (isMemoryInst(I))
  349. E->setMemoryUseOrder(getMemoryUseOrder(I));
  350. if (CmpInst *C = dyn_cast<CmpInst>(I)) {
  351. CmpInst::Predicate Predicate = C->getPredicate();
  352. E->setOpcode((C->getOpcode() << 8) | Predicate);
  353. }
  354. return E;
  355. }
  356. /// Helper to compute the value number for a memory instruction
  357. /// (LoadInst/StoreInst), including checking the memory ordering and
  358. /// volatility.
  359. template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
  360. if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
  361. return nullptr;
  362. InstructionUseExpr *E = createExpr(I);
  363. E->setVolatile(I->isVolatile());
  364. return E;
  365. }
  366. public:
  367. ValueTable() = default;
  368. /// Set basic blocks reachable from entry block.
  369. void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
  370. this->ReachableBBs = ReachableBBs;
  371. }
  372. /// Returns the value number for the specified value, assigning
  373. /// it a new number if it did not have one before.
  374. uint32_t lookupOrAdd(Value *V) {
  375. auto VI = ValueNumbering.find(V);
  376. if (VI != ValueNumbering.end())
  377. return VI->second;
  378. if (!isa<Instruction>(V)) {
  379. ValueNumbering[V] = nextValueNumber;
  380. return nextValueNumber++;
  381. }
  382. Instruction *I = cast<Instruction>(V);
  383. if (!ReachableBBs.contains(I->getParent()))
  384. return ~0U;
  385. InstructionUseExpr *exp = nullptr;
  386. switch (I->getOpcode()) {
  387. case Instruction::Load:
  388. exp = createMemoryExpr(cast<LoadInst>(I));
  389. break;
  390. case Instruction::Store:
  391. exp = createMemoryExpr(cast<StoreInst>(I));
  392. break;
  393. case Instruction::Call:
  394. case Instruction::Invoke:
  395. case Instruction::FNeg:
  396. case Instruction::Add:
  397. case Instruction::FAdd:
  398. case Instruction::Sub:
  399. case Instruction::FSub:
  400. case Instruction::Mul:
  401. case Instruction::FMul:
  402. case Instruction::UDiv:
  403. case Instruction::SDiv:
  404. case Instruction::FDiv:
  405. case Instruction::URem:
  406. case Instruction::SRem:
  407. case Instruction::FRem:
  408. case Instruction::Shl:
  409. case Instruction::LShr:
  410. case Instruction::AShr:
  411. case Instruction::And:
  412. case Instruction::Or:
  413. case Instruction::Xor:
  414. case Instruction::ICmp:
  415. case Instruction::FCmp:
  416. case Instruction::Trunc:
  417. case Instruction::ZExt:
  418. case Instruction::SExt:
  419. case Instruction::FPToUI:
  420. case Instruction::FPToSI:
  421. case Instruction::UIToFP:
  422. case Instruction::SIToFP:
  423. case Instruction::FPTrunc:
  424. case Instruction::FPExt:
  425. case Instruction::PtrToInt:
  426. case Instruction::IntToPtr:
  427. case Instruction::BitCast:
  428. case Instruction::AddrSpaceCast:
  429. case Instruction::Select:
  430. case Instruction::ExtractElement:
  431. case Instruction::InsertElement:
  432. case Instruction::ShuffleVector:
  433. case Instruction::InsertValue:
  434. case Instruction::GetElementPtr:
  435. exp = createExpr(I);
  436. break;
  437. default:
  438. break;
  439. }
  440. if (!exp) {
  441. ValueNumbering[V] = nextValueNumber;
  442. return nextValueNumber++;
  443. }
  444. uint32_t e = ExpressionNumbering[exp];
  445. if (!e) {
  446. hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
  447. auto I = HashNumbering.find(H);
  448. if (I != HashNumbering.end()) {
  449. e = I->second;
  450. } else {
  451. e = nextValueNumber++;
  452. HashNumbering[H] = e;
  453. ExpressionNumbering[exp] = e;
  454. }
  455. }
  456. ValueNumbering[V] = e;
  457. return e;
  458. }
  459. /// Returns the value number of the specified value. Fails if the value has
  460. /// not yet been numbered.
  461. uint32_t lookup(Value *V) const {
  462. auto VI = ValueNumbering.find(V);
  463. assert(VI != ValueNumbering.end() && "Value not numbered?");
  464. return VI->second;
  465. }
  466. /// Removes all value numberings and resets the value table.
  467. void clear() {
  468. ValueNumbering.clear();
  469. ExpressionNumbering.clear();
  470. HashNumbering.clear();
  471. Recycler.clear(Allocator);
  472. nextValueNumber = 1;
  473. }
  474. /// \c Inst uses or touches memory. Return an ID describing the memory state
  475. /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
  476. /// the exact same memory operations happen after I1 and I2.
  477. ///
  478. /// This is a very hard problem in general, so we use domain-specific
  479. /// knowledge that we only ever check for equivalence between blocks sharing a
  480. /// single immediate successor that is common, and when determining if I1 ==
  481. /// I2 we will have already determined that next(I1) == next(I2). This
  482. /// inductive property allows us to simply return the value number of the next
  483. /// instruction that defines memory.
  484. uint32_t getMemoryUseOrder(Instruction *Inst) {
  485. auto *BB = Inst->getParent();
  486. for (auto I = std::next(Inst->getIterator()), E = BB->end();
  487. I != E && !I->isTerminator(); ++I) {
  488. if (!isMemoryInst(&*I))
  489. continue;
  490. if (isa<LoadInst>(&*I))
  491. continue;
  492. CallInst *CI = dyn_cast<CallInst>(&*I);
  493. if (CI && CI->onlyReadsMemory())
  494. continue;
  495. InvokeInst *II = dyn_cast<InvokeInst>(&*I);
  496. if (II && II->onlyReadsMemory())
  497. continue;
  498. return lookupOrAdd(&*I);
  499. }
  500. return 0;
  501. }
  502. };
  503. //===----------------------------------------------------------------------===//
  504. class GVNSink {
  505. public:
  506. GVNSink() = default;
  507. bool run(Function &F) {
  508. LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
  509. << "\n");
  510. unsigned NumSunk = 0;
  511. ReversePostOrderTraversal<Function*> RPOT(&F);
  512. VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
  513. for (auto *N : RPOT)
  514. NumSunk += sinkBB(N);
  515. return NumSunk > 0;
  516. }
  517. private:
  518. ValueTable VN;
  519. bool shouldAvoidSinkingInstruction(Instruction *I) {
  520. // These instructions may change or break semantics if moved.
  521. if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
  522. I->getType()->isTokenTy())
  523. return true;
  524. return false;
  525. }
  526. /// The main heuristic function. Analyze the set of instructions pointed to by
  527. /// LRI and return a candidate solution if these instructions can be sunk, or
  528. /// std::nullopt otherwise.
  529. std::optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
  530. LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
  531. ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
  532. /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
  533. void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
  534. SmallPtrSetImpl<Value *> &PHIContents) {
  535. for (PHINode &PN : BB->phis()) {
  536. auto MPHI = ModelledPHI(&PN);
  537. PHIs.insert(MPHI);
  538. for (auto *V : MPHI.getValues())
  539. PHIContents.insert(V);
  540. }
  541. }
  542. /// The main instruction sinking driver. Set up state and try and sink
  543. /// instructions into BBEnd from its predecessors.
  544. unsigned sinkBB(BasicBlock *BBEnd);
  545. /// Perform the actual mechanics of sinking an instruction from Blocks into
  546. /// BBEnd, which is their only successor.
  547. void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
  548. /// Remove PHIs that all have the same incoming value.
  549. void foldPointlessPHINodes(BasicBlock *BB) {
  550. auto I = BB->begin();
  551. while (PHINode *PN = dyn_cast<PHINode>(I++)) {
  552. if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
  553. return V == PN->getIncomingValue(0);
  554. }))
  555. continue;
  556. if (PN->getIncomingValue(0) != PN)
  557. PN->replaceAllUsesWith(PN->getIncomingValue(0));
  558. else
  559. PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
  560. PN->eraseFromParent();
  561. }
  562. }
  563. };
  564. std::optional<SinkingInstructionCandidate>
  565. GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
  566. unsigned &InstNum,
  567. unsigned &MemoryInstNum,
  568. ModelledPHISet &NeededPHIs,
  569. SmallPtrSetImpl<Value *> &PHIContents) {
  570. auto Insts = *LRI;
  571. LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
  572. : Insts) {
  573. I->dump();
  574. } dbgs() << " ]\n";);
  575. DenseMap<uint32_t, unsigned> VNums;
  576. for (auto *I : Insts) {
  577. uint32_t N = VN.lookupOrAdd(I);
  578. LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
  579. if (N == ~0U)
  580. return std::nullopt;
  581. VNums[N]++;
  582. }
  583. unsigned VNumToSink =
  584. std::max_element(VNums.begin(), VNums.end(), llvm::less_second())->first;
  585. if (VNums[VNumToSink] == 1)
  586. // Can't sink anything!
  587. return std::nullopt;
  588. // Now restrict the number of incoming blocks down to only those with
  589. // VNumToSink.
  590. auto &ActivePreds = LRI.getActiveBlocks();
  591. unsigned InitialActivePredSize = ActivePreds.size();
  592. SmallVector<Instruction *, 4> NewInsts;
  593. for (auto *I : Insts) {
  594. if (VN.lookup(I) != VNumToSink)
  595. ActivePreds.remove(I->getParent());
  596. else
  597. NewInsts.push_back(I);
  598. }
  599. for (auto *I : NewInsts)
  600. if (shouldAvoidSinkingInstruction(I))
  601. return std::nullopt;
  602. // If we've restricted the incoming blocks, restrict all needed PHIs also
  603. // to that set.
  604. bool RecomputePHIContents = false;
  605. if (ActivePreds.size() != InitialActivePredSize) {
  606. ModelledPHISet NewNeededPHIs;
  607. for (auto P : NeededPHIs) {
  608. P.restrictToBlocks(ActivePreds);
  609. NewNeededPHIs.insert(P);
  610. }
  611. NeededPHIs = NewNeededPHIs;
  612. LRI.restrictToBlocks(ActivePreds);
  613. RecomputePHIContents = true;
  614. }
  615. // The sunk instruction's results.
  616. ModelledPHI NewPHI(NewInsts, ActivePreds);
  617. // Does sinking this instruction render previous PHIs redundant?
  618. if (NeededPHIs.erase(NewPHI))
  619. RecomputePHIContents = true;
  620. if (RecomputePHIContents) {
  621. // The needed PHIs have changed, so recompute the set of all needed
  622. // values.
  623. PHIContents.clear();
  624. for (auto &PHI : NeededPHIs)
  625. PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
  626. }
  627. // Is this instruction required by a later PHI that doesn't match this PHI?
  628. // if so, we can't sink this instruction.
  629. for (auto *V : NewPHI.getValues())
  630. if (PHIContents.count(V))
  631. // V exists in this PHI, but the whole PHI is different to NewPHI
  632. // (else it would have been removed earlier). We cannot continue
  633. // because this isn't representable.
  634. return std::nullopt;
  635. // Which operands need PHIs?
  636. // FIXME: If any of these fail, we should partition up the candidates to
  637. // try and continue making progress.
  638. Instruction *I0 = NewInsts[0];
  639. // If all instructions that are going to participate don't have the same
  640. // number of operands, we can't do any useful PHI analysis for all operands.
  641. auto hasDifferentNumOperands = [&I0](Instruction *I) {
  642. return I->getNumOperands() != I0->getNumOperands();
  643. };
  644. if (any_of(NewInsts, hasDifferentNumOperands))
  645. return std::nullopt;
  646. for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
  647. ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
  648. if (PHI.areAllIncomingValuesSame())
  649. continue;
  650. if (!canReplaceOperandWithVariable(I0, OpNum))
  651. // We can 't create a PHI from this instruction!
  652. return std::nullopt;
  653. if (NeededPHIs.count(PHI))
  654. continue;
  655. if (!PHI.areAllIncomingValuesSameType())
  656. return std::nullopt;
  657. // Don't create indirect calls! The called value is the final operand.
  658. if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
  659. PHI.areAnyIncomingValuesConstant())
  660. return std::nullopt;
  661. NeededPHIs.reserve(NeededPHIs.size());
  662. NeededPHIs.insert(PHI);
  663. PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
  664. }
  665. if (isMemoryInst(NewInsts[0]))
  666. ++MemoryInstNum;
  667. SinkingInstructionCandidate Cand;
  668. Cand.NumInstructions = ++InstNum;
  669. Cand.NumMemoryInsts = MemoryInstNum;
  670. Cand.NumBlocks = ActivePreds.size();
  671. Cand.NumPHIs = NeededPHIs.size();
  672. append_range(Cand.Blocks, ActivePreds);
  673. return Cand;
  674. }
  675. unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
  676. LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
  677. BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
  678. SmallVector<BasicBlock *, 4> Preds;
  679. for (auto *B : predecessors(BBEnd)) {
  680. auto *T = B->getTerminator();
  681. if (isa<BranchInst>(T) || isa<SwitchInst>(T))
  682. Preds.push_back(B);
  683. else
  684. return 0;
  685. }
  686. if (Preds.size() < 2)
  687. return 0;
  688. llvm::sort(Preds);
  689. unsigned NumOrigPreds = Preds.size();
  690. // We can only sink instructions through unconditional branches.
  691. llvm::erase_if(Preds, [](BasicBlock *BB) {
  692. return BB->getTerminator()->getNumSuccessors() != 1;
  693. });
  694. LockstepReverseIterator LRI(Preds);
  695. SmallVector<SinkingInstructionCandidate, 4> Candidates;
  696. unsigned InstNum = 0, MemoryInstNum = 0;
  697. ModelledPHISet NeededPHIs;
  698. SmallPtrSet<Value *, 4> PHIContents;
  699. analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
  700. unsigned NumOrigPHIs = NeededPHIs.size();
  701. while (LRI.isValid()) {
  702. auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
  703. NeededPHIs, PHIContents);
  704. if (!Cand)
  705. break;
  706. Cand->calculateCost(NumOrigPHIs, Preds.size());
  707. Candidates.emplace_back(*Cand);
  708. --LRI;
  709. }
  710. llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
  711. LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
  712. : Candidates) dbgs()
  713. << " " << C << "\n";);
  714. // Pick the top candidate, as long it is positive!
  715. if (Candidates.empty() || Candidates.front().Cost <= 0)
  716. return 0;
  717. auto C = Candidates.front();
  718. LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
  719. BasicBlock *InsertBB = BBEnd;
  720. if (C.Blocks.size() < NumOrigPreds) {
  721. LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
  722. BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
  723. InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
  724. if (!InsertBB) {
  725. LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
  726. // Edge couldn't be split.
  727. return 0;
  728. }
  729. }
  730. for (unsigned I = 0; I < C.NumInstructions; ++I)
  731. sinkLastInstruction(C.Blocks, InsertBB);
  732. return C.NumInstructions;
  733. }
  734. void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
  735. BasicBlock *BBEnd) {
  736. SmallVector<Instruction *, 4> Insts;
  737. for (BasicBlock *BB : Blocks)
  738. Insts.push_back(BB->getTerminator()->getPrevNode());
  739. Instruction *I0 = Insts.front();
  740. SmallVector<Value *, 4> NewOperands;
  741. for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
  742. bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
  743. return I->getOperand(O) != I0->getOperand(O);
  744. });
  745. if (!NeedPHI) {
  746. NewOperands.push_back(I0->getOperand(O));
  747. continue;
  748. }
  749. // Create a new PHI in the successor block and populate it.
  750. auto *Op = I0->getOperand(O);
  751. assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
  752. auto *PN = PHINode::Create(Op->getType(), Insts.size(),
  753. Op->getName() + ".sink", &BBEnd->front());
  754. for (auto *I : Insts)
  755. PN->addIncoming(I->getOperand(O), I->getParent());
  756. NewOperands.push_back(PN);
  757. }
  758. // Arbitrarily use I0 as the new "common" instruction; remap its operands
  759. // and move it to the start of the successor block.
  760. for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
  761. I0->getOperandUse(O).set(NewOperands[O]);
  762. I0->moveBefore(&*BBEnd->getFirstInsertionPt());
  763. // Update metadata and IR flags.
  764. for (auto *I : Insts)
  765. if (I != I0) {
  766. combineMetadataForCSE(I0, I, true);
  767. I0->andIRFlags(I);
  768. }
  769. for (auto *I : Insts)
  770. if (I != I0)
  771. I->replaceAllUsesWith(I0);
  772. foldPointlessPHINodes(BBEnd);
  773. // Finally nuke all instructions apart from the common instruction.
  774. for (auto *I : Insts)
  775. if (I != I0)
  776. I->eraseFromParent();
  777. NumRemoved += Insts.size() - 1;
  778. }
  779. ////////////////////////////////////////////////////////////////////////////////
  780. // Pass machinery / boilerplate
  781. class GVNSinkLegacyPass : public FunctionPass {
  782. public:
  783. static char ID;
  784. GVNSinkLegacyPass() : FunctionPass(ID) {
  785. initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
  786. }
  787. bool runOnFunction(Function &F) override {
  788. if (skipFunction(F))
  789. return false;
  790. GVNSink G;
  791. return G.run(F);
  792. }
  793. void getAnalysisUsage(AnalysisUsage &AU) const override {
  794. AU.addPreserved<GlobalsAAWrapperPass>();
  795. }
  796. };
  797. } // end anonymous namespace
  798. PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
  799. GVNSink G;
  800. if (!G.run(F))
  801. return PreservedAnalyses::all();
  802. return PreservedAnalyses::none();
  803. }
  804. char GVNSinkLegacyPass::ID = 0;
  805. INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
  806. "Early GVN sinking of Expressions", false, false)
  807. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  808. INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
  809. INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
  810. "Early GVN sinking of Expressions", false, false)
  811. FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }