Simplify.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. //===------ Simplify.cpp ----------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Simplify a SCoP by removing unnecessary statements and accesses.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "polly/Simplify.h"
  13. #include "polly/ScopInfo.h"
  14. #include "polly/ScopPass.h"
  15. #include "polly/Support/GICHelper.h"
  16. #include "polly/Support/ISLOStream.h"
  17. #include "polly/Support/ISLTools.h"
  18. #include "polly/Support/VirtualInstruction.h"
  19. #include "llvm/ADT/Statistic.h"
  20. #include "llvm/InitializePasses.h"
  21. #include "llvm/Support/Debug.h"
  22. #define DEBUG_TYPE "polly-simplify"
  23. using namespace llvm;
  24. using namespace polly;
  25. namespace {
  26. #define TWO_STATISTICS(VARNAME, DESC) \
  27. static llvm::Statistic VARNAME[2] = { \
  28. {DEBUG_TYPE, #VARNAME "0", DESC " (first)"}, \
  29. {DEBUG_TYPE, #VARNAME "1", DESC " (second)"}}
  30. /// Number of max disjuncts we allow in removeOverwrites(). This is to avoid
  31. /// that the analysis of accesses in a statement is becoming too complex. Chosen
  32. /// to be relatively small because all the common cases should access only few
  33. /// array elements per statement.
  34. static unsigned const SimplifyMaxDisjuncts = 4;
  35. TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed");
  36. TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified");
  37. TWO_STATISTICS(TotalEmptyDomainsRemoved,
  38. "Number of statement with empty domains removed in any SCoP");
  39. TWO_STATISTICS(TotalOverwritesRemoved, "Number of removed overwritten writes");
  40. TWO_STATISTICS(TotalWritesCoalesced, "Number of writes coalesced with another");
  41. TWO_STATISTICS(TotalRedundantWritesRemoved,
  42. "Number of writes of same value removed in any SCoP");
  43. TWO_STATISTICS(TotalEmptyPartialAccessesRemoved,
  44. "Number of empty partial accesses removed");
  45. TWO_STATISTICS(TotalDeadAccessesRemoved, "Number of dead accesses removed");
  46. TWO_STATISTICS(TotalDeadInstructionsRemoved,
  47. "Number of unused instructions removed");
  48. TWO_STATISTICS(TotalStmtsRemoved, "Number of statements removed in any SCoP");
  49. TWO_STATISTICS(NumValueWrites, "Number of scalar value writes after Simplify");
  50. TWO_STATISTICS(
  51. NumValueWritesInLoops,
  52. "Number of scalar value writes nested in affine loops after Simplify");
  53. TWO_STATISTICS(NumPHIWrites,
  54. "Number of scalar phi writes after the first simplification");
  55. TWO_STATISTICS(
  56. NumPHIWritesInLoops,
  57. "Number of scalar phi writes nested in affine loops after Simplify");
  58. TWO_STATISTICS(NumSingletonWrites, "Number of singleton writes after Simplify");
  59. TWO_STATISTICS(
  60. NumSingletonWritesInLoops,
  61. "Number of singleton writes nested in affine loops after Simplify");
  62. static bool isImplicitRead(MemoryAccess *MA) {
  63. return MA->isRead() && MA->isOriginalScalarKind();
  64. }
  65. static bool isExplicitAccess(MemoryAccess *MA) {
  66. return MA->isOriginalArrayKind();
  67. }
  68. static bool isImplicitWrite(MemoryAccess *MA) {
  69. return MA->isWrite() && MA->isOriginalScalarKind();
  70. }
  71. /// Like isl::union_map::unite, but may also return an underapproximated
  72. /// result if getting too complex.
  73. ///
  74. /// This is implemented by adding disjuncts to the results until the limit is
  75. /// reached.
  76. static isl::union_map underapproximatedAddMap(isl::union_map UMap,
  77. isl::map Map) {
  78. if (UMap.is_null() || Map.is_null())
  79. return {};
  80. isl::map PrevMap = UMap.extract_map(Map.get_space());
  81. // Fast path: If known that we cannot exceed the disjunct limit, just add
  82. // them.
  83. if (unsignedFromIslSize(PrevMap.n_basic_map()) +
  84. unsignedFromIslSize(Map.n_basic_map()) <=
  85. SimplifyMaxDisjuncts)
  86. return UMap.unite(Map);
  87. isl::map Result = isl::map::empty(PrevMap.get_space());
  88. for (isl::basic_map BMap : PrevMap.get_basic_map_list()) {
  89. if (unsignedFromIslSize(Result.n_basic_map()) > SimplifyMaxDisjuncts)
  90. break;
  91. Result = Result.unite(BMap);
  92. }
  93. for (isl::basic_map BMap : Map.get_basic_map_list()) {
  94. if (unsignedFromIslSize(Result.n_basic_map()) > SimplifyMaxDisjuncts)
  95. break;
  96. Result = Result.unite(BMap);
  97. }
  98. isl::union_map UResult =
  99. UMap.subtract(isl::map::universe(PrevMap.get_space()));
  100. UResult.unite(Result);
  101. return UResult;
  102. }
  103. class SimplifyImpl {
  104. private:
  105. /// The invocation id (if there are multiple instances in the pass manager's
  106. /// pipeline) to determine which statistics to update.
  107. int CallNo;
  108. /// The last/current SCoP that is/has been processed.
  109. Scop *S = nullptr;
  110. /// Number of statements with empty domains removed from the SCoP.
  111. int EmptyDomainsRemoved = 0;
  112. /// Number of writes that are overwritten anyway.
  113. int OverwritesRemoved = 0;
  114. /// Number of combined writes.
  115. int WritesCoalesced = 0;
  116. /// Number of redundant writes removed from this SCoP.
  117. int RedundantWritesRemoved = 0;
  118. /// Number of writes with empty access domain removed.
  119. int EmptyPartialAccessesRemoved = 0;
  120. /// Number of unused accesses removed from this SCoP.
  121. int DeadAccessesRemoved = 0;
  122. /// Number of unused instructions removed from this SCoP.
  123. int DeadInstructionsRemoved = 0;
  124. /// Number of unnecessary statements removed from the SCoP.
  125. int StmtsRemoved = 0;
  126. /// Remove statements that are never executed due to their domains being
  127. /// empty.
  128. ///
  129. /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's
  130. /// effective domain, i.e. including the SCoP's context as used by some other
  131. /// simplification methods in this pass. This is necessary because the
  132. /// analysis on empty domains is unreliable, e.g. remove a scalar value
  133. /// definition MemoryAccesses, but not its use.
  134. void removeEmptyDomainStmts();
  135. /// Remove writes that are overwritten unconditionally later in the same
  136. /// statement.
  137. ///
  138. /// There must be no read of the same value between the write (that is to be
  139. /// removed) and the overwrite.
  140. void removeOverwrites();
  141. /// Combine writes that write the same value if possible.
  142. ///
  143. /// This function is able to combine:
  144. /// - Partial writes with disjoint domain.
  145. /// - Writes that write to the same array element.
  146. ///
  147. /// In all cases, both writes must write the same values.
  148. void coalesceWrites();
  149. /// Remove writes that just write the same value already stored in the
  150. /// element.
  151. void removeRedundantWrites();
  152. /// Remove statements without side effects.
  153. void removeUnnecessaryStmts();
  154. /// Remove accesses that have an empty domain.
  155. void removeEmptyPartialAccesses();
  156. /// Mark all reachable instructions and access, and sweep those that are not
  157. /// reachable.
  158. void markAndSweep(LoopInfo *LI);
  159. /// Print simplification statistics to @p OS.
  160. void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const;
  161. /// Print the current state of all MemoryAccesses to @p OS.
  162. void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const;
  163. public:
  164. explicit SimplifyImpl(int CallNo = 0) : CallNo(CallNo) {}
  165. void run(Scop &S, LoopInfo *LI);
  166. void printScop(raw_ostream &OS, Scop &S) const;
  167. /// Return whether at least one simplification has been applied.
  168. bool isModified() const;
  169. };
  170. /// Return whether at least one simplification has been applied.
  171. bool SimplifyImpl::isModified() const {
  172. return EmptyDomainsRemoved > 0 || OverwritesRemoved > 0 ||
  173. WritesCoalesced > 0 || RedundantWritesRemoved > 0 ||
  174. EmptyPartialAccessesRemoved > 0 || DeadAccessesRemoved > 0 ||
  175. DeadInstructionsRemoved > 0 || StmtsRemoved > 0;
  176. }
  177. /// Remove statements that are never executed due to their domains being
  178. /// empty.
  179. ///
  180. /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's
  181. /// effective domain, i.e. including the SCoP's context as used by some other
  182. /// simplification methods in this pass. This is necessary because the
  183. /// analysis on empty domains is unreliable, e.g. remove a scalar value
  184. /// definition MemoryAccesses, but not its use.
  185. void SimplifyImpl::removeEmptyDomainStmts() {
  186. size_t NumStmtsBefore = S->getSize();
  187. S->removeStmts([](ScopStmt &Stmt) -> bool {
  188. auto EffectiveDomain =
  189. Stmt.getDomain().intersect_params(Stmt.getParent()->getContext());
  190. return EffectiveDomain.is_empty();
  191. });
  192. assert(NumStmtsBefore >= S->getSize());
  193. EmptyDomainsRemoved = NumStmtsBefore - S->getSize();
  194. LLVM_DEBUG(dbgs() << "Removed " << EmptyDomainsRemoved << " (of "
  195. << NumStmtsBefore << ") statements with empty domains \n");
  196. TotalEmptyDomainsRemoved[CallNo] += EmptyDomainsRemoved;
  197. }
  198. /// Remove writes that are overwritten unconditionally later in the same
  199. /// statement.
  200. ///
  201. /// There must be no read of the same value between the write (that is to be
  202. /// removed) and the overwrite.
  203. void SimplifyImpl::removeOverwrites() {
  204. for (auto &Stmt : *S) {
  205. isl::set Domain = Stmt.getDomain();
  206. isl::union_map WillBeOverwritten = isl::union_map::empty(S->getIslCtx());
  207. SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
  208. // Iterate in reverse order, so the overwrite comes before the write that
  209. // is to be removed.
  210. for (auto *MA : reverse(Accesses)) {
  211. // In region statements, the explicit accesses can be in blocks that are
  212. // can be executed in any order. We therefore process only the implicit
  213. // writes and stop after that.
  214. if (Stmt.isRegionStmt() && isExplicitAccess(MA))
  215. break;
  216. auto AccRel = MA->getAccessRelation();
  217. AccRel = AccRel.intersect_domain(Domain);
  218. AccRel = AccRel.intersect_params(S->getContext());
  219. // If a value is read in-between, do not consider it as overwritten.
  220. if (MA->isRead()) {
  221. // Invalidate all overwrites for the array it accesses to avoid too
  222. // complex isl sets.
  223. isl::map AccRelUniv = isl::map::universe(AccRel.get_space());
  224. WillBeOverwritten = WillBeOverwritten.subtract(AccRelUniv);
  225. continue;
  226. }
  227. // If all of a write's elements are overwritten, remove it.
  228. isl::union_map AccRelUnion = AccRel;
  229. if (AccRelUnion.is_subset(WillBeOverwritten)) {
  230. LLVM_DEBUG(dbgs() << "Removing " << MA
  231. << " which will be overwritten anyway\n");
  232. Stmt.removeSingleMemoryAccess(MA);
  233. OverwritesRemoved++;
  234. TotalOverwritesRemoved[CallNo]++;
  235. }
  236. // Unconditional writes overwrite other values.
  237. if (MA->isMustWrite()) {
  238. // Avoid too complex isl sets. If necessary, throw away some of the
  239. // knowledge.
  240. WillBeOverwritten = underapproximatedAddMap(WillBeOverwritten, AccRel);
  241. }
  242. }
  243. }
  244. }
  245. /// Combine writes that write the same value if possible.
  246. ///
  247. /// This function is able to combine:
  248. /// - Partial writes with disjoint domain.
  249. /// - Writes that write to the same array element.
  250. ///
  251. /// In all cases, both writes must write the same values.
  252. void SimplifyImpl::coalesceWrites() {
  253. for (auto &Stmt : *S) {
  254. isl::set Domain = Stmt.getDomain().intersect_params(S->getContext());
  255. // We let isl do the lookup for the same-value condition. For this, we
  256. // wrap llvm::Value into an isl::set such that isl can do the lookup in
  257. // its hashtable implementation. llvm::Values are only compared within a
  258. // ScopStmt, so the map can be local to this scope. TODO: Refactor with
  259. // ZoneAlgorithm::makeValueSet()
  260. SmallDenseMap<Value *, isl::set> ValueSets;
  261. auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set {
  262. assert(V);
  263. isl::set &Result = ValueSets[V];
  264. if (Result.is_null()) {
  265. isl::ctx Ctx = S->getIslCtx();
  266. std::string Name = getIslCompatibleName(
  267. "Val", V, ValueSets.size() - 1, std::string(), UseInstructionNames);
  268. isl::id Id = isl::id::alloc(Ctx, Name, V);
  269. Result = isl::set::universe(
  270. isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id));
  271. }
  272. return Result;
  273. };
  274. // List of all eligible (for coalescing) writes of the future.
  275. // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
  276. isl::union_map FutureWrites = isl::union_map::empty(S->getIslCtx());
  277. // Iterate over accesses from the last to the first.
  278. SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
  279. for (MemoryAccess *MA : reverse(Accesses)) {
  280. // In region statements, the explicit accesses can be in blocks that can
  281. // be executed in any order. We therefore process only the implicit
  282. // writes and stop after that.
  283. if (Stmt.isRegionStmt() && isExplicitAccess(MA))
  284. break;
  285. // { Domain[] -> Element[] }
  286. isl::map AccRel = MA->getLatestAccessRelation().intersect_domain(Domain);
  287. // { [Domain[] -> Element[]] }
  288. isl::set AccRelWrapped = AccRel.wrap();
  289. // { Value[] }
  290. isl::set ValSet;
  291. if (MA->isMustWrite() && (MA->isOriginalScalarKind() ||
  292. isa<StoreInst>(MA->getAccessInstruction()))) {
  293. // Normally, tryGetValueStored() should be used to determine which
  294. // element is written, but it can return nullptr; For PHI accesses,
  295. // getAccessValue() returns the PHI instead of the PHI's incoming
  296. // value. In this case, where we only compare values of a single
  297. // statement, this is fine, because within a statement, a PHI in a
  298. // successor block has always the same value as the incoming write. We
  299. // still preferably use the incoming value directly so we also catch
  300. // direct uses of that.
  301. Value *StoredVal = MA->tryGetValueStored();
  302. if (!StoredVal)
  303. StoredVal = MA->getAccessValue();
  304. ValSet = makeValueSet(StoredVal);
  305. // { Domain[] }
  306. isl::set AccDomain = AccRel.domain();
  307. // Parts of the statement's domain that is not written by this access.
  308. isl::set UndefDomain = Domain.subtract(AccDomain);
  309. // { Element[] }
  310. isl::set ElementUniverse =
  311. isl::set::universe(AccRel.get_space().range());
  312. // { Domain[] -> Element[] }
  313. isl::map UndefAnything =
  314. isl::map::from_domain_and_range(UndefDomain, ElementUniverse);
  315. // We are looking a compatible write access. The other write can
  316. // access these elements...
  317. isl::map AllowedAccesses = AccRel.unite(UndefAnything);
  318. // ... and must write the same value.
  319. // { [Domain[] -> Element[]] -> Value[] }
  320. isl::map Filter =
  321. isl::map::from_domain_and_range(AllowedAccesses.wrap(), ValSet);
  322. // Lookup future write that fulfills these conditions.
  323. // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] }
  324. isl::union_map Filtered =
  325. FutureWrites.uncurry().intersect_domain(Filter.wrap());
  326. // Iterate through the candidates.
  327. for (isl::map Map : Filtered.get_map_list()) {
  328. MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space()
  329. .get_tuple_id(isl::dim::out)
  330. .get_user();
  331. isl::map OtherAccRel =
  332. OtherMA->getLatestAccessRelation().intersect_domain(Domain);
  333. // The filter only guaranteed that some of OtherMA's accessed
  334. // elements are allowed. Verify that it only accesses allowed
  335. // elements. Otherwise, continue with the next candidate.
  336. if (!OtherAccRel.is_subset(AllowedAccesses).is_true())
  337. continue;
  338. // The combined access relation.
  339. // { Domain[] -> Element[] }
  340. isl::map NewAccRel = AccRel.unite(OtherAccRel);
  341. simplify(NewAccRel);
  342. // Carry out the coalescing.
  343. Stmt.removeSingleMemoryAccess(MA);
  344. OtherMA->setNewAccessRelation(NewAccRel);
  345. // We removed MA, OtherMA takes its role.
  346. MA = OtherMA;
  347. TotalWritesCoalesced[CallNo]++;
  348. WritesCoalesced++;
  349. // Don't look for more candidates.
  350. break;
  351. }
  352. }
  353. // Two writes cannot be coalesced if there is another access (to some of
  354. // the written elements) between them. Remove all visited write accesses
  355. // from the list of eligible writes. Don't just remove the accessed
  356. // elements, but any MemoryAccess that touches any of the invalidated
  357. // elements.
  358. SmallPtrSet<MemoryAccess *, 2> TouchedAccesses;
  359. for (isl::map Map :
  360. FutureWrites.intersect_domain(AccRelWrapped).get_map_list()) {
  361. MemoryAccess *MA = (MemoryAccess *)Map.get_space()
  362. .range()
  363. .unwrap()
  364. .get_tuple_id(isl::dim::out)
  365. .get_user();
  366. TouchedAccesses.insert(MA);
  367. }
  368. isl::union_map NewFutureWrites =
  369. isl::union_map::empty(FutureWrites.ctx());
  370. for (isl::map FutureWrite : FutureWrites.get_map_list()) {
  371. MemoryAccess *MA = (MemoryAccess *)FutureWrite.get_space()
  372. .range()
  373. .unwrap()
  374. .get_tuple_id(isl::dim::out)
  375. .get_user();
  376. if (!TouchedAccesses.count(MA))
  377. NewFutureWrites = NewFutureWrites.unite(FutureWrite);
  378. }
  379. FutureWrites = NewFutureWrites;
  380. if (MA->isMustWrite() && !ValSet.is_null()) {
  381. // { MemoryAccess[] }
  382. auto AccSet =
  383. isl::set::universe(isl::space(S->getIslCtx(), 0, 0)
  384. .set_tuple_id(isl::dim::set, MA->getId()));
  385. // { Val[] -> MemoryAccess[] }
  386. isl::map ValAccSet = isl::map::from_domain_and_range(ValSet, AccSet);
  387. // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
  388. isl::map AccRelValAcc =
  389. isl::map::from_domain_and_range(AccRelWrapped, ValAccSet.wrap());
  390. FutureWrites = FutureWrites.unite(AccRelValAcc);
  391. }
  392. }
  393. }
  394. }
  395. /// Remove writes that just write the same value already stored in the
  396. /// element.
  397. void SimplifyImpl::removeRedundantWrites() {
  398. for (auto &Stmt : *S) {
  399. SmallDenseMap<Value *, isl::set> ValueSets;
  400. auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set {
  401. assert(V);
  402. isl::set &Result = ValueSets[V];
  403. if (Result.is_null()) {
  404. isl_ctx *Ctx = S->getIslCtx().get();
  405. std::string Name = getIslCompatibleName(
  406. "Val", V, ValueSets.size() - 1, std::string(), UseInstructionNames);
  407. isl::id Id = isl::manage(isl_id_alloc(Ctx, Name.c_str(), V));
  408. Result = isl::set::universe(
  409. isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id));
  410. }
  411. return Result;
  412. };
  413. isl::set Domain = Stmt.getDomain();
  414. Domain = Domain.intersect_params(S->getContext());
  415. // List of element reads that still have the same value while iterating
  416. // through the MemoryAccesses.
  417. // { [Domain[] -> Element[]] -> Val[] }
  418. isl::union_map Known = isl::union_map::empty(S->getIslCtx());
  419. SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
  420. for (MemoryAccess *MA : Accesses) {
  421. // Is the memory access in a defined order relative to the other
  422. // accesses? In region statements, only the first and the last accesses
  423. // have defined order. Execution of those in the middle may depend on
  424. // runtime conditions an therefore cannot be modified.
  425. bool IsOrdered =
  426. Stmt.isBlockStmt() || MA->isOriginalScalarKind() ||
  427. (!S->getBoxedLoops().size() && MA->getAccessInstruction() &&
  428. Stmt.getEntryBlock() == MA->getAccessInstruction()->getParent());
  429. isl::map AccRel = MA->getAccessRelation();
  430. AccRel = AccRel.intersect_domain(Domain);
  431. isl::set AccRelWrapped = AccRel.wrap();
  432. // Determine whether a write is redundant (stores only values that are
  433. // already present in the written array elements) and remove it if this
  434. // is the case.
  435. if (IsOrdered && MA->isMustWrite() &&
  436. (isa<StoreInst>(MA->getAccessInstruction()) ||
  437. MA->isOriginalScalarKind())) {
  438. Value *StoredVal = MA->tryGetValueStored();
  439. if (!StoredVal)
  440. StoredVal = MA->getAccessValue();
  441. if (StoredVal) {
  442. // Lookup in the set of known values.
  443. isl::map AccRelStoredVal = isl::map::from_domain_and_range(
  444. AccRelWrapped, makeValueSet(StoredVal));
  445. if (isl::union_map(AccRelStoredVal).is_subset(Known)) {
  446. LLVM_DEBUG(dbgs() << "Cleanup of " << MA << ":\n");
  447. LLVM_DEBUG(dbgs() << " Scalar: " << *StoredVal << "\n");
  448. LLVM_DEBUG(dbgs() << " AccRel: " << AccRel << "\n");
  449. Stmt.removeSingleMemoryAccess(MA);
  450. RedundantWritesRemoved++;
  451. TotalRedundantWritesRemoved[CallNo]++;
  452. }
  453. }
  454. }
  455. // Update the know values set.
  456. if (MA->isRead()) {
  457. // Loaded values are the currently known values of the array element
  458. // it was loaded from.
  459. Value *LoadedVal = MA->getAccessValue();
  460. if (LoadedVal && IsOrdered) {
  461. isl::map AccRelVal = isl::map::from_domain_and_range(
  462. AccRelWrapped, makeValueSet(LoadedVal));
  463. Known = Known.unite(AccRelVal);
  464. }
  465. } else if (MA->isWrite()) {
  466. // Remove (possibly) overwritten values from the known elements set.
  467. // We remove all elements of the accessed array to avoid too complex
  468. // isl sets.
  469. isl::set AccRelUniv = isl::set::universe(AccRelWrapped.get_space());
  470. Known = Known.subtract_domain(AccRelUniv);
  471. // At this point, we could add the written value of must-writes.
  472. // However, writing same values is already handled by
  473. // coalesceWrites().
  474. }
  475. }
  476. }
  477. }
  478. /// Remove statements without side effects.
  479. void SimplifyImpl::removeUnnecessaryStmts() {
  480. auto NumStmtsBefore = S->getSize();
  481. S->simplifySCoP(true);
  482. assert(NumStmtsBefore >= S->getSize());
  483. StmtsRemoved = NumStmtsBefore - S->getSize();
  484. LLVM_DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore
  485. << ") statements\n");
  486. TotalStmtsRemoved[CallNo] += StmtsRemoved;
  487. }
  488. /// Remove accesses that have an empty domain.
  489. void SimplifyImpl::removeEmptyPartialAccesses() {
  490. for (ScopStmt &Stmt : *S) {
  491. // Defer the actual removal to not invalidate iterators.
  492. SmallVector<MemoryAccess *, 8> DeferredRemove;
  493. for (MemoryAccess *MA : Stmt) {
  494. if (!MA->isWrite())
  495. continue;
  496. isl::map AccRel = MA->getAccessRelation();
  497. if (!AccRel.is_empty().is_true())
  498. continue;
  499. LLVM_DEBUG(
  500. dbgs() << "Removing " << MA
  501. << " because it's a partial access that never occurs\n");
  502. DeferredRemove.push_back(MA);
  503. }
  504. for (MemoryAccess *MA : DeferredRemove) {
  505. Stmt.removeSingleMemoryAccess(MA);
  506. EmptyPartialAccessesRemoved++;
  507. TotalEmptyPartialAccessesRemoved[CallNo]++;
  508. }
  509. }
  510. }
  511. /// Mark all reachable instructions and access, and sweep those that are not
  512. /// reachable.
  513. void SimplifyImpl::markAndSweep(LoopInfo *LI) {
  514. DenseSet<MemoryAccess *> UsedMA;
  515. DenseSet<VirtualInstruction> UsedInsts;
  516. // Get all reachable instructions and accesses.
  517. markReachable(S, LI, UsedInsts, UsedMA);
  518. // Remove all non-reachable accesses.
  519. // We need get all MemoryAccesses first, in order to not invalidate the
  520. // iterators when removing them.
  521. SmallVector<MemoryAccess *, 64> AllMAs;
  522. for (ScopStmt &Stmt : *S)
  523. AllMAs.append(Stmt.begin(), Stmt.end());
  524. for (MemoryAccess *MA : AllMAs) {
  525. if (UsedMA.count(MA))
  526. continue;
  527. LLVM_DEBUG(dbgs() << "Removing " << MA
  528. << " because its value is not used\n");
  529. ScopStmt *Stmt = MA->getStatement();
  530. Stmt->removeSingleMemoryAccess(MA);
  531. DeadAccessesRemoved++;
  532. TotalDeadAccessesRemoved[CallNo]++;
  533. }
  534. // Remove all non-reachable instructions.
  535. for (ScopStmt &Stmt : *S) {
  536. // Note that for region statements, we can only remove the non-terminator
  537. // instructions of the entry block. All other instructions are not in the
  538. // instructions list, but implicitly always part of the statement.
  539. SmallVector<Instruction *, 32> AllInsts(Stmt.insts_begin(),
  540. Stmt.insts_end());
  541. SmallVector<Instruction *, 32> RemainInsts;
  542. for (Instruction *Inst : AllInsts) {
  543. auto It = UsedInsts.find({&Stmt, Inst});
  544. if (It == UsedInsts.end()) {
  545. LLVM_DEBUG(dbgs() << "Removing "; Inst->print(dbgs());
  546. dbgs() << " because it is not used\n");
  547. DeadInstructionsRemoved++;
  548. TotalDeadInstructionsRemoved[CallNo]++;
  549. continue;
  550. }
  551. RemainInsts.push_back(Inst);
  552. // If instructions appear multiple times, keep only the first.
  553. UsedInsts.erase(It);
  554. }
  555. // Set the new instruction list to be only those we did not remove.
  556. Stmt.setInstructions(RemainInsts);
  557. }
  558. }
  559. /// Print simplification statistics to @p OS.
  560. void SimplifyImpl::printStatistics(llvm::raw_ostream &OS, int Indent) const {
  561. OS.indent(Indent) << "Statistics {\n";
  562. OS.indent(Indent + 4) << "Empty domains removed: " << EmptyDomainsRemoved
  563. << '\n';
  564. OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved << '\n';
  565. OS.indent(Indent + 4) << "Partial writes coalesced: " << WritesCoalesced
  566. << "\n";
  567. OS.indent(Indent + 4) << "Redundant writes removed: "
  568. << RedundantWritesRemoved << "\n";
  569. OS.indent(Indent + 4) << "Accesses with empty domains removed: "
  570. << EmptyPartialAccessesRemoved << "\n";
  571. OS.indent(Indent + 4) << "Dead accesses removed: " << DeadAccessesRemoved
  572. << '\n';
  573. OS.indent(Indent + 4) << "Dead instructions removed: "
  574. << DeadInstructionsRemoved << '\n';
  575. OS.indent(Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n";
  576. OS.indent(Indent) << "}\n";
  577. }
  578. /// Print the current state of all MemoryAccesses to @p OS.
  579. void SimplifyImpl::printAccesses(llvm::raw_ostream &OS, int Indent) const {
  580. OS.indent(Indent) << "After accesses {\n";
  581. for (auto &Stmt : *S) {
  582. OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
  583. for (auto *MA : Stmt)
  584. MA->print(OS);
  585. }
  586. OS.indent(Indent) << "}\n";
  587. }
  588. void SimplifyImpl::run(Scop &S, LoopInfo *LI) {
  589. // Must not have run before.
  590. assert(!this->S);
  591. assert(!isModified());
  592. // Prepare processing of this SCoP.
  593. this->S = &S;
  594. ScopsProcessed[CallNo]++;
  595. LLVM_DEBUG(dbgs() << "Removing statements that are never executed...\n");
  596. removeEmptyDomainStmts();
  597. LLVM_DEBUG(dbgs() << "Removing partial writes that never happen...\n");
  598. removeEmptyPartialAccesses();
  599. LLVM_DEBUG(dbgs() << "Removing overwrites...\n");
  600. removeOverwrites();
  601. LLVM_DEBUG(dbgs() << "Coalesce partial writes...\n");
  602. coalesceWrites();
  603. LLVM_DEBUG(dbgs() << "Removing redundant writes...\n");
  604. removeRedundantWrites();
  605. LLVM_DEBUG(dbgs() << "Cleanup unused accesses...\n");
  606. markAndSweep(LI);
  607. LLVM_DEBUG(dbgs() << "Removing statements without side effects...\n");
  608. removeUnnecessaryStmts();
  609. if (isModified())
  610. ScopsModified[CallNo]++;
  611. LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
  612. LLVM_DEBUG(dbgs() << S);
  613. auto ScopStats = S.getStatistics();
  614. NumValueWrites[CallNo] += ScopStats.NumValueWrites;
  615. NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops;
  616. NumPHIWrites[CallNo] += ScopStats.NumPHIWrites;
  617. NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops;
  618. NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites;
  619. NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops;
  620. }
  621. void SimplifyImpl::printScop(raw_ostream &OS, Scop &S) const {
  622. assert(&S == this->S &&
  623. "Can only print analysis for the last processed SCoP");
  624. printStatistics(OS);
  625. if (!isModified()) {
  626. OS << "SCoP could not be simplified\n";
  627. return;
  628. }
  629. printAccesses(OS);
  630. }
  631. class SimplifyWrapperPass : public ScopPass {
  632. public:
  633. static char ID;
  634. int CallNo;
  635. Optional<SimplifyImpl> Impl;
  636. explicit SimplifyWrapperPass(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {}
  637. virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
  638. AU.addRequiredTransitive<ScopInfoRegionPass>();
  639. AU.addRequired<LoopInfoWrapperPass>();
  640. AU.setPreservesAll();
  641. }
  642. virtual bool runOnScop(Scop &S) override {
  643. LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  644. Impl.emplace(CallNo);
  645. Impl->run(S, LI);
  646. return false;
  647. }
  648. virtual void printScop(raw_ostream &OS, Scop &S) const override {
  649. if (Impl)
  650. Impl->printScop(OS, S);
  651. }
  652. virtual void releaseMemory() override { Impl.reset(); }
  653. };
  654. char SimplifyWrapperPass::ID;
  655. static llvm::PreservedAnalyses
  656. runSimplifyUsingNPM(Scop &S, ScopAnalysisManager &SAM,
  657. ScopStandardAnalysisResults &SAR, SPMUpdater &U, int CallNo,
  658. raw_ostream *OS) {
  659. SimplifyImpl Impl(CallNo);
  660. Impl.run(S, &SAR.LI);
  661. if (OS) {
  662. *OS << "Printing analysis 'Polly - Simplify' for region: '" << S.getName()
  663. << "' in function '" << S.getFunction().getName() << "':\n";
  664. Impl.printScop(*OS, S);
  665. }
  666. if (!Impl.isModified())
  667. return llvm::PreservedAnalyses::all();
  668. PreservedAnalyses PA;
  669. PA.preserveSet<AllAnalysesOn<Module>>();
  670. PA.preserveSet<AllAnalysesOn<Function>>();
  671. PA.preserveSet<AllAnalysesOn<Loop>>();
  672. return PA;
  673. }
  674. } // anonymous namespace
  675. llvm::PreservedAnalyses SimplifyPass::run(Scop &S, ScopAnalysisManager &SAM,
  676. ScopStandardAnalysisResults &SAR,
  677. SPMUpdater &U) {
  678. return runSimplifyUsingNPM(S, SAM, SAR, U, CallNo, nullptr);
  679. }
  680. llvm::PreservedAnalyses
  681. SimplifyPrinterPass::run(Scop &S, ScopAnalysisManager &SAM,
  682. ScopStandardAnalysisResults &SAR, SPMUpdater &U) {
  683. return runSimplifyUsingNPM(S, SAM, SAR, U, CallNo, &OS);
  684. }
  685. SmallVector<MemoryAccess *, 32> polly::getAccessesInOrder(ScopStmt &Stmt) {
  686. SmallVector<MemoryAccess *, 32> Accesses;
  687. for (MemoryAccess *MemAcc : Stmt)
  688. if (isImplicitRead(MemAcc))
  689. Accesses.push_back(MemAcc);
  690. for (MemoryAccess *MemAcc : Stmt)
  691. if (isExplicitAccess(MemAcc))
  692. Accesses.push_back(MemAcc);
  693. for (MemoryAccess *MemAcc : Stmt)
  694. if (isImplicitWrite(MemAcc))
  695. Accesses.push_back(MemAcc);
  696. return Accesses;
  697. }
  698. Pass *polly::createSimplifyWrapperPass(int CallNo) {
  699. return new SimplifyWrapperPass(CallNo);
  700. }
  701. INITIALIZE_PASS_BEGIN(SimplifyWrapperPass, "polly-simplify", "Polly - Simplify",
  702. false, false)
  703. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  704. INITIALIZE_PASS_END(SimplifyWrapperPass, "polly-simplify", "Polly - Simplify",
  705. false, false)