DeLICM.cpp 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496
  1. //===------ DeLICM.cpp -----------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Undo the effect of Loop Invariant Code Motion (LICM) and
  10. // GVN Partial Redundancy Elimination (PRE) on SCoP-level.
  11. //
  12. // Namely, remove register/scalar dependencies by mapping them back to array
  13. // elements.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "polly/DeLICM.h"
  17. #include "polly/LinkAllPasses.h"
  18. #include "polly/Options.h"
  19. #include "polly/ScopInfo.h"
  20. #include "polly/ScopPass.h"
  21. #include "polly/Support/GICHelper.h"
  22. #include "polly/Support/ISLOStream.h"
  23. #include "polly/Support/ISLTools.h"
  24. #include "polly/ZoneAlgo.h"
  25. #include "llvm/ADT/Statistic.h"
  26. #include "llvm/InitializePasses.h"
  27. #define DEBUG_TYPE "polly-delicm"
  28. using namespace polly;
  29. using namespace llvm;
  30. namespace {
  31. cl::opt<int>
  32. DelicmMaxOps("polly-delicm-max-ops",
  33. cl::desc("Maximum number of isl operations to invest for "
  34. "lifetime analysis; 0=no limit"),
  35. cl::init(1000000), cl::cat(PollyCategory));
  36. cl::opt<bool> DelicmOverapproximateWrites(
  37. "polly-delicm-overapproximate-writes",
  38. cl::desc(
  39. "Do more PHI writes than necessary in order to avoid partial accesses"),
  40. cl::init(false), cl::Hidden, cl::cat(PollyCategory));
  41. cl::opt<bool> DelicmPartialWrites("polly-delicm-partial-writes",
  42. cl::desc("Allow partial writes"),
  43. cl::init(true), cl::Hidden,
  44. cl::cat(PollyCategory));
  45. cl::opt<bool>
  46. DelicmComputeKnown("polly-delicm-compute-known",
  47. cl::desc("Compute known content of array elements"),
  48. cl::init(true), cl::Hidden, cl::cat(PollyCategory));
  49. STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs");
  50. STATISTIC(DeLICMOutOfQuota,
  51. "Analyses aborted because max_operations was reached");
  52. STATISTIC(MappedValueScalars, "Number of mapped Value scalars");
  53. STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars");
  54. STATISTIC(TargetsMapped, "Number of stores used for at least one mapping");
  55. STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized");
  56. STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM");
  57. STATISTIC(NumValueWritesInLoops,
  58. "Number of scalar value writes nested in affine loops after DeLICM");
  59. STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM");
  60. STATISTIC(NumPHIWritesInLoops,
  61. "Number of scalar phi writes nested in affine loops after DeLICM");
  62. STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM");
  63. STATISTIC(NumSingletonWritesInLoops,
  64. "Number of singleton writes nested in affine loops after DeLICM");
  65. isl::union_map computeReachingOverwrite(isl::union_map Schedule,
  66. isl::union_map Writes,
  67. bool InclPrevWrite,
  68. bool InclOverwrite) {
  69. return computeReachingWrite(Schedule, Writes, true, InclPrevWrite,
  70. InclOverwrite);
  71. }
  72. /// Compute the next overwrite for a scalar.
  73. ///
  74. /// @param Schedule { DomainWrite[] -> Scatter[] }
  75. /// Schedule of (at least) all writes. Instances not in @p
  76. /// Writes are ignored.
  77. /// @param Writes { DomainWrite[] }
  78. /// The element instances that write to the scalar.
  79. /// @param InclPrevWrite Whether to extend the timepoints to include
  80. /// the timepoint where the previous write happens.
  81. /// @param InclOverwrite Whether the reaching overwrite includes the timepoint
  82. /// of the overwrite itself.
  83. ///
  84. /// @return { Scatter[] -> DomainDef[] }
  85. isl::union_map computeScalarReachingOverwrite(isl::union_map Schedule,
  86. isl::union_set Writes,
  87. bool InclPrevWrite,
  88. bool InclOverwrite) {
  89. // { DomainWrite[] }
  90. auto WritesMap = isl::union_map::from_domain(Writes);
  91. // { [Element[] -> Scatter[]] -> DomainWrite[] }
  92. auto Result = computeReachingOverwrite(
  93. std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite);
  94. return Result.domain_factor_range();
  95. }
  96. /// Overload of computeScalarReachingOverwrite, with only one writing statement.
  97. /// Consequently, the result consists of only one map space.
  98. ///
  99. /// @param Schedule { DomainWrite[] -> Scatter[] }
  100. /// @param Writes { DomainWrite[] }
  101. /// @param InclPrevWrite Include the previous write to result.
  102. /// @param InclOverwrite Include the overwrite to the result.
  103. ///
  104. /// @return { Scatter[] -> DomainWrite[] }
  105. isl::map computeScalarReachingOverwrite(isl::union_map Schedule,
  106. isl::set Writes, bool InclPrevWrite,
  107. bool InclOverwrite) {
  108. isl::space ScatterSpace = getScatterSpace(Schedule);
  109. isl::space DomSpace = Writes.get_space();
  110. isl::union_map ReachOverwrite = computeScalarReachingOverwrite(
  111. Schedule, isl::union_set(Writes), InclPrevWrite, InclOverwrite);
  112. isl::space ResultSpace = ScatterSpace.map_from_domain_and_range(DomSpace);
  113. return singleton(std::move(ReachOverwrite), ResultSpace);
  114. }
  115. /// Try to find a 'natural' extension of a mapped to elements outside its
  116. /// domain.
  117. ///
  118. /// @param Relevant The map with mapping that may not be modified.
  119. /// @param Universe The domain to which @p Relevant needs to be extended.
  120. ///
  121. /// @return A map with that associates the domain elements of @p Relevant to the
  122. /// same elements and in addition the elements of @p Universe to some
  123. /// undefined elements. The function prefers to return simple maps.
  124. isl::union_map expandMapping(isl::union_map Relevant, isl::union_set Universe) {
  125. Relevant = Relevant.coalesce();
  126. isl::union_set RelevantDomain = Relevant.domain();
  127. isl::union_map Simplified = Relevant.gist_domain(RelevantDomain);
  128. Simplified = Simplified.coalesce();
  129. return Simplified.intersect_domain(Universe);
  130. }
  131. /// Represent the knowledge of the contents of any array elements in any zone or
  132. /// the knowledge we would add when mapping a scalar to an array element.
  133. ///
  134. /// Every array element at every zone unit has one of two states:
  135. ///
  136. /// - Unused: Not occupied by any value so a transformation can change it to
  137. /// other values.
  138. ///
  139. /// - Occupied: The element contains a value that is still needed.
  140. ///
  141. /// The union of Unused and Unknown zones forms the universe, the set of all
  142. /// elements at every timepoint. The universe can easily be derived from the
  143. /// array elements that are accessed someway. Arrays that are never accessed
  144. /// also never play a role in any computation and can hence be ignored. With a
  145. /// given universe, only one of the sets needs to stored implicitly. Computing
  146. /// the complement is also an expensive operation, hence this class has been
  147. /// designed that only one of sets is needed while the other is assumed to be
  148. /// implicit. It can still be given, but is mostly ignored.
  149. ///
  150. /// There are two use cases for the Knowledge class:
  151. ///
  152. /// 1) To represent the knowledge of the current state of ScopInfo. The unused
  153. /// state means that an element is currently unused: there is no read of it
  154. /// before the next overwrite. Also called 'Existing'.
  155. ///
  156. /// 2) To represent the requirements for mapping a scalar to array elements. The
  157. /// unused state means that there is no change/requirement. Also called
  158. /// 'Proposed'.
  159. ///
  160. /// In addition to these states at unit zones, Knowledge needs to know when
  161. /// values are written. This is because written values may have no lifetime (one
  162. /// reason is that the value is never read). Such writes would therefore never
  163. /// conflict, but overwrite values that might still be required. Another source
  164. /// of problems are multiple writes to the same element at the same timepoint,
  165. /// because their order is undefined.
  166. class Knowledge {
  167. private:
  168. /// { [Element[] -> Zone[]] }
  169. /// Set of array elements and when they are alive.
  170. /// Can contain a nullptr; in this case the set is implicitly defined as the
  171. /// complement of #Unused.
  172. ///
  173. /// The set of alive array elements is represented as zone, as the set of live
  174. /// values can differ depending on how the elements are interpreted.
  175. /// Assuming a value X is written at timestep [0] and read at timestep [1]
  176. /// without being used at any later point, then the value is alive in the
  177. /// interval ]0,1[. This interval cannot be represented by an integer set, as
  178. /// it does not contain any integer point. Zones allow us to represent this
  179. /// interval and can be converted to sets of timepoints when needed (e.g., in
  180. /// isConflicting when comparing to the write sets).
  181. /// @see convertZoneToTimepoints and this file's comment for more details.
  182. isl::union_set Occupied;
  183. /// { [Element[] -> Zone[]] }
  184. /// Set of array elements when they are not alive, i.e. their memory can be
  185. /// used for other purposed. Can contain a nullptr; in this case the set is
  186. /// implicitly defined as the complement of #Occupied.
  187. isl::union_set Unused;
  188. /// { [Element[] -> Zone[]] -> ValInst[] }
  189. /// Maps to the known content for each array element at any interval.
  190. ///
  191. /// Any element/interval can map to multiple known elements. This is due to
  192. /// multiple llvm::Value referring to the same content. Examples are
  193. ///
  194. /// - A value stored and loaded again. The LoadInst represents the same value
  195. /// as the StoreInst's value operand.
  196. ///
  197. /// - A PHINode is equal to any one of the incoming values. In case of
  198. /// LCSSA-form, it is always equal to its single incoming value.
  199. ///
  200. /// Two Knowledges are considered not conflicting if at least one of the known
  201. /// values match. Not known values are not stored as an unnamed tuple (as
  202. /// #Written does), but maps to nothing.
  203. ///
  204. /// Known values are usually just defined for #Occupied elements. Knowing
  205. /// #Unused contents has no advantage as it can be overwritten.
  206. isl::union_map Known;
  207. /// { [Element[] -> Scatter[]] -> ValInst[] }
  208. /// The write actions currently in the scop or that would be added when
  209. /// mapping a scalar. Maps to the value that is written.
  210. ///
  211. /// Written values that cannot be identified are represented by an unknown
  212. /// ValInst[] (an unnamed tuple of 0 dimension). It conflicts with itself.
  213. isl::union_map Written;
  214. /// Check whether this Knowledge object is well-formed.
  215. void checkConsistency() const {
  216. #ifndef NDEBUG
  217. // Default-initialized object
  218. if (Occupied.is_null() && Unused.is_null() && Known.is_null() &&
  219. Written.is_null())
  220. return;
  221. assert(!Occupied.is_null() || !Unused.is_null());
  222. assert(!Known.is_null());
  223. assert(!Written.is_null());
  224. // If not all fields are defined, we cannot derived the universe.
  225. if (Occupied.is_null() || Unused.is_null())
  226. return;
  227. assert(Occupied.is_disjoint(Unused));
  228. auto Universe = Occupied.unite(Unused);
  229. assert(!Known.domain().is_subset(Universe).is_false());
  230. assert(!Written.domain().is_subset(Universe).is_false());
  231. #endif
  232. }
  233. public:
  234. /// Initialize a nullptr-Knowledge. This is only provided for convenience; do
  235. /// not use such an object.
  236. Knowledge() {}
  237. /// Create a new object with the given members.
  238. Knowledge(isl::union_set Occupied, isl::union_set Unused,
  239. isl::union_map Known, isl::union_map Written)
  240. : Occupied(std::move(Occupied)), Unused(std::move(Unused)),
  241. Known(std::move(Known)), Written(std::move(Written)) {
  242. checkConsistency();
  243. }
  244. /// Return whether this object was not default-constructed.
  245. bool isUsable() const {
  246. return (Occupied.is_null() || Unused.is_null()) && !Known.is_null() &&
  247. !Written.is_null();
  248. }
  249. /// Print the content of this object to @p OS.
  250. void print(llvm::raw_ostream &OS, unsigned Indent = 0) const {
  251. if (isUsable()) {
  252. if (!Occupied.is_null())
  253. OS.indent(Indent) << "Occupied: " << Occupied << "\n";
  254. else
  255. OS.indent(Indent) << "Occupied: <Everything else not in Unused>\n";
  256. if (!Unused.is_null())
  257. OS.indent(Indent) << "Unused: " << Unused << "\n";
  258. else
  259. OS.indent(Indent) << "Unused: <Everything else not in Occupied>\n";
  260. OS.indent(Indent) << "Known: " << Known << "\n";
  261. OS.indent(Indent) << "Written : " << Written << '\n';
  262. } else {
  263. OS.indent(Indent) << "Invalid knowledge\n";
  264. }
  265. }
  266. /// Combine two knowledges, this and @p That.
  267. void learnFrom(Knowledge That) {
  268. assert(!isConflicting(*this, That));
  269. assert(!Unused.is_null() && !That.Occupied.is_null());
  270. assert(
  271. That.Unused.is_null() &&
  272. "This function is only prepared to learn occupied elements from That");
  273. assert(Occupied.is_null() && "This function does not implement "
  274. "`this->Occupied = "
  275. "this->Occupied.unite(That.Occupied);`");
  276. Unused = Unused.subtract(That.Occupied);
  277. Known = Known.unite(That.Known);
  278. Written = Written.unite(That.Written);
  279. checkConsistency();
  280. }
  281. /// Determine whether two Knowledges conflict with each other.
  282. ///
  283. /// In theory @p Existing and @p Proposed are symmetric, but the
  284. /// implementation is constrained by the implicit interpretation. That is, @p
  285. /// Existing must have #Unused defined (use case 1) and @p Proposed must have
  286. /// #Occupied defined (use case 1).
  287. ///
  288. /// A conflict is defined as non-preserved semantics when they are merged. For
  289. /// instance, when for the same array and zone they assume different
  290. /// llvm::Values.
  291. ///
  292. /// @param Existing One of the knowledges with #Unused defined.
  293. /// @param Proposed One of the knowledges with #Occupied defined.
  294. /// @param OS Dump the conflict reason to this output stream; use
  295. /// nullptr to not output anything.
  296. /// @param Indent Indention for the conflict reason.
  297. ///
  298. /// @return True, iff the two knowledges are conflicting.
  299. static bool isConflicting(const Knowledge &Existing,
  300. const Knowledge &Proposed,
  301. llvm::raw_ostream *OS = nullptr,
  302. unsigned Indent = 0) {
  303. assert(!Existing.Unused.is_null());
  304. assert(!Proposed.Occupied.is_null());
  305. #ifndef NDEBUG
  306. if (!Existing.Occupied.is_null() && !Proposed.Unused.is_null()) {
  307. auto ExistingUniverse = Existing.Occupied.unite(Existing.Unused);
  308. auto ProposedUniverse = Proposed.Occupied.unite(Proposed.Unused);
  309. assert(ExistingUniverse.is_equal(ProposedUniverse) &&
  310. "Both inputs' Knowledges must be over the same universe");
  311. }
  312. #endif
  313. // Do the Existing and Proposed lifetimes conflict?
  314. //
  315. // Lifetimes are described as the cross-product of array elements and zone
  316. // intervals in which they are alive (the space { [Element[] -> Zone[]] }).
  317. // In the following we call this "element/lifetime interval".
  318. //
  319. // In order to not conflict, one of the following conditions must apply for
  320. // each element/lifetime interval:
  321. //
  322. // 1. If occupied in one of the knowledges, it is unused in the other.
  323. //
  324. // - or -
  325. //
  326. // 2. Both contain the same value.
  327. //
  328. // Instead of partitioning the element/lifetime intervals into a part that
  329. // both Knowledges occupy (which requires an expensive subtraction) and for
  330. // these to check whether they are known to be the same value, we check only
  331. // the second condition and ensure that it also applies when then first
  332. // condition is true. This is done by adding a wildcard value to
  333. // Proposed.Known and Existing.Unused such that they match as a common known
  334. // value. We use the "unknown ValInst" for this purpose. Every
  335. // Existing.Unused may match with an unknown Proposed.Occupied because these
  336. // never are in conflict with each other.
  337. auto ProposedOccupiedAnyVal = makeUnknownForDomain(Proposed.Occupied);
  338. auto ProposedValues = Proposed.Known.unite(ProposedOccupiedAnyVal);
  339. auto ExistingUnusedAnyVal = makeUnknownForDomain(Existing.Unused);
  340. auto ExistingValues = Existing.Known.unite(ExistingUnusedAnyVal);
  341. auto MatchingVals = ExistingValues.intersect(ProposedValues);
  342. auto Matches = MatchingVals.domain();
  343. // Any Proposed.Occupied must either have a match between the known values
  344. // of Existing and Occupied, or be in Existing.Unused. In the latter case,
  345. // the previously added "AnyVal" will match each other.
  346. if (!Proposed.Occupied.is_subset(Matches)) {
  347. if (OS) {
  348. auto Conflicting = Proposed.Occupied.subtract(Matches);
  349. auto ExistingConflictingKnown =
  350. Existing.Known.intersect_domain(Conflicting);
  351. auto ProposedConflictingKnown =
  352. Proposed.Known.intersect_domain(Conflicting);
  353. OS->indent(Indent) << "Proposed lifetime conflicting with Existing's\n";
  354. OS->indent(Indent) << "Conflicting occupied: " << Conflicting << "\n";
  355. if (!ExistingConflictingKnown.is_empty())
  356. OS->indent(Indent)
  357. << "Existing Known: " << ExistingConflictingKnown << "\n";
  358. if (!ProposedConflictingKnown.is_empty())
  359. OS->indent(Indent)
  360. << "Proposed Known: " << ProposedConflictingKnown << "\n";
  361. }
  362. return true;
  363. }
  364. // Do the writes in Existing conflict with occupied values in Proposed?
  365. //
  366. // In order to not conflict, it must either write to unused lifetime or
  367. // write the same value. To check, we remove the writes that write into
  368. // Proposed.Unused (they never conflict) and then see whether the written
  369. // value is already in Proposed.Known. If there are multiple known values
  370. // and a written value is known under different names, it is enough when one
  371. // of the written values (assuming that they are the same value under
  372. // different names, e.g. a PHINode and one of the incoming values) matches
  373. // one of the known names.
  374. //
  375. // We convert here the set of lifetimes to actual timepoints. A lifetime is
  376. // in conflict with a set of write timepoints, if either a live timepoint is
  377. // clearly within the lifetime or if a write happens at the beginning of the
  378. // lifetime (where it would conflict with the value that actually writes the
  379. // value alive). There is no conflict at the end of a lifetime, as the alive
  380. // value will always be read, before it is overwritten again. The last
  381. // property holds in Polly for all scalar values and we expect all users of
  382. // Knowledge to check this property also for accesses to MemoryKind::Array.
  383. auto ProposedFixedDefs =
  384. convertZoneToTimepoints(Proposed.Occupied, true, false);
  385. auto ProposedFixedKnown =
  386. convertZoneToTimepoints(Proposed.Known, isl::dim::in, true, false);
  387. auto ExistingConflictingWrites =
  388. Existing.Written.intersect_domain(ProposedFixedDefs);
  389. auto ExistingConflictingWritesDomain = ExistingConflictingWrites.domain();
  390. auto CommonWrittenVal =
  391. ProposedFixedKnown.intersect(ExistingConflictingWrites);
  392. auto CommonWrittenValDomain = CommonWrittenVal.domain();
  393. if (!ExistingConflictingWritesDomain.is_subset(CommonWrittenValDomain)) {
  394. if (OS) {
  395. auto ExistingConflictingWritten =
  396. ExistingConflictingWrites.subtract_domain(CommonWrittenValDomain);
  397. auto ProposedConflictingKnown = ProposedFixedKnown.subtract_domain(
  398. ExistingConflictingWritten.domain());
  399. OS->indent(Indent)
  400. << "Proposed a lifetime where there is an Existing write into it\n";
  401. OS->indent(Indent) << "Existing conflicting writes: "
  402. << ExistingConflictingWritten << "\n";
  403. if (!ProposedConflictingKnown.is_empty())
  404. OS->indent(Indent)
  405. << "Proposed conflicting known: " << ProposedConflictingKnown
  406. << "\n";
  407. }
  408. return true;
  409. }
  410. // Do the writes in Proposed conflict with occupied values in Existing?
  411. auto ExistingAvailableDefs =
  412. convertZoneToTimepoints(Existing.Unused, true, false);
  413. auto ExistingKnownDefs =
  414. convertZoneToTimepoints(Existing.Known, isl::dim::in, true, false);
  415. auto ProposedWrittenDomain = Proposed.Written.domain();
  416. auto KnownIdentical = ExistingKnownDefs.intersect(Proposed.Written);
  417. auto IdenticalOrUnused =
  418. ExistingAvailableDefs.unite(KnownIdentical.domain());
  419. if (!ProposedWrittenDomain.is_subset(IdenticalOrUnused)) {
  420. if (OS) {
  421. auto Conflicting = ProposedWrittenDomain.subtract(IdenticalOrUnused);
  422. auto ExistingConflictingKnown =
  423. ExistingKnownDefs.intersect_domain(Conflicting);
  424. auto ProposedConflictingWritten =
  425. Proposed.Written.intersect_domain(Conflicting);
  426. OS->indent(Indent) << "Proposed writes into range used by Existing\n";
  427. OS->indent(Indent) << "Proposed conflicting writes: "
  428. << ProposedConflictingWritten << "\n";
  429. if (!ExistingConflictingKnown.is_empty())
  430. OS->indent(Indent)
  431. << "Existing conflicting known: " << ExistingConflictingKnown
  432. << "\n";
  433. }
  434. return true;
  435. }
  436. // Does Proposed write at the same time as Existing already does (order of
  437. // writes is undefined)? Writing the same value is permitted.
  438. auto ExistingWrittenDomain = Existing.Written.domain();
  439. auto BothWritten =
  440. Existing.Written.domain().intersect(Proposed.Written.domain());
  441. auto ExistingKnownWritten = filterKnownValInst(Existing.Written);
  442. auto ProposedKnownWritten = filterKnownValInst(Proposed.Written);
  443. auto CommonWritten =
  444. ExistingKnownWritten.intersect(ProposedKnownWritten).domain();
  445. if (!BothWritten.is_subset(CommonWritten)) {
  446. if (OS) {
  447. auto Conflicting = BothWritten.subtract(CommonWritten);
  448. auto ExistingConflictingWritten =
  449. Existing.Written.intersect_domain(Conflicting);
  450. auto ProposedConflictingWritten =
  451. Proposed.Written.intersect_domain(Conflicting);
  452. OS->indent(Indent) << "Proposed writes at the same time as an already "
  453. "Existing write\n";
  454. OS->indent(Indent) << "Conflicting writes: " << Conflicting << "\n";
  455. if (!ExistingConflictingWritten.is_empty())
  456. OS->indent(Indent)
  457. << "Exiting write: " << ExistingConflictingWritten << "\n";
  458. if (!ProposedConflictingWritten.is_empty())
  459. OS->indent(Indent)
  460. << "Proposed write: " << ProposedConflictingWritten << "\n";
  461. }
  462. return true;
  463. }
  464. return false;
  465. }
  466. };
  467. /// Implementation of the DeLICM/DePRE transformation.
  468. class DeLICMImpl : public ZoneAlgorithm {
  469. private:
  470. /// Knowledge before any transformation took place.
  471. Knowledge OriginalZone;
  472. /// Current knowledge of the SCoP including all already applied
  473. /// transformations.
  474. Knowledge Zone;
  475. /// Number of StoreInsts something can be mapped to.
  476. int NumberOfCompatibleTargets = 0;
  477. /// The number of StoreInsts to which at least one value or PHI has been
  478. /// mapped to.
  479. int NumberOfTargetsMapped = 0;
  480. /// The number of llvm::Value mapped to some array element.
  481. int NumberOfMappedValueScalars = 0;
  482. /// The number of PHIs mapped to some array element.
  483. int NumberOfMappedPHIScalars = 0;
  484. /// Determine whether two knowledges are conflicting with each other.
  485. ///
  486. /// @see Knowledge::isConflicting
  487. bool isConflicting(const Knowledge &Proposed) {
  488. raw_ostream *OS = nullptr;
  489. LLVM_DEBUG(OS = &llvm::dbgs());
  490. return Knowledge::isConflicting(Zone, Proposed, OS, 4);
  491. }
  492. /// Determine whether @p SAI is a scalar that can be mapped to an array
  493. /// element.
  494. bool isMappable(const ScopArrayInfo *SAI) {
  495. assert(SAI);
  496. if (SAI->isValueKind()) {
  497. auto *MA = S->getValueDef(SAI);
  498. if (!MA) {
  499. LLVM_DEBUG(
  500. dbgs()
  501. << " Reject because value is read-only within the scop\n");
  502. return false;
  503. }
  504. // Mapping if value is used after scop is not supported. The code
  505. // generator would need to reload the scalar after the scop, but it
  506. // does not have the information to where it is mapped to. Only the
  507. // MemoryAccesses have that information, not the ScopArrayInfo.
  508. auto Inst = MA->getAccessInstruction();
  509. for (auto User : Inst->users()) {
  510. if (!isa<Instruction>(User))
  511. return false;
  512. auto UserInst = cast<Instruction>(User);
  513. if (!S->contains(UserInst)) {
  514. LLVM_DEBUG(dbgs() << " Reject because value is escaping\n");
  515. return false;
  516. }
  517. }
  518. return true;
  519. }
  520. if (SAI->isPHIKind()) {
  521. auto *MA = S->getPHIRead(SAI);
  522. assert(MA);
  523. // Mapping of an incoming block from before the SCoP is not supported by
  524. // the code generator.
  525. auto PHI = cast<PHINode>(MA->getAccessInstruction());
  526. for (auto Incoming : PHI->blocks()) {
  527. if (!S->contains(Incoming)) {
  528. LLVM_DEBUG(dbgs()
  529. << " Reject because at least one incoming block is "
  530. "not in the scop region\n");
  531. return false;
  532. }
  533. }
  534. return true;
  535. }
  536. LLVM_DEBUG(dbgs() << " Reject ExitPHI or other non-value\n");
  537. return false;
  538. }
  539. /// Compute the uses of a MemoryKind::Value and its lifetime (from its
  540. /// definition to the last use).
  541. ///
  542. /// @param SAI The ScopArrayInfo representing the value's storage.
  543. ///
  544. /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] }
  545. /// First element is the set of uses for each definition.
  546. /// The second is the lifetime of each definition.
  547. std::tuple<isl::union_map, isl::map>
  548. computeValueUses(const ScopArrayInfo *SAI) {
  549. assert(SAI->isValueKind());
  550. // { DomainRead[] }
  551. auto Reads = makeEmptyUnionSet();
  552. // Find all uses.
  553. for (auto *MA : S->getValueUses(SAI))
  554. Reads = Reads.unite(getDomainFor(MA));
  555. // { DomainRead[] -> Scatter[] }
  556. auto ReadSchedule = getScatterFor(Reads);
  557. auto *DefMA = S->getValueDef(SAI);
  558. assert(DefMA);
  559. // { DomainDef[] }
  560. auto Writes = getDomainFor(DefMA);
  561. // { DomainDef[] -> Scatter[] }
  562. auto WriteScatter = getScatterFor(Writes);
  563. // { Scatter[] -> DomainDef[] }
  564. auto ReachDef = getScalarReachingDefinition(DefMA->getStatement());
  565. // { [DomainDef[] -> Scatter[]] -> DomainUse[] }
  566. auto Uses = isl::union_map(ReachDef.reverse().range_map())
  567. .apply_range(ReadSchedule.reverse());
  568. // { DomainDef[] -> Scatter[] }
  569. auto UseScatter =
  570. singleton(Uses.domain().unwrap(),
  571. Writes.get_space().map_from_domain_and_range(ScatterSpace));
  572. // { DomainDef[] -> Zone[] }
  573. auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true);
  574. // { DomainDef[] -> DomainRead[] }
  575. auto DefUses = Uses.domain_factor_domain();
  576. return std::make_pair(DefUses, Lifetime);
  577. }
  578. /// Try to map a MemoryKind::Value to a given array element.
  579. ///
  580. /// @param SAI Representation of the scalar's memory to map.
  581. /// @param TargetElt { Scatter[] -> Element[] }
  582. /// Suggestion where to map a scalar to when at a timepoint.
  583. ///
  584. /// @return true if the scalar was successfully mapped.
  585. bool tryMapValue(const ScopArrayInfo *SAI, isl::map TargetElt) {
  586. assert(SAI->isValueKind());
  587. auto *DefMA = S->getValueDef(SAI);
  588. assert(DefMA->isValueKind());
  589. assert(DefMA->isMustWrite());
  590. auto *V = DefMA->getAccessValue();
  591. auto *DefInst = DefMA->getAccessInstruction();
  592. // Stop if the scalar has already been mapped.
  593. if (!DefMA->getLatestScopArrayInfo()->isValueKind())
  594. return false;
  595. // { DomainDef[] -> Scatter[] }
  596. auto DefSched = getScatterFor(DefMA);
  597. // Where each write is mapped to, according to the suggestion.
  598. // { DomainDef[] -> Element[] }
  599. auto DefTarget = TargetElt.apply_domain(DefSched.reverse());
  600. simplify(DefTarget);
  601. LLVM_DEBUG(dbgs() << " Def Mapping: " << DefTarget << '\n');
  602. auto OrigDomain = getDomainFor(DefMA);
  603. auto MappedDomain = DefTarget.domain();
  604. if (!OrigDomain.is_subset(MappedDomain)) {
  605. LLVM_DEBUG(
  606. dbgs()
  607. << " Reject because mapping does not encompass all instances\n");
  608. return false;
  609. }
  610. // { DomainDef[] -> Zone[] }
  611. isl::map Lifetime;
  612. // { DomainDef[] -> DomainUse[] }
  613. isl::union_map DefUses;
  614. std::tie(DefUses, Lifetime) = computeValueUses(SAI);
  615. LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << '\n');
  616. /// { [Element[] -> Zone[]] }
  617. auto EltZone = Lifetime.apply_domain(DefTarget).wrap();
  618. simplify(EltZone);
  619. // When known knowledge is disabled, just return the unknown value. It will
  620. // either get filtered out or conflict with itself.
  621. // { DomainDef[] -> ValInst[] }
  622. isl::map ValInst;
  623. if (DelicmComputeKnown)
  624. ValInst = makeValInst(V, DefMA->getStatement(),
  625. LI->getLoopFor(DefInst->getParent()));
  626. else
  627. ValInst = makeUnknownForDomain(DefMA->getStatement());
  628. // { DomainDef[] -> [Element[] -> Zone[]] }
  629. auto EltKnownTranslator = DefTarget.range_product(Lifetime);
  630. // { [Element[] -> Zone[]] -> ValInst[] }
  631. auto EltKnown = ValInst.apply_domain(EltKnownTranslator);
  632. simplify(EltKnown);
  633. // { DomainDef[] -> [Element[] -> Scatter[]] }
  634. auto WrittenTranslator = DefTarget.range_product(DefSched);
  635. // { [Element[] -> Scatter[]] -> ValInst[] }
  636. auto DefEltSched = ValInst.apply_domain(WrittenTranslator);
  637. simplify(DefEltSched);
  638. Knowledge Proposed(EltZone, {}, filterKnownValInst(EltKnown), DefEltSched);
  639. if (isConflicting(Proposed))
  640. return false;
  641. // { DomainUse[] -> Element[] }
  642. auto UseTarget = DefUses.reverse().apply_range(DefTarget);
  643. mapValue(SAI, std::move(DefTarget), std::move(UseTarget),
  644. std::move(Lifetime), std::move(Proposed));
  645. return true;
  646. }
  647. /// After a scalar has been mapped, update the global knowledge.
  648. void applyLifetime(Knowledge Proposed) {
  649. Zone.learnFrom(std::move(Proposed));
  650. }
  651. /// Map a MemoryKind::Value scalar to an array element.
  652. ///
  653. /// Callers must have ensured that the mapping is valid and not conflicting.
  654. ///
  655. /// @param SAI The ScopArrayInfo representing the scalar's memory to
  656. /// map.
  657. /// @param DefTarget { DomainDef[] -> Element[] }
  658. /// The array element to map the scalar to.
  659. /// @param UseTarget { DomainUse[] -> Element[] }
  660. /// The array elements the uses are mapped to.
  661. /// @param Lifetime { DomainDef[] -> Zone[] }
  662. /// The lifetime of each llvm::Value definition for
  663. /// reporting.
  664. /// @param Proposed Mapping constraints for reporting.
  665. void mapValue(const ScopArrayInfo *SAI, isl::map DefTarget,
  666. isl::union_map UseTarget, isl::map Lifetime,
  667. Knowledge Proposed) {
  668. // Redirect the read accesses.
  669. for (auto *MA : S->getValueUses(SAI)) {
  670. // { DomainUse[] }
  671. auto Domain = getDomainFor(MA);
  672. // { DomainUse[] -> Element[] }
  673. auto NewAccRel = UseTarget.intersect_domain(Domain);
  674. simplify(NewAccRel);
  675. assert(isl_union_map_n_map(NewAccRel.get()) == 1);
  676. MA->setNewAccessRelation(isl::map::from_union_map(NewAccRel));
  677. }
  678. auto *WA = S->getValueDef(SAI);
  679. WA->setNewAccessRelation(DefTarget);
  680. applyLifetime(Proposed);
  681. MappedValueScalars++;
  682. NumberOfMappedValueScalars += 1;
  683. }
  684. isl::map makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope,
  685. bool IsCertain = true) {
  686. // When known knowledge is disabled, just return the unknown value. It will
  687. // either get filtered out or conflict with itself.
  688. if (!DelicmComputeKnown)
  689. return makeUnknownForDomain(UserStmt);
  690. return ZoneAlgorithm::makeValInst(Val, UserStmt, Scope, IsCertain);
  691. }
  692. /// Express the incoming values of a PHI for each incoming statement in an
  693. /// isl::union_map.
  694. ///
  695. /// @param SAI The PHI scalar represented by a ScopArrayInfo.
  696. ///
  697. /// @return { PHIWriteDomain[] -> ValInst[] }
  698. isl::union_map determinePHIWrittenValues(const ScopArrayInfo *SAI) {
  699. auto Result = makeEmptyUnionMap();
  700. // Collect the incoming values.
  701. for (auto *MA : S->getPHIIncomings(SAI)) {
  702. // { DomainWrite[] -> ValInst[] }
  703. isl::union_map ValInst;
  704. auto *WriteStmt = MA->getStatement();
  705. auto Incoming = MA->getIncoming();
  706. assert(!Incoming.empty());
  707. if (Incoming.size() == 1) {
  708. ValInst = makeValInst(Incoming[0].second, WriteStmt,
  709. LI->getLoopFor(Incoming[0].first));
  710. } else {
  711. // If the PHI is in a subregion's exit node it can have multiple
  712. // incoming values (+ maybe another incoming edge from an unrelated
  713. // block). We cannot directly represent it as a single llvm::Value.
  714. // We currently model it as unknown value, but modeling as the PHIInst
  715. // itself could be OK, too.
  716. ValInst = makeUnknownForDomain(WriteStmt);
  717. }
  718. Result = Result.unite(ValInst);
  719. }
  720. assert(Result.is_single_valued() &&
  721. "Cannot have multiple incoming values for same incoming statement");
  722. return Result;
  723. }
  724. /// Try to map a MemoryKind::PHI scalar to a given array element.
  725. ///
  726. /// @param SAI Representation of the scalar's memory to map.
  727. /// @param TargetElt { Scatter[] -> Element[] }
  728. /// Suggestion where to map the scalar to when at a
  729. /// timepoint.
  730. ///
  731. /// @return true if the PHI scalar has been mapped.
  732. bool tryMapPHI(const ScopArrayInfo *SAI, isl::map TargetElt) {
  733. auto *PHIRead = S->getPHIRead(SAI);
  734. assert(PHIRead->isPHIKind());
  735. assert(PHIRead->isRead());
  736. // Skip if already been mapped.
  737. if (!PHIRead->getLatestScopArrayInfo()->isPHIKind())
  738. return false;
  739. // { DomainRead[] -> Scatter[] }
  740. auto PHISched = getScatterFor(PHIRead);
  741. // { DomainRead[] -> Element[] }
  742. auto PHITarget = PHISched.apply_range(TargetElt);
  743. simplify(PHITarget);
  744. LLVM_DEBUG(dbgs() << " Mapping: " << PHITarget << '\n');
  745. auto OrigDomain = getDomainFor(PHIRead);
  746. auto MappedDomain = PHITarget.domain();
  747. if (!OrigDomain.is_subset(MappedDomain)) {
  748. LLVM_DEBUG(
  749. dbgs()
  750. << " Reject because mapping does not encompass all instances\n");
  751. return false;
  752. }
  753. // { DomainRead[] -> DomainWrite[] }
  754. auto PerPHIWrites = computePerPHI(SAI);
  755. if (PerPHIWrites.is_null()) {
  756. LLVM_DEBUG(
  757. dbgs() << " Reject because cannot determine incoming values\n");
  758. return false;
  759. }
  760. // { DomainWrite[] -> Element[] }
  761. auto WritesTarget = PerPHIWrites.apply_domain(PHITarget).reverse();
  762. simplify(WritesTarget);
  763. // { DomainWrite[] }
  764. auto UniverseWritesDom = isl::union_set::empty(ParamSpace.ctx());
  765. for (auto *MA : S->getPHIIncomings(SAI))
  766. UniverseWritesDom = UniverseWritesDom.unite(getDomainFor(MA));
  767. auto RelevantWritesTarget = WritesTarget;
  768. if (DelicmOverapproximateWrites)
  769. WritesTarget = expandMapping(WritesTarget, UniverseWritesDom);
  770. auto ExpandedWritesDom = WritesTarget.domain();
  771. if (!DelicmPartialWrites &&
  772. !UniverseWritesDom.is_subset(ExpandedWritesDom)) {
  773. LLVM_DEBUG(
  774. dbgs() << " Reject because did not find PHI write mapping for "
  775. "all instances\n");
  776. if (DelicmOverapproximateWrites)
  777. LLVM_DEBUG(dbgs() << " Relevant Mapping: "
  778. << RelevantWritesTarget << '\n');
  779. LLVM_DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget
  780. << '\n');
  781. LLVM_DEBUG(dbgs() << " Missing instances: "
  782. << UniverseWritesDom.subtract(ExpandedWritesDom)
  783. << '\n');
  784. return false;
  785. }
  786. // { DomainRead[] -> Scatter[] }
  787. isl::union_map PerPHIWriteScatterUmap = PerPHIWrites.apply_range(Schedule);
  788. isl::map PerPHIWriteScatter =
  789. singleton(PerPHIWriteScatterUmap, PHISched.get_space());
  790. // { DomainRead[] -> Zone[] }
  791. auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true);
  792. simplify(Lifetime);
  793. LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n");
  794. // { DomainWrite[] -> Zone[] }
  795. auto WriteLifetime = isl::union_map(Lifetime).apply_domain(PerPHIWrites);
  796. // { DomainWrite[] -> ValInst[] }
  797. auto WrittenValue = determinePHIWrittenValues(SAI);
  798. // { DomainWrite[] -> [Element[] -> Scatter[]] }
  799. auto WrittenTranslator = WritesTarget.range_product(Schedule);
  800. // { [Element[] -> Scatter[]] -> ValInst[] }
  801. auto Written = WrittenValue.apply_domain(WrittenTranslator);
  802. simplify(Written);
  803. // { DomainWrite[] -> [Element[] -> Zone[]] }
  804. auto LifetimeTranslator = WritesTarget.range_product(WriteLifetime);
  805. // { DomainWrite[] -> ValInst[] }
  806. auto WrittenKnownValue = filterKnownValInst(WrittenValue);
  807. // { [Element[] -> Zone[]] -> ValInst[] }
  808. auto EltLifetimeInst = WrittenKnownValue.apply_domain(LifetimeTranslator);
  809. simplify(EltLifetimeInst);
  810. // { [Element[] -> Zone[] }
  811. auto Occupied = LifetimeTranslator.range();
  812. simplify(Occupied);
  813. Knowledge Proposed(Occupied, {}, EltLifetimeInst, Written);
  814. if (isConflicting(Proposed))
  815. return false;
  816. mapPHI(SAI, std::move(PHITarget), std::move(WritesTarget),
  817. std::move(Lifetime), std::move(Proposed));
  818. return true;
  819. }
  820. /// Map a MemoryKind::PHI scalar to an array element.
  821. ///
  822. /// Callers must have ensured that the mapping is valid and not conflicting
  823. /// with the common knowledge.
  824. ///
  825. /// @param SAI The ScopArrayInfo representing the scalar's memory to
  826. /// map.
  827. /// @param ReadTarget { DomainRead[] -> Element[] }
  828. /// The array element to map the scalar to.
  829. /// @param WriteTarget { DomainWrite[] -> Element[] }
  830. /// New access target for each PHI incoming write.
  831. /// @param Lifetime { DomainRead[] -> Zone[] }
  832. /// The lifetime of each PHI for reporting.
  833. /// @param Proposed Mapping constraints for reporting.
  834. void mapPHI(const ScopArrayInfo *SAI, isl::map ReadTarget,
  835. isl::union_map WriteTarget, isl::map Lifetime,
  836. Knowledge Proposed) {
  837. // { Element[] }
  838. isl::space ElementSpace = ReadTarget.get_space().range();
  839. // Redirect the PHI incoming writes.
  840. for (auto *MA : S->getPHIIncomings(SAI)) {
  841. // { DomainWrite[] }
  842. auto Domain = getDomainFor(MA);
  843. // { DomainWrite[] -> Element[] }
  844. auto NewAccRel = WriteTarget.intersect_domain(Domain);
  845. simplify(NewAccRel);
  846. isl::space NewAccRelSpace =
  847. Domain.get_space().map_from_domain_and_range(ElementSpace);
  848. isl::map NewAccRelMap = singleton(NewAccRel, NewAccRelSpace);
  849. MA->setNewAccessRelation(NewAccRelMap);
  850. }
  851. // Redirect the PHI read.
  852. auto *PHIRead = S->getPHIRead(SAI);
  853. PHIRead->setNewAccessRelation(ReadTarget);
  854. applyLifetime(Proposed);
  855. MappedPHIScalars++;
  856. NumberOfMappedPHIScalars++;
  857. }
  858. /// Search and map scalars to memory overwritten by @p TargetStoreMA.
  859. ///
  860. /// Start trying to map scalars that are used in the same statement as the
  861. /// store. For every successful mapping, try to also map scalars of the
  862. /// statements where those are written. Repeat, until no more mapping
  863. /// opportunity is found.
  864. ///
  865. /// There is currently no preference in which order scalars are tried.
  866. /// Ideally, we would direct it towards a load instruction of the same array
  867. /// element.
  868. bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) {
  869. assert(TargetStoreMA->isLatestArrayKind());
  870. assert(TargetStoreMA->isMustWrite());
  871. auto TargetStmt = TargetStoreMA->getStatement();
  872. // { DomTarget[] }
  873. auto TargetDom = getDomainFor(TargetStmt);
  874. // { DomTarget[] -> Element[] }
  875. auto TargetAccRel = getAccessRelationFor(TargetStoreMA);
  876. // { Zone[] -> DomTarget[] }
  877. // For each point in time, find the next target store instance.
  878. auto Target =
  879. computeScalarReachingOverwrite(Schedule, TargetDom, false, true);
  880. // { Zone[] -> Element[] }
  881. // Use the target store's write location as a suggestion to map scalars to.
  882. auto EltTarget = Target.apply_range(TargetAccRel);
  883. simplify(EltTarget);
  884. LLVM_DEBUG(dbgs() << " Target mapping is " << EltTarget << '\n');
  885. // Stack of elements not yet processed.
  886. SmallVector<MemoryAccess *, 16> Worklist;
  887. // Set of scalars already tested.
  888. SmallPtrSet<const ScopArrayInfo *, 16> Closed;
  889. // Lambda to add all scalar reads to the work list.
  890. auto ProcessAllIncoming = [&](ScopStmt *Stmt) {
  891. for (auto *MA : *Stmt) {
  892. if (!MA->isLatestScalarKind())
  893. continue;
  894. if (!MA->isRead())
  895. continue;
  896. Worklist.push_back(MA);
  897. }
  898. };
  899. auto *WrittenVal = TargetStoreMA->getAccessInstruction()->getOperand(0);
  900. if (auto *WrittenValInputMA = TargetStmt->lookupInputAccessOf(WrittenVal))
  901. Worklist.push_back(WrittenValInputMA);
  902. else
  903. ProcessAllIncoming(TargetStmt);
  904. auto AnyMapped = false;
  905. auto &DL = S->getRegion().getEntry()->getModule()->getDataLayout();
  906. auto StoreSize =
  907. DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType());
  908. while (!Worklist.empty()) {
  909. auto *MA = Worklist.pop_back_val();
  910. auto *SAI = MA->getScopArrayInfo();
  911. if (Closed.count(SAI))
  912. continue;
  913. Closed.insert(SAI);
  914. LLVM_DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI
  915. << ")\n");
  916. // Skip non-mappable scalars.
  917. if (!isMappable(SAI))
  918. continue;
  919. auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType());
  920. if (MASize > StoreSize) {
  921. LLVM_DEBUG(
  922. dbgs() << " Reject because storage size is insufficient\n");
  923. continue;
  924. }
  925. // Try to map MemoryKind::Value scalars.
  926. if (SAI->isValueKind()) {
  927. if (!tryMapValue(SAI, EltTarget))
  928. continue;
  929. auto *DefAcc = S->getValueDef(SAI);
  930. ProcessAllIncoming(DefAcc->getStatement());
  931. AnyMapped = true;
  932. continue;
  933. }
  934. // Try to map MemoryKind::PHI scalars.
  935. if (SAI->isPHIKind()) {
  936. if (!tryMapPHI(SAI, EltTarget))
  937. continue;
  938. // Add inputs of all incoming statements to the worklist. Prefer the
  939. // input accesses of the incoming blocks.
  940. for (auto *PHIWrite : S->getPHIIncomings(SAI)) {
  941. auto *PHIWriteStmt = PHIWrite->getStatement();
  942. bool FoundAny = false;
  943. for (auto Incoming : PHIWrite->getIncoming()) {
  944. auto *IncomingInputMA =
  945. PHIWriteStmt->lookupInputAccessOf(Incoming.second);
  946. if (!IncomingInputMA)
  947. continue;
  948. Worklist.push_back(IncomingInputMA);
  949. FoundAny = true;
  950. }
  951. if (!FoundAny)
  952. ProcessAllIncoming(PHIWrite->getStatement());
  953. }
  954. AnyMapped = true;
  955. continue;
  956. }
  957. }
  958. if (AnyMapped) {
  959. TargetsMapped++;
  960. NumberOfTargetsMapped++;
  961. }
  962. return AnyMapped;
  963. }
  964. /// Compute when an array element is unused.
  965. ///
  966. /// @return { [Element[] -> Zone[]] }
  967. isl::union_set computeLifetime() const {
  968. // { Element[] -> Zone[] }
  969. auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads,
  970. false, false, true);
  971. auto Result = ArrayUnused.wrap();
  972. simplify(Result);
  973. return Result;
  974. }
  975. /// Determine when an array element is written to, and which value instance is
  976. /// written.
  977. ///
  978. /// @return { [Element[] -> Scatter[]] -> ValInst[] }
  979. isl::union_map computeWritten() const {
  980. // { [Element[] -> Scatter[]] -> ValInst[] }
  981. auto EltWritten = applyDomainRange(AllWriteValInst, Schedule);
  982. simplify(EltWritten);
  983. return EltWritten;
  984. }
  985. /// Determine whether an access touches at most one element.
  986. ///
  987. /// The accessed element could be a scalar or accessing an array with constant
  988. /// subscript, such that all instances access only that element.
  989. ///
  990. /// @param MA The access to test.
  991. ///
  992. /// @return True, if zero or one elements are accessed; False if at least two
  993. /// different elements are accessed.
  994. bool isScalarAccess(MemoryAccess *MA) {
  995. auto Map = getAccessRelationFor(MA);
  996. auto Set = Map.range();
  997. return Set.is_singleton();
  998. }
  999. /// Print mapping statistics to @p OS.
  1000. void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const {
  1001. OS.indent(Indent) << "Statistics {\n";
  1002. OS.indent(Indent + 4) << "Compatible overwrites: "
  1003. << NumberOfCompatibleTargets << "\n";
  1004. OS.indent(Indent + 4) << "Overwrites mapped to: " << NumberOfTargetsMapped
  1005. << '\n';
  1006. OS.indent(Indent + 4) << "Value scalars mapped: "
  1007. << NumberOfMappedValueScalars << '\n';
  1008. OS.indent(Indent + 4) << "PHI scalars mapped: "
  1009. << NumberOfMappedPHIScalars << '\n';
  1010. OS.indent(Indent) << "}\n";
  1011. }
  1012. public:
  1013. DeLICMImpl(Scop *S, LoopInfo *LI) : ZoneAlgorithm("polly-delicm", S, LI) {}
  1014. /// Calculate the lifetime (definition to last use) of every array element.
  1015. ///
  1016. /// @return True if the computed lifetimes (#Zone) is usable.
  1017. bool computeZone() {
  1018. // Check that nothing strange occurs.
  1019. collectCompatibleElts();
  1020. isl::union_set EltUnused;
  1021. isl::union_map EltKnown, EltWritten;
  1022. {
  1023. IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps);
  1024. computeCommon();
  1025. EltUnused = computeLifetime();
  1026. EltKnown = computeKnown(true, false);
  1027. EltWritten = computeWritten();
  1028. }
  1029. DeLICMAnalyzed++;
  1030. if (EltUnused.is_null() || EltKnown.is_null() || EltWritten.is_null()) {
  1031. assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota &&
  1032. "The only reason that these things have not been computed should "
  1033. "be if the max-operations limit hit");
  1034. DeLICMOutOfQuota++;
  1035. LLVM_DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n");
  1036. DebugLoc Begin, End;
  1037. getDebugLocations(getBBPairForRegion(&S->getRegion()), Begin, End);
  1038. OptimizationRemarkAnalysis R(DEBUG_TYPE, "OutOfQuota", Begin,
  1039. S->getEntry());
  1040. R << "maximal number of operations exceeded during zone analysis";
  1041. S->getFunction().getContext().diagnose(R);
  1042. return false;
  1043. }
  1044. Zone = OriginalZone = Knowledge({}, EltUnused, EltKnown, EltWritten);
  1045. LLVM_DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4));
  1046. assert(Zone.isUsable() && OriginalZone.isUsable());
  1047. return true;
  1048. }
  1049. /// Try to map as many scalars to unused array elements as possible.
  1050. ///
  1051. /// Multiple scalars might be mappable to intersecting unused array element
  1052. /// zones, but we can only chose one. This is a greedy algorithm, therefore
  1053. /// the first processed element claims it.
  1054. void greedyCollapse() {
  1055. bool Modified = false;
  1056. for (auto &Stmt : *S) {
  1057. for (auto *MA : Stmt) {
  1058. if (!MA->isLatestArrayKind())
  1059. continue;
  1060. if (!MA->isWrite())
  1061. continue;
  1062. if (MA->isMayWrite()) {
  1063. LLVM_DEBUG(dbgs() << "Access " << MA
  1064. << " pruned because it is a MAY_WRITE\n");
  1065. OptimizationRemarkMissed R(DEBUG_TYPE, "TargetMayWrite",
  1066. MA->getAccessInstruction());
  1067. R << "Skipped possible mapping target because it is not an "
  1068. "unconditional overwrite";
  1069. S->getFunction().getContext().diagnose(R);
  1070. continue;
  1071. }
  1072. if (Stmt.getNumIterators() == 0) {
  1073. LLVM_DEBUG(dbgs() << "Access " << MA
  1074. << " pruned because it is not in a loop\n");
  1075. OptimizationRemarkMissed R(DEBUG_TYPE, "WriteNotInLoop",
  1076. MA->getAccessInstruction());
  1077. R << "skipped possible mapping target because it is not in a loop";
  1078. S->getFunction().getContext().diagnose(R);
  1079. continue;
  1080. }
  1081. if (isScalarAccess(MA)) {
  1082. LLVM_DEBUG(dbgs()
  1083. << "Access " << MA
  1084. << " pruned because it writes only a single element\n");
  1085. OptimizationRemarkMissed R(DEBUG_TYPE, "ScalarWrite",
  1086. MA->getAccessInstruction());
  1087. R << "skipped possible mapping target because the memory location "
  1088. "written to does not depend on its outer loop";
  1089. S->getFunction().getContext().diagnose(R);
  1090. continue;
  1091. }
  1092. if (!isa<StoreInst>(MA->getAccessInstruction())) {
  1093. LLVM_DEBUG(dbgs() << "Access " << MA
  1094. << " pruned because it is not a StoreInst\n");
  1095. OptimizationRemarkMissed R(DEBUG_TYPE, "NotAStore",
  1096. MA->getAccessInstruction());
  1097. R << "skipped possible mapping target because non-store instructions "
  1098. "are not supported";
  1099. S->getFunction().getContext().diagnose(R);
  1100. continue;
  1101. }
  1102. // Check for more than one element acces per statement instance.
  1103. // Currently we expect write accesses to be functional, eg. disallow
  1104. //
  1105. // { Stmt[0] -> [i] : 0 <= i < 2 }
  1106. //
  1107. // This may occur when some accesses to the element write/read only
  1108. // parts of the element, eg. a single byte. Polly then divides each
  1109. // element into subelements of the smallest access length, normal access
  1110. // then touch multiple of such subelements. It is very common when the
  1111. // array is accesses with memset, memcpy or memmove which take i8*
  1112. // arguments.
  1113. isl::union_map AccRel = MA->getLatestAccessRelation();
  1114. if (!AccRel.is_single_valued().is_true()) {
  1115. LLVM_DEBUG(dbgs() << "Access " << MA
  1116. << " is incompatible because it writes multiple "
  1117. "elements per instance\n");
  1118. OptimizationRemarkMissed R(DEBUG_TYPE, "NonFunctionalAccRel",
  1119. MA->getAccessInstruction());
  1120. R << "skipped possible mapping target because it writes more than "
  1121. "one element";
  1122. S->getFunction().getContext().diagnose(R);
  1123. continue;
  1124. }
  1125. isl::union_set TouchedElts = AccRel.range();
  1126. if (!TouchedElts.is_subset(CompatibleElts)) {
  1127. LLVM_DEBUG(
  1128. dbgs()
  1129. << "Access " << MA
  1130. << " is incompatible because it touches incompatible elements\n");
  1131. OptimizationRemarkMissed R(DEBUG_TYPE, "IncompatibleElts",
  1132. MA->getAccessInstruction());
  1133. R << "skipped possible mapping target because a target location "
  1134. "cannot be reliably analyzed";
  1135. S->getFunction().getContext().diagnose(R);
  1136. continue;
  1137. }
  1138. assert(isCompatibleAccess(MA));
  1139. NumberOfCompatibleTargets++;
  1140. LLVM_DEBUG(dbgs() << "Analyzing target access " << MA << "\n");
  1141. if (collapseScalarsToStore(MA))
  1142. Modified = true;
  1143. }
  1144. }
  1145. if (Modified)
  1146. DeLICMScopsModified++;
  1147. }
  1148. /// Dump the internal information about a performed DeLICM to @p OS.
  1149. void print(llvm::raw_ostream &OS, int Indent = 0) {
  1150. if (!Zone.isUsable()) {
  1151. OS.indent(Indent) << "Zone not computed\n";
  1152. return;
  1153. }
  1154. printStatistics(OS, Indent);
  1155. if (!isModified()) {
  1156. OS.indent(Indent) << "No modification has been made\n";
  1157. return;
  1158. }
  1159. printAccesses(OS, Indent);
  1160. }
  1161. /// Return whether at least one transformation been applied.
  1162. bool isModified() const { return NumberOfTargetsMapped > 0; }
  1163. };
  1164. static std::unique_ptr<DeLICMImpl> collapseToUnused(Scop &S, LoopInfo &LI) {
  1165. std::unique_ptr<DeLICMImpl> Impl = std::make_unique<DeLICMImpl>(&S, &LI);
  1166. if (!Impl->computeZone()) {
  1167. LLVM_DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n");
  1168. return Impl;
  1169. }
  1170. LLVM_DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n");
  1171. Impl->greedyCollapse();
  1172. LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
  1173. LLVM_DEBUG(dbgs() << S);
  1174. return Impl;
  1175. }
  1176. static std::unique_ptr<DeLICMImpl> runDeLICM(Scop &S, LoopInfo &LI) {
  1177. std::unique_ptr<DeLICMImpl> Impl = collapseToUnused(S, LI);
  1178. Scop::ScopStatistics ScopStats = S.getStatistics();
  1179. NumValueWrites += ScopStats.NumValueWrites;
  1180. NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
  1181. NumPHIWrites += ScopStats.NumPHIWrites;
  1182. NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
  1183. NumSingletonWrites += ScopStats.NumSingletonWrites;
  1184. NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
  1185. return Impl;
  1186. }
  1187. static PreservedAnalyses runDeLICMUsingNPM(Scop &S, ScopAnalysisManager &SAM,
  1188. ScopStandardAnalysisResults &SAR,
  1189. SPMUpdater &U, raw_ostream *OS) {
  1190. LoopInfo &LI = SAR.LI;
  1191. std::unique_ptr<DeLICMImpl> Impl = runDeLICM(S, LI);
  1192. if (OS) {
  1193. *OS << "Printing analysis 'Polly - DeLICM/DePRE' for region: '"
  1194. << S.getName() << "' in function '" << S.getFunction().getName()
  1195. << "':\n";
  1196. if (Impl) {
  1197. assert(Impl->getScop() == &S);
  1198. *OS << "DeLICM result:\n";
  1199. Impl->print(*OS);
  1200. }
  1201. }
  1202. if (!Impl->isModified())
  1203. return PreservedAnalyses::all();
  1204. PreservedAnalyses PA;
  1205. PA.preserveSet<AllAnalysesOn<Module>>();
  1206. PA.preserveSet<AllAnalysesOn<Function>>();
  1207. PA.preserveSet<AllAnalysesOn<Loop>>();
  1208. return PA;
  1209. }
  1210. class DeLICMWrapperPass : public ScopPass {
  1211. private:
  1212. DeLICMWrapperPass(const DeLICMWrapperPass &) = delete;
  1213. const DeLICMWrapperPass &operator=(const DeLICMWrapperPass &) = delete;
  1214. /// The pass implementation, also holding per-scop data.
  1215. std::unique_ptr<DeLICMImpl> Impl;
  1216. public:
  1217. static char ID;
  1218. explicit DeLICMWrapperPass() : ScopPass(ID) {}
  1219. virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
  1220. AU.addRequiredTransitive<ScopInfoRegionPass>();
  1221. AU.addRequired<LoopInfoWrapperPass>();
  1222. AU.setPreservesAll();
  1223. }
  1224. virtual bool runOnScop(Scop &S) override {
  1225. // Free resources for previous scop's computation, if not yet done.
  1226. releaseMemory();
  1227. auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  1228. Impl = runDeLICM(S, LI);
  1229. return Impl->isModified();
  1230. }
  1231. virtual void printScop(raw_ostream &OS, Scop &S) const override {
  1232. if (!Impl)
  1233. return;
  1234. assert(Impl->getScop() == &S);
  1235. OS << "DeLICM result:\n";
  1236. Impl->print(OS);
  1237. }
  1238. virtual void releaseMemory() override { Impl.reset(); }
  1239. };
  1240. char DeLICMWrapperPass::ID;
  1241. } // anonymous namespace
  1242. Pass *polly::createDeLICMWrapperPass() { return new DeLICMWrapperPass(); }
  1243. INITIALIZE_PASS_BEGIN(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE",
  1244. false, false)
  1245. INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass)
  1246. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  1247. INITIALIZE_PASS_END(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE",
  1248. false, false)
  1249. llvm::PreservedAnalyses DeLICMPass::run(Scop &S, ScopAnalysisManager &SAM,
  1250. ScopStandardAnalysisResults &SAR,
  1251. SPMUpdater &U) {
  1252. return runDeLICMUsingNPM(S, SAM, SAR, U, nullptr);
  1253. }
  1254. llvm::PreservedAnalyses DeLICMPrinterPass::run(Scop &S,
  1255. ScopAnalysisManager &SAM,
  1256. ScopStandardAnalysisResults &SAR,
  1257. SPMUpdater &U) {
  1258. return runDeLICMUsingNPM(S, SAM, SAR, U, &OS);
  1259. }
  1260. bool polly::isConflicting(
  1261. isl::union_set ExistingOccupied, isl::union_set ExistingUnused,
  1262. isl::union_map ExistingKnown, isl::union_map ExistingWrites,
  1263. isl::union_set ProposedOccupied, isl::union_set ProposedUnused,
  1264. isl::union_map ProposedKnown, isl::union_map ProposedWrites,
  1265. llvm::raw_ostream *OS, unsigned Indent) {
  1266. Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused),
  1267. std::move(ExistingKnown), std::move(ExistingWrites));
  1268. Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused),
  1269. std::move(ProposedKnown), std::move(ProposedWrites));
  1270. return Knowledge::isConflicting(Existing, Proposed, OS, Indent);
  1271. }