ScopBuilder.cpp 132 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650
  1. //===- ScopBuilder.cpp ----------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Create a polyhedral description for a static control flow region.
  10. //
  11. // The pass creates a polyhedral description of the Scops detected by the SCoP
  12. // detection derived from their LLVM-IR code.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "polly/ScopBuilder.h"
  16. #include "polly/Options.h"
  17. #include "polly/ScopDetection.h"
  18. #include "polly/ScopInfo.h"
  19. #include "polly/Support/GICHelper.h"
  20. #include "polly/Support/ISLTools.h"
  21. #include "polly/Support/SCEVValidator.h"
  22. #include "polly/Support/ScopHelper.h"
  23. #include "polly/Support/VirtualInstruction.h"
  24. #include "llvm/ADT/ArrayRef.h"
  25. #include "llvm/ADT/EquivalenceClasses.h"
  26. #include "llvm/ADT/PostOrderIterator.h"
  27. #include "llvm/ADT/Sequence.h"
  28. #include "llvm/ADT/SmallSet.h"
  29. #include "llvm/ADT/Statistic.h"
  30. #include "llvm/Analysis/AliasAnalysis.h"
  31. #include "llvm/Analysis/AssumptionCache.h"
  32. #include "llvm/Analysis/Delinearization.h"
  33. #include "llvm/Analysis/Loads.h"
  34. #include "llvm/Analysis/LoopInfo.h"
  35. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  36. #include "llvm/Analysis/RegionInfo.h"
  37. #include "llvm/Analysis/RegionIterator.h"
  38. #include "llvm/Analysis/ScalarEvolution.h"
  39. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  40. #include "llvm/IR/BasicBlock.h"
  41. #include "llvm/IR/DataLayout.h"
  42. #include "llvm/IR/DebugLoc.h"
  43. #include "llvm/IR/DerivedTypes.h"
  44. #include "llvm/IR/Dominators.h"
  45. #include "llvm/IR/Function.h"
  46. #include "llvm/IR/InstrTypes.h"
  47. #include "llvm/IR/Instruction.h"
  48. #include "llvm/IR/Instructions.h"
  49. #include "llvm/IR/Type.h"
  50. #include "llvm/IR/Use.h"
  51. #include "llvm/IR/Value.h"
  52. #include "llvm/Support/CommandLine.h"
  53. #include "llvm/Support/Compiler.h"
  54. #include "llvm/Support/Debug.h"
  55. #include "llvm/Support/ErrorHandling.h"
  56. #include "llvm/Support/raw_ostream.h"
  57. #include <cassert>
  58. using namespace llvm;
  59. using namespace polly;
  60. #define DEBUG_TYPE "polly-scops"
  61. STATISTIC(ScopFound, "Number of valid Scops");
  62. STATISTIC(RichScopFound, "Number of Scops containing a loop");
  63. STATISTIC(InfeasibleScops,
  64. "Number of SCoPs with statically infeasible context.");
  65. bool polly::ModelReadOnlyScalars;
  66. // The maximal number of dimensions we allow during invariant load construction.
  67. // More complex access ranges will result in very high compile time and are also
  68. // unlikely to result in good code. This value is very high and should only
  69. // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
  70. static unsigned const MaxDimensionsInAccessRange = 9;
  71. static cl::opt<bool, true> XModelReadOnlyScalars(
  72. "polly-analyze-read-only-scalars",
  73. cl::desc("Model read-only scalar values in the scop description"),
  74. cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
  75. cl::init(true), cl::cat(PollyCategory));
  76. static cl::opt<int>
  77. OptComputeOut("polly-analysis-computeout",
  78. cl::desc("Bound the scop analysis by a maximal amount of "
  79. "computational steps (0 means no bound)"),
  80. cl::Hidden, cl::init(800000), cl::ZeroOrMore,
  81. cl::cat(PollyCategory));
  82. static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
  83. "polly-allow-dereference-of-all-function-parameters",
  84. cl::desc(
  85. "Treat all parameters to functions that are pointers as dereferencible."
  86. " This is useful for invariant load hoisting, since we can generate"
  87. " less runtime checks. This is only valid if all pointers to functions"
  88. " are always initialized, so that Polly can choose to hoist"
  89. " their loads. "),
  90. cl::Hidden, cl::init(false), cl::cat(PollyCategory));
  91. static cl::opt<bool>
  92. PollyIgnoreInbounds("polly-ignore-inbounds",
  93. cl::desc("Do not take inbounds assumptions at all"),
  94. cl::Hidden, cl::init(false), cl::cat(PollyCategory));
  95. static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
  96. "polly-rtc-max-arrays-per-group",
  97. cl::desc("The maximal number of arrays to compare in each alias group."),
  98. cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
  99. static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
  100. "polly-rtc-max-array-disjuncts",
  101. cl::desc("The maximal number of disjunts allowed in memory accesses to "
  102. "to build RTCs."),
  103. cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
  104. static cl::opt<unsigned> RunTimeChecksMaxParameters(
  105. "polly-rtc-max-parameters",
  106. cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
  107. cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
  108. static cl::opt<bool> UnprofitableScalarAccs(
  109. "polly-unprofitable-scalar-accs",
  110. cl::desc("Count statements with scalar accesses as not optimizable"),
  111. cl::Hidden, cl::init(false), cl::cat(PollyCategory));
  112. static cl::opt<std::string> UserContextStr(
  113. "polly-context", cl::value_desc("isl parameter set"),
  114. cl::desc("Provide additional constraints on the context parameters"),
  115. cl::init(""), cl::cat(PollyCategory));
  116. static cl::opt<bool> DetectReductions("polly-detect-reductions",
  117. cl::desc("Detect and exploit reductions"),
  118. cl::Hidden, cl::ZeroOrMore,
  119. cl::init(true), cl::cat(PollyCategory));
  120. // Multiplicative reductions can be disabled separately as these kind of
  121. // operations can overflow easily. Additive reductions and bit operations
  122. // are in contrast pretty stable.
  123. static cl::opt<bool> DisableMultiplicativeReductions(
  124. "polly-disable-multiplicative-reductions",
  125. cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
  126. cl::init(false), cl::cat(PollyCategory));
  127. enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
  128. static cl::opt<GranularityChoice> StmtGranularity(
  129. "polly-stmt-granularity",
  130. cl::desc(
  131. "Algorithm to use for splitting basic blocks into multiple statements"),
  132. cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
  133. "One statement per basic block"),
  134. clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
  135. "Scalar independence heuristic"),
  136. clEnumValN(GranularityChoice::Stores, "store",
  137. "Store-level granularity")),
  138. cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
  139. /// Helper to treat non-affine regions and basic blocks the same.
  140. ///
  141. ///{
  142. /// Return the block that is the representing block for @p RN.
  143. static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
  144. return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
  145. : RN->getNodeAs<BasicBlock>();
  146. }
  147. /// Return the @p idx'th block that is executed after @p RN.
  148. static inline BasicBlock *
  149. getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
  150. if (RN->isSubRegion()) {
  151. assert(idx == 0);
  152. return RN->getNodeAs<Region>()->getExit();
  153. }
  154. return TI->getSuccessor(idx);
  155. }
  156. static bool containsErrorBlock(RegionNode *RN, const Region &R,
  157. ScopDetection *SD) {
  158. if (!RN->isSubRegion())
  159. return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
  160. for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
  161. if (SD->isErrorBlock(*BB, R))
  162. return true;
  163. return false;
  164. }
  165. ///}
  166. /// Create a map to map from a given iteration to a subsequent iteration.
  167. ///
  168. /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
  169. /// is incremented by one and all other dimensions are equal, e.g.,
  170. /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
  171. ///
  172. /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
  173. static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
  174. isl::space MapSpace = SetSpace.map_from_set();
  175. isl::map NextIterationMap = isl::map::universe(MapSpace);
  176. for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim()))
  177. if (u != Dim)
  178. NextIterationMap =
  179. NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
  180. isl::constraint C =
  181. isl::constraint::alloc_equality(isl::local_space(MapSpace));
  182. C = C.set_constant_si(1);
  183. C = C.set_coefficient_si(isl::dim::in, Dim, 1);
  184. C = C.set_coefficient_si(isl::dim::out, Dim, -1);
  185. NextIterationMap = NextIterationMap.add_constraint(C);
  186. return NextIterationMap;
  187. }
  188. /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
  189. static isl::set collectBoundedParts(isl::set S) {
  190. isl::set BoundedParts = isl::set::empty(S.get_space());
  191. for (isl::basic_set BSet : S.get_basic_set_list())
  192. if (BSet.is_bounded())
  193. BoundedParts = BoundedParts.unite(isl::set(BSet));
  194. return BoundedParts;
  195. }
  196. /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
  197. ///
  198. /// @returns A separation of @p S into first an unbounded then a bounded subset,
  199. /// both with regards to the dimension @p Dim.
  200. static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
  201. unsigned Dim) {
  202. for (unsigned u : rangeIslSize(0, S.tuple_dim()))
  203. S = S.lower_bound_si(isl::dim::set, u, 0);
  204. unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim());
  205. isl::set OnlyDimS = S;
  206. // Remove dimensions that are greater than Dim as they are not interesting.
  207. assert(NumDimsS >= Dim + 1);
  208. OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
  209. // Create artificial parametric upper bounds for dimensions smaller than Dim
  210. // as we are not interested in them.
  211. OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
  212. for (unsigned u = 0; u < Dim; u++) {
  213. isl::constraint C = isl::constraint::alloc_inequality(
  214. isl::local_space(OnlyDimS.get_space()));
  215. C = C.set_coefficient_si(isl::dim::param, u, 1);
  216. C = C.set_coefficient_si(isl::dim::set, u, -1);
  217. OnlyDimS = OnlyDimS.add_constraint(C);
  218. }
  219. // Collect all bounded parts of OnlyDimS.
  220. isl::set BoundedParts = collectBoundedParts(OnlyDimS);
  221. // Create the dimensions greater than Dim again.
  222. BoundedParts =
  223. BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
  224. // Remove the artificial upper bound parameters again.
  225. BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
  226. isl::set UnboundedParts = S.subtract(BoundedParts);
  227. return std::make_pair(UnboundedParts, BoundedParts);
  228. }
  229. /// Create the conditions under which @p L @p Pred @p R is true.
  230. static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
  231. isl::pw_aff R) {
  232. switch (Pred) {
  233. case ICmpInst::ICMP_EQ:
  234. return L.eq_set(R);
  235. case ICmpInst::ICMP_NE:
  236. return L.ne_set(R);
  237. case ICmpInst::ICMP_SLT:
  238. return L.lt_set(R);
  239. case ICmpInst::ICMP_SLE:
  240. return L.le_set(R);
  241. case ICmpInst::ICMP_SGT:
  242. return L.gt_set(R);
  243. case ICmpInst::ICMP_SGE:
  244. return L.ge_set(R);
  245. case ICmpInst::ICMP_ULT:
  246. return L.lt_set(R);
  247. case ICmpInst::ICMP_UGT:
  248. return L.gt_set(R);
  249. case ICmpInst::ICMP_ULE:
  250. return L.le_set(R);
  251. case ICmpInst::ICMP_UGE:
  252. return L.ge_set(R);
  253. default:
  254. llvm_unreachable("Non integer predicate not supported");
  255. }
  256. }
  257. isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
  258. Loop *NewL) {
  259. // If the loops are the same there is nothing to do.
  260. if (NewL == OldL)
  261. return Dom;
  262. int OldDepth = scop->getRelativeLoopDepth(OldL);
  263. int NewDepth = scop->getRelativeLoopDepth(NewL);
  264. // If both loops are non-affine loops there is nothing to do.
  265. if (OldDepth == -1 && NewDepth == -1)
  266. return Dom;
  267. // Distinguish three cases:
  268. // 1) The depth is the same but the loops are not.
  269. // => One loop was left one was entered.
  270. // 2) The depth increased from OldL to NewL.
  271. // => One loop was entered, none was left.
  272. // 3) The depth decreased from OldL to NewL.
  273. // => Loops were left were difference of the depths defines how many.
  274. if (OldDepth == NewDepth) {
  275. assert(OldL->getParentLoop() == NewL->getParentLoop());
  276. Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
  277. Dom = Dom.add_dims(isl::dim::set, 1);
  278. } else if (OldDepth < NewDepth) {
  279. assert(OldDepth + 1 == NewDepth);
  280. auto &R = scop->getRegion();
  281. (void)R;
  282. assert(NewL->getParentLoop() == OldL ||
  283. ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
  284. Dom = Dom.add_dims(isl::dim::set, 1);
  285. } else {
  286. assert(OldDepth > NewDepth);
  287. unsigned Diff = OldDepth - NewDepth;
  288. unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim());
  289. assert(NumDim >= Diff);
  290. Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
  291. }
  292. return Dom;
  293. }
  294. /// Compute the isl representation for the SCEV @p E in this BB.
  295. ///
  296. /// @param BB The BB for which isl representation is to be
  297. /// computed.
  298. /// @param InvalidDomainMap A map of BB to their invalid domains.
  299. /// @param E The SCEV that should be translated.
  300. /// @param NonNegative Flag to indicate the @p E has to be non-negative.
  301. ///
  302. /// Note that this function will also adjust the invalid context accordingly.
  303. __isl_give isl_pw_aff *
  304. ScopBuilder::getPwAff(BasicBlock *BB,
  305. DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
  306. const SCEV *E, bool NonNegative) {
  307. PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions);
  308. InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
  309. return PWAC.first.release();
  310. }
  311. /// Build condition sets for unsigned ICmpInst(s).
  312. /// Special handling is required for unsigned operands to ensure that if
  313. /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
  314. /// it should wrap around.
  315. ///
  316. /// @param IsStrictUpperBound holds information on the predicate relation
  317. /// between TestVal and UpperBound, i.e,
  318. /// TestVal < UpperBound OR TestVal <= UpperBound
  319. __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
  320. BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
  321. const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
  322. DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
  323. bool IsStrictUpperBound) {
  324. // Do not take NonNeg assumption on TestVal
  325. // as it might have MSB (Sign bit) set.
  326. isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
  327. // Take NonNeg assumption on UpperBound.
  328. isl_pw_aff *UpperBound =
  329. getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);
  330. // 0 <= TestVal
  331. isl_set *First =
  332. isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
  333. isl_pw_aff_get_domain_space(TestVal))),
  334. isl_pw_aff_copy(TestVal));
  335. isl_set *Second;
  336. if (IsStrictUpperBound)
  337. // TestVal < UpperBound
  338. Second = isl_pw_aff_lt_set(TestVal, UpperBound);
  339. else
  340. // TestVal <= UpperBound
  341. Second = isl_pw_aff_le_set(TestVal, UpperBound);
  342. isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
  343. return ConsequenceCondSet;
  344. }
  345. bool ScopBuilder::buildConditionSets(
  346. BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
  347. DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
  348. SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
  349. Value *Condition = getConditionFromTerminator(SI);
  350. assert(Condition && "No condition for switch");
  351. isl_pw_aff *LHS, *RHS;
  352. LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
  353. unsigned NumSuccessors = SI->getNumSuccessors();
  354. ConditionSets.resize(NumSuccessors);
  355. for (auto &Case : SI->cases()) {
  356. unsigned Idx = Case.getSuccessorIndex();
  357. ConstantInt *CaseValue = Case.getCaseValue();
  358. RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
  359. isl_set *CaseConditionSet =
  360. buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
  361. isl::manage(RHS))
  362. .release();
  363. ConditionSets[Idx] = isl_set_coalesce(
  364. isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
  365. }
  366. assert(ConditionSets[0] == nullptr && "Default condition set was set");
  367. isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
  368. for (unsigned u = 2; u < NumSuccessors; u++)
  369. ConditionSetUnion =
  370. isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
  371. ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
  372. isl_pw_aff_free(LHS);
  373. return true;
  374. }
  375. bool ScopBuilder::buildConditionSets(
  376. BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
  377. __isl_keep isl_set *Domain,
  378. DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
  379. SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
  380. isl_set *ConsequenceCondSet = nullptr;
  381. if (auto Load = dyn_cast<LoadInst>(Condition)) {
  382. const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
  383. const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
  384. bool NonNeg = false;
  385. isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
  386. isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
  387. ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
  388. isl::manage(RHS))
  389. .release();
  390. } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
  391. auto *Unique = dyn_cast<ConstantInt>(
  392. getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD));
  393. assert(Unique &&
  394. "A PHINode condition should only be accepted by ScopDetection if "
  395. "getUniqueNonErrorValue returns non-NULL");
  396. if (Unique->isZero())
  397. ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
  398. else
  399. ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
  400. } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
  401. if (CCond->isZero())
  402. ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
  403. else
  404. ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
  405. } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
  406. auto Opcode = BinOp->getOpcode();
  407. assert(Opcode == Instruction::And || Opcode == Instruction::Or);
  408. bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
  409. InvalidDomainMap, ConditionSets) &&
  410. buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
  411. InvalidDomainMap, ConditionSets);
  412. if (!Valid) {
  413. while (!ConditionSets.empty())
  414. isl_set_free(ConditionSets.pop_back_val());
  415. return false;
  416. }
  417. isl_set_free(ConditionSets.pop_back_val());
  418. isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
  419. isl_set_free(ConditionSets.pop_back_val());
  420. isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
  421. if (Opcode == Instruction::And)
  422. ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
  423. else
  424. ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
  425. } else {
  426. auto *ICond = dyn_cast<ICmpInst>(Condition);
  427. assert(ICond &&
  428. "Condition of exiting branch was neither constant nor ICmp!");
  429. Region &R = scop->getRegion();
  430. isl_pw_aff *LHS, *RHS;
  431. // For unsigned comparisons we assumed the signed bit of neither operand
  432. // to be set. The comparison is equal to a signed comparison under this
  433. // assumption.
  434. bool NonNeg = ICond->isUnsigned();
  435. const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
  436. *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
  437. LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD);
  438. RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD);
  439. switch (ICond->getPredicate()) {
  440. case ICmpInst::ICMP_ULT:
  441. ConsequenceCondSet =
  442. buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
  443. RightOperand, InvalidDomainMap, true);
  444. break;
  445. case ICmpInst::ICMP_ULE:
  446. ConsequenceCondSet =
  447. buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
  448. RightOperand, InvalidDomainMap, false);
  449. break;
  450. case ICmpInst::ICMP_UGT:
  451. ConsequenceCondSet =
  452. buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
  453. LeftOperand, InvalidDomainMap, true);
  454. break;
  455. case ICmpInst::ICMP_UGE:
  456. ConsequenceCondSet =
  457. buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
  458. LeftOperand, InvalidDomainMap, false);
  459. break;
  460. default:
  461. LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
  462. RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
  463. ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
  464. isl::manage(LHS), isl::manage(RHS))
  465. .release();
  466. break;
  467. }
  468. }
  469. // If no terminator was given we are only looking for parameter constraints
  470. // under which @p Condition is true/false.
  471. if (!TI)
  472. ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
  473. assert(ConsequenceCondSet);
  474. ConsequenceCondSet = isl_set_coalesce(
  475. isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
  476. isl_set *AlternativeCondSet = nullptr;
  477. bool TooComplex =
  478. isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;
  479. if (!TooComplex) {
  480. AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
  481. isl_set_copy(ConsequenceCondSet));
  482. TooComplex =
  483. isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
  484. }
  485. if (TooComplex) {
  486. scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
  487. TI ? TI->getParent() : nullptr /* BasicBlock */);
  488. isl_set_free(AlternativeCondSet);
  489. isl_set_free(ConsequenceCondSet);
  490. return false;
  491. }
  492. ConditionSets.push_back(ConsequenceCondSet);
  493. ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
  494. return true;
  495. }
  496. bool ScopBuilder::buildConditionSets(
  497. BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
  498. DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
  499. SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
  500. if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
  501. return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
  502. ConditionSets);
  503. assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
  504. if (TI->getNumSuccessors() == 1) {
  505. ConditionSets.push_back(isl_set_copy(Domain));
  506. return true;
  507. }
  508. Value *Condition = getConditionFromTerminator(TI);
  509. assert(Condition && "No condition for Terminator");
  510. return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
  511. ConditionSets);
  512. }
  513. bool ScopBuilder::propagateDomainConstraints(
  514. Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
  515. // Iterate over the region R and propagate the domain constrains from the
  516. // predecessors to the current node. In contrast to the
  517. // buildDomainsWithBranchConstraints function, this one will pull the domain
  518. // information from the predecessors instead of pushing it to the successors.
  519. // Additionally, we assume the domains to be already present in the domain
  520. // map here. However, we iterate again in reverse post order so we know all
  521. // predecessors have been visited before a block or non-affine subregion is
  522. // visited.
  523. ReversePostOrderTraversal<Region *> RTraversal(R);
  524. for (auto *RN : RTraversal) {
  525. // Recurse for affine subregions but go on for basic blocks and non-affine
  526. // subregions.
  527. if (RN->isSubRegion()) {
  528. Region *SubRegion = RN->getNodeAs<Region>();
  529. if (!scop->isNonAffineSubRegion(SubRegion)) {
  530. if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
  531. return false;
  532. continue;
  533. }
  534. }
  535. BasicBlock *BB = getRegionNodeBasicBlock(RN);
  536. isl::set &Domain = scop->getOrInitEmptyDomain(BB);
  537. assert(!Domain.is_null());
  538. // Under the union of all predecessor conditions we can reach this block.
  539. isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
  540. Domain = Domain.intersect(PredDom).coalesce();
  541. Domain = Domain.align_params(scop->getParamSpace());
  542. Loop *BBLoop = getRegionNodeLoop(RN, LI);
  543. if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
  544. if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
  545. return false;
  546. }
  547. return true;
  548. }
  549. void ScopBuilder::propagateDomainConstraintsToRegionExit(
  550. BasicBlock *BB, Loop *BBLoop,
  551. SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
  552. DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
  553. // Check if the block @p BB is the entry of a region. If so we propagate it's
  554. // domain to the exit block of the region. Otherwise we are done.
  555. auto *RI = scop->getRegion().getRegionInfo();
  556. auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
  557. auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
  558. if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
  559. return;
  560. // Do not propagate the domain if there is a loop backedge inside the region
  561. // that would prevent the exit block from being executed.
  562. auto *L = BBLoop;
  563. while (L && scop->contains(L)) {
  564. SmallVector<BasicBlock *, 4> LatchBBs;
  565. BBLoop->getLoopLatches(LatchBBs);
  566. for (auto *LatchBB : LatchBBs)
  567. if (BB != LatchBB && BBReg->contains(LatchBB))
  568. return;
  569. L = L->getParentLoop();
  570. }
  571. isl::set Domain = scop->getOrInitEmptyDomain(BB);
  572. assert(!Domain.is_null() && "Cannot propagate a nullptr");
  573. Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());
  574. // Since the dimensions of @p BB and @p ExitBB might be different we have to
  575. // adjust the domain before we can propagate it.
  576. isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
  577. isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);
  578. // If the exit domain is not yet created we set it otherwise we "add" the
  579. // current domain.
  580. ExitDomain =
  581. !ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
  582. // Initialize the invalid domain.
  583. InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
  584. FinishedExitBlocks.insert(ExitBB);
  585. }
  586. isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
  587. isl::set Domain) {
  588. // If @p BB is the ScopEntry we are done
  589. if (scop->getRegion().getEntry() == BB)
  590. return isl::set::universe(Domain.get_space());
  591. // The region info of this function.
  592. auto &RI = *scop->getRegion().getRegionInfo();
  593. Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());
  594. // A domain to collect all predecessor domains, thus all conditions under
  595. // which the block is executed. To this end we start with the empty domain.
  596. isl::set PredDom = isl::set::empty(Domain.get_space());
  597. // Set of regions of which the entry block domain has been propagated to BB.
  598. // all predecessors inside any of the regions can be skipped.
  599. SmallSet<Region *, 8> PropagatedRegions;
  600. for (auto *PredBB : predecessors(BB)) {
  601. // Skip backedges.
  602. if (DT.dominates(BB, PredBB))
  603. continue;
  604. // If the predecessor is in a region we used for propagation we can skip it.
  605. auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
  606. if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
  607. PredBBInRegion)) {
  608. continue;
  609. }
  610. // Check if there is a valid region we can use for propagation, thus look
  611. // for a region that contains the predecessor and has @p BB as exit block.
  612. // FIXME: This was an side-effect-free (and possibly infinite) loop when
  613. // committed and seems not to be needed.
  614. auto *PredR = RI.getRegionFor(PredBB);
  615. while (PredR->getExit() != BB && !PredR->contains(BB))
  616. PredR = PredR->getParent();
  617. // If a valid region for propagation was found use the entry of that region
  618. // for propagation, otherwise the PredBB directly.
  619. if (PredR->getExit() == BB) {
  620. PredBB = PredR->getEntry();
  621. PropagatedRegions.insert(PredR);
  622. }
  623. isl::set PredBBDom = scop->getDomainConditions(PredBB);
  624. Loop *PredBBLoop =
  625. getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
  626. PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
  627. PredDom = PredDom.unite(PredBBDom);
  628. }
  629. return PredDom;
  630. }
  631. bool ScopBuilder::addLoopBoundsToHeaderDomain(
  632. Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
  633. int LoopDepth = scop->getRelativeLoopDepth(L);
  634. assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
  635. BasicBlock *HeaderBB = L->getHeader();
  636. assert(scop->isDomainDefined(HeaderBB));
  637. isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);
  638. isl::map NextIterationMap =
  639. createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
  640. isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
  641. SmallVector<BasicBlock *, 4> LatchBlocks;
  642. L->getLoopLatches(LatchBlocks);
  643. for (BasicBlock *LatchBB : LatchBlocks) {
  644. // If the latch is only reachable via error statements we skip it.
  645. if (!scop->isDomainDefined(LatchBB))
  646. continue;
  647. isl::set LatchBBDom = scop->getDomainConditions(LatchBB);
  648. isl::set BackedgeCondition;
  649. Instruction *TI = LatchBB->getTerminator();
  650. BranchInst *BI = dyn_cast<BranchInst>(TI);
  651. assert(BI && "Only branch instructions allowed in loop latches");
  652. if (BI->isUnconditional())
  653. BackedgeCondition = LatchBBDom;
  654. else {
  655. SmallVector<isl_set *, 8> ConditionSets;
  656. int idx = BI->getSuccessor(0) != HeaderBB;
  657. if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
  658. InvalidDomainMap, ConditionSets))
  659. return false;
  660. // Free the non back edge condition set as we do not need it.
  661. isl_set_free(ConditionSets[1 - idx]);
  662. BackedgeCondition = isl::manage(ConditionSets[idx]);
  663. }
  664. int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
  665. assert(LatchLoopDepth >= LoopDepth);
  666. BackedgeCondition = BackedgeCondition.project_out(
  667. isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
  668. UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
  669. }
  670. isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
  671. for (int i = 0; i < LoopDepth; i++)
  672. ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
  673. isl::set UnionBackedgeConditionComplement =
  674. UnionBackedgeCondition.complement();
  675. UnionBackedgeConditionComplement =
  676. UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
  677. 0);
  678. UnionBackedgeConditionComplement =
  679. UnionBackedgeConditionComplement.apply(ForwardMap);
  680. HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
  681. HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
  682. auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
  683. HeaderBBDom = Parts.second;
  684. // Check if there is a <nsw> tagged AddRec for this loop and if so do not
  685. // require a runtime check. The assumption is already implied by the <nsw>
  686. // tag.
  687. bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);
  688. isl::set UnboundedCtx = Parts.first.params();
  689. recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx,
  690. HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION,
  691. nullptr, RequiresRTC);
  692. return true;
  693. }
  694. void ScopBuilder::buildInvariantEquivalenceClasses() {
  695. DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
  696. const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
  697. for (LoadInst *LInst : RIL) {
  698. const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
  699. Type *Ty = LInst->getType();
  700. LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
  701. if (ClassRep) {
  702. scop->addInvariantLoadMapping(LInst, ClassRep);
  703. continue;
  704. }
  705. ClassRep = LInst;
  706. scop->addInvariantEquivClass(
  707. InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty});
  708. }
  709. }
  710. bool ScopBuilder::buildDomains(
  711. Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
  712. bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
  713. auto *EntryBB = R->getEntry();
  714. auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
  715. int LD = scop->getRelativeLoopDepth(L);
  716. auto *S =
  717. isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));
  718. InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
  719. isl::set Domain = isl::manage(S);
  720. scop->setDomain(EntryBB, Domain);
  721. if (IsOnlyNonAffineRegion)
  722. return !containsErrorBlock(R->getNode(), *R, &SD);
  723. if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
  724. return false;
  725. if (!propagateDomainConstraints(R, InvalidDomainMap))
  726. return false;
  727. // Error blocks and blocks dominated by them have been assumed to never be
  728. // executed. Representing them in the Scop does not add any value. In fact,
  729. // it is likely to cause issues during construction of the ScopStmts. The
  730. // contents of error blocks have not been verified to be expressible and
  731. // will cause problems when building up a ScopStmt for them.
  732. // Furthermore, basic blocks dominated by error blocks may reference
  733. // instructions in the error block which, if the error block is not modeled,
  734. // can themselves not be constructed properly. To this end we will replace
  735. // the domains of error blocks and those only reachable via error blocks
  736. // with an empty set. Additionally, we will record for each block under which
  737. // parameter combination it would be reached via an error block in its
  738. // InvalidDomain. This information is needed during load hoisting.
  739. if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
  740. return false;
  741. return true;
  742. }
  743. bool ScopBuilder::buildDomainsWithBranchConstraints(
  744. Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
  745. // To create the domain for each block in R we iterate over all blocks and
  746. // subregions in R and propagate the conditions under which the current region
  747. // element is executed. To this end we iterate in reverse post order over R as
  748. // it ensures that we first visit all predecessors of a region node (either a
  749. // basic block or a subregion) before we visit the region node itself.
  750. // Initially, only the domain for the SCoP region entry block is set and from
  751. // there we propagate the current domain to all successors, however we add the
  752. // condition that the successor is actually executed next.
  753. // As we are only interested in non-loop carried constraints here we can
  754. // simply skip loop back edges.
  755. SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
  756. ReversePostOrderTraversal<Region *> RTraversal(R);
  757. for (auto *RN : RTraversal) {
  758. // Recurse for affine subregions but go on for basic blocks and non-affine
  759. // subregions.
  760. if (RN->isSubRegion()) {
  761. Region *SubRegion = RN->getNodeAs<Region>();
  762. if (!scop->isNonAffineSubRegion(SubRegion)) {
  763. if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
  764. return false;
  765. continue;
  766. }
  767. }
  768. if (containsErrorBlock(RN, scop->getRegion(), &SD))
  769. scop->notifyErrorBlock();
  770. ;
  771. BasicBlock *BB = getRegionNodeBasicBlock(RN);
  772. Instruction *TI = BB->getTerminator();
  773. if (isa<UnreachableInst>(TI))
  774. continue;
  775. if (!scop->isDomainDefined(BB))
  776. continue;
  777. isl::set Domain = scop->getDomainConditions(BB);
  778. scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim()));
  779. auto *BBLoop = getRegionNodeLoop(RN, LI);
  780. // Propagate the domain from BB directly to blocks that have a superset
  781. // domain, at the moment only region exit nodes of regions that start in BB.
  782. propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
  783. InvalidDomainMap);
  784. // If all successors of BB have been set a domain through the propagation
  785. // above we do not need to build condition sets but can just skip this
  786. // block. However, it is important to note that this is a local property
  787. // with regards to the region @p R. To this end FinishedExitBlocks is a
  788. // local variable.
  789. auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
  790. return FinishedExitBlocks.count(SuccBB);
  791. };
  792. if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
  793. continue;
  794. // Build the condition sets for the successor nodes of the current region
  795. // node. If it is a non-affine subregion we will always execute the single
  796. // exit node, hence the single entry node domain is the condition set. For
  797. // basic blocks we use the helper function buildConditionSets.
  798. SmallVector<isl_set *, 8> ConditionSets;
  799. if (RN->isSubRegion())
  800. ConditionSets.push_back(Domain.copy());
  801. else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
  802. ConditionSets))
  803. return false;
  804. // Now iterate over the successors and set their initial domain based on
  805. // their condition set. We skip back edges here and have to be careful when
  806. // we leave a loop not to keep constraints over a dimension that doesn't
  807. // exist anymore.
  808. assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
  809. for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
  810. isl::set CondSet = isl::manage(ConditionSets[u]);
  811. BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
  812. // Skip blocks outside the region.
  813. if (!scop->contains(SuccBB))
  814. continue;
  815. // If we propagate the domain of some block to "SuccBB" we do not have to
  816. // adjust the domain.
  817. if (FinishedExitBlocks.count(SuccBB))
  818. continue;
  819. // Skip back edges.
  820. if (DT.dominates(SuccBB, BB))
  821. continue;
  822. Loop *SuccBBLoop =
  823. getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
  824. CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);
  825. // Set the domain for the successor or merge it with an existing domain in
  826. // case there are multiple paths (without loop back edges) to the
  827. // successor block.
  828. isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);
  829. if (!SuccDomain.is_null()) {
  830. SuccDomain = SuccDomain.unite(CondSet).coalesce();
  831. } else {
  832. // Initialize the invalid domain.
  833. InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
  834. SuccDomain = CondSet;
  835. }
  836. SuccDomain = SuccDomain.detect_equalities();
  837. // Check if the maximal number of domain disjunctions was reached.
  838. // In case this happens we will clean up and bail.
  839. if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain)
  840. continue;
  841. scop->invalidate(COMPLEXITY, DebugLoc());
  842. while (++u < ConditionSets.size())
  843. isl_set_free(ConditionSets[u]);
  844. return false;
  845. }
  846. }
  847. return true;
  848. }
  849. bool ScopBuilder::propagateInvalidStmtDomains(
  850. Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
  851. ReversePostOrderTraversal<Region *> RTraversal(R);
  852. for (auto *RN : RTraversal) {
  853. // Recurse for affine subregions but go on for basic blocks and non-affine
  854. // subregions.
  855. if (RN->isSubRegion()) {
  856. Region *SubRegion = RN->getNodeAs<Region>();
  857. if (!scop->isNonAffineSubRegion(SubRegion)) {
  858. propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
  859. continue;
  860. }
  861. }
  862. bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD);
  863. BasicBlock *BB = getRegionNodeBasicBlock(RN);
  864. isl::set &Domain = scop->getOrInitEmptyDomain(BB);
  865. assert(!Domain.is_null() && "Cannot propagate a nullptr");
  866. isl::set InvalidDomain = InvalidDomainMap[BB];
  867. bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
  868. if (!IsInvalidBlock) {
  869. InvalidDomain = InvalidDomain.intersect(Domain);
  870. } else {
  871. InvalidDomain = Domain;
  872. isl::set DomPar = Domain.params();
  873. recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar,
  874. BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
  875. Domain = isl::set::empty(Domain.get_space());
  876. }
  877. if (InvalidDomain.is_empty()) {
  878. InvalidDomainMap[BB] = InvalidDomain;
  879. continue;
  880. }
  881. auto *BBLoop = getRegionNodeLoop(RN, LI);
  882. auto *TI = BB->getTerminator();
  883. unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
  884. for (unsigned u = 0; u < NumSuccs; u++) {
  885. auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
  886. // Skip successors outside the SCoP.
  887. if (!scop->contains(SuccBB))
  888. continue;
  889. // Skip backedges.
  890. if (DT.dominates(SuccBB, BB))
  891. continue;
  892. Loop *SuccBBLoop =
  893. getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
  894. auto AdjustedInvalidDomain =
  895. adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);
  896. isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
  897. SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
  898. SuccInvalidDomain = SuccInvalidDomain.coalesce();
  899. InvalidDomainMap[SuccBB] = SuccInvalidDomain;
  900. // Check if the maximal number of domain disjunctions was reached.
  901. // In case this happens we will bail.
  902. if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) <
  903. MaxDisjunctsInDomain)
  904. continue;
  905. InvalidDomainMap.erase(BB);
  906. scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
  907. return false;
  908. }
  909. InvalidDomainMap[BB] = InvalidDomain;
  910. }
  911. return true;
  912. }
  913. void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
  914. Region *NonAffineSubRegion,
  915. bool IsExitBlock) {
  916. // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
  917. // true, are not modeled as ordinary PHI nodes as they are not part of the
  918. // region. However, we model the operands in the predecessor blocks that are
  919. // part of the region as regular scalar accesses.
  920. // If we can synthesize a PHI we can skip it, however only if it is in
  921. // the region. If it is not it can only be in the exit block of the region.
  922. // In this case we model the operands but not the PHI itself.
  923. auto *Scope = LI.getLoopFor(PHI->getParent());
  924. if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
  925. return;
  926. // PHI nodes are modeled as if they had been demoted prior to the SCoP
  927. // detection. Hence, the PHI is a load of a new memory location in which the
  928. // incoming value was written at the end of the incoming basic block.
  929. bool OnlyNonAffineSubRegionOperands = true;
  930. for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
  931. Value *Op = PHI->getIncomingValue(u);
  932. BasicBlock *OpBB = PHI->getIncomingBlock(u);
  933. ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
  934. // Do not build PHI dependences inside a non-affine subregion, but make
  935. // sure that the necessary scalar values are still made available.
  936. if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
  937. auto *OpInst = dyn_cast<Instruction>(Op);
  938. if (!OpInst || !NonAffineSubRegion->contains(OpInst))
  939. ensureValueRead(Op, OpStmt);
  940. continue;
  941. }
  942. OnlyNonAffineSubRegionOperands = false;
  943. ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
  944. }
  945. if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
  946. addPHIReadAccess(PHIStmt, PHI);
  947. }
  948. }
  949. void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
  950. Instruction *Inst) {
  951. assert(!isa<PHINode>(Inst));
  952. // Pull-in required operands.
  953. for (Use &Op : Inst->operands())
  954. ensureValueRead(Op.get(), UserStmt);
  955. }
  956. // Create a sequence of two schedules. Either argument may be null and is
  957. // interpreted as the empty schedule. Can also return null if both schedules are
  958. // empty.
  959. static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
  960. if (Prev.is_null())
  961. return Succ;
  962. if (Succ.is_null())
  963. return Prev;
  964. return Prev.sequence(Succ);
  965. }
  966. // Create an isl_multi_union_aff that defines an identity mapping from the
  967. // elements of USet to their N-th dimension.
  968. //
  969. // # Example:
  970. //
  971. // Domain: { A[i,j]; B[i,j,k] }
  972. // N: 1
  973. //
  974. // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
  975. //
  976. // @param USet A union set describing the elements for which to generate a
  977. // mapping.
  978. // @param N The dimension to map to.
  979. // @returns A mapping from USet to its N-th dimension.
  980. static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) {
  981. assert(!USet.is_null());
  982. assert(!USet.is_empty());
  983. auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
  984. for (isl::set S : USet.get_set_list()) {
  985. unsigned Dim = unsignedFromIslSize(S.tuple_dim());
  986. assert(Dim >= N);
  987. auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
  988. N, Dim - N);
  989. if (N > 1)
  990. PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
  991. Result = Result.add_pw_multi_aff(PMA);
  992. }
  993. return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
  994. }
  995. void ScopBuilder::buildSchedule() {
  996. Loop *L = getLoopSurroundingScop(*scop, LI);
  997. LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)});
  998. buildSchedule(scop->getRegion().getNode(), LoopStack);
  999. assert(LoopStack.size() == 1 && LoopStack.back().L == L);
  1000. scop->setScheduleTree(LoopStack[0].Schedule);
  1001. }
  1002. /// To generate a schedule for the elements in a Region we traverse the Region
  1003. /// in reverse-post-order and add the contained RegionNodes in traversal order
  1004. /// to the schedule of the loop that is currently at the top of the LoopStack.
  1005. /// For loop-free codes, this results in a correct sequential ordering.
  1006. ///
  1007. /// Example:
  1008. /// bb1(0)
  1009. /// / \.
  1010. /// bb2(1) bb3(2)
  1011. /// \ / \.
  1012. /// bb4(3) bb5(4)
  1013. /// \ /
  1014. /// bb6(5)
  1015. ///
  1016. /// Including loops requires additional processing. Whenever a loop header is
  1017. /// encountered, the corresponding loop is added to the @p LoopStack. Starting
  1018. /// from an empty schedule, we first process all RegionNodes that are within
  1019. /// this loop and complete the sequential schedule at this loop-level before
  1020. /// processing about any other nodes. To implement this
  1021. /// loop-nodes-first-processing, the reverse post-order traversal is
  1022. /// insufficient. Hence, we additionally check if the traversal yields
  1023. /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
  1024. /// These region-nodes are then queue and only traverse after the all nodes
  1025. /// within the current loop have been processed.
  1026. void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
  1027. Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
  1028. ReversePostOrderTraversal<Region *> RTraversal(R);
  1029. std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
  1030. std::deque<RegionNode *> DelayList;
  1031. bool LastRNWaiting = false;
  1032. // Iterate over the region @p R in reverse post-order but queue
  1033. // sub-regions/blocks iff they are not part of the last encountered but not
  1034. // completely traversed loop. The variable LastRNWaiting is a flag to indicate
  1035. // that we queued the last sub-region/block from the reverse post-order
  1036. // iterator. If it is set we have to explore the next sub-region/block from
  1037. // the iterator (if any) to guarantee progress. If it is not set we first try
  1038. // the next queued sub-region/blocks.
  1039. while (!WorkList.empty() || !DelayList.empty()) {
  1040. RegionNode *RN;
  1041. if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
  1042. RN = WorkList.front();
  1043. WorkList.pop_front();
  1044. LastRNWaiting = false;
  1045. } else {
  1046. RN = DelayList.front();
  1047. DelayList.pop_front();
  1048. }
  1049. Loop *L = getRegionNodeLoop(RN, LI);
  1050. if (!scop->contains(L))
  1051. L = OuterScopLoop;
  1052. Loop *LastLoop = LoopStack.back().L;
  1053. if (LastLoop != L) {
  1054. if (LastLoop && !LastLoop->contains(L)) {
  1055. LastRNWaiting = true;
  1056. DelayList.push_back(RN);
  1057. continue;
  1058. }
  1059. LoopStack.push_back({L, {}, 0});
  1060. }
  1061. buildSchedule(RN, LoopStack);
  1062. }
  1063. }
  1064. void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
  1065. if (RN->isSubRegion()) {
  1066. auto *LocalRegion = RN->getNodeAs<Region>();
  1067. if (!scop->isNonAffineSubRegion(LocalRegion)) {
  1068. buildSchedule(LocalRegion, LoopStack);
  1069. return;
  1070. }
  1071. }
  1072. assert(LoopStack.rbegin() != LoopStack.rend());
  1073. auto LoopData = LoopStack.rbegin();
  1074. LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
  1075. for (auto *Stmt : scop->getStmtListFor(RN)) {
  1076. isl::union_set UDomain{Stmt->getDomain()};
  1077. auto StmtSchedule = isl::schedule::from_domain(UDomain);
  1078. LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
  1079. }
  1080. // Check if we just processed the last node in this loop. If we did, finalize
  1081. // the loop by:
  1082. //
  1083. // - adding new schedule dimensions
  1084. // - folding the resulting schedule into the parent loop schedule
  1085. // - dropping the loop schedule from the LoopStack.
  1086. //
  1087. // Then continue to check surrounding loops, which might also have been
  1088. // completed by this node.
  1089. size_t Dimension = LoopStack.size();
  1090. while (LoopData->L &&
  1091. LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
  1092. isl::schedule Schedule = LoopData->Schedule;
  1093. auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
  1094. assert(std::next(LoopData) != LoopStack.rend());
  1095. Loop *L = LoopData->L;
  1096. ++LoopData;
  1097. --Dimension;
  1098. if (!Schedule.is_null()) {
  1099. isl::union_set Domain = Schedule.get_domain();
  1100. isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
  1101. Schedule = Schedule.insert_partial_schedule(MUPA);
  1102. if (hasDisableAllTransformsHint(L)) {
  1103. /// If any of the loops has a disable_nonforced heuristic, mark the
  1104. /// entire SCoP as such. The ISL rescheduler can only reschedule the
  1105. /// SCoP in its entirety.
  1106. /// TODO: ScopDetection could avoid including such loops or warp them as
  1107. /// boxed loop. It still needs to pass-through loop with user-defined
  1108. /// metadata.
  1109. scop->markDisableHeuristics();
  1110. }
  1111. // It is easier to insert the marks here that do it retroactively.
  1112. isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L);
  1113. if (!IslLoopId.is_null())
  1114. Schedule =
  1115. Schedule.get_root().child(0).insert_mark(IslLoopId).get_schedule();
  1116. LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
  1117. }
  1118. LoopData->NumBlocksProcessed += NumBlocksProcessed;
  1119. }
  1120. // Now pop all loops processed up there from the LoopStack
  1121. LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
  1122. }
  1123. void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
  1124. // Check for uses of this instruction outside the scop. Because we do not
  1125. // iterate over such instructions and therefore did not "ensure" the existence
  1126. // of a write, we must determine such use here.
  1127. if (scop->isEscaping(Inst))
  1128. ensureValueWrite(Inst);
  1129. }
  1130. void ScopBuilder::addRecordedAssumptions() {
  1131. for (auto &AS : llvm::reverse(RecordedAssumptions)) {
  1132. if (!AS.BB) {
  1133. scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
  1134. nullptr /* BasicBlock */, AS.RequiresRTC);
  1135. continue;
  1136. }
  1137. // If the domain was deleted the assumptions are void.
  1138. isl_set *Dom = scop->getDomainConditions(AS.BB).release();
  1139. if (!Dom)
  1140. continue;
  1141. // If a basic block was given use its domain to simplify the assumption.
  1142. // In case of restrictions we know they only have to hold on the domain,
  1143. // thus we can intersect them with the domain of the block. However, for
  1144. // assumptions the domain has to imply them, thus:
  1145. // _ _____
  1146. // Dom => S <==> A v B <==> A - B
  1147. //
  1148. // To avoid the complement we will register A - B as a restriction not an
  1149. // assumption.
  1150. isl_set *S = AS.Set.copy();
  1151. if (AS.Sign == AS_RESTRICTION)
  1152. S = isl_set_params(isl_set_intersect(S, Dom));
  1153. else /* (AS.Sign == AS_ASSUMPTION) */
  1154. S = isl_set_params(isl_set_subtract(Dom, S));
  1155. scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB,
  1156. AS.RequiresRTC);
  1157. }
  1158. }
  1159. void ScopBuilder::addUserAssumptions(
  1160. AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
  1161. for (auto &Assumption : AC.assumptions()) {
  1162. auto *CI = dyn_cast_or_null<CallInst>(Assumption);
  1163. if (!CI || CI->arg_size() != 1)
  1164. continue;
  1165. bool InScop = scop->contains(CI);
  1166. if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
  1167. continue;
  1168. auto *L = LI.getLoopFor(CI->getParent());
  1169. auto *Val = CI->getArgOperand(0);
  1170. ParameterSetTy DetectedParams;
  1171. auto &R = scop->getRegion();
  1172. if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
  1173. ORE.emit(
  1174. OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
  1175. << "Non-affine user assumption ignored.");
  1176. continue;
  1177. }
  1178. // Collect all newly introduced parameters.
  1179. ParameterSetTy NewParams;
  1180. for (auto *Param : DetectedParams) {
  1181. Param = extractConstantFactor(Param, SE).second;
  1182. Param = scop->getRepresentingInvariantLoadSCEV(Param);
  1183. if (scop->isParam(Param))
  1184. continue;
  1185. NewParams.insert(Param);
  1186. }
  1187. SmallVector<isl_set *, 2> ConditionSets;
  1188. auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
  1189. BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
  1190. auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
  1191. : isl_set_copy(scop->getContext().get());
  1192. assert(Dom && "Cannot propagate a nullptr.");
  1193. bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
  1194. ConditionSets);
  1195. isl_set_free(Dom);
  1196. if (!Valid)
  1197. continue;
  1198. isl_set *AssumptionCtx = nullptr;
  1199. if (InScop) {
  1200. AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
  1201. isl_set_free(ConditionSets[0]);
  1202. } else {
  1203. AssumptionCtx = isl_set_complement(ConditionSets[1]);
  1204. AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
  1205. }
  1206. // Project out newly introduced parameters as they are not otherwise useful.
  1207. if (!NewParams.empty()) {
  1208. for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
  1209. auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
  1210. auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
  1211. isl_id_free(Id);
  1212. if (!NewParams.count(Param))
  1213. continue;
  1214. AssumptionCtx =
  1215. isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
  1216. }
  1217. }
  1218. ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
  1219. << "Use user assumption: "
  1220. << stringFromIslObj(AssumptionCtx, "null"));
  1221. isl::set newContext =
  1222. scop->getContext().intersect(isl::manage(AssumptionCtx));
  1223. scop->setContext(newContext);
  1224. }
  1225. }
  1226. bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
  1227. Value *Val = Inst.getValueOperand();
  1228. Type *ElementType = Val->getType();
  1229. Value *Address = Inst.getPointerOperand();
  1230. const SCEV *AccessFunction =
  1231. SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
  1232. const SCEVUnknown *BasePointer =
  1233. dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
  1234. enum MemoryAccess::AccessType AccType =
  1235. isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
  1236. if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
  1237. auto *Src = BitCast->getOperand(0);
  1238. auto *SrcTy = Src->getType();
  1239. auto *DstTy = BitCast->getType();
  1240. // Do not try to delinearize non-sized (opaque) pointers.
  1241. if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
  1242. (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) {
  1243. return false;
  1244. }
  1245. if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
  1246. DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
  1247. DL.getTypeAllocSize(DstTy->getPointerElementType()))
  1248. Address = Src;
  1249. }
  1250. auto *GEP = dyn_cast<GetElementPtrInst>(Address);
  1251. if (!GEP)
  1252. return false;
  1253. SmallVector<const SCEV *, 4> Subscripts;
  1254. SmallVector<int, 4> Sizes;
  1255. getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
  1256. auto *BasePtr = GEP->getOperand(0);
  1257. if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
  1258. BasePtr = BasePtrCast->getOperand(0);
  1259. // Check for identical base pointers to ensure that we do not miss index
  1260. // offsets that have been added before this GEP is applied.
  1261. if (BasePtr != BasePointer->getValue())
  1262. return false;
  1263. std::vector<const SCEV *> SizesSCEV;
  1264. const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
  1265. Loop *SurroundingLoop = Stmt->getSurroundingLoop();
  1266. for (auto *Subscript : Subscripts) {
  1267. InvariantLoadsSetTy AccessILS;
  1268. if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
  1269. &AccessILS))
  1270. return false;
  1271. for (LoadInst *LInst : AccessILS)
  1272. if (!ScopRIL.count(LInst))
  1273. return false;
  1274. }
  1275. if (Sizes.empty())
  1276. return false;
  1277. SizesSCEV.push_back(nullptr);
  1278. for (auto V : Sizes)
  1279. SizesSCEV.push_back(SE.getSCEV(
  1280. ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
  1281. addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
  1282. true, Subscripts, SizesSCEV, Val);
  1283. return true;
  1284. }
  1285. bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
  1286. if (!PollyDelinearize)
  1287. return false;
  1288. Value *Address = Inst.getPointerOperand();
  1289. Value *Val = Inst.getValueOperand();
  1290. Type *ElementType = Val->getType();
  1291. unsigned ElementSize = DL.getTypeAllocSize(ElementType);
  1292. enum MemoryAccess::AccessType AccType =
  1293. isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
  1294. const SCEV *AccessFunction =
  1295. SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
  1296. const SCEVUnknown *BasePointer =
  1297. dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
  1298. assert(BasePointer && "Could not find base pointer");
  1299. auto &InsnToMemAcc = scop->getInsnToMemAccMap();
  1300. auto AccItr = InsnToMemAcc.find(Inst);
  1301. if (AccItr == InsnToMemAcc.end())
  1302. return false;
  1303. std::vector<const SCEV *> Sizes = {nullptr};
  1304. Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
  1305. AccItr->second.Shape->DelinearizedSizes.end());
  1306. // In case only the element size is contained in the 'Sizes' array, the
  1307. // access does not access a real multi-dimensional array. Hence, we allow
  1308. // the normal single-dimensional access construction to handle this.
  1309. if (Sizes.size() == 1)
  1310. return false;
  1311. // Remove the element size. This information is already provided by the
  1312. // ElementSize parameter. In case the element size of this access and the
  1313. // element size used for delinearization differs the delinearization is
  1314. // incorrect. Hence, we invalidate the scop.
  1315. //
  1316. // TODO: Handle delinearization with differing element sizes.
  1317. auto DelinearizedSize =
  1318. cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
  1319. Sizes.pop_back();
  1320. if (ElementSize != DelinearizedSize)
  1321. scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
  1322. addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
  1323. true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
  1324. return true;
  1325. }
  1326. bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
  1327. auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
  1328. if (MemIntr == nullptr)
  1329. return false;
  1330. auto *L = LI.getLoopFor(Inst->getParent());
  1331. auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
  1332. assert(LengthVal);
  1333. // Check if the length val is actually affine or if we overapproximate it
  1334. InvariantLoadsSetTy AccessILS;
  1335. const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
  1336. Loop *SurroundingLoop = Stmt->getSurroundingLoop();
  1337. bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
  1338. LengthVal, SE, &AccessILS);
  1339. for (LoadInst *LInst : AccessILS)
  1340. if (!ScopRIL.count(LInst))
  1341. LengthIsAffine = false;
  1342. if (!LengthIsAffine)
  1343. LengthVal = nullptr;
  1344. auto *DestPtrVal = MemIntr->getDest();
  1345. assert(DestPtrVal);
  1346. auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
  1347. assert(DestAccFunc);
  1348. // Ignore accesses to "NULL".
  1349. // TODO: We could use this to optimize the region further, e.g., intersect
  1350. // the context with
  1351. // isl_set_complement(isl_set_params(getDomain()))
  1352. // as we know it would be undefined to execute this instruction anyway.
  1353. if (DestAccFunc->isZero())
  1354. return true;
  1355. if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
  1356. if (isa<ConstantPointerNull>(U->getValue()))
  1357. return true;
  1358. }
  1359. auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
  1360. assert(DestPtrSCEV);
  1361. DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
  1362. addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
  1363. IntegerType::getInt8Ty(DestPtrVal->getContext()),
  1364. LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
  1365. Inst.getValueOperand());
  1366. auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
  1367. if (!MemTrans)
  1368. return true;
  1369. auto *SrcPtrVal = MemTrans->getSource();
  1370. assert(SrcPtrVal);
  1371. auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
  1372. assert(SrcAccFunc);
  1373. // Ignore accesses to "NULL".
  1374. // TODO: See above TODO
  1375. if (SrcAccFunc->isZero())
  1376. return true;
  1377. auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
  1378. assert(SrcPtrSCEV);
  1379. SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
  1380. addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
  1381. IntegerType::getInt8Ty(SrcPtrVal->getContext()),
  1382. LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
  1383. Inst.getValueOperand());
  1384. return true;
  1385. }
  1386. bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
  1387. auto *CI = dyn_cast_or_null<CallInst>(Inst);
  1388. if (CI == nullptr)
  1389. return false;
  1390. if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
  1391. return true;
  1392. bool ReadOnly = false;
  1393. auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
  1394. auto *CalledFunction = CI->getCalledFunction();
  1395. switch (AA.getModRefBehavior(CalledFunction)) {
  1396. case FMRB_UnknownModRefBehavior:
  1397. llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
  1398. case FMRB_DoesNotAccessMemory:
  1399. return true;
  1400. case FMRB_OnlyWritesMemory:
  1401. case FMRB_OnlyWritesInaccessibleMem:
  1402. case FMRB_OnlyWritesInaccessibleOrArgMem:
  1403. case FMRB_OnlyAccessesInaccessibleMem:
  1404. case FMRB_OnlyAccessesInaccessibleOrArgMem:
  1405. return false;
  1406. case FMRB_OnlyReadsMemory:
  1407. case FMRB_OnlyReadsInaccessibleMem:
  1408. case FMRB_OnlyReadsInaccessibleOrArgMem:
  1409. GlobalReads.emplace_back(Stmt, CI);
  1410. return true;
  1411. case FMRB_OnlyReadsArgumentPointees:
  1412. ReadOnly = true;
  1413. LLVM_FALLTHROUGH;
  1414. case FMRB_OnlyWritesArgumentPointees:
  1415. case FMRB_OnlyAccessesArgumentPointees: {
  1416. auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
  1417. Loop *L = LI.getLoopFor(Inst->getParent());
  1418. for (const auto &Arg : CI->args()) {
  1419. if (!Arg->getType()->isPointerTy())
  1420. continue;
  1421. auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
  1422. if (ArgSCEV->isZero())
  1423. continue;
  1424. if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
  1425. if (isa<ConstantPointerNull>(U->getValue()))
  1426. return true;
  1427. }
  1428. auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
  1429. addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
  1430. ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
  1431. }
  1432. return true;
  1433. }
  1434. }
  1435. return true;
  1436. }
  1437. void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
  1438. Value *Address = Inst.getPointerOperand();
  1439. Value *Val = Inst.getValueOperand();
  1440. Type *ElementType = Val->getType();
  1441. enum MemoryAccess::AccessType AccType =
  1442. isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
  1443. const SCEV *AccessFunction =
  1444. SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
  1445. const SCEVUnknown *BasePointer =
  1446. dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
  1447. assert(BasePointer && "Could not find base pointer");
  1448. AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
  1449. // Check if the access depends on a loop contained in a non-affine subregion.
  1450. bool isVariantInNonAffineLoop = false;
  1451. SetVector<const Loop *> Loops;
  1452. findLoops(AccessFunction, Loops);
  1453. for (const Loop *L : Loops)
  1454. if (Stmt->contains(L)) {
  1455. isVariantInNonAffineLoop = true;
  1456. break;
  1457. }
  1458. InvariantLoadsSetTy AccessILS;
  1459. Loop *SurroundingLoop = Stmt->getSurroundingLoop();
  1460. bool IsAffine = !isVariantInNonAffineLoop &&
  1461. isAffineExpr(&scop->getRegion(), SurroundingLoop,
  1462. AccessFunction, SE, &AccessILS);
  1463. const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
  1464. for (LoadInst *LInst : AccessILS)
  1465. if (!ScopRIL.count(LInst))
  1466. IsAffine = false;
  1467. if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
  1468. AccType = MemoryAccess::MAY_WRITE;
  1469. addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
  1470. IsAffine, {AccessFunction}, {nullptr}, Val);
  1471. }
  1472. void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
  1473. if (buildAccessMemIntrinsic(Inst, Stmt))
  1474. return;
  1475. if (buildAccessCallInst(Inst, Stmt))
  1476. return;
  1477. if (buildAccessMultiDimFixed(Inst, Stmt))
  1478. return;
  1479. if (buildAccessMultiDimParam(Inst, Stmt))
  1480. return;
  1481. buildAccessSingleDim(Inst, Stmt);
  1482. }
  1483. void ScopBuilder::buildAccessFunctions() {
  1484. for (auto &Stmt : *scop) {
  1485. if (Stmt.isBlockStmt()) {
  1486. buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
  1487. continue;
  1488. }
  1489. Region *R = Stmt.getRegion();
  1490. for (BasicBlock *BB : R->blocks())
  1491. buildAccessFunctions(&Stmt, *BB, R);
  1492. }
  1493. // Build write accesses for values that are used after the SCoP.
  1494. // The instructions defining them might be synthesizable and therefore not
  1495. // contained in any statement, hence we iterate over the original instructions
  1496. // to identify all escaping values.
  1497. for (BasicBlock *BB : scop->getRegion().blocks()) {
  1498. for (Instruction &Inst : *BB)
  1499. buildEscapingDependences(&Inst);
  1500. }
  1501. }
  1502. bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
  1503. return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
  1504. !canSynthesize(Inst, *scop, &SE, L);
  1505. }
  1506. /// Generate a name for a statement.
  1507. ///
  1508. /// @param BB The basic block the statement will represent.
  1509. /// @param BBIdx The index of the @p BB relative to other BBs/regions.
  1510. /// @param Count The index of the created statement in @p BB.
  1511. /// @param IsMain Whether this is the main of all statement for @p BB. If true,
  1512. /// no suffix will be added.
  1513. /// @param IsLast Uses a special indicator for the last statement of a BB.
  1514. static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
  1515. bool IsMain, bool IsLast = false) {
  1516. std::string Suffix;
  1517. if (!IsMain) {
  1518. if (UseInstructionNames)
  1519. Suffix = '_';
  1520. if (IsLast)
  1521. Suffix += "last";
  1522. else if (Count < 26)
  1523. Suffix += 'a' + Count;
  1524. else
  1525. Suffix += std::to_string(Count);
  1526. }
  1527. return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
  1528. }
  1529. /// Generate a name for a statement that represents a non-affine subregion.
  1530. ///
  1531. /// @param R The region the statement will represent.
  1532. /// @param RIdx The index of the @p R relative to other BBs/regions.
  1533. static std::string makeStmtName(Region *R, long RIdx) {
  1534. return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
  1535. UseInstructionNames);
  1536. }
  1537. void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
  1538. Loop *SurroundingLoop = LI.getLoopFor(BB);
  1539. int Count = 0;
  1540. long BBIdx = scop->getNextStmtIdx();
  1541. std::vector<Instruction *> Instructions;
  1542. for (Instruction &Inst : *BB) {
  1543. if (shouldModelInst(&Inst, SurroundingLoop))
  1544. Instructions.push_back(&Inst);
  1545. if (Inst.getMetadata("polly_split_after") ||
  1546. (SplitOnStore && isa<StoreInst>(Inst))) {
  1547. std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
  1548. scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
  1549. Count++;
  1550. Instructions.clear();
  1551. }
  1552. }
  1553. std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
  1554. scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
  1555. }
  1556. /// Is @p Inst an ordered instruction?
  1557. ///
  1558. /// An unordered instruction is an instruction, such that a sequence of
  1559. /// unordered instructions can be permuted without changing semantics. Any
  1560. /// instruction for which this is not always the case is ordered.
  1561. static bool isOrderedInstruction(Instruction *Inst) {
  1562. return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
  1563. }
  1564. /// Join instructions to the same statement if one uses the scalar result of the
  1565. /// other.
  1566. static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
  1567. ArrayRef<Instruction *> ModeledInsts) {
  1568. for (Instruction *Inst : ModeledInsts) {
  1569. if (isa<PHINode>(Inst))
  1570. continue;
  1571. for (Use &Op : Inst->operands()) {
  1572. Instruction *OpInst = dyn_cast<Instruction>(Op.get());
  1573. if (!OpInst)
  1574. continue;
  1575. // Check if OpInst is in the BB and is a modeled instruction.
  1576. auto OpVal = UnionFind.findValue(OpInst);
  1577. if (OpVal == UnionFind.end())
  1578. continue;
  1579. UnionFind.unionSets(Inst, OpInst);
  1580. }
  1581. }
  1582. }
  1583. /// Ensure that the order of ordered instructions does not change.
  1584. ///
  1585. /// If we encounter an ordered instruction enclosed in instructions belonging to
  1586. /// a different statement (which might as well contain ordered instructions, but
  1587. /// this is not tested here), join them.
  1588. static void
  1589. joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
  1590. ArrayRef<Instruction *> ModeledInsts) {
  1591. SetVector<Instruction *> SeenLeaders;
  1592. for (Instruction *Inst : ModeledInsts) {
  1593. if (!isOrderedInstruction(Inst))
  1594. continue;
  1595. Instruction *Leader = UnionFind.getLeaderValue(Inst);
  1596. // Since previous iterations might have merged sets, some items in
  1597. // SeenLeaders are not leaders anymore. However, The new leader of
  1598. // previously merged instructions must be one of the former leaders of
  1599. // these merged instructions.
  1600. bool Inserted = SeenLeaders.insert(Leader);
  1601. if (Inserted)
  1602. continue;
  1603. // Merge statements to close holes. Say, we have already seen statements A
  1604. // and B, in this order. Then we see an instruction of A again and we would
  1605. // see the pattern "A B A". This function joins all statements until the
  1606. // only seen occurrence of A.
  1607. for (Instruction *Prev : reverse(SeenLeaders)) {
  1608. // We are backtracking from the last element until we see Inst's leader
  1609. // in SeenLeaders and merge all into one set. Although leaders of
  1610. // instructions change during the execution of this loop, it's irrelevant
  1611. // as we are just searching for the element that we already confirmed is
  1612. // in the list.
  1613. if (Prev == Leader)
  1614. break;
  1615. UnionFind.unionSets(Prev, Leader);
  1616. }
  1617. }
  1618. }
  1619. /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
  1620. /// the incoming values from this block are executed after the PHI READ.
  1621. ///
  1622. /// Otherwise it could overwrite the incoming value from before the BB with the
  1623. /// value for the next execution. This can happen if the PHI WRITE is added to
  1624. /// the statement with the instruction that defines the incoming value (instead
  1625. /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
  1626. /// are in order, we put both into the statement. PHI WRITEs are always executed
  1627. /// after PHI READs when they are in the same statement.
  1628. ///
  1629. /// TODO: This is an overpessimization. We only have to ensure that the PHI
  1630. /// WRITE is not put into a statement containing the PHI itself. That could also
  1631. /// be done by
  1632. /// - having all (strongly connected) PHIs in a single statement,
  1633. /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
  1634. /// has a chance of being lifted before a PHI by being in a statement with a
  1635. /// PHI that comes before in the basic block), or
  1636. /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
  1637. static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
  1638. ArrayRef<Instruction *> ModeledInsts) {
  1639. for (Instruction *Inst : ModeledInsts) {
  1640. PHINode *PHI = dyn_cast<PHINode>(Inst);
  1641. if (!PHI)
  1642. continue;
  1643. int Idx = PHI->getBasicBlockIndex(PHI->getParent());
  1644. if (Idx < 0)
  1645. continue;
  1646. Instruction *IncomingVal =
  1647. dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
  1648. if (!IncomingVal)
  1649. continue;
  1650. UnionFind.unionSets(PHI, IncomingVal);
  1651. }
  1652. }
  1653. void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
  1654. Loop *L = LI.getLoopFor(BB);
  1655. // Extracting out modeled instructions saves us from checking
  1656. // shouldModelInst() repeatedly.
  1657. SmallVector<Instruction *, 32> ModeledInsts;
  1658. EquivalenceClasses<Instruction *> UnionFind;
  1659. Instruction *MainInst = nullptr, *MainLeader = nullptr;
  1660. for (Instruction &Inst : *BB) {
  1661. if (!shouldModelInst(&Inst, L))
  1662. continue;
  1663. ModeledInsts.push_back(&Inst);
  1664. UnionFind.insert(&Inst);
  1665. // When a BB is split into multiple statements, the main statement is the
  1666. // one containing the 'main' instruction. We select the first instruction
  1667. // that is unlikely to be removed (because it has side-effects) as the main
  1668. // one. It is used to ensure that at least one statement from the bb has the
  1669. // same name as with -polly-stmt-granularity=bb.
  1670. if (!MainInst && (isa<StoreInst>(Inst) ||
  1671. (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
  1672. MainInst = &Inst;
  1673. }
  1674. joinOperandTree(UnionFind, ModeledInsts);
  1675. joinOrderedInstructions(UnionFind, ModeledInsts);
  1676. joinOrderedPHIs(UnionFind, ModeledInsts);
  1677. // The list of instructions for statement (statement represented by the leader
  1678. // instruction).
  1679. MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
  1680. // The order of statements must be preserved w.r.t. their ordered
  1681. // instructions. Without this explicit scan, we would also use non-ordered
  1682. // instructions (whose order is arbitrary) to determine statement order.
  1683. for (Instruction *Inst : ModeledInsts) {
  1684. if (!isOrderedInstruction(Inst))
  1685. continue;
  1686. auto LeaderIt = UnionFind.findLeader(Inst);
  1687. if (LeaderIt == UnionFind.member_end())
  1688. continue;
  1689. // Insert element for the leader instruction.
  1690. (void)LeaderToInstList[*LeaderIt];
  1691. }
  1692. // Collect the instructions of all leaders. UnionFind's member iterator
  1693. // unfortunately are not in any specific order.
  1694. for (Instruction *Inst : ModeledInsts) {
  1695. auto LeaderIt = UnionFind.findLeader(Inst);
  1696. if (LeaderIt == UnionFind.member_end())
  1697. continue;
  1698. if (Inst == MainInst)
  1699. MainLeader = *LeaderIt;
  1700. std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
  1701. InstList.push_back(Inst);
  1702. }
  1703. // Finally build the statements.
  1704. int Count = 0;
  1705. long BBIdx = scop->getNextStmtIdx();
  1706. for (auto &Instructions : LeaderToInstList) {
  1707. std::vector<Instruction *> &InstList = Instructions.second;
  1708. // If there is no main instruction, make the first statement the main.
  1709. bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
  1710. std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
  1711. scop->addScopStmt(BB, Name, L, std::move(InstList));
  1712. Count += 1;
  1713. }
  1714. // Unconditionally add an epilogue (last statement). It contains no
  1715. // instructions, but holds the PHI write accesses for successor basic blocks,
  1716. // if the incoming value is not defined in another statement if the same BB.
  1717. // The epilogue becomes the main statement only if there is no other
  1718. // statement that could become main.
  1719. // The epilogue will be removed if no PHIWrite is added to it.
  1720. std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
  1721. scop->addScopStmt(BB, EpilogueName, L, {});
  1722. }
  1723. void ScopBuilder::buildStmts(Region &SR) {
  1724. if (scop->isNonAffineSubRegion(&SR)) {
  1725. std::vector<Instruction *> Instructions;
  1726. Loop *SurroundingLoop =
  1727. getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
  1728. for (Instruction &Inst : *SR.getEntry())
  1729. if (shouldModelInst(&Inst, SurroundingLoop))
  1730. Instructions.push_back(&Inst);
  1731. long RIdx = scop->getNextStmtIdx();
  1732. std::string Name = makeStmtName(&SR, RIdx);
  1733. scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
  1734. return;
  1735. }
  1736. for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
  1737. if (I->isSubRegion())
  1738. buildStmts(*I->getNodeAs<Region>());
  1739. else {
  1740. BasicBlock *BB = I->getNodeAs<BasicBlock>();
  1741. switch (StmtGranularity) {
  1742. case GranularityChoice::BasicBlocks:
  1743. buildSequentialBlockStmts(BB);
  1744. break;
  1745. case GranularityChoice::ScalarIndependence:
  1746. buildEqivClassBlockStmts(BB);
  1747. break;
  1748. case GranularityChoice::Stores:
  1749. buildSequentialBlockStmts(BB, true);
  1750. break;
  1751. }
  1752. }
  1753. }
  1754. void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
  1755. Region *NonAffineSubRegion) {
  1756. assert(
  1757. Stmt &&
  1758. "The exit BB is the only one that cannot be represented by a statement");
  1759. assert(Stmt->represents(&BB));
  1760. // We do not build access functions for error blocks, as they may contain
  1761. // instructions we can not model.
  1762. if (SD.isErrorBlock(BB, scop->getRegion()))
  1763. return;
  1764. auto BuildAccessesForInst = [this, Stmt,
  1765. NonAffineSubRegion](Instruction *Inst) {
  1766. PHINode *PHI = dyn_cast<PHINode>(Inst);
  1767. if (PHI)
  1768. buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
  1769. if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
  1770. assert(Stmt && "Cannot build access function in non-existing statement");
  1771. buildMemoryAccess(MemInst, Stmt);
  1772. }
  1773. // PHI nodes have already been modeled above and terminators that are
  1774. // not part of a non-affine subregion are fully modeled and regenerated
  1775. // from the polyhedral domains. Hence, they do not need to be modeled as
  1776. // explicit data dependences.
  1777. if (!PHI)
  1778. buildScalarDependences(Stmt, Inst);
  1779. };
  1780. const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
  1781. bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
  1782. if (IsEntryBlock) {
  1783. for (Instruction *Inst : Stmt->getInstructions())
  1784. BuildAccessesForInst(Inst);
  1785. if (Stmt->isRegionStmt())
  1786. BuildAccessesForInst(BB.getTerminator());
  1787. } else {
  1788. for (Instruction &Inst : BB) {
  1789. if (isIgnoredIntrinsic(&Inst))
  1790. continue;
  1791. // Invariant loads already have been processed.
  1792. if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
  1793. continue;
  1794. BuildAccessesForInst(&Inst);
  1795. }
  1796. }
  1797. }
  1798. MemoryAccess *ScopBuilder::addMemoryAccess(
  1799. ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
  1800. Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
  1801. ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
  1802. MemoryKind Kind) {
  1803. bool isKnownMustAccess = false;
  1804. // Accesses in single-basic block statements are always executed.
  1805. if (Stmt->isBlockStmt())
  1806. isKnownMustAccess = true;
  1807. if (Stmt->isRegionStmt()) {
  1808. // Accesses that dominate the exit block of a non-affine region are always
  1809. // executed. In non-affine regions there may exist MemoryKind::Values that
  1810. // do not dominate the exit. MemoryKind::Values will always dominate the
  1811. // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
  1812. // non-affine region.
  1813. if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
  1814. isKnownMustAccess = true;
  1815. }
  1816. // Non-affine PHI writes do not "happen" at a particular instruction, but
  1817. // after exiting the statement. Therefore they are guaranteed to execute and
  1818. // overwrite the old value.
  1819. if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
  1820. isKnownMustAccess = true;
  1821. if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
  1822. AccType = MemoryAccess::MAY_WRITE;
  1823. auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
  1824. Affine, Subscripts, Sizes, AccessValue, Kind);
  1825. scop->addAccessFunction(Access);
  1826. Stmt->addAccess(Access);
  1827. return Access;
  1828. }
  1829. void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
  1830. MemoryAccess::AccessType AccType,
  1831. Value *BaseAddress, Type *ElementType,
  1832. bool IsAffine,
  1833. ArrayRef<const SCEV *> Subscripts,
  1834. ArrayRef<const SCEV *> Sizes,
  1835. Value *AccessValue) {
  1836. ArrayBasePointers.insert(BaseAddress);
  1837. addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine,
  1838. AccessValue, Subscripts, Sizes, MemoryKind::Array);
  1839. }
  1840. /// Check if @p Expr is divisible by @p Size.
  1841. static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
  1842. assert(Size != 0);
  1843. if (Size == 1)
  1844. return true;
  1845. // Only one factor needs to be divisible.
  1846. if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
  1847. for (auto *FactorExpr : MulExpr->operands())
  1848. if (isDivisible(FactorExpr, Size, SE))
  1849. return true;
  1850. return false;
  1851. }
  1852. // For other n-ary expressions (Add, AddRec, Max,...) all operands need
  1853. // to be divisible.
  1854. if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
  1855. for (auto *OpExpr : NAryExpr->operands())
  1856. if (!isDivisible(OpExpr, Size, SE))
  1857. return false;
  1858. return true;
  1859. }
  1860. auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
  1861. auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
  1862. auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
  1863. return MulSCEV == Expr;
  1864. }
  1865. void ScopBuilder::foldSizeConstantsToRight() {
  1866. isl::union_set Accessed = scop->getAccesses().range();
  1867. for (auto Array : scop->arrays()) {
  1868. if (Array->getNumberOfDimensions() <= 1)
  1869. continue;
  1870. isl::space Space = Array->getSpace();
  1871. Space = Space.align_params(Accessed.get_space());
  1872. if (!Accessed.contains(Space))
  1873. continue;
  1874. isl::set Elements = Accessed.extract_set(Space);
  1875. isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
  1876. std::vector<int> Int;
  1877. unsigned Dims = unsignedFromIslSize(Elements.tuple_dim());
  1878. for (unsigned i = 0; i < Dims; i++) {
  1879. isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
  1880. DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
  1881. DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
  1882. isl::basic_set DimHull = DimOnly.affine_hull();
  1883. if (i == Dims - 1) {
  1884. Int.push_back(1);
  1885. Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
  1886. continue;
  1887. }
  1888. if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) {
  1889. isl::aff Diff = DimHull.get_div(0);
  1890. isl::val Val = Diff.get_denominator_val();
  1891. int ValInt = 1;
  1892. if (Val.is_int()) {
  1893. auto ValAPInt = APIntFromVal(Val);
  1894. if (ValAPInt.isSignedIntN(32))
  1895. ValInt = ValAPInt.getSExtValue();
  1896. } else {
  1897. }
  1898. Int.push_back(ValInt);
  1899. isl::constraint C = isl::constraint::alloc_equality(
  1900. isl::local_space(Transform.get_space()));
  1901. C = C.set_coefficient_si(isl::dim::out, i, ValInt);
  1902. C = C.set_coefficient_si(isl::dim::in, i, -1);
  1903. Transform = Transform.add_constraint(C);
  1904. continue;
  1905. }
  1906. isl::basic_set ZeroSet = isl::basic_set(DimHull);
  1907. ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
  1908. int ValInt = 1;
  1909. if (ZeroSet.is_equal(DimHull)) {
  1910. ValInt = 0;
  1911. }
  1912. Int.push_back(ValInt);
  1913. Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
  1914. }
  1915. isl::set MappedElements = isl::map(Transform).domain();
  1916. if (!Elements.is_subset(MappedElements))
  1917. continue;
  1918. bool CanFold = true;
  1919. if (Int[0] <= 1)
  1920. CanFold = false;
  1921. unsigned NumDims = Array->getNumberOfDimensions();
  1922. for (unsigned i = 1; i < NumDims - 1; i++)
  1923. if (Int[0] != Int[i] && Int[i])
  1924. CanFold = false;
  1925. if (!CanFold)
  1926. continue;
  1927. for (auto &Access : scop->access_functions())
  1928. if (Access->getScopArrayInfo() == Array)
  1929. Access->setAccessRelation(
  1930. Access->getAccessRelation().apply_range(Transform));
  1931. std::vector<const SCEV *> Sizes;
  1932. for (unsigned i = 0; i < NumDims; i++) {
  1933. auto Size = Array->getDimensionSize(i);
  1934. if (i == NumDims - 1)
  1935. Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
  1936. Sizes.push_back(Size);
  1937. }
  1938. Array->updateSizes(Sizes, false /* CheckConsistency */);
  1939. }
  1940. }
  1941. void ScopBuilder::finalizeAccesses() {
  1942. updateAccessDimensionality();
  1943. foldSizeConstantsToRight();
  1944. foldAccessRelations();
  1945. assumeNoOutOfBounds();
  1946. }
  1947. void ScopBuilder::updateAccessDimensionality() {
  1948. // Check all array accesses for each base pointer and find a (virtual) element
  1949. // size for the base pointer that divides all access functions.
  1950. for (ScopStmt &Stmt : *scop)
  1951. for (MemoryAccess *Access : Stmt) {
  1952. if (!Access->isArrayKind())
  1953. continue;
  1954. ScopArrayInfo *Array =
  1955. const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
  1956. if (Array->getNumberOfDimensions() != 1)
  1957. continue;
  1958. unsigned DivisibleSize = Array->getElemSizeInBytes();
  1959. const SCEV *Subscript = Access->getSubscript(0);
  1960. while (!isDivisible(Subscript, DivisibleSize, SE))
  1961. DivisibleSize /= 2;
  1962. auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
  1963. Array->updateElementType(Ty);
  1964. }
  1965. for (auto &Stmt : *scop)
  1966. for (auto &Access : Stmt)
  1967. Access->updateDimensionality();
  1968. }
  1969. void ScopBuilder::foldAccessRelations() {
  1970. for (auto &Stmt : *scop)
  1971. for (auto &Access : Stmt)
  1972. Access->foldAccessRelation();
  1973. }
  1974. void ScopBuilder::assumeNoOutOfBounds() {
  1975. if (PollyIgnoreInbounds)
  1976. return;
  1977. for (auto &Stmt : *scop)
  1978. for (auto &Access : Stmt) {
  1979. isl::set Outside = Access->assumeNoOutOfBound();
  1980. const auto &Loc = Access->getAccessInstruction()
  1981. ? Access->getAccessInstruction()->getDebugLoc()
  1982. : DebugLoc();
  1983. recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc,
  1984. AS_ASSUMPTION);
  1985. }
  1986. }
  1987. void ScopBuilder::ensureValueWrite(Instruction *Inst) {
  1988. // Find the statement that defines the value of Inst. That statement has to
  1989. // write the value to make it available to those statements that read it.
  1990. ScopStmt *Stmt = scop->getStmtFor(Inst);
  1991. // It is possible that the value is synthesizable within a loop (such that it
  1992. // is not part of any statement), but not after the loop (where you need the
  1993. // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
  1994. // avoid this. In case the IR has no such PHI, use the last statement (where
  1995. // the value is synthesizable) to write the value.
  1996. if (!Stmt)
  1997. Stmt = scop->getLastStmtFor(Inst->getParent());
  1998. // Inst not defined within this SCoP.
  1999. if (!Stmt)
  2000. return;
  2001. // Do not process further if the instruction is already written.
  2002. if (Stmt->lookupValueWriteOf(Inst))
  2003. return;
  2004. addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
  2005. true, Inst, ArrayRef<const SCEV *>(),
  2006. ArrayRef<const SCEV *>(), MemoryKind::Value);
  2007. }
  2008. void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
  2009. // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
  2010. // to be able to replace this one. Currently, there is a split responsibility.
  2011. // In a first step, the MemoryAccess is created, but without the
  2012. // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
  2013. // AccessRelation is created. At least for scalar accesses, there is no new
  2014. // information available at ScopStmt::buildAccessRelations(), so we could
  2015. // create the AccessRelation right away. This is what
  2016. // ScopStmt::ensureValueRead(Value*) does.
  2017. auto *Scope = UserStmt->getSurroundingLoop();
  2018. auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
  2019. switch (VUse.getKind()) {
  2020. case VirtualUse::Constant:
  2021. case VirtualUse::Block:
  2022. case VirtualUse::Synthesizable:
  2023. case VirtualUse::Hoisted:
  2024. case VirtualUse::Intra:
  2025. // Uses of these kinds do not need a MemoryAccess.
  2026. break;
  2027. case VirtualUse::ReadOnly:
  2028. // Add MemoryAccess for invariant values only if requested.
  2029. if (!ModelReadOnlyScalars)
  2030. break;
  2031. LLVM_FALLTHROUGH;
  2032. case VirtualUse::Inter:
  2033. // Do not create another MemoryAccess for reloading the value if one already
  2034. // exists.
  2035. if (UserStmt->lookupValueReadOf(V))
  2036. break;
  2037. addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
  2038. true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
  2039. MemoryKind::Value);
  2040. // Inter-statement uses need to write the value in their defining statement.
  2041. if (VUse.isInter())
  2042. ensureValueWrite(cast<Instruction>(V));
  2043. break;
  2044. }
  2045. }
  2046. void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
  2047. BasicBlock *IncomingBlock,
  2048. Value *IncomingValue, bool IsExitBlock) {
  2049. // As the incoming block might turn out to be an error statement ensure we
  2050. // will create an exit PHI SAI object. It is needed during code generation
  2051. // and would be created later anyway.
  2052. if (IsExitBlock)
  2053. scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
  2054. MemoryKind::ExitPHI);
  2055. // This is possible if PHI is in the SCoP's entry block. The incoming blocks
  2056. // from outside the SCoP's region have no statement representation.
  2057. if (!IncomingStmt)
  2058. return;
  2059. // Take care for the incoming value being available in the incoming block.
  2060. // This must be done before the check for multiple PHI writes because multiple
  2061. // exiting edges from subregion each can be the effective written value of the
  2062. // subregion. As such, all of them must be made available in the subregion
  2063. // statement.
  2064. ensureValueRead(IncomingValue, IncomingStmt);
  2065. // Do not add more than one MemoryAccess per PHINode and ScopStmt.
  2066. if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
  2067. assert(Acc->getAccessInstruction() == PHI);
  2068. Acc->addIncoming(IncomingBlock, IncomingValue);
  2069. return;
  2070. }
  2071. MemoryAccess *Acc = addMemoryAccess(
  2072. IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
  2073. PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
  2074. IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
  2075. assert(Acc);
  2076. Acc->addIncoming(IncomingBlock, IncomingValue);
  2077. }
  2078. void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
  2079. addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
  2080. PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
  2081. MemoryKind::PHI);
  2082. }
  2083. void ScopBuilder::buildDomain(ScopStmt &Stmt) {
  2084. isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
  2085. Stmt.Domain = scop->getDomainConditions(&Stmt);
  2086. Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
  2087. }
  2088. void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
  2089. isl::set Domain = Stmt.getDomain();
  2090. BasicBlock *BB = Stmt.getEntryBlock();
  2091. Loop *L = LI.getLoopFor(BB);
  2092. while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
  2093. L = L->getParentLoop();
  2094. SmallVector<llvm::Loop *, 8> Loops;
  2095. while (L && Stmt.getParent()->getRegion().contains(L)) {
  2096. Loops.push_back(L);
  2097. L = L->getParentLoop();
  2098. }
  2099. Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
  2100. }
  2101. /// Return the reduction type for a given binary operator.
  2102. static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
  2103. const Instruction *Load) {
  2104. if (!BinOp)
  2105. return MemoryAccess::RT_NONE;
  2106. switch (BinOp->getOpcode()) {
  2107. case Instruction::FAdd:
  2108. if (!BinOp->isFast())
  2109. return MemoryAccess::RT_NONE;
  2110. LLVM_FALLTHROUGH;
  2111. case Instruction::Add:
  2112. return MemoryAccess::RT_ADD;
  2113. case Instruction::Or:
  2114. return MemoryAccess::RT_BOR;
  2115. case Instruction::Xor:
  2116. return MemoryAccess::RT_BXOR;
  2117. case Instruction::And:
  2118. return MemoryAccess::RT_BAND;
  2119. case Instruction::FMul:
  2120. if (!BinOp->isFast())
  2121. return MemoryAccess::RT_NONE;
  2122. LLVM_FALLTHROUGH;
  2123. case Instruction::Mul:
  2124. if (DisableMultiplicativeReductions)
  2125. return MemoryAccess::RT_NONE;
  2126. return MemoryAccess::RT_MUL;
  2127. default:
  2128. return MemoryAccess::RT_NONE;
  2129. }
  2130. }
  2131. void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
  2132. SmallVector<MemoryAccess *, 2> Loads;
  2133. SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
  2134. // First collect candidate load-store reduction chains by iterating over all
  2135. // stores and collecting possible reduction loads.
  2136. for (MemoryAccess *StoreMA : Stmt) {
  2137. if (StoreMA->isRead())
  2138. continue;
  2139. Loads.clear();
  2140. collectCandidateReductionLoads(StoreMA, Loads);
  2141. for (MemoryAccess *LoadMA : Loads)
  2142. Candidates.push_back(std::make_pair(LoadMA, StoreMA));
  2143. }
  2144. // Then check each possible candidate pair.
  2145. for (const auto &CandidatePair : Candidates) {
  2146. bool Valid = true;
  2147. isl::map LoadAccs = CandidatePair.first->getAccessRelation();
  2148. isl::map StoreAccs = CandidatePair.second->getAccessRelation();
  2149. // Skip those with obviously unequal base addresses.
  2150. if (!LoadAccs.has_equal_space(StoreAccs)) {
  2151. continue;
  2152. }
  2153. // And check if the remaining for overlap with other memory accesses.
  2154. isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
  2155. AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
  2156. isl::set AllAccs = AllAccsRel.range();
  2157. for (MemoryAccess *MA : Stmt) {
  2158. if (MA == CandidatePair.first || MA == CandidatePair.second)
  2159. continue;
  2160. isl::map AccRel =
  2161. MA->getAccessRelation().intersect_domain(Stmt.getDomain());
  2162. isl::set Accs = AccRel.range();
  2163. if (AllAccs.has_equal_space(Accs)) {
  2164. isl::set OverlapAccs = Accs.intersect(AllAccs);
  2165. Valid = Valid && OverlapAccs.is_empty();
  2166. }
  2167. }
  2168. if (!Valid)
  2169. continue;
  2170. const LoadInst *Load =
  2171. dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
  2172. MemoryAccess::ReductionType RT =
  2173. getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
  2174. // If no overlapping access was found we mark the load and store as
  2175. // reduction like.
  2176. CandidatePair.first->markAsReductionLike(RT);
  2177. CandidatePair.second->markAsReductionLike(RT);
  2178. }
  2179. }
  2180. void ScopBuilder::verifyInvariantLoads() {
  2181. auto &RIL = scop->getRequiredInvariantLoads();
  2182. for (LoadInst *LI : RIL) {
  2183. assert(LI && scop->contains(LI));
  2184. // If there exists a statement in the scop which has a memory access for
  2185. // @p LI, then mark this scop as infeasible for optimization.
  2186. for (ScopStmt &Stmt : *scop)
  2187. if (Stmt.getArrayAccessOrNULLFor(LI)) {
  2188. scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
  2189. return;
  2190. }
  2191. }
  2192. }
  2193. void ScopBuilder::hoistInvariantLoads() {
  2194. if (!PollyInvariantLoadHoisting)
  2195. return;
  2196. isl::union_map Writes = scop->getWrites();
  2197. for (ScopStmt &Stmt : *scop) {
  2198. InvariantAccessesTy InvariantAccesses;
  2199. for (MemoryAccess *Access : Stmt) {
  2200. isl::set NHCtx = getNonHoistableCtx(Access, Writes);
  2201. if (!NHCtx.is_null())
  2202. InvariantAccesses.push_back({Access, NHCtx});
  2203. }
  2204. // Transfer the memory access from the statement to the SCoP.
  2205. for (auto InvMA : InvariantAccesses)
  2206. Stmt.removeMemoryAccess(InvMA.MA);
  2207. addInvariantLoads(Stmt, InvariantAccesses);
  2208. }
  2209. }
  2210. /// Check if an access range is too complex.
  2211. ///
  2212. /// An access range is too complex, if it contains either many disjuncts or
  2213. /// very complex expressions. As a simple heuristic, we assume if a set to
  2214. /// be too complex if the sum of existentially quantified dimensions and
  2215. /// set dimensions is larger than a threshold. This reliably detects both
  2216. /// sets with many disjuncts as well as sets with many divisions as they
  2217. /// arise in h264.
  2218. ///
  2219. /// @param AccessRange The range to check for complexity.
  2220. ///
  2221. /// @returns True if the access range is too complex.
  2222. static bool isAccessRangeTooComplex(isl::set AccessRange) {
  2223. unsigned NumTotalDims = 0;
  2224. for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
  2225. NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div));
  2226. NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set));
  2227. }
  2228. if (NumTotalDims > MaxDimensionsInAccessRange)
  2229. return true;
  2230. return false;
  2231. }
  2232. bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
  2233. isl::union_map Writes) {
  2234. if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
  2235. return getNonHoistableCtx(BasePtrMA, Writes).is_null();
  2236. }
  2237. Value *BaseAddr = MA->getOriginalBaseAddr();
  2238. if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
  2239. if (!isa<LoadInst>(BasePtrInst))
  2240. return scop->contains(BasePtrInst);
  2241. return false;
  2242. }
  2243. void ScopBuilder::addUserContext() {
  2244. if (UserContextStr.empty())
  2245. return;
  2246. isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
  2247. isl::space Space = scop->getParamSpace();
  2248. isl::size SpaceParams = Space.dim(isl::dim::param);
  2249. if (unsignedFromIslSize(SpaceParams) !=
  2250. unsignedFromIslSize(UserContext.dim(isl::dim::param))) {
  2251. std::string SpaceStr = stringFromIslObj(Space, "null");
  2252. errs() << "Error: the context provided in -polly-context has not the same "
  2253. << "number of dimensions than the computed context. Due to this "
  2254. << "mismatch, the -polly-context option is ignored. Please provide "
  2255. << "the context in the parameter space: " << SpaceStr << ".\n";
  2256. return;
  2257. }
  2258. for (auto i : rangeIslSize(0, SpaceParams)) {
  2259. std::string NameContext =
  2260. scop->getContext().get_dim_name(isl::dim::param, i);
  2261. std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
  2262. if (NameContext != NameUserContext) {
  2263. std::string SpaceStr = stringFromIslObj(Space, "null");
  2264. errs() << "Error: the name of dimension " << i
  2265. << " provided in -polly-context "
  2266. << "is '" << NameUserContext << "', but the name in the computed "
  2267. << "context is '" << NameContext
  2268. << "'. Due to this name mismatch, "
  2269. << "the -polly-context option is ignored. Please provide "
  2270. << "the context in the parameter space: " << SpaceStr << ".\n";
  2271. return;
  2272. }
  2273. UserContext = UserContext.set_dim_id(isl::dim::param, i,
  2274. Space.get_dim_id(isl::dim::param, i));
  2275. }
  2276. isl::set newContext = scop->getContext().intersect(UserContext);
  2277. scop->setContext(newContext);
  2278. }
  2279. isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
  2280. isl::union_map Writes) {
  2281. // TODO: Loads that are not loop carried, hence are in a statement with
  2282. // zero iterators, are by construction invariant, though we
  2283. // currently "hoist" them anyway. This is necessary because we allow
  2284. // them to be treated as parameters (e.g., in conditions) and our code
  2285. // generation would otherwise use the old value.
  2286. auto &Stmt = *Access->getStatement();
  2287. BasicBlock *BB = Stmt.getEntryBlock();
  2288. if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
  2289. Access->isMemoryIntrinsic())
  2290. return {};
  2291. // Skip accesses that have an invariant base pointer which is defined but
  2292. // not loaded inside the SCoP. This can happened e.g., if a readnone call
  2293. // returns a pointer that is used as a base address. However, as we want
  2294. // to hoist indirect pointers, we allow the base pointer to be defined in
  2295. // the region if it is also a memory access. Each ScopArrayInfo object
  2296. // that has a base pointer origin has a base pointer that is loaded and
  2297. // that it is invariant, thus it will be hoisted too. However, if there is
  2298. // no base pointer origin we check that the base pointer is defined
  2299. // outside the region.
  2300. auto *LI = cast<LoadInst>(Access->getAccessInstruction());
  2301. if (hasNonHoistableBasePtrInScop(Access, Writes))
  2302. return {};
  2303. isl::map AccessRelation = Access->getAccessRelation();
  2304. assert(!AccessRelation.is_empty());
  2305. if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
  2306. return {};
  2307. AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
  2308. isl::set SafeToLoad;
  2309. auto &DL = scop->getFunction().getParent()->getDataLayout();
  2310. if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
  2311. LI->getAlign(), DL)) {
  2312. SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
  2313. } else if (BB != LI->getParent()) {
  2314. // Skip accesses in non-affine subregions as they might not be executed
  2315. // under the same condition as the entry of the non-affine subregion.
  2316. return {};
  2317. } else {
  2318. SafeToLoad = AccessRelation.range();
  2319. }
  2320. if (isAccessRangeTooComplex(AccessRelation.range()))
  2321. return {};
  2322. isl::union_map Written = Writes.intersect_range(SafeToLoad);
  2323. isl::set WrittenCtx = Written.params();
  2324. bool IsWritten = !WrittenCtx.is_empty();
  2325. if (!IsWritten)
  2326. return WrittenCtx;
  2327. WrittenCtx = WrittenCtx.remove_divs();
  2328. bool TooComplex =
  2329. unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain;
  2330. if (TooComplex || !isRequiredInvariantLoad(LI))
  2331. return {};
  2332. scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
  2333. AS_RESTRICTION, LI->getParent());
  2334. return WrittenCtx;
  2335. }
  2336. static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
  2337. for (const llvm::Argument &Arg : F.args())
  2338. if (&Arg == maybeParam)
  2339. return true;
  2340. return false;
  2341. }
  2342. bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
  2343. bool StmtInvalidCtxIsEmpty,
  2344. bool MAInvalidCtxIsEmpty,
  2345. bool NonHoistableCtxIsEmpty) {
  2346. LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
  2347. const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
  2348. if (PollyAllowDereferenceOfAllFunctionParams &&
  2349. isAParameter(LInst->getPointerOperand(), scop->getFunction()))
  2350. return true;
  2351. // TODO: We can provide more information for better but more expensive
  2352. // results.
  2353. if (!isDereferenceableAndAlignedPointer(
  2354. LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL))
  2355. return false;
  2356. // If the location might be overwritten we do not hoist it unconditionally.
  2357. //
  2358. // TODO: This is probably too conservative.
  2359. if (!NonHoistableCtxIsEmpty)
  2360. return false;
  2361. // If a dereferenceable load is in a statement that is modeled precisely we
  2362. // can hoist it.
  2363. if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
  2364. return true;
  2365. // Even if the statement is not modeled precisely we can hoist the load if it
  2366. // does not involve any parameters that might have been specialized by the
  2367. // statement domain.
  2368. for (const SCEV *Subscript : MA->subscripts())
  2369. if (!isa<SCEVConstant>(Subscript))
  2370. return false;
  2371. return true;
  2372. }
  2373. void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
  2374. InvariantAccessesTy &InvMAs) {
  2375. if (InvMAs.empty())
  2376. return;
  2377. isl::set StmtInvalidCtx = Stmt.getInvalidContext();
  2378. bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
  2379. // Get the context under which the statement is executed but remove the error
  2380. // context under which this statement is reached.
  2381. isl::set DomainCtx = Stmt.getDomain().params();
  2382. DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
  2383. if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) {
  2384. auto *AccInst = InvMAs.front().MA->getAccessInstruction();
  2385. scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
  2386. return;
  2387. }
  2388. // Project out all parameters that relate to loads in the statement. Otherwise
  2389. // we could have cyclic dependences on the constraints under which the
  2390. // hoisted loads are executed and we could not determine an order in which to
  2391. // pre-load them. This happens because not only lower bounds are part of the
  2392. // domain but also upper bounds.
  2393. for (auto &InvMA : InvMAs) {
  2394. auto *MA = InvMA.MA;
  2395. Instruction *AccInst = MA->getAccessInstruction();
  2396. if (SE.isSCEVable(AccInst->getType())) {
  2397. SetVector<Value *> Values;
  2398. for (const SCEV *Parameter : scop->parameters()) {
  2399. Values.clear();
  2400. findValues(Parameter, SE, Values);
  2401. if (!Values.count(AccInst))
  2402. continue;
  2403. isl::id ParamId = scop->getIdForParam(Parameter);
  2404. if (!ParamId.is_null()) {
  2405. int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
  2406. if (Dim >= 0)
  2407. DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
  2408. }
  2409. }
  2410. }
  2411. }
  2412. for (auto &InvMA : InvMAs) {
  2413. auto *MA = InvMA.MA;
  2414. isl::set NHCtx = InvMA.NonHoistableCtx;
  2415. // Check for another invariant access that accesses the same location as
  2416. // MA and if found consolidate them. Otherwise create a new equivalence
  2417. // class at the end of InvariantEquivClasses.
  2418. LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
  2419. Type *Ty = LInst->getType();
  2420. const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
  2421. isl::set MAInvalidCtx = MA->getInvalidContext();
  2422. bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
  2423. bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
  2424. isl::set MACtx;
  2425. // Check if we know that this pointer can be speculatively accessed.
  2426. if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
  2427. NonHoistableCtxIsEmpty)) {
  2428. MACtx = isl::set::universe(DomainCtx.get_space());
  2429. } else {
  2430. MACtx = DomainCtx;
  2431. MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
  2432. MACtx = MACtx.gist_params(scop->getContext());
  2433. }
  2434. bool Consolidated = false;
  2435. for (auto &IAClass : scop->invariantEquivClasses()) {
  2436. if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
  2437. continue;
  2438. // If the pointer and the type is equal check if the access function wrt.
  2439. // to the domain is equal too. It can happen that the domain fixes
  2440. // parameter values and these can be different for distinct part of the
  2441. // SCoP. If this happens we cannot consolidate the loads but need to
  2442. // create a new invariant load equivalence class.
  2443. auto &MAs = IAClass.InvariantAccesses;
  2444. if (!MAs.empty()) {
  2445. auto *LastMA = MAs.front();
  2446. isl::set AR = MA->getAccessRelation().range();
  2447. isl::set LastAR = LastMA->getAccessRelation().range();
  2448. bool SameAR = AR.is_equal(LastAR);
  2449. if (!SameAR)
  2450. continue;
  2451. }
  2452. // Add MA to the list of accesses that are in this class.
  2453. MAs.push_front(MA);
  2454. Consolidated = true;
  2455. // Unify the execution context of the class and this statement.
  2456. isl::set IAClassDomainCtx = IAClass.ExecutionContext;
  2457. if (!IAClassDomainCtx.is_null())
  2458. IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
  2459. else
  2460. IAClassDomainCtx = MACtx;
  2461. IAClass.ExecutionContext = IAClassDomainCtx;
  2462. break;
  2463. }
  2464. if (Consolidated)
  2465. continue;
  2466. MACtx = MACtx.coalesce();
  2467. // If we did not consolidate MA, thus did not find an equivalence class
  2468. // for it, we create a new one.
  2469. scop->addInvariantEquivClass(
  2470. InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
  2471. }
  2472. }
  2473. void ScopBuilder::collectCandidateReductionLoads(
  2474. MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
  2475. ScopStmt *Stmt = StoreMA->getStatement();
  2476. auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
  2477. if (!Store)
  2478. return;
  2479. // Skip if there is not one binary operator between the load and the store
  2480. auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
  2481. if (!BinOp)
  2482. return;
  2483. // Skip if the binary operators has multiple uses
  2484. if (BinOp->getNumUses() != 1)
  2485. return;
  2486. // Skip if the opcode of the binary operator is not commutative/associative
  2487. if (!BinOp->isCommutative() || !BinOp->isAssociative())
  2488. return;
  2489. // Skip if the binary operator is outside the current SCoP
  2490. if (BinOp->getParent() != Store->getParent())
  2491. return;
  2492. // Skip if it is a multiplicative reduction and we disabled them
  2493. if (DisableMultiplicativeReductions &&
  2494. (BinOp->getOpcode() == Instruction::Mul ||
  2495. BinOp->getOpcode() == Instruction::FMul))
  2496. return;
  2497. // Check the binary operator operands for a candidate load
  2498. auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
  2499. auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
  2500. if (!PossibleLoad0 && !PossibleLoad1)
  2501. return;
  2502. // A load is only a candidate if it cannot escape (thus has only this use)
  2503. if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
  2504. if (PossibleLoad0->getParent() == Store->getParent())
  2505. Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
  2506. if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
  2507. if (PossibleLoad1->getParent() == Store->getParent())
  2508. Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
  2509. }
  2510. /// Find the canonical scop array info object for a set of invariant load
  2511. /// hoisted loads. The canonical array is the one that corresponds to the
  2512. /// first load in the list of accesses which is used as base pointer of a
  2513. /// scop array.
  2514. static const ScopArrayInfo *findCanonicalArray(Scop &S,
  2515. MemoryAccessList &Accesses) {
  2516. for (MemoryAccess *Access : Accesses) {
  2517. const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
  2518. Access->getAccessInstruction(), MemoryKind::Array);
  2519. if (CanonicalArray)
  2520. return CanonicalArray;
  2521. }
  2522. return nullptr;
  2523. }
  2524. /// Check if @p Array severs as base array in an invariant load.
  2525. static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
  2526. for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
  2527. for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
  2528. if (Access2->getScopArrayInfo() == Array)
  2529. return true;
  2530. return false;
  2531. }
  2532. /// Replace the base pointer arrays in all memory accesses referencing @p Old,
  2533. /// with a reference to @p New.
  2534. static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
  2535. const ScopArrayInfo *New) {
  2536. for (ScopStmt &Stmt : S)
  2537. for (MemoryAccess *Access : Stmt) {
  2538. if (Access->getLatestScopArrayInfo() != Old)
  2539. continue;
  2540. isl::id Id = New->getBasePtrId();
  2541. isl::map Map = Access->getAccessRelation();
  2542. Map = Map.set_tuple_id(isl::dim::out, Id);
  2543. Access->setAccessRelation(Map);
  2544. }
  2545. }
  2546. void ScopBuilder::canonicalizeDynamicBasePtrs() {
  2547. for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
  2548. MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
  2549. const ScopArrayInfo *CanonicalBasePtrSAI =
  2550. findCanonicalArray(*scop, BasePtrAccesses);
  2551. if (!CanonicalBasePtrSAI)
  2552. continue;
  2553. for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
  2554. const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
  2555. BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
  2556. if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
  2557. !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
  2558. continue;
  2559. // we currently do not canonicalize arrays where some accesses are
  2560. // hoisted as invariant loads. If we would, we need to update the access
  2561. // function of the invariant loads as well. However, as this is not a
  2562. // very common situation, we leave this for now to avoid further
  2563. // complexity increases.
  2564. if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
  2565. continue;
  2566. replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
  2567. }
  2568. }
  2569. }
  2570. void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
  2571. for (MemoryAccess *Access : Stmt.MemAccs) {
  2572. Type *ElementType = Access->getElementType();
  2573. MemoryKind Ty;
  2574. if (Access->isPHIKind())
  2575. Ty = MemoryKind::PHI;
  2576. else if (Access->isExitPHIKind())
  2577. Ty = MemoryKind::ExitPHI;
  2578. else if (Access->isValueKind())
  2579. Ty = MemoryKind::Value;
  2580. else
  2581. Ty = MemoryKind::Array;
  2582. // Create isl::pw_aff for SCEVs which describe sizes. Collect all
  2583. // assumptions which are taken. isl::pw_aff objects are cached internally
  2584. // and they are used later by scop.
  2585. for (const SCEV *Size : Access->Sizes) {
  2586. if (!Size)
  2587. continue;
  2588. scop->getPwAff(Size, nullptr, false, &RecordedAssumptions);
  2589. }
  2590. auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
  2591. ElementType, Access->Sizes, Ty);
  2592. // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
  2593. // assumptions which are taken. isl::pw_aff objects are cached internally
  2594. // and they are used later by scop.
  2595. for (const SCEV *Subscript : Access->subscripts()) {
  2596. if (!Access->isAffine() || !Subscript)
  2597. continue;
  2598. scop->getPwAff(Subscript, Stmt.getEntryBlock(), false,
  2599. &RecordedAssumptions);
  2600. }
  2601. Access->buildAccessRelation(SAI);
  2602. scop->addAccessData(Access);
  2603. }
  2604. }
  2605. /// Add the minimal/maximal access in @p Set to @p User.
  2606. ///
  2607. /// @return True if more accesses should be added, false if we reached the
  2608. /// maximal number of run-time checks to be generated.
  2609. static bool buildMinMaxAccess(isl::set Set,
  2610. Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
  2611. isl::pw_multi_aff MinPMA, MaxPMA;
  2612. isl::pw_aff LastDimAff;
  2613. isl::aff OneAff;
  2614. unsigned Pos;
  2615. Set = Set.remove_divs();
  2616. polly::simplify(Set);
  2617. if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts)
  2618. Set = Set.simple_hull();
  2619. // Restrict the number of parameters involved in the access as the lexmin/
  2620. // lexmax computation will take too long if this number is high.
  2621. //
  2622. // Experiments with a simple test case using an i7 4800MQ:
  2623. //
  2624. // #Parameters involved | Time (in sec)
  2625. // 6 | 0.01
  2626. // 7 | 0.04
  2627. // 8 | 0.12
  2628. // 9 | 0.40
  2629. // 10 | 1.54
  2630. // 11 | 6.78
  2631. // 12 | 30.38
  2632. //
  2633. if (isl_set_n_param(Set.get()) >
  2634. static_cast<isl_size>(RunTimeChecksMaxParameters)) {
  2635. unsigned InvolvedParams = 0;
  2636. for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
  2637. if (Set.involves_dims(isl::dim::param, u, 1))
  2638. InvolvedParams++;
  2639. if (InvolvedParams > RunTimeChecksMaxParameters)
  2640. return false;
  2641. }
  2642. MinPMA = Set.lexmin_pw_multi_aff();
  2643. MaxPMA = Set.lexmax_pw_multi_aff();
  2644. MinPMA = MinPMA.coalesce();
  2645. MaxPMA = MaxPMA.coalesce();
  2646. if (MaxPMA.is_null())
  2647. return false;
  2648. unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out));
  2649. // Adjust the last dimension of the maximal access by one as we want to
  2650. // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
  2651. // we test during code generation might now point after the end of the
  2652. // allocated array but we will never dereference it anyway.
  2653. assert(MaxOutputSize >= 1 && "Assumed at least one output dimension");
  2654. Pos = MaxOutputSize - 1;
  2655. LastDimAff = MaxPMA.at(Pos);
  2656. OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
  2657. OneAff = OneAff.add_constant_si(1);
  2658. LastDimAff = LastDimAff.add(OneAff);
  2659. MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
  2660. if (MinPMA.is_null() || MaxPMA.is_null())
  2661. return false;
  2662. MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
  2663. return true;
  2664. }
  2665. /// Wrapper function to calculate minimal/maximal accesses to each array.
  2666. bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
  2667. Scop::MinMaxVectorTy &MinMaxAccesses) {
  2668. MinMaxAccesses.reserve(AliasGroup.size());
  2669. isl::union_set Domains = scop->getDomains();
  2670. isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx());
  2671. for (MemoryAccess *MA : AliasGroup)
  2672. Accesses = Accesses.unite(MA->getAccessRelation());
  2673. Accesses = Accesses.intersect_domain(Domains);
  2674. isl::union_set Locations = Accesses.range();
  2675. bool LimitReached = false;
  2676. for (isl::set Set : Locations.get_set_list()) {
  2677. LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
  2678. if (LimitReached)
  2679. break;
  2680. }
  2681. return !LimitReached;
  2682. }
  2683. static isl::set getAccessDomain(MemoryAccess *MA) {
  2684. isl::set Domain = MA->getStatement()->getDomain();
  2685. Domain = Domain.project_out(isl::dim::set, 0,
  2686. unsignedFromIslSize(Domain.tuple_dim()));
  2687. return Domain.reset_tuple_id();
  2688. }
  2689. bool ScopBuilder::buildAliasChecks() {
  2690. if (!PollyUseRuntimeAliasChecks)
  2691. return true;
  2692. if (buildAliasGroups()) {
  2693. // Aliasing assumptions do not go through addAssumption but we still want to
  2694. // collect statistics so we do it here explicitly.
  2695. if (scop->getAliasGroups().size())
  2696. Scop::incrementNumberOfAliasingAssumptions(1);
  2697. return true;
  2698. }
  2699. // If a problem occurs while building the alias groups we need to delete
  2700. // this SCoP and pretend it wasn't valid in the first place. To this end
  2701. // we make the assumed context infeasible.
  2702. scop->invalidate(ALIASING, DebugLoc());
  2703. LLVM_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
  2704. << " could not be created. This SCoP has been dismissed.");
  2705. return false;
  2706. }
  2707. std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
  2708. ScopBuilder::buildAliasGroupsForAccesses() {
  2709. AliasSetTracker AST(AA);
  2710. DenseMap<Value *, MemoryAccess *> PtrToAcc;
  2711. DenseSet<const ScopArrayInfo *> HasWriteAccess;
  2712. for (ScopStmt &Stmt : *scop) {
  2713. isl::set StmtDomain = Stmt.getDomain();
  2714. bool StmtDomainEmpty = StmtDomain.is_empty();
  2715. // Statements with an empty domain will never be executed.
  2716. if (StmtDomainEmpty)
  2717. continue;
  2718. for (MemoryAccess *MA : Stmt) {
  2719. if (MA->isScalarKind())
  2720. continue;
  2721. if (!MA->isRead())
  2722. HasWriteAccess.insert(MA->getScopArrayInfo());
  2723. MemAccInst Acc(MA->getAccessInstruction());
  2724. if (MA->isRead() && isa<MemTransferInst>(Acc))
  2725. PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
  2726. else
  2727. PtrToAcc[Acc.getPointerOperand()] = MA;
  2728. AST.add(Acc);
  2729. }
  2730. }
  2731. AliasGroupVectorTy AliasGroups;
  2732. for (AliasSet &AS : AST) {
  2733. if (AS.isMustAlias() || AS.isForwardingAliasSet())
  2734. continue;
  2735. AliasGroupTy AG;
  2736. for (auto &PR : AS)
  2737. AG.push_back(PtrToAcc[PR.getValue()]);
  2738. if (AG.size() < 2)
  2739. continue;
  2740. AliasGroups.push_back(std::move(AG));
  2741. }
  2742. return std::make_tuple(AliasGroups, HasWriteAccess);
  2743. }
  2744. bool ScopBuilder::buildAliasGroups() {
  2745. // To create sound alias checks we perform the following steps:
  2746. // o) We partition each group into read only and non read only accesses.
  2747. // o) For each group with more than one base pointer we then compute minimal
  2748. // and maximal accesses to each array of a group in read only and non
  2749. // read only partitions separately.
  2750. AliasGroupVectorTy AliasGroups;
  2751. DenseSet<const ScopArrayInfo *> HasWriteAccess;
  2752. std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
  2753. splitAliasGroupsByDomain(AliasGroups);
  2754. for (AliasGroupTy &AG : AliasGroups) {
  2755. if (!scop->hasFeasibleRuntimeContext())
  2756. return false;
  2757. {
  2758. IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
  2759. bool Valid = buildAliasGroup(AG, HasWriteAccess);
  2760. if (!Valid)
  2761. return false;
  2762. }
  2763. if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
  2764. scop->invalidate(COMPLEXITY, DebugLoc());
  2765. return false;
  2766. }
  2767. }
  2768. return true;
  2769. }
  2770. bool ScopBuilder::buildAliasGroup(
  2771. AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
  2772. AliasGroupTy ReadOnlyAccesses;
  2773. AliasGroupTy ReadWriteAccesses;
  2774. SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
  2775. SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
  2776. if (AliasGroup.size() < 2)
  2777. return true;
  2778. for (MemoryAccess *Access : AliasGroup) {
  2779. ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
  2780. Access->getAccessInstruction())
  2781. << "Possibly aliasing pointer, use restrict keyword.");
  2782. const ScopArrayInfo *Array = Access->getScopArrayInfo();
  2783. if (HasWriteAccess.count(Array)) {
  2784. ReadWriteArrays.insert(Array);
  2785. ReadWriteAccesses.push_back(Access);
  2786. } else {
  2787. ReadOnlyArrays.insert(Array);
  2788. ReadOnlyAccesses.push_back(Access);
  2789. }
  2790. }
  2791. // If there are no read-only pointers, and less than two read-write pointers,
  2792. // no alias check is needed.
  2793. if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
  2794. return true;
  2795. // If there is no read-write pointer, no alias check is needed.
  2796. if (ReadWriteArrays.empty())
  2797. return true;
  2798. // For non-affine accesses, no alias check can be generated as we cannot
  2799. // compute a sufficiently tight lower and upper bound: bail out.
  2800. for (MemoryAccess *MA : AliasGroup) {
  2801. if (!MA->isAffine()) {
  2802. scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
  2803. MA->getAccessInstruction()->getParent());
  2804. return false;
  2805. }
  2806. }
  2807. // Ensure that for all memory accesses for which we generate alias checks,
  2808. // their base pointers are available.
  2809. for (MemoryAccess *MA : AliasGroup) {
  2810. if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
  2811. scop->addRequiredInvariantLoad(
  2812. cast<LoadInst>(BasePtrMA->getAccessInstruction()));
  2813. }
  2814. // scop->getAliasGroups().emplace_back();
  2815. // Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
  2816. Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
  2817. Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
  2818. bool Valid;
  2819. Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
  2820. if (!Valid)
  2821. return false;
  2822. // Bail out if the number of values we need to compare is too large.
  2823. // This is important as the number of comparisons grows quadratically with
  2824. // the number of values we need to compare.
  2825. if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
  2826. RunTimeChecksMaxArraysPerGroup)
  2827. return false;
  2828. Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
  2829. scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
  2830. if (!Valid)
  2831. return false;
  2832. return true;
  2833. }
  2834. void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
  2835. for (unsigned u = 0; u < AliasGroups.size(); u++) {
  2836. AliasGroupTy NewAG;
  2837. AliasGroupTy &AG = AliasGroups[u];
  2838. AliasGroupTy::iterator AGI = AG.begin();
  2839. isl::set AGDomain = getAccessDomain(*AGI);
  2840. while (AGI != AG.end()) {
  2841. MemoryAccess *MA = *AGI;
  2842. isl::set MADomain = getAccessDomain(MA);
  2843. if (AGDomain.is_disjoint(MADomain)) {
  2844. NewAG.push_back(MA);
  2845. AGI = AG.erase(AGI);
  2846. } else {
  2847. AGDomain = AGDomain.unite(MADomain);
  2848. AGI++;
  2849. }
  2850. }
  2851. if (NewAG.size() > 1)
  2852. AliasGroups.push_back(std::move(NewAG));
  2853. }
  2854. }
  2855. #ifndef NDEBUG
  2856. static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
  2857. auto PhysUse = VirtualUse::create(S, Op, &LI, false);
  2858. auto VirtUse = VirtualUse::create(S, Op, &LI, true);
  2859. assert(PhysUse.getKind() == VirtUse.getKind());
  2860. }
  2861. /// Check the consistency of every statement's MemoryAccesses.
  2862. ///
  2863. /// The check is carried out by expecting the "physical" kind of use (derived
  2864. /// from the BasicBlocks instructions resides in) to be same as the "virtual"
  2865. /// kind of use (derived from a statement's MemoryAccess).
  2866. ///
  2867. /// The "physical" uses are taken by ensureValueRead to determine whether to
  2868. /// create MemoryAccesses. When done, the kind of scalar access should be the
  2869. /// same no matter which way it was derived.
  2870. ///
  2871. /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
  2872. /// can intentionally influence on the kind of uses (not corresponding to the
  2873. /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
  2874. /// to pick up the virtual uses. But here in the code generator, this has not
  2875. /// happened yet, such that virtual and physical uses are equivalent.
  2876. static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
  2877. for (auto *BB : S->getRegion().blocks()) {
  2878. for (auto &Inst : *BB) {
  2879. auto *Stmt = S->getStmtFor(&Inst);
  2880. if (!Stmt)
  2881. continue;
  2882. if (isIgnoredIntrinsic(&Inst))
  2883. continue;
  2884. // Branch conditions are encoded in the statement domains.
  2885. if (Inst.isTerminator() && Stmt->isBlockStmt())
  2886. continue;
  2887. // Verify all uses.
  2888. for (auto &Op : Inst.operands())
  2889. verifyUse(S, Op, LI);
  2890. // Stores do not produce values used by other statements.
  2891. if (isa<StoreInst>(Inst))
  2892. continue;
  2893. // For every value defined in the block, also check that a use of that
  2894. // value in the same statement would not be an inter-statement use. It can
  2895. // still be synthesizable or load-hoisted, but these kind of instructions
  2896. // are not directly copied in code-generation.
  2897. auto VirtDef =
  2898. VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
  2899. assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
  2900. VirtDef.getKind() == VirtualUse::Intra ||
  2901. VirtDef.getKind() == VirtualUse::Hoisted);
  2902. }
  2903. }
  2904. if (S->hasSingleExitEdge())
  2905. return;
  2906. // PHINodes in the SCoP region's exit block are also uses to be checked.
  2907. if (!S->getRegion().isTopLevelRegion()) {
  2908. for (auto &Inst : *S->getRegion().getExit()) {
  2909. if (!isa<PHINode>(Inst))
  2910. break;
  2911. for (auto &Op : Inst.operands())
  2912. verifyUse(S, Op, LI);
  2913. }
  2914. }
  2915. }
  2916. #endif
  2917. void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
  2918. scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE,
  2919. SD.getNextID()));
  2920. buildStmts(R);
  2921. // Create all invariant load instructions first. These are categorized as
  2922. // 'synthesizable', therefore are not part of any ScopStmt but need to be
  2923. // created somewhere.
  2924. const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
  2925. for (BasicBlock *BB : scop->getRegion().blocks()) {
  2926. if (SD.isErrorBlock(*BB, scop->getRegion()))
  2927. continue;
  2928. for (Instruction &Inst : *BB) {
  2929. LoadInst *Load = dyn_cast<LoadInst>(&Inst);
  2930. if (!Load)
  2931. continue;
  2932. if (!RIL.count(Load))
  2933. continue;
  2934. // Invariant loads require a MemoryAccess to be created in some statement.
  2935. // It is not important to which statement the MemoryAccess is added
  2936. // because it will later be removed from the ScopStmt again. We chose the
  2937. // first statement of the basic block the LoadInst is in.
  2938. ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
  2939. assert(!List.empty());
  2940. ScopStmt *RILStmt = List.front();
  2941. buildMemoryAccess(Load, RILStmt);
  2942. }
  2943. }
  2944. buildAccessFunctions();
  2945. // In case the region does not have an exiting block we will later (during
  2946. // code generation) split the exit block. This will move potential PHI nodes
  2947. // from the current exit block into the new region exiting block. Hence, PHI
  2948. // nodes that are at this point not part of the region will be.
  2949. // To handle these PHI nodes later we will now model their operands as scalar
  2950. // accesses. Note that we do not model anything in the exit block if we have
  2951. // an exiting block in the region, as there will not be any splitting later.
  2952. if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
  2953. for (Instruction &Inst : *R.getExit()) {
  2954. PHINode *PHI = dyn_cast<PHINode>(&Inst);
  2955. if (!PHI)
  2956. break;
  2957. buildPHIAccesses(nullptr, PHI, nullptr, true);
  2958. }
  2959. }
  2960. // Create memory accesses for global reads since all arrays are now known.
  2961. auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
  2962. for (auto GlobalReadPair : GlobalReads) {
  2963. ScopStmt *GlobalReadStmt = GlobalReadPair.first;
  2964. Instruction *GlobalRead = GlobalReadPair.second;
  2965. for (auto *BP : ArrayBasePointers)
  2966. addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
  2967. BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
  2968. }
  2969. buildInvariantEquivalenceClasses();
  2970. /// A map from basic blocks to their invalid domains.
  2971. DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
  2972. if (!buildDomains(&R, InvalidDomainMap)) {
  2973. LLVM_DEBUG(
  2974. dbgs() << "Bailing-out because buildDomains encountered problems\n");
  2975. return;
  2976. }
  2977. addUserAssumptions(AC, InvalidDomainMap);
  2978. // Initialize the invalid domain.
  2979. for (ScopStmt &Stmt : scop->Stmts)
  2980. if (Stmt.isBlockStmt())
  2981. Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
  2982. else
  2983. Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
  2984. Stmt.getRegion()->getNode())]);
  2985. // Remove empty statements.
  2986. // Exit early in case there are no executable statements left in this scop.
  2987. scop->removeStmtNotInDomainMap();
  2988. scop->simplifySCoP(false);
  2989. if (scop->isEmpty()) {
  2990. LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
  2991. return;
  2992. }
  2993. // The ScopStmts now have enough information to initialize themselves.
  2994. for (ScopStmt &Stmt : *scop) {
  2995. collectSurroundingLoops(Stmt);
  2996. buildDomain(Stmt);
  2997. buildAccessRelations(Stmt);
  2998. if (DetectReductions)
  2999. checkForReductions(Stmt);
  3000. }
  3001. // Check early for a feasible runtime context.
  3002. if (!scop->hasFeasibleRuntimeContext()) {
  3003. LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
  3004. return;
  3005. }
  3006. // Check early for profitability. Afterwards it cannot change anymore,
  3007. // only the runtime context could become infeasible.
  3008. if (!scop->isProfitable(UnprofitableScalarAccs)) {
  3009. scop->invalidate(PROFITABLE, DebugLoc());
  3010. LLVM_DEBUG(
  3011. dbgs() << "Bailing-out because SCoP is not considered profitable\n");
  3012. return;
  3013. }
  3014. buildSchedule();
  3015. finalizeAccesses();
  3016. scop->realignParams();
  3017. addUserContext();
  3018. // After the context was fully constructed, thus all our knowledge about
  3019. // the parameters is in there, we add all recorded assumptions to the
  3020. // assumed/invalid context.
  3021. addRecordedAssumptions();
  3022. scop->simplifyContexts();
  3023. if (!buildAliasChecks()) {
  3024. LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
  3025. return;
  3026. }
  3027. hoistInvariantLoads();
  3028. canonicalizeDynamicBasePtrs();
  3029. verifyInvariantLoads();
  3030. scop->simplifySCoP(true);
  3031. // Check late for a feasible runtime context because profitability did not
  3032. // change.
  3033. if (!scop->hasFeasibleRuntimeContext()) {
  3034. LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
  3035. return;
  3036. }
  3037. #ifndef NDEBUG
  3038. verifyUses(scop.get(), LI, DT);
  3039. #endif
  3040. }
  3041. ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
  3042. const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
  3043. ScopDetection &SD, ScalarEvolution &SE,
  3044. OptimizationRemarkEmitter &ORE)
  3045. : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
  3046. DebugLoc Beg, End;
  3047. auto P = getBBPairForRegion(R);
  3048. getDebugLocations(P, Beg, End);
  3049. std::string Msg = "SCoP begins here.";
  3050. ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
  3051. << Msg);
  3052. buildScop(*R, AC);
  3053. LLVM_DEBUG(dbgs() << *scop);
  3054. if (!scop->hasFeasibleRuntimeContext()) {
  3055. InfeasibleScops++;
  3056. Msg = "SCoP ends here but was dismissed.";
  3057. LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
  3058. RecordedAssumptions.clear();
  3059. scop.reset();
  3060. } else {
  3061. Msg = "SCoP ends here.";
  3062. ++ScopFound;
  3063. if (scop->getMaxLoopDepth() > 0)
  3064. ++RichScopFound;
  3065. }
  3066. if (R->isTopLevelRegion())
  3067. ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
  3068. << Msg);
  3069. else
  3070. ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
  3071. << Msg);
  3072. }