BlockGenerators.cpp 66 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797
  1. //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the BlockGenerator and VectorBlockGenerator classes,
  10. // which generate sequential code and vectorized code for a polyhedral
  11. // statement, respectively.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "polly/CodeGen/BlockGenerators.h"
  15. #include "polly/CodeGen/IslExprBuilder.h"
  16. #include "polly/CodeGen/RuntimeDebugBuilder.h"
  17. #include "polly/Options.h"
  18. #include "polly/ScopInfo.h"
  19. #include "polly/Support/ScopHelper.h"
  20. #include "polly/Support/VirtualInstruction.h"
  21. #include "llvm/Analysis/LoopInfo.h"
  22. #include "llvm/Analysis/RegionInfo.h"
  23. #include "llvm/Analysis/ScalarEvolution.h"
  24. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  25. #include "llvm/Transforms/Utils/Local.h"
  26. #include "isl/ast.h"
  27. #include <deque>
  28. using namespace llvm;
  29. using namespace polly;
  30. static cl::opt<bool> Aligned("enable-polly-aligned",
  31. cl::desc("Assumed aligned memory accesses."),
  32. cl::Hidden, cl::init(false), cl::ZeroOrMore,
  33. cl::cat(PollyCategory));
  34. bool PollyDebugPrinting;
  35. static cl::opt<bool, true> DebugPrintingX(
  36. "polly-codegen-add-debug-printing",
  37. cl::desc("Add printf calls that show the values loaded/stored."),
  38. cl::location(PollyDebugPrinting), cl::Hidden, cl::init(false),
  39. cl::ZeroOrMore, cl::cat(PollyCategory));
  40. static cl::opt<bool> TraceStmts(
  41. "polly-codegen-trace-stmts",
  42. cl::desc("Add printf calls that print the statement being executed"),
  43. cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
  44. static cl::opt<bool> TraceScalars(
  45. "polly-codegen-trace-scalars",
  46. cl::desc("Add printf calls that print the values of all scalar values "
  47. "used in a statement. Requires -polly-codegen-trace-stmts."),
  48. cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
  49. BlockGenerator::BlockGenerator(
  50. PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT,
  51. AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap,
  52. ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, BasicBlock *StartBlock)
  53. : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT),
  54. EntryBB(nullptr), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
  55. GlobalMap(GlobalMap), StartBlock(StartBlock) {}
  56. Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
  57. ValueMapT &BBMap,
  58. LoopToScevMapT &LTS,
  59. Loop *L) const {
  60. if (!SE.isSCEVable(Old->getType()))
  61. return nullptr;
  62. const SCEV *Scev = SE.getSCEVAtScope(Old, L);
  63. if (!Scev)
  64. return nullptr;
  65. if (isa<SCEVCouldNotCompute>(Scev))
  66. return nullptr;
  67. const SCEV *NewScev = SCEVLoopAddRecRewriter::rewrite(Scev, LTS, SE);
  68. ValueMapT VTV;
  69. VTV.insert(BBMap.begin(), BBMap.end());
  70. VTV.insert(GlobalMap.begin(), GlobalMap.end());
  71. Scop &S = *Stmt.getParent();
  72. const DataLayout &DL = S.getFunction().getParent()->getDataLayout();
  73. auto IP = Builder.GetInsertPoint();
  74. assert(IP != Builder.GetInsertBlock()->end() &&
  75. "Only instructions can be insert points for SCEVExpander");
  76. Value *Expanded =
  77. expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV,
  78. StartBlock->getSinglePredecessor());
  79. BBMap[Old] = Expanded;
  80. return Expanded;
  81. }
  82. Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
  83. LoopToScevMapT &LTS, Loop *L) const {
  84. auto lookupGlobally = [this](Value *Old) -> Value * {
  85. Value *New = GlobalMap.lookup(Old);
  86. if (!New)
  87. return nullptr;
  88. // Required by:
  89. // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
  90. // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
  91. // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
  92. // * Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
  93. // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
  94. // * Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
  95. // GlobalMap should be a mapping from (value in original SCoP) to (copied
  96. // value in generated SCoP), without intermediate mappings, which might
  97. // easily require transitiveness as well.
  98. if (Value *NewRemapped = GlobalMap.lookup(New))
  99. New = NewRemapped;
  100. // No test case for this code.
  101. if (Old->getType()->getScalarSizeInBits() <
  102. New->getType()->getScalarSizeInBits())
  103. New = Builder.CreateTruncOrBitCast(New, Old->getType());
  104. return New;
  105. };
  106. Value *New = nullptr;
  107. auto VUse = VirtualUse::create(&Stmt, L, Old, true);
  108. switch (VUse.getKind()) {
  109. case VirtualUse::Block:
  110. // BasicBlock are constants, but the BlockGenerator copies them.
  111. New = BBMap.lookup(Old);
  112. break;
  113. case VirtualUse::Constant:
  114. // Used by:
  115. // * Isl/CodeGen/OpenMP/reference-argument-from-non-affine-region.ll
  116. // Constants should not be redefined. In this case, the GlobalMap just
  117. // contains a mapping to the same constant, which is unnecessary, but
  118. // harmless.
  119. if ((New = lookupGlobally(Old)))
  120. break;
  121. assert(!BBMap.count(Old));
  122. New = Old;
  123. break;
  124. case VirtualUse::ReadOnly:
  125. assert(!GlobalMap.count(Old));
  126. // Required for:
  127. // * Isl/CodeGen/MemAccess/create_arrays.ll
  128. // * Isl/CodeGen/read-only-scalars.ll
  129. // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
  130. // For some reason these reload a read-only value. The reloaded value ends
  131. // up in BBMap, buts its value should be identical.
  132. //
  133. // Required for:
  134. // * Isl/CodeGen/OpenMP/single_loop_with_param.ll
  135. // The parallel subfunctions need to reference the read-only value from the
  136. // parent function, this is done by reloading them locally.
  137. if ((New = BBMap.lookup(Old)))
  138. break;
  139. New = Old;
  140. break;
  141. case VirtualUse::Synthesizable:
  142. // Used by:
  143. // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
  144. // * Isl/CodeGen/OpenMP/recomputed-srem.ll
  145. // * Isl/CodeGen/OpenMP/reference-other-bb.ll
  146. // * Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll
  147. // For some reason synthesizable values end up in GlobalMap. Their values
  148. // are the same as trySynthesizeNewValue would return. The legacy
  149. // implementation prioritized GlobalMap, so this is what we do here as well.
  150. // Ideally, synthesizable values should not end up in GlobalMap.
  151. if ((New = lookupGlobally(Old)))
  152. break;
  153. // Required for:
  154. // * Isl/CodeGen/RuntimeDebugBuilder/combine_different_values.ll
  155. // * Isl/CodeGen/getNumberOfIterations.ll
  156. // * Isl/CodeGen/non_affine_float_compare.ll
  157. // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
  158. // Ideally, synthesizable values are synthesized by trySynthesizeNewValue,
  159. // not precomputed (SCEVExpander has its own caching mechanism).
  160. // These tests fail without this, but I think trySynthesizeNewValue would
  161. // just re-synthesize the same instructions.
  162. if ((New = BBMap.lookup(Old)))
  163. break;
  164. New = trySynthesizeNewValue(Stmt, Old, BBMap, LTS, L);
  165. break;
  166. case VirtualUse::Hoisted:
  167. // TODO: Hoisted invariant loads should be found in GlobalMap only, but not
  168. // redefined locally (which will be ignored anyway). That is, the following
  169. // assertion should apply: assert(!BBMap.count(Old))
  170. New = lookupGlobally(Old);
  171. break;
  172. case VirtualUse::Intra:
  173. case VirtualUse::Inter:
  174. assert(!GlobalMap.count(Old) &&
  175. "Intra and inter-stmt values are never global");
  176. New = BBMap.lookup(Old);
  177. break;
  178. }
  179. assert(New && "Unexpected scalar dependence in region!");
  180. return New;
  181. }
  182. void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst,
  183. ValueMapT &BBMap, LoopToScevMapT &LTS) {
  184. // We do not generate debug intrinsics as we did not investigate how to
  185. // copy them correctly. At the current state, they just crash the code
  186. // generation as the meta-data operands are not correctly copied.
  187. if (isa<DbgInfoIntrinsic>(Inst))
  188. return;
  189. Instruction *NewInst = Inst->clone();
  190. // Replace old operands with the new ones.
  191. for (Value *OldOperand : Inst->operands()) {
  192. Value *NewOperand =
  193. getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForStmt(Stmt));
  194. if (!NewOperand) {
  195. assert(!isa<StoreInst>(NewInst) &&
  196. "Store instructions are always needed!");
  197. NewInst->deleteValue();
  198. return;
  199. }
  200. NewInst->replaceUsesOfWith(OldOperand, NewOperand);
  201. }
  202. Builder.Insert(NewInst);
  203. BBMap[Inst] = NewInst;
  204. // When copying the instruction onto the Module meant for the GPU,
  205. // debug metadata attached to an instruction causes all related
  206. // metadata to be pulled into the Module. This includes the DICompileUnit,
  207. // which will not be listed in llvm.dbg.cu of the Module since the Module
  208. // doesn't contain one. This fails the verification of the Module and the
  209. // subsequent generation of the ASM string.
  210. if (NewInst->getModule() != Inst->getModule())
  211. NewInst->setDebugLoc(llvm::DebugLoc());
  212. if (!NewInst->getType()->isVoidTy())
  213. NewInst->setName("p_" + Inst->getName());
  214. }
  215. Value *
  216. BlockGenerator::generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
  217. ValueMapT &BBMap, LoopToScevMapT &LTS,
  218. isl_id_to_ast_expr *NewAccesses) {
  219. const MemoryAccess &MA = Stmt.getArrayAccessFor(Inst);
  220. return generateLocationAccessed(
  221. Stmt, getLoopForStmt(Stmt),
  222. Inst.isNull() ? nullptr : Inst.getPointerOperand(), BBMap, LTS,
  223. NewAccesses, MA.getId().release(), MA.getAccessValue()->getType());
  224. }
  225. Value *BlockGenerator::generateLocationAccessed(
  226. ScopStmt &Stmt, Loop *L, Value *Pointer, ValueMapT &BBMap,
  227. LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses, __isl_take isl_id *Id,
  228. Type *ExpectedType) {
  229. isl_ast_expr *AccessExpr = isl_id_to_ast_expr_get(NewAccesses, Id);
  230. if (AccessExpr) {
  231. AccessExpr = isl_ast_expr_address_of(AccessExpr);
  232. auto Address = ExprBuilder->create(AccessExpr);
  233. // Cast the address of this memory access to a pointer type that has the
  234. // same element type as the original access, but uses the address space of
  235. // the newly generated pointer.
  236. auto OldPtrTy = ExpectedType->getPointerTo();
  237. auto NewPtrTy = Address->getType();
  238. OldPtrTy = PointerType::get(OldPtrTy->getElementType(),
  239. NewPtrTy->getPointerAddressSpace());
  240. if (OldPtrTy != NewPtrTy)
  241. Address = Builder.CreateBitOrPointerCast(Address, OldPtrTy);
  242. return Address;
  243. }
  244. assert(
  245. Pointer &&
  246. "If expression was not generated, must use the original pointer value");
  247. return getNewValue(Stmt, Pointer, BBMap, LTS, L);
  248. }
  249. Value *
  250. BlockGenerator::getImplicitAddress(MemoryAccess &Access, Loop *L,
  251. LoopToScevMapT &LTS, ValueMapT &BBMap,
  252. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  253. if (Access.isLatestArrayKind())
  254. return generateLocationAccessed(*Access.getStatement(), L, nullptr, BBMap,
  255. LTS, NewAccesses, Access.getId().release(),
  256. Access.getAccessValue()->getType());
  257. return getOrCreateAlloca(Access);
  258. }
  259. Loop *BlockGenerator::getLoopForStmt(const ScopStmt &Stmt) const {
  260. auto *StmtBB = Stmt.getEntryBlock();
  261. return LI.getLoopFor(StmtBB);
  262. }
  263. Value *BlockGenerator::generateArrayLoad(ScopStmt &Stmt, LoadInst *Load,
  264. ValueMapT &BBMap, LoopToScevMapT &LTS,
  265. isl_id_to_ast_expr *NewAccesses) {
  266. if (Value *PreloadLoad = GlobalMap.lookup(Load))
  267. return PreloadLoad;
  268. Value *NewPointer =
  269. generateLocationAccessed(Stmt, Load, BBMap, LTS, NewAccesses);
  270. Value *ScalarLoad = Builder.CreateAlignedLoad(NewPointer, Load->getAlign(),
  271. Load->getName() + "_p_scalar_");
  272. if (PollyDebugPrinting)
  273. RuntimeDebugBuilder::createCPUPrinter(Builder, "Load from ", NewPointer,
  274. ": ", ScalarLoad, "\n");
  275. return ScalarLoad;
  276. }
  277. void BlockGenerator::generateArrayStore(ScopStmt &Stmt, StoreInst *Store,
  278. ValueMapT &BBMap, LoopToScevMapT &LTS,
  279. isl_id_to_ast_expr *NewAccesses) {
  280. MemoryAccess &MA = Stmt.getArrayAccessFor(Store);
  281. isl::set AccDom = MA.getAccessRelation().domain();
  282. std::string Subject = MA.getId().get_name();
  283. generateConditionalExecution(Stmt, AccDom, Subject.c_str(), [&, this]() {
  284. Value *NewPointer =
  285. generateLocationAccessed(Stmt, Store, BBMap, LTS, NewAccesses);
  286. Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap,
  287. LTS, getLoopForStmt(Stmt));
  288. if (PollyDebugPrinting)
  289. RuntimeDebugBuilder::createCPUPrinter(Builder, "Store to ", NewPointer,
  290. ": ", ValueOperand, "\n");
  291. Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlign());
  292. });
  293. }
  294. bool BlockGenerator::canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst) {
  295. Loop *L = getLoopForStmt(Stmt);
  296. return (Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) &&
  297. canSynthesize(Inst, *Stmt.getParent(), &SE, L);
  298. }
  299. void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst,
  300. ValueMapT &BBMap, LoopToScevMapT &LTS,
  301. isl_id_to_ast_expr *NewAccesses) {
  302. // Terminator instructions control the control flow. They are explicitly
  303. // expressed in the clast and do not need to be copied.
  304. if (Inst->isTerminator())
  305. return;
  306. // Synthesizable statements will be generated on-demand.
  307. if (canSyntheziseInStmt(Stmt, Inst))
  308. return;
  309. if (auto *Load = dyn_cast<LoadInst>(Inst)) {
  310. Value *NewLoad = generateArrayLoad(Stmt, Load, BBMap, LTS, NewAccesses);
  311. // Compute NewLoad before its insertion in BBMap to make the insertion
  312. // deterministic.
  313. BBMap[Load] = NewLoad;
  314. return;
  315. }
  316. if (auto *Store = dyn_cast<StoreInst>(Inst)) {
  317. // Identified as redundant by -polly-simplify.
  318. if (!Stmt.getArrayAccessOrNULLFor(Store))
  319. return;
  320. generateArrayStore(Stmt, Store, BBMap, LTS, NewAccesses);
  321. return;
  322. }
  323. if (auto *PHI = dyn_cast<PHINode>(Inst)) {
  324. copyPHIInstruction(Stmt, PHI, BBMap, LTS);
  325. return;
  326. }
  327. // Skip some special intrinsics for which we do not adjust the semantics to
  328. // the new schedule. All others are handled like every other instruction.
  329. if (isIgnoredIntrinsic(Inst))
  330. return;
  331. copyInstScalar(Stmt, Inst, BBMap, LTS);
  332. }
  333. void BlockGenerator::removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap) {
  334. auto NewBB = Builder.GetInsertBlock();
  335. for (auto I = NewBB->rbegin(); I != NewBB->rend(); I++) {
  336. Instruction *NewInst = &*I;
  337. if (!isInstructionTriviallyDead(NewInst))
  338. continue;
  339. for (auto Pair : BBMap)
  340. if (Pair.second == NewInst) {
  341. BBMap.erase(Pair.first);
  342. }
  343. NewInst->eraseFromParent();
  344. I = NewBB->rbegin();
  345. }
  346. }
  347. void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
  348. isl_id_to_ast_expr *NewAccesses) {
  349. assert(Stmt.isBlockStmt() &&
  350. "Only block statements can be copied by the block generator");
  351. ValueMapT BBMap;
  352. BasicBlock *BB = Stmt.getBasicBlock();
  353. copyBB(Stmt, BB, BBMap, LTS, NewAccesses);
  354. removeDeadInstructions(BB, BBMap);
  355. }
  356. BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
  357. BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
  358. &*Builder.GetInsertPoint(), &DT, &LI);
  359. CopyBB->setName("polly.stmt." + BB->getName());
  360. return CopyBB;
  361. }
  362. BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB,
  363. ValueMapT &BBMap, LoopToScevMapT &LTS,
  364. isl_id_to_ast_expr *NewAccesses) {
  365. BasicBlock *CopyBB = splitBB(BB);
  366. Builder.SetInsertPoint(&CopyBB->front());
  367. generateScalarLoads(Stmt, LTS, BBMap, NewAccesses);
  368. generateBeginStmtTrace(Stmt, LTS, BBMap);
  369. copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses);
  370. // After a basic block was copied store all scalars that escape this block in
  371. // their alloca.
  372. generateScalarStores(Stmt, LTS, BBMap, NewAccesses);
  373. return CopyBB;
  374. }
  375. void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
  376. ValueMapT &BBMap, LoopToScevMapT &LTS,
  377. isl_id_to_ast_expr *NewAccesses) {
  378. EntryBB = &CopyBB->getParent()->getEntryBlock();
  379. // Block statements and the entry blocks of region statement are code
  380. // generated from instruction lists. This allow us to optimize the
  381. // instructions that belong to a certain scop statement. As the code
  382. // structure of region statements might be arbitrary complex, optimizing the
  383. // instruction list is not yet supported.
  384. if (Stmt.isBlockStmt() || (Stmt.isRegionStmt() && Stmt.getEntryBlock() == BB))
  385. for (Instruction *Inst : Stmt.getInstructions())
  386. copyInstruction(Stmt, Inst, BBMap, LTS, NewAccesses);
  387. else
  388. for (Instruction &Inst : *BB)
  389. copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses);
  390. }
  391. Value *BlockGenerator::getOrCreateAlloca(const MemoryAccess &Access) {
  392. assert(!Access.isLatestArrayKind() && "Trying to get alloca for array kind");
  393. return getOrCreateAlloca(Access.getLatestScopArrayInfo());
  394. }
  395. Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) {
  396. assert(!Array->isArrayKind() && "Trying to get alloca for array kind");
  397. auto &Addr = ScalarMap[Array];
  398. if (Addr) {
  399. // Allow allocas to be (temporarily) redirected once by adding a new
  400. // old-alloca-addr to new-addr mapping to GlobalMap. This functionality
  401. // is used for example by the OpenMP code generation where a first use
  402. // of a scalar while still in the host code allocates a normal alloca with
  403. // getOrCreateAlloca. When the values of this scalar are accessed during
  404. // the generation of the parallel subfunction, these values are copied over
  405. // to the parallel subfunction and each request for a scalar alloca slot
  406. // must be forwarded to the temporary in-subfunction slot. This mapping is
  407. // removed when the subfunction has been generated and again normal host
  408. // code is generated. Due to the following reasons it is not possible to
  409. // perform the GlobalMap lookup right after creating the alloca below, but
  410. // instead we need to check GlobalMap at each call to getOrCreateAlloca:
  411. //
  412. // 1) GlobalMap may be changed multiple times (for each parallel loop),
  413. // 2) The temporary mapping is commonly only known after the initial
  414. // alloca has already been generated, and
  415. // 3) The original alloca value must be restored after leaving the
  416. // sub-function.
  417. if (Value *NewAddr = GlobalMap.lookup(&*Addr))
  418. return NewAddr;
  419. return Addr;
  420. }
  421. Type *Ty = Array->getElementType();
  422. Value *ScalarBase = Array->getBasePtr();
  423. std::string NameExt;
  424. if (Array->isPHIKind())
  425. NameExt = ".phiops";
  426. else
  427. NameExt = ".s2a";
  428. const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
  429. Addr =
  430. new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
  431. DL.getPrefTypeAlign(Ty), ScalarBase->getName() + NameExt);
  432. EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
  433. Addr->insertBefore(&*EntryBB->getFirstInsertionPt());
  434. return Addr;
  435. }
  436. void BlockGenerator::handleOutsideUsers(const Scop &S, ScopArrayInfo *Array) {
  437. Instruction *Inst = cast<Instruction>(Array->getBasePtr());
  438. // If there are escape users we get the alloca for this instruction and put it
  439. // in the EscapeMap for later finalization. Lastly, if the instruction was
  440. // copied multiple times we already did this and can exit.
  441. if (EscapeMap.count(Inst))
  442. return;
  443. EscapeUserVectorTy EscapeUsers;
  444. for (User *U : Inst->users()) {
  445. // Non-instruction user will never escape.
  446. Instruction *UI = dyn_cast<Instruction>(U);
  447. if (!UI)
  448. continue;
  449. if (S.contains(UI))
  450. continue;
  451. EscapeUsers.push_back(UI);
  452. }
  453. // Exit if no escape uses were found.
  454. if (EscapeUsers.empty())
  455. return;
  456. // Get or create an escape alloca for this instruction.
  457. auto *ScalarAddr = getOrCreateAlloca(Array);
  458. // Remember that this instruction has escape uses and the escape alloca.
  459. EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers));
  460. }
  461. void BlockGenerator::generateScalarLoads(
  462. ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
  463. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  464. for (MemoryAccess *MA : Stmt) {
  465. if (MA->isOriginalArrayKind() || MA->isWrite())
  466. continue;
  467. #ifndef NDEBUG
  468. auto StmtDom =
  469. Stmt.getDomain().intersect_params(Stmt.getParent()->getContext());
  470. auto AccDom = MA->getAccessRelation().domain();
  471. assert(!StmtDom.is_subset(AccDom).is_false() &&
  472. "Scalar must be loaded in all statement instances");
  473. #endif
  474. auto *Address =
  475. getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
  476. assert((!isa<Instruction>(Address) ||
  477. DT.dominates(cast<Instruction>(Address)->getParent(),
  478. Builder.GetInsertBlock())) &&
  479. "Domination violation");
  480. BBMap[MA->getAccessValue()] =
  481. Builder.CreateLoad(Address, Address->getName() + ".reload");
  482. }
  483. }
  484. Value *BlockGenerator::buildContainsCondition(ScopStmt &Stmt,
  485. const isl::set &Subdomain) {
  486. isl::ast_build AstBuild = Stmt.getAstBuild();
  487. isl::set Domain = Stmt.getDomain();
  488. isl::union_map USchedule = AstBuild.get_schedule();
  489. USchedule = USchedule.intersect_domain(Domain);
  490. assert(!USchedule.is_empty());
  491. isl::map Schedule = isl::map::from_union_map(USchedule);
  492. isl::set ScheduledDomain = Schedule.range();
  493. isl::set ScheduledSet = Subdomain.apply(Schedule);
  494. isl::ast_build RestrictedBuild = AstBuild.restrict(ScheduledDomain);
  495. isl::ast_expr IsInSet = RestrictedBuild.expr_from(ScheduledSet);
  496. Value *IsInSetExpr = ExprBuilder->create(IsInSet.copy());
  497. IsInSetExpr = Builder.CreateICmpNE(
  498. IsInSetExpr, ConstantInt::get(IsInSetExpr->getType(), 0));
  499. return IsInSetExpr;
  500. }
  501. void BlockGenerator::generateConditionalExecution(
  502. ScopStmt &Stmt, const isl::set &Subdomain, StringRef Subject,
  503. const std::function<void()> &GenThenFunc) {
  504. isl::set StmtDom = Stmt.getDomain();
  505. // If the condition is a tautology, don't generate a condition around the
  506. // code.
  507. bool IsPartialWrite =
  508. !StmtDom.intersect_params(Stmt.getParent()->getContext())
  509. .is_subset(Subdomain);
  510. if (!IsPartialWrite) {
  511. GenThenFunc();
  512. return;
  513. }
  514. // Generate the condition.
  515. Value *Cond = buildContainsCondition(Stmt, Subdomain);
  516. // Don't call GenThenFunc if it is never executed. An ast index expression
  517. // might not be defined in this case.
  518. if (auto *Const = dyn_cast<ConstantInt>(Cond))
  519. if (Const->isZero())
  520. return;
  521. BasicBlock *HeadBlock = Builder.GetInsertBlock();
  522. StringRef BlockName = HeadBlock->getName();
  523. // Generate the conditional block.
  524. SplitBlockAndInsertIfThen(Cond, &*Builder.GetInsertPoint(), false, nullptr,
  525. &DT, &LI);
  526. BranchInst *Branch = cast<BranchInst>(HeadBlock->getTerminator());
  527. BasicBlock *ThenBlock = Branch->getSuccessor(0);
  528. BasicBlock *TailBlock = Branch->getSuccessor(1);
  529. // Assign descriptive names.
  530. if (auto *CondInst = dyn_cast<Instruction>(Cond))
  531. CondInst->setName("polly." + Subject + ".cond");
  532. ThenBlock->setName(BlockName + "." + Subject + ".partial");
  533. TailBlock->setName(BlockName + ".cont");
  534. // Put the client code into the conditional block and continue in the merge
  535. // block afterwards.
  536. Builder.SetInsertPoint(ThenBlock, ThenBlock->getFirstInsertionPt());
  537. GenThenFunc();
  538. Builder.SetInsertPoint(TailBlock, TailBlock->getFirstInsertionPt());
  539. }
  540. static std::string getInstName(Value *Val) {
  541. std::string Result;
  542. raw_string_ostream OS(Result);
  543. Val->printAsOperand(OS, false);
  544. return OS.str();
  545. }
  546. void BlockGenerator::generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT &LTS,
  547. ValueMapT &BBMap) {
  548. if (!TraceStmts)
  549. return;
  550. Scop *S = Stmt.getParent();
  551. const char *BaseName = Stmt.getBaseName();
  552. isl::ast_build AstBuild = Stmt.getAstBuild();
  553. isl::set Domain = Stmt.getDomain();
  554. isl::union_map USchedule = AstBuild.get_schedule().intersect_domain(Domain);
  555. isl::map Schedule = isl::map::from_union_map(USchedule);
  556. assert(Schedule.is_empty().is_false() &&
  557. "The stmt must have a valid instance");
  558. isl::multi_pw_aff ScheduleMultiPwAff =
  559. isl::pw_multi_aff::from_map(Schedule.reverse());
  560. isl::ast_build RestrictedBuild = AstBuild.restrict(Schedule.range());
  561. // Sequence of strings to print.
  562. SmallVector<llvm::Value *, 8> Values;
  563. // Print the name of the statement.
  564. // TODO: Indent by the depth of the statement instance in the schedule tree.
  565. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, BaseName));
  566. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "("));
  567. // Add the coordinate of the statement instance.
  568. int DomDims = ScheduleMultiPwAff.dim(isl::dim::out);
  569. for (int i = 0; i < DomDims; i += 1) {
  570. if (i > 0)
  571. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ","));
  572. isl::ast_expr IsInSet =
  573. RestrictedBuild.expr_from(ScheduleMultiPwAff.get_pw_aff(i));
  574. Values.push_back(ExprBuilder->create(IsInSet.copy()));
  575. }
  576. if (TraceScalars) {
  577. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ")"));
  578. DenseSet<Instruction *> Encountered;
  579. // Add the value of each scalar (and the result of PHIs) used in the
  580. // statement.
  581. // TODO: Values used in region-statements.
  582. for (Instruction *Inst : Stmt.insts()) {
  583. if (!RuntimeDebugBuilder::isPrintable(Inst->getType()))
  584. continue;
  585. if (isa<PHINode>(Inst)) {
  586. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, " "));
  587. Values.push_back(RuntimeDebugBuilder::getPrintableString(
  588. Builder, getInstName(Inst)));
  589. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "="));
  590. Values.push_back(getNewValue(Stmt, Inst, BBMap, LTS,
  591. LI.getLoopFor(Inst->getParent())));
  592. } else {
  593. for (Value *Op : Inst->operand_values()) {
  594. // Do not print values that cannot change during the execution of the
  595. // SCoP.
  596. auto *OpInst = dyn_cast<Instruction>(Op);
  597. if (!OpInst)
  598. continue;
  599. if (!S->contains(OpInst))
  600. continue;
  601. // Print each scalar at most once, and exclude values defined in the
  602. // statement itself.
  603. if (Encountered.count(OpInst))
  604. continue;
  605. Values.push_back(
  606. RuntimeDebugBuilder::getPrintableString(Builder, " "));
  607. Values.push_back(RuntimeDebugBuilder::getPrintableString(
  608. Builder, getInstName(OpInst)));
  609. Values.push_back(
  610. RuntimeDebugBuilder::getPrintableString(Builder, "="));
  611. Values.push_back(getNewValue(Stmt, OpInst, BBMap, LTS,
  612. LI.getLoopFor(Inst->getParent())));
  613. Encountered.insert(OpInst);
  614. }
  615. }
  616. Encountered.insert(Inst);
  617. }
  618. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "\n"));
  619. } else {
  620. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ")\n"));
  621. }
  622. RuntimeDebugBuilder::createCPUPrinter(Builder, ArrayRef<Value *>(Values));
  623. }
  624. void BlockGenerator::generateScalarStores(
  625. ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
  626. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  627. Loop *L = LI.getLoopFor(Stmt.getBasicBlock());
  628. assert(Stmt.isBlockStmt() &&
  629. "Region statements need to use the generateScalarStores() function in "
  630. "the RegionGenerator");
  631. for (MemoryAccess *MA : Stmt) {
  632. if (MA->isOriginalArrayKind() || MA->isRead())
  633. continue;
  634. isl::set AccDom = MA->getAccessRelation().domain();
  635. std::string Subject = MA->getId().get_name();
  636. generateConditionalExecution(
  637. Stmt, AccDom, Subject.c_str(), [&, this, MA]() {
  638. Value *Val = MA->getAccessValue();
  639. if (MA->isAnyPHIKind()) {
  640. assert(MA->getIncoming().size() >= 1 &&
  641. "Block statements have exactly one exiting block, or "
  642. "multiple but "
  643. "with same incoming block and value");
  644. assert(std::all_of(MA->getIncoming().begin(),
  645. MA->getIncoming().end(),
  646. [&](std::pair<BasicBlock *, Value *> p) -> bool {
  647. return p.first == Stmt.getBasicBlock();
  648. }) &&
  649. "Incoming block must be statement's block");
  650. Val = MA->getIncoming()[0].second;
  651. }
  652. auto Address = getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS,
  653. BBMap, NewAccesses);
  654. Val = getNewValue(Stmt, Val, BBMap, LTS, L);
  655. assert((!isa<Instruction>(Val) ||
  656. DT.dominates(cast<Instruction>(Val)->getParent(),
  657. Builder.GetInsertBlock())) &&
  658. "Domination violation");
  659. assert((!isa<Instruction>(Address) ||
  660. DT.dominates(cast<Instruction>(Address)->getParent(),
  661. Builder.GetInsertBlock())) &&
  662. "Domination violation");
  663. // The new Val might have a different type than the old Val due to
  664. // ScalarEvolution looking through bitcasts.
  665. if (Val->getType() != Address->getType()->getPointerElementType())
  666. Address = Builder.CreateBitOrPointerCast(
  667. Address, Val->getType()->getPointerTo());
  668. Builder.CreateStore(Val, Address);
  669. });
  670. }
  671. }
  672. void BlockGenerator::createScalarInitialization(Scop &S) {
  673. BasicBlock *ExitBB = S.getExit();
  674. BasicBlock *PreEntryBB = S.getEnteringBlock();
  675. Builder.SetInsertPoint(&*StartBlock->begin());
  676. for (auto &Array : S.arrays()) {
  677. if (Array->getNumberOfDimensions() != 0)
  678. continue;
  679. if (Array->isPHIKind()) {
  680. // For PHI nodes, the only values we need to store are the ones that
  681. // reach the PHI node from outside the region. In general there should
  682. // only be one such incoming edge and this edge should enter through
  683. // 'PreEntryBB'.
  684. auto PHI = cast<PHINode>(Array->getBasePtr());
  685. for (auto BI = PHI->block_begin(), BE = PHI->block_end(); BI != BE; BI++)
  686. if (!S.contains(*BI) && *BI != PreEntryBB)
  687. llvm_unreachable("Incoming edges from outside the scop should always "
  688. "come from PreEntryBB");
  689. int Idx = PHI->getBasicBlockIndex(PreEntryBB);
  690. if (Idx < 0)
  691. continue;
  692. Value *ScalarValue = PHI->getIncomingValue(Idx);
  693. Builder.CreateStore(ScalarValue, getOrCreateAlloca(Array));
  694. continue;
  695. }
  696. auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
  697. if (Inst && S.contains(Inst))
  698. continue;
  699. // PHI nodes that are not marked as such in their SAI object are either exit
  700. // PHI nodes we model as common scalars but without initialization, or
  701. // incoming phi nodes that need to be initialized. Check if the first is the
  702. // case for Inst and do not create and initialize memory if so.
  703. if (auto *PHI = dyn_cast_or_null<PHINode>(Inst))
  704. if (!S.hasSingleExitEdge() && PHI->getBasicBlockIndex(ExitBB) >= 0)
  705. continue;
  706. Builder.CreateStore(Array->getBasePtr(), getOrCreateAlloca(Array));
  707. }
  708. }
  709. void BlockGenerator::createScalarFinalization(Scop &S) {
  710. // The exit block of the __unoptimized__ region.
  711. BasicBlock *ExitBB = S.getExitingBlock();
  712. // The merge block __just after__ the region and the optimized region.
  713. BasicBlock *MergeBB = S.getExit();
  714. // The exit block of the __optimized__ region.
  715. BasicBlock *OptExitBB = *(pred_begin(MergeBB));
  716. if (OptExitBB == ExitBB)
  717. OptExitBB = *(++pred_begin(MergeBB));
  718. Builder.SetInsertPoint(OptExitBB->getTerminator());
  719. for (const auto &EscapeMapping : EscapeMap) {
  720. // Extract the escaping instruction and the escaping users as well as the
  721. // alloca the instruction was demoted to.
  722. Instruction *EscapeInst = EscapeMapping.first;
  723. const auto &EscapeMappingValue = EscapeMapping.second;
  724. const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second;
  725. Value *ScalarAddr = EscapeMappingValue.first;
  726. // Reload the demoted instruction in the optimized version of the SCoP.
  727. Value *EscapeInstReload =
  728. Builder.CreateLoad(ScalarAddr, EscapeInst->getName() + ".final_reload");
  729. EscapeInstReload =
  730. Builder.CreateBitOrPointerCast(EscapeInstReload, EscapeInst->getType());
  731. // Create the merge PHI that merges the optimized and unoptimized version.
  732. PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2,
  733. EscapeInst->getName() + ".merge");
  734. MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
  735. // Add the respective values to the merge PHI.
  736. MergePHI->addIncoming(EscapeInstReload, OptExitBB);
  737. MergePHI->addIncoming(EscapeInst, ExitBB);
  738. // The information of scalar evolution about the escaping instruction needs
  739. // to be revoked so the new merged instruction will be used.
  740. if (SE.isSCEVable(EscapeInst->getType()))
  741. SE.forgetValue(EscapeInst);
  742. // Replace all uses of the demoted instruction with the merge PHI.
  743. for (Instruction *EUser : EscapeUsers)
  744. EUser->replaceUsesOfWith(EscapeInst, MergePHI);
  745. }
  746. }
  747. void BlockGenerator::findOutsideUsers(Scop &S) {
  748. for (auto &Array : S.arrays()) {
  749. if (Array->getNumberOfDimensions() != 0)
  750. continue;
  751. if (Array->isPHIKind())
  752. continue;
  753. auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
  754. if (!Inst)
  755. continue;
  756. // Scop invariant hoisting moves some of the base pointers out of the scop.
  757. // We can ignore these, as the invariant load hoisting already registers the
  758. // relevant outside users.
  759. if (!S.contains(Inst))
  760. continue;
  761. handleOutsideUsers(S, Array);
  762. }
  763. }
  764. void BlockGenerator::createExitPHINodeMerges(Scop &S) {
  765. if (S.hasSingleExitEdge())
  766. return;
  767. auto *ExitBB = S.getExitingBlock();
  768. auto *MergeBB = S.getExit();
  769. auto *AfterMergeBB = MergeBB->getSingleSuccessor();
  770. BasicBlock *OptExitBB = *(pred_begin(MergeBB));
  771. if (OptExitBB == ExitBB)
  772. OptExitBB = *(++pred_begin(MergeBB));
  773. Builder.SetInsertPoint(OptExitBB->getTerminator());
  774. for (auto &SAI : S.arrays()) {
  775. auto *Val = SAI->getBasePtr();
  776. // Only Value-like scalars need a merge PHI. Exit block PHIs receive either
  777. // the original PHI's value or the reloaded incoming values from the
  778. // generated code. An llvm::Value is merged between the original code's
  779. // value or the generated one.
  780. if (!SAI->isExitPHIKind())
  781. continue;
  782. PHINode *PHI = dyn_cast<PHINode>(Val);
  783. if (!PHI)
  784. continue;
  785. if (PHI->getParent() != AfterMergeBB)
  786. continue;
  787. std::string Name = PHI->getName().str();
  788. Value *ScalarAddr = getOrCreateAlloca(SAI);
  789. Value *Reload = Builder.CreateLoad(ScalarAddr, Name + ".ph.final_reload");
  790. Reload = Builder.CreateBitOrPointerCast(Reload, PHI->getType());
  791. Value *OriginalValue = PHI->getIncomingValueForBlock(MergeBB);
  792. assert((!isa<Instruction>(OriginalValue) ||
  793. cast<Instruction>(OriginalValue)->getParent() != MergeBB) &&
  794. "Original value must no be one we just generated.");
  795. auto *MergePHI = PHINode::Create(PHI->getType(), 2, Name + ".ph.merge");
  796. MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
  797. MergePHI->addIncoming(Reload, OptExitBB);
  798. MergePHI->addIncoming(OriginalValue, ExitBB);
  799. int Idx = PHI->getBasicBlockIndex(MergeBB);
  800. PHI->setIncomingValue(Idx, MergePHI);
  801. }
  802. }
  803. void BlockGenerator::invalidateScalarEvolution(Scop &S) {
  804. for (auto &Stmt : S)
  805. if (Stmt.isCopyStmt())
  806. continue;
  807. else if (Stmt.isBlockStmt())
  808. for (auto &Inst : *Stmt.getBasicBlock())
  809. SE.forgetValue(&Inst);
  810. else if (Stmt.isRegionStmt())
  811. for (auto *BB : Stmt.getRegion()->blocks())
  812. for (auto &Inst : *BB)
  813. SE.forgetValue(&Inst);
  814. else
  815. llvm_unreachable("Unexpected statement type found");
  816. // Invalidate SCEV of loops surrounding the EscapeUsers.
  817. for (const auto &EscapeMapping : EscapeMap) {
  818. const EscapeUserVectorTy &EscapeUsers = EscapeMapping.second.second;
  819. for (Instruction *EUser : EscapeUsers) {
  820. if (Loop *L = LI.getLoopFor(EUser->getParent()))
  821. while (L) {
  822. SE.forgetLoop(L);
  823. L = L->getParentLoop();
  824. }
  825. }
  826. }
  827. }
  828. void BlockGenerator::finalizeSCoP(Scop &S) {
  829. findOutsideUsers(S);
  830. createScalarInitialization(S);
  831. createExitPHINodeMerges(S);
  832. createScalarFinalization(S);
  833. invalidateScalarEvolution(S);
  834. }
  835. VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen,
  836. std::vector<LoopToScevMapT> &VLTS,
  837. isl_map *Schedule)
  838. : BlockGenerator(BlockGen), VLTS(VLTS), Schedule(Schedule) {
  839. assert(Schedule && "No statement domain provided");
  840. }
  841. Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old,
  842. ValueMapT &VectorMap,
  843. VectorValueMapT &ScalarMaps,
  844. Loop *L) {
  845. if (Value *NewValue = VectorMap.lookup(Old))
  846. return NewValue;
  847. int Width = getVectorWidth();
  848. Value *Vector = UndefValue::get(FixedVectorType::get(Old->getType(), Width));
  849. for (int Lane = 0; Lane < Width; Lane++)
  850. Vector = Builder.CreateInsertElement(
  851. Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L),
  852. Builder.getInt32(Lane));
  853. VectorMap[Old] = Vector;
  854. return Vector;
  855. }
  856. Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
  857. auto *PointerTy = cast<PointerType>(Val->getType());
  858. unsigned AddrSpace = PointerTy->getAddressSpace();
  859. Type *ScalarType = PointerTy->getElementType();
  860. auto *FVTy = FixedVectorType::get(ScalarType, Width);
  861. return PointerType::get(FVTy, AddrSpace);
  862. }
  863. Value *VectorBlockGenerator::generateStrideOneLoad(
  864. ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
  865. __isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) {
  866. unsigned VectorWidth = getVectorWidth();
  867. auto *Pointer = Load->getPointerOperand();
  868. Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
  869. unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
  870. Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset],
  871. VLTS[Offset], NewAccesses);
  872. Value *VectorPtr =
  873. Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
  874. LoadInst *VecLoad =
  875. Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
  876. if (!Aligned)
  877. VecLoad->setAlignment(Align(8));
  878. if (NegativeStride) {
  879. SmallVector<Constant *, 16> Indices;
  880. for (int i = VectorWidth - 1; i >= 0; i--)
  881. Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
  882. Constant *SV = llvm::ConstantVector::get(Indices);
  883. Value *RevVecLoad = Builder.CreateShuffleVector(
  884. VecLoad, VecLoad, SV, Load->getName() + "_reverse");
  885. return RevVecLoad;
  886. }
  887. return VecLoad;
  888. }
  889. Value *VectorBlockGenerator::generateStrideZeroLoad(
  890. ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap,
  891. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  892. auto *Pointer = Load->getPointerOperand();
  893. Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
  894. Value *NewPointer =
  895. generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses);
  896. Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
  897. Load->getName() + "_p_vec_p");
  898. LoadInst *ScalarLoad =
  899. Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
  900. if (!Aligned)
  901. ScalarLoad->setAlignment(Align(8));
  902. Constant *SplatVector = Constant::getNullValue(
  903. FixedVectorType::get(Builder.getInt32Ty(), getVectorWidth()));
  904. Value *VectorLoad = Builder.CreateShuffleVector(
  905. ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
  906. return VectorLoad;
  907. }
  908. Value *VectorBlockGenerator::generateUnknownStrideLoad(
  909. ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
  910. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  911. int VectorWidth = getVectorWidth();
  912. auto *Pointer = Load->getPointerOperand();
  913. auto *FVTy = FixedVectorType::get(
  914. dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
  915. Value *Vector = UndefValue::get(FVTy);
  916. for (int i = 0; i < VectorWidth; i++) {
  917. Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[i],
  918. VLTS[i], NewAccesses);
  919. Value *ScalarLoad =
  920. Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
  921. Vector = Builder.CreateInsertElement(
  922. Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
  923. }
  924. return Vector;
  925. }
  926. void VectorBlockGenerator::generateLoad(
  927. ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
  928. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  929. if (Value *PreloadLoad = GlobalMap.lookup(Load)) {
  930. VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad,
  931. Load->getName() + "_p");
  932. return;
  933. }
  934. if (!VectorType::isValidElementType(Load->getType())) {
  935. for (int i = 0; i < getVectorWidth(); i++)
  936. ScalarMaps[i][Load] =
  937. generateArrayLoad(Stmt, Load, ScalarMaps[i], VLTS[i], NewAccesses);
  938. return;
  939. }
  940. const MemoryAccess &Access = Stmt.getArrayAccessFor(Load);
  941. // Make sure we have scalar values available to access the pointer to
  942. // the data location.
  943. extractScalarValues(Load, VectorMap, ScalarMaps);
  944. Value *NewLoad;
  945. if (Access.isStrideZero(isl::manage_copy(Schedule)))
  946. NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0], NewAccesses);
  947. else if (Access.isStrideOne(isl::manage_copy(Schedule)))
  948. NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses);
  949. else if (Access.isStrideX(isl::manage_copy(Schedule), -1))
  950. NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses, true);
  951. else
  952. NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps, NewAccesses);
  953. VectorMap[Load] = NewLoad;
  954. }
  955. void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
  956. ValueMapT &VectorMap,
  957. VectorValueMapT &ScalarMaps) {
  958. int VectorWidth = getVectorWidth();
  959. Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap,
  960. ScalarMaps, getLoopForStmt(Stmt));
  961. assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
  962. const CastInst *Cast = dyn_cast<CastInst>(Inst);
  963. auto *DestType = FixedVectorType::get(Inst->getType(), VectorWidth);
  964. VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
  965. }
  966. void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
  967. ValueMapT &VectorMap,
  968. VectorValueMapT &ScalarMaps) {
  969. Loop *L = getLoopForStmt(Stmt);
  970. Value *OpZero = Inst->getOperand(0);
  971. Value *OpOne = Inst->getOperand(1);
  972. Value *NewOpZero, *NewOpOne;
  973. NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L);
  974. NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L);
  975. Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
  976. Inst->getName() + "p_vec");
  977. VectorMap[Inst] = NewInst;
  978. }
  979. void VectorBlockGenerator::copyStore(
  980. ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
  981. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  982. const MemoryAccess &Access = Stmt.getArrayAccessFor(Store);
  983. auto *Pointer = Store->getPointerOperand();
  984. Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap,
  985. ScalarMaps, getLoopForStmt(Stmt));
  986. // Make sure we have scalar values available to access the pointer to
  987. // the data location.
  988. extractScalarValues(Store, VectorMap, ScalarMaps);
  989. if (Access.isStrideOne(isl::manage_copy(Schedule))) {
  990. Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
  991. Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0],
  992. VLTS[0], NewAccesses);
  993. Value *VectorPtr =
  994. Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
  995. StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
  996. if (!Aligned)
  997. Store->setAlignment(Align(8));
  998. } else {
  999. for (unsigned i = 0; i < ScalarMaps.size(); i++) {
  1000. Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
  1001. Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[i],
  1002. VLTS[i], NewAccesses);
  1003. Builder.CreateStore(Scalar, NewPointer);
  1004. }
  1005. }
  1006. }
  1007. bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
  1008. ValueMapT &VectorMap) {
  1009. for (Value *Operand : Inst->operands())
  1010. if (VectorMap.count(Operand))
  1011. return true;
  1012. return false;
  1013. }
  1014. bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
  1015. ValueMapT &VectorMap,
  1016. VectorValueMapT &ScalarMaps) {
  1017. bool HasVectorOperand = false;
  1018. int VectorWidth = getVectorWidth();
  1019. for (Value *Operand : Inst->operands()) {
  1020. ValueMapT::iterator VecOp = VectorMap.find(Operand);
  1021. if (VecOp == VectorMap.end())
  1022. continue;
  1023. HasVectorOperand = true;
  1024. Value *NewVector = VecOp->second;
  1025. for (int i = 0; i < VectorWidth; ++i) {
  1026. ValueMapT &SM = ScalarMaps[i];
  1027. // If there is one scalar extracted, all scalar elements should have
  1028. // already been extracted by the code here. So no need to check for the
  1029. // existence of all of them.
  1030. if (SM.count(Operand))
  1031. break;
  1032. SM[Operand] =
  1033. Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
  1034. }
  1035. }
  1036. return HasVectorOperand;
  1037. }
  1038. void VectorBlockGenerator::copyInstScalarized(
  1039. ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
  1040. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1041. bool HasVectorOperand;
  1042. int VectorWidth = getVectorWidth();
  1043. HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
  1044. for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
  1045. BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane],
  1046. VLTS[VectorLane], NewAccesses);
  1047. if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
  1048. return;
  1049. // Make the result available as vector value.
  1050. auto *FVTy = FixedVectorType::get(Inst->getType(), VectorWidth);
  1051. Value *Vector = UndefValue::get(FVTy);
  1052. for (int i = 0; i < VectorWidth; i++)
  1053. Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
  1054. Builder.getInt32(i));
  1055. VectorMap[Inst] = Vector;
  1056. }
  1057. int VectorBlockGenerator::getVectorWidth() { return VLTS.size(); }
  1058. void VectorBlockGenerator::copyInstruction(
  1059. ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
  1060. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1061. // Terminator instructions control the control flow. They are explicitly
  1062. // expressed in the clast and do not need to be copied.
  1063. if (Inst->isTerminator())
  1064. return;
  1065. if (canSyntheziseInStmt(Stmt, Inst))
  1066. return;
  1067. if (auto *Load = dyn_cast<LoadInst>(Inst)) {
  1068. generateLoad(Stmt, Load, VectorMap, ScalarMaps, NewAccesses);
  1069. return;
  1070. }
  1071. if (hasVectorOperands(Inst, VectorMap)) {
  1072. if (auto *Store = dyn_cast<StoreInst>(Inst)) {
  1073. // Identified as redundant by -polly-simplify.
  1074. if (!Stmt.getArrayAccessOrNULLFor(Store))
  1075. return;
  1076. copyStore(Stmt, Store, VectorMap, ScalarMaps, NewAccesses);
  1077. return;
  1078. }
  1079. if (auto *Unary = dyn_cast<UnaryInstruction>(Inst)) {
  1080. copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps);
  1081. return;
  1082. }
  1083. if (auto *Binary = dyn_cast<BinaryOperator>(Inst)) {
  1084. copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps);
  1085. return;
  1086. }
  1087. // Fallthrough: We generate scalar instructions, if we don't know how to
  1088. // generate vector code.
  1089. }
  1090. copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses);
  1091. }
  1092. void VectorBlockGenerator::generateScalarVectorLoads(
  1093. ScopStmt &Stmt, ValueMapT &VectorBlockMap) {
  1094. for (MemoryAccess *MA : Stmt) {
  1095. if (MA->isArrayKind() || MA->isWrite())
  1096. continue;
  1097. auto *Address = getOrCreateAlloca(*MA);
  1098. Type *VectorPtrType = getVectorPtrTy(Address, 1);
  1099. Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType,
  1100. Address->getName() + "_p_vec_p");
  1101. auto *Val = Builder.CreateLoad(VectorPtr, Address->getName() + ".reload");
  1102. Constant *SplatVector = Constant::getNullValue(
  1103. FixedVectorType::get(Builder.getInt32Ty(), getVectorWidth()));
  1104. Value *VectorVal = Builder.CreateShuffleVector(
  1105. Val, Val, SplatVector, Address->getName() + "_p_splat");
  1106. VectorBlockMap[MA->getAccessValue()] = VectorVal;
  1107. }
  1108. }
  1109. void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) {
  1110. for (MemoryAccess *MA : Stmt) {
  1111. if (MA->isArrayKind() || MA->isRead())
  1112. continue;
  1113. llvm_unreachable("Scalar stores not expected in vector loop");
  1114. }
  1115. }
  1116. void VectorBlockGenerator::copyStmt(
  1117. ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1118. assert(Stmt.isBlockStmt() &&
  1119. "TODO: Only block statements can be copied by the vector block "
  1120. "generator");
  1121. BasicBlock *BB = Stmt.getBasicBlock();
  1122. BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
  1123. &*Builder.GetInsertPoint(), &DT, &LI);
  1124. CopyBB->setName("polly.stmt." + BB->getName());
  1125. Builder.SetInsertPoint(&CopyBB->front());
  1126. // Create two maps that store the mapping from the original instructions of
  1127. // the old basic block to their copies in the new basic block. Those maps
  1128. // are basic block local.
  1129. //
  1130. // As vector code generation is supported there is one map for scalar values
  1131. // and one for vector values.
  1132. //
  1133. // In case we just do scalar code generation, the vectorMap is not used and
  1134. // the scalarMap has just one dimension, which contains the mapping.
  1135. //
  1136. // In case vector code generation is done, an instruction may either appear
  1137. // in the vector map once (as it is calculating >vectorwidth< values at a
  1138. // time. Or (if the values are calculated using scalar operations), it
  1139. // appears once in every dimension of the scalarMap.
  1140. VectorValueMapT ScalarBlockMap(getVectorWidth());
  1141. ValueMapT VectorBlockMap;
  1142. generateScalarVectorLoads(Stmt, VectorBlockMap);
  1143. for (Instruction *Inst : Stmt.getInstructions())
  1144. copyInstruction(Stmt, Inst, VectorBlockMap, ScalarBlockMap, NewAccesses);
  1145. verifyNoScalarStores(Stmt);
  1146. }
  1147. BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB,
  1148. BasicBlock *BBCopy) {
  1149. BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock();
  1150. BasicBlock *BBCopyIDom = EndBlockMap.lookup(BBIDom);
  1151. if (BBCopyIDom)
  1152. DT.changeImmediateDominator(BBCopy, BBCopyIDom);
  1153. return StartBlockMap.lookup(BBIDom);
  1154. }
  1155. // This is to determine whether an llvm::Value (defined in @p BB) is usable when
  1156. // leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock())
  1157. // does not work in cases where the exit block has edges from outside the
  1158. // region. In that case the llvm::Value would never be usable in in the exit
  1159. // block. The RegionGenerator however creates an new exit block ('ExitBBCopy')
  1160. // for the subregion's exiting edges only. We need to determine whether an
  1161. // llvm::Value is usable in there. We do this by checking whether it dominates
  1162. // all exiting blocks individually.
  1163. static bool isDominatingSubregionExit(const DominatorTree &DT, Region *R,
  1164. BasicBlock *BB) {
  1165. for (auto ExitingBB : predecessors(R->getExit())) {
  1166. // Check for non-subregion incoming edges.
  1167. if (!R->contains(ExitingBB))
  1168. continue;
  1169. if (!DT.dominates(BB, ExitingBB))
  1170. return false;
  1171. }
  1172. return true;
  1173. }
  1174. // Find the direct dominator of the subregion's exit block if the subregion was
  1175. // simplified.
  1176. static BasicBlock *findExitDominator(DominatorTree &DT, Region *R) {
  1177. BasicBlock *Common = nullptr;
  1178. for (auto ExitingBB : predecessors(R->getExit())) {
  1179. // Check for non-subregion incoming edges.
  1180. if (!R->contains(ExitingBB))
  1181. continue;
  1182. // First exiting edge.
  1183. if (!Common) {
  1184. Common = ExitingBB;
  1185. continue;
  1186. }
  1187. Common = DT.findNearestCommonDominator(Common, ExitingBB);
  1188. }
  1189. assert(Common && R->contains(Common));
  1190. return Common;
  1191. }
  1192. void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
  1193. isl_id_to_ast_expr *IdToAstExp) {
  1194. assert(Stmt.isRegionStmt() &&
  1195. "Only region statements can be copied by the region generator");
  1196. // Forget all old mappings.
  1197. StartBlockMap.clear();
  1198. EndBlockMap.clear();
  1199. RegionMaps.clear();
  1200. IncompletePHINodeMap.clear();
  1201. // Collection of all values related to this subregion.
  1202. ValueMapT ValueMap;
  1203. // The region represented by the statement.
  1204. Region *R = Stmt.getRegion();
  1205. // Create a dedicated entry for the region where we can reload all demoted
  1206. // inputs.
  1207. BasicBlock *EntryBB = R->getEntry();
  1208. BasicBlock *EntryBBCopy = SplitBlock(Builder.GetInsertBlock(),
  1209. &*Builder.GetInsertPoint(), &DT, &LI);
  1210. EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry");
  1211. Builder.SetInsertPoint(&EntryBBCopy->front());
  1212. ValueMapT &EntryBBMap = RegionMaps[EntryBBCopy];
  1213. generateScalarLoads(Stmt, LTS, EntryBBMap, IdToAstExp);
  1214. generateBeginStmtTrace(Stmt, LTS, EntryBBMap);
  1215. for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI)
  1216. if (!R->contains(*PI)) {
  1217. StartBlockMap[*PI] = EntryBBCopy;
  1218. EndBlockMap[*PI] = EntryBBCopy;
  1219. }
  1220. // Iterate over all blocks in the region in a breadth-first search.
  1221. std::deque<BasicBlock *> Blocks;
  1222. SmallSetVector<BasicBlock *, 8> SeenBlocks;
  1223. Blocks.push_back(EntryBB);
  1224. SeenBlocks.insert(EntryBB);
  1225. while (!Blocks.empty()) {
  1226. BasicBlock *BB = Blocks.front();
  1227. Blocks.pop_front();
  1228. // First split the block and update dominance information.
  1229. BasicBlock *BBCopy = splitBB(BB);
  1230. BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy);
  1231. // Get the mapping for this block and initialize it with either the scalar
  1232. // loads from the generated entering block (which dominates all blocks of
  1233. // this subregion) or the maps of the immediate dominator, if part of the
  1234. // subregion. The latter necessarily includes the former.
  1235. ValueMapT *InitBBMap;
  1236. if (BBCopyIDom) {
  1237. assert(RegionMaps.count(BBCopyIDom));
  1238. InitBBMap = &RegionMaps[BBCopyIDom];
  1239. } else
  1240. InitBBMap = &EntryBBMap;
  1241. auto Inserted = RegionMaps.insert(std::make_pair(BBCopy, *InitBBMap));
  1242. ValueMapT &RegionMap = Inserted.first->second;
  1243. // Copy the block with the BlockGenerator.
  1244. Builder.SetInsertPoint(&BBCopy->front());
  1245. copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp);
  1246. // In order to remap PHI nodes we store also basic block mappings.
  1247. StartBlockMap[BB] = BBCopy;
  1248. EndBlockMap[BB] = Builder.GetInsertBlock();
  1249. // Add values to incomplete PHI nodes waiting for this block to be copied.
  1250. for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB])
  1251. addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, LTS);
  1252. IncompletePHINodeMap[BB].clear();
  1253. // And continue with new successors inside the region.
  1254. for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++)
  1255. if (R->contains(*SI) && SeenBlocks.insert(*SI))
  1256. Blocks.push_back(*SI);
  1257. // Remember value in case it is visible after this subregion.
  1258. if (isDominatingSubregionExit(DT, R, BB))
  1259. ValueMap.insert(RegionMap.begin(), RegionMap.end());
  1260. }
  1261. // Now create a new dedicated region exit block and add it to the region map.
  1262. BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(),
  1263. &*Builder.GetInsertPoint(), &DT, &LI);
  1264. ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit");
  1265. StartBlockMap[R->getExit()] = ExitBBCopy;
  1266. EndBlockMap[R->getExit()] = ExitBBCopy;
  1267. BasicBlock *ExitDomBBCopy = EndBlockMap.lookup(findExitDominator(DT, R));
  1268. assert(ExitDomBBCopy &&
  1269. "Common exit dominator must be within region; at least the entry node "
  1270. "must match");
  1271. DT.changeImmediateDominator(ExitBBCopy, ExitDomBBCopy);
  1272. // As the block generator doesn't handle control flow we need to add the
  1273. // region control flow by hand after all blocks have been copied.
  1274. for (BasicBlock *BB : SeenBlocks) {
  1275. BasicBlock *BBCopyStart = StartBlockMap[BB];
  1276. BasicBlock *BBCopyEnd = EndBlockMap[BB];
  1277. Instruction *TI = BB->getTerminator();
  1278. if (isa<UnreachableInst>(TI)) {
  1279. while (!BBCopyEnd->empty())
  1280. BBCopyEnd->begin()->eraseFromParent();
  1281. new UnreachableInst(BBCopyEnd->getContext(), BBCopyEnd);
  1282. continue;
  1283. }
  1284. Instruction *BICopy = BBCopyEnd->getTerminator();
  1285. ValueMapT &RegionMap = RegionMaps[BBCopyStart];
  1286. RegionMap.insert(StartBlockMap.begin(), StartBlockMap.end());
  1287. Builder.SetInsertPoint(BICopy);
  1288. copyInstScalar(Stmt, TI, RegionMap, LTS);
  1289. BICopy->eraseFromParent();
  1290. }
  1291. // Add counting PHI nodes to all loops in the region that can be used as
  1292. // replacement for SCEVs referring to the old loop.
  1293. for (BasicBlock *BB : SeenBlocks) {
  1294. Loop *L = LI.getLoopFor(BB);
  1295. if (L == nullptr || L->getHeader() != BB || !R->contains(L))
  1296. continue;
  1297. BasicBlock *BBCopy = StartBlockMap[BB];
  1298. Value *NullVal = Builder.getInt32(0);
  1299. PHINode *LoopPHI =
  1300. PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv");
  1301. Instruction *LoopPHIInc = BinaryOperator::CreateAdd(
  1302. LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc");
  1303. LoopPHI->insertBefore(&BBCopy->front());
  1304. LoopPHIInc->insertBefore(BBCopy->getTerminator());
  1305. for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) {
  1306. if (!R->contains(PredBB))
  1307. continue;
  1308. if (L->contains(PredBB))
  1309. LoopPHI->addIncoming(LoopPHIInc, EndBlockMap[PredBB]);
  1310. else
  1311. LoopPHI->addIncoming(NullVal, EndBlockMap[PredBB]);
  1312. }
  1313. for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy)))
  1314. if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0)
  1315. LoopPHI->addIncoming(NullVal, PredBBCopy);
  1316. LTS[L] = SE.getUnknown(LoopPHI);
  1317. }
  1318. // Continue generating code in the exit block.
  1319. Builder.SetInsertPoint(&*ExitBBCopy->getFirstInsertionPt());
  1320. // Write values visible to other statements.
  1321. generateScalarStores(Stmt, LTS, ValueMap, IdToAstExp);
  1322. StartBlockMap.clear();
  1323. EndBlockMap.clear();
  1324. RegionMaps.clear();
  1325. IncompletePHINodeMap.clear();
  1326. }
  1327. PHINode *RegionGenerator::buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS,
  1328. ValueMapT &BBMap, Loop *L) {
  1329. ScopStmt *Stmt = MA->getStatement();
  1330. Region *SubR = Stmt->getRegion();
  1331. auto Incoming = MA->getIncoming();
  1332. PollyIRBuilder::InsertPointGuard IPGuard(Builder);
  1333. PHINode *OrigPHI = cast<PHINode>(MA->getAccessInstruction());
  1334. BasicBlock *NewSubregionExit = Builder.GetInsertBlock();
  1335. // This can happen if the subregion is simplified after the ScopStmts
  1336. // have been created; simplification happens as part of CodeGeneration.
  1337. if (OrigPHI->getParent() != SubR->getExit()) {
  1338. BasicBlock *FormerExit = SubR->getExitingBlock();
  1339. if (FormerExit)
  1340. NewSubregionExit = StartBlockMap.lookup(FormerExit);
  1341. }
  1342. PHINode *NewPHI = PHINode::Create(OrigPHI->getType(), Incoming.size(),
  1343. "polly." + OrigPHI->getName(),
  1344. NewSubregionExit->getFirstNonPHI());
  1345. // Add the incoming values to the PHI.
  1346. for (auto &Pair : Incoming) {
  1347. BasicBlock *OrigIncomingBlock = Pair.first;
  1348. BasicBlock *NewIncomingBlockStart = StartBlockMap.lookup(OrigIncomingBlock);
  1349. BasicBlock *NewIncomingBlockEnd = EndBlockMap.lookup(OrigIncomingBlock);
  1350. Builder.SetInsertPoint(NewIncomingBlockEnd->getTerminator());
  1351. assert(RegionMaps.count(NewIncomingBlockStart));
  1352. assert(RegionMaps.count(NewIncomingBlockEnd));
  1353. ValueMapT *LocalBBMap = &RegionMaps[NewIncomingBlockStart];
  1354. Value *OrigIncomingValue = Pair.second;
  1355. Value *NewIncomingValue =
  1356. getNewValue(*Stmt, OrigIncomingValue, *LocalBBMap, LTS, L);
  1357. NewPHI->addIncoming(NewIncomingValue, NewIncomingBlockEnd);
  1358. }
  1359. return NewPHI;
  1360. }
  1361. Value *RegionGenerator::getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS,
  1362. ValueMapT &BBMap) {
  1363. ScopStmt *Stmt = MA->getStatement();
  1364. // TODO: Add some test cases that ensure this is really the right choice.
  1365. Loop *L = LI.getLoopFor(Stmt->getRegion()->getExit());
  1366. if (MA->isAnyPHIKind()) {
  1367. auto Incoming = MA->getIncoming();
  1368. assert(!Incoming.empty() &&
  1369. "PHI WRITEs must have originate from at least one incoming block");
  1370. // If there is only one incoming value, we do not need to create a PHI.
  1371. if (Incoming.size() == 1) {
  1372. Value *OldVal = Incoming[0].second;
  1373. return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
  1374. }
  1375. return buildExitPHI(MA, LTS, BBMap, L);
  1376. }
  1377. // MemoryKind::Value accesses leaving the subregion must dominate the exit
  1378. // block; just pass the copied value.
  1379. Value *OldVal = MA->getAccessValue();
  1380. return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
  1381. }
  1382. void RegionGenerator::generateScalarStores(
  1383. ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
  1384. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1385. assert(Stmt.getRegion() &&
  1386. "Block statements need to use the generateScalarStores() "
  1387. "function in the BlockGenerator");
  1388. // Get the exit scalar values before generating the writes.
  1389. // This is necessary because RegionGenerator::getExitScalar may insert
  1390. // PHINodes that depend on the region's exiting blocks. But
  1391. // BlockGenerator::generateConditionalExecution may insert a new basic block
  1392. // such that the current basic block is not a direct successor of the exiting
  1393. // blocks anymore. Hence, build the PHINodes while the current block is still
  1394. // the direct successor.
  1395. SmallDenseMap<MemoryAccess *, Value *> NewExitScalars;
  1396. for (MemoryAccess *MA : Stmt) {
  1397. if (MA->isOriginalArrayKind() || MA->isRead())
  1398. continue;
  1399. Value *NewVal = getExitScalar(MA, LTS, BBMap);
  1400. NewExitScalars[MA] = NewVal;
  1401. }
  1402. for (MemoryAccess *MA : Stmt) {
  1403. if (MA->isOriginalArrayKind() || MA->isRead())
  1404. continue;
  1405. isl::set AccDom = MA->getAccessRelation().domain();
  1406. std::string Subject = MA->getId().get_name();
  1407. generateConditionalExecution(
  1408. Stmt, AccDom, Subject.c_str(), [&, this, MA]() {
  1409. Value *NewVal = NewExitScalars.lookup(MA);
  1410. assert(NewVal && "The exit scalar must be determined before");
  1411. Value *Address = getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS,
  1412. BBMap, NewAccesses);
  1413. assert((!isa<Instruction>(NewVal) ||
  1414. DT.dominates(cast<Instruction>(NewVal)->getParent(),
  1415. Builder.GetInsertBlock())) &&
  1416. "Domination violation");
  1417. assert((!isa<Instruction>(Address) ||
  1418. DT.dominates(cast<Instruction>(Address)->getParent(),
  1419. Builder.GetInsertBlock())) &&
  1420. "Domination violation");
  1421. Builder.CreateStore(NewVal, Address);
  1422. });
  1423. }
  1424. }
  1425. void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, PHINode *PHI,
  1426. PHINode *PHICopy, BasicBlock *IncomingBB,
  1427. LoopToScevMapT &LTS) {
  1428. // If the incoming block was not yet copied mark this PHI as incomplete.
  1429. // Once the block will be copied the incoming value will be added.
  1430. BasicBlock *BBCopyStart = StartBlockMap[IncomingBB];
  1431. BasicBlock *BBCopyEnd = EndBlockMap[IncomingBB];
  1432. if (!BBCopyStart) {
  1433. assert(!BBCopyEnd);
  1434. assert(Stmt.represents(IncomingBB) &&
  1435. "Bad incoming block for PHI in non-affine region");
  1436. IncompletePHINodeMap[IncomingBB].push_back(std::make_pair(PHI, PHICopy));
  1437. return;
  1438. }
  1439. assert(RegionMaps.count(BBCopyStart) &&
  1440. "Incoming PHI block did not have a BBMap");
  1441. ValueMapT &BBCopyMap = RegionMaps[BBCopyStart];
  1442. Value *OpCopy = nullptr;
  1443. if (Stmt.represents(IncomingBB)) {
  1444. Value *Op = PHI->getIncomingValueForBlock(IncomingBB);
  1445. // If the current insert block is different from the PHIs incoming block
  1446. // change it, otherwise do not.
  1447. auto IP = Builder.GetInsertPoint();
  1448. if (IP->getParent() != BBCopyEnd)
  1449. Builder.SetInsertPoint(BBCopyEnd->getTerminator());
  1450. OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForStmt(Stmt));
  1451. if (IP->getParent() != BBCopyEnd)
  1452. Builder.SetInsertPoint(&*IP);
  1453. } else {
  1454. // All edges from outside the non-affine region become a single edge
  1455. // in the new copy of the non-affine region. Make sure to only add the
  1456. // corresponding edge the first time we encounter a basic block from
  1457. // outside the non-affine region.
  1458. if (PHICopy->getBasicBlockIndex(BBCopyEnd) >= 0)
  1459. return;
  1460. // Get the reloaded value.
  1461. OpCopy = getNewValue(Stmt, PHI, BBCopyMap, LTS, getLoopForStmt(Stmt));
  1462. }
  1463. assert(OpCopy && "Incoming PHI value was not copied properly");
  1464. PHICopy->addIncoming(OpCopy, BBCopyEnd);
  1465. }
  1466. void RegionGenerator::copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI,
  1467. ValueMapT &BBMap,
  1468. LoopToScevMapT &LTS) {
  1469. unsigned NumIncoming = PHI->getNumIncomingValues();
  1470. PHINode *PHICopy =
  1471. Builder.CreatePHI(PHI->getType(), NumIncoming, "polly." + PHI->getName());
  1472. PHICopy->moveBefore(PHICopy->getParent()->getFirstNonPHI());
  1473. BBMap[PHI] = PHICopy;
  1474. for (BasicBlock *IncomingBB : PHI->blocks())
  1475. addOperandToPHI(Stmt, PHI, PHICopy, IncomingBB, LTS);
  1476. }