BlockGenerators.cpp 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795
  1. //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the BlockGenerator and VectorBlockGenerator classes,
  10. // which generate sequential code and vectorized code for a polyhedral
  11. // statement, respectively.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "polly/CodeGen/BlockGenerators.h"
  15. #include "polly/CodeGen/IslExprBuilder.h"
  16. #include "polly/CodeGen/RuntimeDebugBuilder.h"
  17. #include "polly/Options.h"
  18. #include "polly/ScopInfo.h"
  19. #include "polly/Support/ISLTools.h"
  20. #include "polly/Support/ScopHelper.h"
  21. #include "polly/Support/VirtualInstruction.h"
  22. #include "llvm/Analysis/LoopInfo.h"
  23. #include "llvm/Analysis/RegionInfo.h"
  24. #include "llvm/Analysis/ScalarEvolution.h"
  25. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  26. #include "llvm/Transforms/Utils/Local.h"
  27. #include "isl/ast.h"
  28. #include <deque>
  29. using namespace llvm;
  30. using namespace polly;
  31. static cl::opt<bool> Aligned("enable-polly-aligned",
  32. cl::desc("Assumed aligned memory accesses."),
  33. cl::Hidden, cl::init(false), cl::ZeroOrMore,
  34. cl::cat(PollyCategory));
  35. bool PollyDebugPrinting;
  36. static cl::opt<bool, true> DebugPrintingX(
  37. "polly-codegen-add-debug-printing",
  38. cl::desc("Add printf calls that show the values loaded/stored."),
  39. cl::location(PollyDebugPrinting), cl::Hidden, cl::init(false),
  40. cl::ZeroOrMore, cl::cat(PollyCategory));
  41. static cl::opt<bool> TraceStmts(
  42. "polly-codegen-trace-stmts",
  43. cl::desc("Add printf calls that print the statement being executed"),
  44. cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
  45. static cl::opt<bool> TraceScalars(
  46. "polly-codegen-trace-scalars",
  47. cl::desc("Add printf calls that print the values of all scalar values "
  48. "used in a statement. Requires -polly-codegen-trace-stmts."),
  49. cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
  50. BlockGenerator::BlockGenerator(
  51. PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT,
  52. AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap,
  53. ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, BasicBlock *StartBlock)
  54. : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT),
  55. EntryBB(nullptr), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
  56. GlobalMap(GlobalMap), StartBlock(StartBlock) {}
  57. Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
  58. ValueMapT &BBMap,
  59. LoopToScevMapT &LTS,
  60. Loop *L) const {
  61. if (!SE.isSCEVable(Old->getType()))
  62. return nullptr;
  63. const SCEV *Scev = SE.getSCEVAtScope(Old, L);
  64. if (!Scev)
  65. return nullptr;
  66. if (isa<SCEVCouldNotCompute>(Scev))
  67. return nullptr;
  68. const SCEV *NewScev = SCEVLoopAddRecRewriter::rewrite(Scev, LTS, SE);
  69. ValueMapT VTV;
  70. VTV.insert(BBMap.begin(), BBMap.end());
  71. VTV.insert(GlobalMap.begin(), GlobalMap.end());
  72. Scop &S = *Stmt.getParent();
  73. const DataLayout &DL = S.getFunction().getParent()->getDataLayout();
  74. auto IP = Builder.GetInsertPoint();
  75. assert(IP != Builder.GetInsertBlock()->end() &&
  76. "Only instructions can be insert points for SCEVExpander");
  77. Value *Expanded =
  78. expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV,
  79. StartBlock->getSinglePredecessor());
  80. BBMap[Old] = Expanded;
  81. return Expanded;
  82. }
  83. Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
  84. LoopToScevMapT &LTS, Loop *L) const {
  85. auto lookupGlobally = [this](Value *Old) -> Value * {
  86. Value *New = GlobalMap.lookup(Old);
  87. if (!New)
  88. return nullptr;
  89. // Required by:
  90. // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
  91. // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
  92. // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
  93. // * Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
  94. // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
  95. // * Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
  96. // GlobalMap should be a mapping from (value in original SCoP) to (copied
  97. // value in generated SCoP), without intermediate mappings, which might
  98. // easily require transitiveness as well.
  99. if (Value *NewRemapped = GlobalMap.lookup(New))
  100. New = NewRemapped;
  101. // No test case for this code.
  102. if (Old->getType()->getScalarSizeInBits() <
  103. New->getType()->getScalarSizeInBits())
  104. New = Builder.CreateTruncOrBitCast(New, Old->getType());
  105. return New;
  106. };
  107. Value *New = nullptr;
  108. auto VUse = VirtualUse::create(&Stmt, L, Old, true);
  109. switch (VUse.getKind()) {
  110. case VirtualUse::Block:
  111. // BasicBlock are constants, but the BlockGenerator copies them.
  112. New = BBMap.lookup(Old);
  113. break;
  114. case VirtualUse::Constant:
  115. // Used by:
  116. // * Isl/CodeGen/OpenMP/reference-argument-from-non-affine-region.ll
  117. // Constants should not be redefined. In this case, the GlobalMap just
  118. // contains a mapping to the same constant, which is unnecessary, but
  119. // harmless.
  120. if ((New = lookupGlobally(Old)))
  121. break;
  122. assert(!BBMap.count(Old));
  123. New = Old;
  124. break;
  125. case VirtualUse::ReadOnly:
  126. assert(!GlobalMap.count(Old));
  127. // Required for:
  128. // * Isl/CodeGen/MemAccess/create_arrays.ll
  129. // * Isl/CodeGen/read-only-scalars.ll
  130. // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
  131. // For some reason these reload a read-only value. The reloaded value ends
  132. // up in BBMap, buts its value should be identical.
  133. //
  134. // Required for:
  135. // * Isl/CodeGen/OpenMP/single_loop_with_param.ll
  136. // The parallel subfunctions need to reference the read-only value from the
  137. // parent function, this is done by reloading them locally.
  138. if ((New = BBMap.lookup(Old)))
  139. break;
  140. New = Old;
  141. break;
  142. case VirtualUse::Synthesizable:
  143. // Used by:
  144. // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
  145. // * Isl/CodeGen/OpenMP/recomputed-srem.ll
  146. // * Isl/CodeGen/OpenMP/reference-other-bb.ll
  147. // * Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll
  148. // For some reason synthesizable values end up in GlobalMap. Their values
  149. // are the same as trySynthesizeNewValue would return. The legacy
  150. // implementation prioritized GlobalMap, so this is what we do here as well.
  151. // Ideally, synthesizable values should not end up in GlobalMap.
  152. if ((New = lookupGlobally(Old)))
  153. break;
  154. // Required for:
  155. // * Isl/CodeGen/RuntimeDebugBuilder/combine_different_values.ll
  156. // * Isl/CodeGen/getNumberOfIterations.ll
  157. // * Isl/CodeGen/non_affine_float_compare.ll
  158. // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
  159. // Ideally, synthesizable values are synthesized by trySynthesizeNewValue,
  160. // not precomputed (SCEVExpander has its own caching mechanism).
  161. // These tests fail without this, but I think trySynthesizeNewValue would
  162. // just re-synthesize the same instructions.
  163. if ((New = BBMap.lookup(Old)))
  164. break;
  165. New = trySynthesizeNewValue(Stmt, Old, BBMap, LTS, L);
  166. break;
  167. case VirtualUse::Hoisted:
  168. // TODO: Hoisted invariant loads should be found in GlobalMap only, but not
  169. // redefined locally (which will be ignored anyway). That is, the following
  170. // assertion should apply: assert(!BBMap.count(Old))
  171. New = lookupGlobally(Old);
  172. break;
  173. case VirtualUse::Intra:
  174. case VirtualUse::Inter:
  175. assert(!GlobalMap.count(Old) &&
  176. "Intra and inter-stmt values are never global");
  177. New = BBMap.lookup(Old);
  178. break;
  179. }
  180. assert(New && "Unexpected scalar dependence in region!");
  181. return New;
  182. }
  183. void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst,
  184. ValueMapT &BBMap, LoopToScevMapT &LTS) {
  185. // We do not generate debug intrinsics as we did not investigate how to
  186. // copy them correctly. At the current state, they just crash the code
  187. // generation as the meta-data operands are not correctly copied.
  188. if (isa<DbgInfoIntrinsic>(Inst))
  189. return;
  190. Instruction *NewInst = Inst->clone();
  191. // Replace old operands with the new ones.
  192. for (Value *OldOperand : Inst->operands()) {
  193. Value *NewOperand =
  194. getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForStmt(Stmt));
  195. if (!NewOperand) {
  196. assert(!isa<StoreInst>(NewInst) &&
  197. "Store instructions are always needed!");
  198. NewInst->deleteValue();
  199. return;
  200. }
  201. NewInst->replaceUsesOfWith(OldOperand, NewOperand);
  202. }
  203. Builder.Insert(NewInst);
  204. BBMap[Inst] = NewInst;
  205. // When copying the instruction onto the Module meant for the GPU,
  206. // debug metadata attached to an instruction causes all related
  207. // metadata to be pulled into the Module. This includes the DICompileUnit,
  208. // which will not be listed in llvm.dbg.cu of the Module since the Module
  209. // doesn't contain one. This fails the verification of the Module and the
  210. // subsequent generation of the ASM string.
  211. if (NewInst->getModule() != Inst->getModule())
  212. NewInst->setDebugLoc(llvm::DebugLoc());
  213. if (!NewInst->getType()->isVoidTy())
  214. NewInst->setName("p_" + Inst->getName());
  215. }
  216. Value *
  217. BlockGenerator::generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
  218. ValueMapT &BBMap, LoopToScevMapT &LTS,
  219. isl_id_to_ast_expr *NewAccesses) {
  220. const MemoryAccess &MA = Stmt.getArrayAccessFor(Inst);
  221. return generateLocationAccessed(
  222. Stmt, getLoopForStmt(Stmt),
  223. Inst.isNull() ? nullptr : Inst.getPointerOperand(), BBMap, LTS,
  224. NewAccesses, MA.getId().release(), MA.getAccessValue()->getType());
  225. }
  226. Value *BlockGenerator::generateLocationAccessed(
  227. ScopStmt &Stmt, Loop *L, Value *Pointer, ValueMapT &BBMap,
  228. LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses, __isl_take isl_id *Id,
  229. Type *ExpectedType) {
  230. isl_ast_expr *AccessExpr = isl_id_to_ast_expr_get(NewAccesses, Id);
  231. if (AccessExpr) {
  232. AccessExpr = isl_ast_expr_address_of(AccessExpr);
  233. auto Address = ExprBuilder->create(AccessExpr);
  234. // Cast the address of this memory access to a pointer type that has the
  235. // same element type as the original access, but uses the address space of
  236. // the newly generated pointer.
  237. auto OldPtrTy = ExpectedType->getPointerTo();
  238. auto NewPtrTy = Address->getType();
  239. OldPtrTy = PointerType::getWithSamePointeeType(
  240. OldPtrTy, NewPtrTy->getPointerAddressSpace());
  241. if (OldPtrTy != NewPtrTy)
  242. Address = Builder.CreateBitOrPointerCast(Address, OldPtrTy);
  243. return Address;
  244. }
  245. assert(
  246. Pointer &&
  247. "If expression was not generated, must use the original pointer value");
  248. return getNewValue(Stmt, Pointer, BBMap, LTS, L);
  249. }
  250. Value *
  251. BlockGenerator::getImplicitAddress(MemoryAccess &Access, Loop *L,
  252. LoopToScevMapT &LTS, ValueMapT &BBMap,
  253. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  254. if (Access.isLatestArrayKind())
  255. return generateLocationAccessed(*Access.getStatement(), L, nullptr, BBMap,
  256. LTS, NewAccesses, Access.getId().release(),
  257. Access.getAccessValue()->getType());
  258. return getOrCreateAlloca(Access);
  259. }
  260. Loop *BlockGenerator::getLoopForStmt(const ScopStmt &Stmt) const {
  261. auto *StmtBB = Stmt.getEntryBlock();
  262. return LI.getLoopFor(StmtBB);
  263. }
  264. Value *BlockGenerator::generateArrayLoad(ScopStmt &Stmt, LoadInst *Load,
  265. ValueMapT &BBMap, LoopToScevMapT &LTS,
  266. isl_id_to_ast_expr *NewAccesses) {
  267. if (Value *PreloadLoad = GlobalMap.lookup(Load))
  268. return PreloadLoad;
  269. Value *NewPointer =
  270. generateLocationAccessed(Stmt, Load, BBMap, LTS, NewAccesses);
  271. Value *ScalarLoad =
  272. Builder.CreateAlignedLoad(Load->getType(), NewPointer, Load->getAlign(),
  273. Load->getName() + "_p_scalar_");
  274. if (PollyDebugPrinting)
  275. RuntimeDebugBuilder::createCPUPrinter(Builder, "Load from ", NewPointer,
  276. ": ", ScalarLoad, "\n");
  277. return ScalarLoad;
  278. }
  279. void BlockGenerator::generateArrayStore(ScopStmt &Stmt, StoreInst *Store,
  280. ValueMapT &BBMap, LoopToScevMapT &LTS,
  281. isl_id_to_ast_expr *NewAccesses) {
  282. MemoryAccess &MA = Stmt.getArrayAccessFor(Store);
  283. isl::set AccDom = MA.getAccessRelation().domain();
  284. std::string Subject = MA.getId().get_name();
  285. generateConditionalExecution(Stmt, AccDom, Subject.c_str(), [&, this]() {
  286. Value *NewPointer =
  287. generateLocationAccessed(Stmt, Store, BBMap, LTS, NewAccesses);
  288. Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap,
  289. LTS, getLoopForStmt(Stmt));
  290. if (PollyDebugPrinting)
  291. RuntimeDebugBuilder::createCPUPrinter(Builder, "Store to ", NewPointer,
  292. ": ", ValueOperand, "\n");
  293. Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlign());
  294. });
  295. }
  296. bool BlockGenerator::canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst) {
  297. Loop *L = getLoopForStmt(Stmt);
  298. return (Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) &&
  299. canSynthesize(Inst, *Stmt.getParent(), &SE, L);
  300. }
  301. void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst,
  302. ValueMapT &BBMap, LoopToScevMapT &LTS,
  303. isl_id_to_ast_expr *NewAccesses) {
  304. // Terminator instructions control the control flow. They are explicitly
  305. // expressed in the clast and do not need to be copied.
  306. if (Inst->isTerminator())
  307. return;
  308. // Synthesizable statements will be generated on-demand.
  309. if (canSyntheziseInStmt(Stmt, Inst))
  310. return;
  311. if (auto *Load = dyn_cast<LoadInst>(Inst)) {
  312. Value *NewLoad = generateArrayLoad(Stmt, Load, BBMap, LTS, NewAccesses);
  313. // Compute NewLoad before its insertion in BBMap to make the insertion
  314. // deterministic.
  315. BBMap[Load] = NewLoad;
  316. return;
  317. }
  318. if (auto *Store = dyn_cast<StoreInst>(Inst)) {
  319. // Identified as redundant by -polly-simplify.
  320. if (!Stmt.getArrayAccessOrNULLFor(Store))
  321. return;
  322. generateArrayStore(Stmt, Store, BBMap, LTS, NewAccesses);
  323. return;
  324. }
  325. if (auto *PHI = dyn_cast<PHINode>(Inst)) {
  326. copyPHIInstruction(Stmt, PHI, BBMap, LTS);
  327. return;
  328. }
  329. // Skip some special intrinsics for which we do not adjust the semantics to
  330. // the new schedule. All others are handled like every other instruction.
  331. if (isIgnoredIntrinsic(Inst))
  332. return;
  333. copyInstScalar(Stmt, Inst, BBMap, LTS);
  334. }
  335. void BlockGenerator::removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap) {
  336. auto NewBB = Builder.GetInsertBlock();
  337. for (auto I = NewBB->rbegin(); I != NewBB->rend(); I++) {
  338. Instruction *NewInst = &*I;
  339. if (!isInstructionTriviallyDead(NewInst))
  340. continue;
  341. for (auto Pair : BBMap)
  342. if (Pair.second == NewInst) {
  343. BBMap.erase(Pair.first);
  344. }
  345. NewInst->eraseFromParent();
  346. I = NewBB->rbegin();
  347. }
  348. }
  349. void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
  350. isl_id_to_ast_expr *NewAccesses) {
  351. assert(Stmt.isBlockStmt() &&
  352. "Only block statements can be copied by the block generator");
  353. ValueMapT BBMap;
  354. BasicBlock *BB = Stmt.getBasicBlock();
  355. copyBB(Stmt, BB, BBMap, LTS, NewAccesses);
  356. removeDeadInstructions(BB, BBMap);
  357. }
  358. BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
  359. BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
  360. &*Builder.GetInsertPoint(), &DT, &LI);
  361. CopyBB->setName("polly.stmt." + BB->getName());
  362. return CopyBB;
  363. }
  364. BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB,
  365. ValueMapT &BBMap, LoopToScevMapT &LTS,
  366. isl_id_to_ast_expr *NewAccesses) {
  367. BasicBlock *CopyBB = splitBB(BB);
  368. Builder.SetInsertPoint(&CopyBB->front());
  369. generateScalarLoads(Stmt, LTS, BBMap, NewAccesses);
  370. generateBeginStmtTrace(Stmt, LTS, BBMap);
  371. copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses);
  372. // After a basic block was copied store all scalars that escape this block in
  373. // their alloca.
  374. generateScalarStores(Stmt, LTS, BBMap, NewAccesses);
  375. return CopyBB;
  376. }
  377. void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
  378. ValueMapT &BBMap, LoopToScevMapT &LTS,
  379. isl_id_to_ast_expr *NewAccesses) {
  380. EntryBB = &CopyBB->getParent()->getEntryBlock();
  381. // Block statements and the entry blocks of region statement are code
  382. // generated from instruction lists. This allow us to optimize the
  383. // instructions that belong to a certain scop statement. As the code
  384. // structure of region statements might be arbitrary complex, optimizing the
  385. // instruction list is not yet supported.
  386. if (Stmt.isBlockStmt() || (Stmt.isRegionStmt() && Stmt.getEntryBlock() == BB))
  387. for (Instruction *Inst : Stmt.getInstructions())
  388. copyInstruction(Stmt, Inst, BBMap, LTS, NewAccesses);
  389. else
  390. for (Instruction &Inst : *BB)
  391. copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses);
  392. }
  393. Value *BlockGenerator::getOrCreateAlloca(const MemoryAccess &Access) {
  394. assert(!Access.isLatestArrayKind() && "Trying to get alloca for array kind");
  395. return getOrCreateAlloca(Access.getLatestScopArrayInfo());
  396. }
  397. Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) {
  398. assert(!Array->isArrayKind() && "Trying to get alloca for array kind");
  399. auto &Addr = ScalarMap[Array];
  400. if (Addr) {
  401. // Allow allocas to be (temporarily) redirected once by adding a new
  402. // old-alloca-addr to new-addr mapping to GlobalMap. This functionality
  403. // is used for example by the OpenMP code generation where a first use
  404. // of a scalar while still in the host code allocates a normal alloca with
  405. // getOrCreateAlloca. When the values of this scalar are accessed during
  406. // the generation of the parallel subfunction, these values are copied over
  407. // to the parallel subfunction and each request for a scalar alloca slot
  408. // must be forwarded to the temporary in-subfunction slot. This mapping is
  409. // removed when the subfunction has been generated and again normal host
  410. // code is generated. Due to the following reasons it is not possible to
  411. // perform the GlobalMap lookup right after creating the alloca below, but
  412. // instead we need to check GlobalMap at each call to getOrCreateAlloca:
  413. //
  414. // 1) GlobalMap may be changed multiple times (for each parallel loop),
  415. // 2) The temporary mapping is commonly only known after the initial
  416. // alloca has already been generated, and
  417. // 3) The original alloca value must be restored after leaving the
  418. // sub-function.
  419. if (Value *NewAddr = GlobalMap.lookup(&*Addr))
  420. return NewAddr;
  421. return Addr;
  422. }
  423. Type *Ty = Array->getElementType();
  424. Value *ScalarBase = Array->getBasePtr();
  425. std::string NameExt;
  426. if (Array->isPHIKind())
  427. NameExt = ".phiops";
  428. else
  429. NameExt = ".s2a";
  430. const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
  431. Addr =
  432. new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
  433. DL.getPrefTypeAlign(Ty), ScalarBase->getName() + NameExt);
  434. EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
  435. Addr->insertBefore(&*EntryBB->getFirstInsertionPt());
  436. return Addr;
  437. }
  438. void BlockGenerator::handleOutsideUsers(const Scop &S, ScopArrayInfo *Array) {
  439. Instruction *Inst = cast<Instruction>(Array->getBasePtr());
  440. // If there are escape users we get the alloca for this instruction and put it
  441. // in the EscapeMap for later finalization. Lastly, if the instruction was
  442. // copied multiple times we already did this and can exit.
  443. if (EscapeMap.count(Inst))
  444. return;
  445. EscapeUserVectorTy EscapeUsers;
  446. for (User *U : Inst->users()) {
  447. // Non-instruction user will never escape.
  448. Instruction *UI = dyn_cast<Instruction>(U);
  449. if (!UI)
  450. continue;
  451. if (S.contains(UI))
  452. continue;
  453. EscapeUsers.push_back(UI);
  454. }
  455. // Exit if no escape uses were found.
  456. if (EscapeUsers.empty())
  457. return;
  458. // Get or create an escape alloca for this instruction.
  459. auto *ScalarAddr = getOrCreateAlloca(Array);
  460. // Remember that this instruction has escape uses and the escape alloca.
  461. EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers));
  462. }
  463. void BlockGenerator::generateScalarLoads(
  464. ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
  465. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  466. for (MemoryAccess *MA : Stmt) {
  467. if (MA->isOriginalArrayKind() || MA->isWrite())
  468. continue;
  469. #ifndef NDEBUG
  470. auto StmtDom =
  471. Stmt.getDomain().intersect_params(Stmt.getParent()->getContext());
  472. auto AccDom = MA->getAccessRelation().domain();
  473. assert(!StmtDom.is_subset(AccDom).is_false() &&
  474. "Scalar must be loaded in all statement instances");
  475. #endif
  476. auto *Address =
  477. getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
  478. assert((!isa<Instruction>(Address) ||
  479. DT.dominates(cast<Instruction>(Address)->getParent(),
  480. Builder.GetInsertBlock())) &&
  481. "Domination violation");
  482. BBMap[MA->getAccessValue()] = Builder.CreateLoad(
  483. MA->getElementType(), Address, Address->getName() + ".reload");
  484. }
  485. }
  486. Value *BlockGenerator::buildContainsCondition(ScopStmt &Stmt,
  487. const isl::set &Subdomain) {
  488. isl::ast_build AstBuild = Stmt.getAstBuild();
  489. isl::set Domain = Stmt.getDomain();
  490. isl::union_map USchedule = AstBuild.get_schedule();
  491. USchedule = USchedule.intersect_domain(Domain);
  492. assert(!USchedule.is_empty());
  493. isl::map Schedule = isl::map::from_union_map(USchedule);
  494. isl::set ScheduledDomain = Schedule.range();
  495. isl::set ScheduledSet = Subdomain.apply(Schedule);
  496. isl::ast_build RestrictedBuild = AstBuild.restrict(ScheduledDomain);
  497. isl::ast_expr IsInSet = RestrictedBuild.expr_from(ScheduledSet);
  498. Value *IsInSetExpr = ExprBuilder->create(IsInSet.copy());
  499. IsInSetExpr = Builder.CreateICmpNE(
  500. IsInSetExpr, ConstantInt::get(IsInSetExpr->getType(), 0));
  501. return IsInSetExpr;
  502. }
  503. void BlockGenerator::generateConditionalExecution(
  504. ScopStmt &Stmt, const isl::set &Subdomain, StringRef Subject,
  505. const std::function<void()> &GenThenFunc) {
  506. isl::set StmtDom = Stmt.getDomain();
  507. // If the condition is a tautology, don't generate a condition around the
  508. // code.
  509. bool IsPartialWrite =
  510. !StmtDom.intersect_params(Stmt.getParent()->getContext())
  511. .is_subset(Subdomain);
  512. if (!IsPartialWrite) {
  513. GenThenFunc();
  514. return;
  515. }
  516. // Generate the condition.
  517. Value *Cond = buildContainsCondition(Stmt, Subdomain);
  518. // Don't call GenThenFunc if it is never executed. An ast index expression
  519. // might not be defined in this case.
  520. if (auto *Const = dyn_cast<ConstantInt>(Cond))
  521. if (Const->isZero())
  522. return;
  523. BasicBlock *HeadBlock = Builder.GetInsertBlock();
  524. StringRef BlockName = HeadBlock->getName();
  525. // Generate the conditional block.
  526. SplitBlockAndInsertIfThen(Cond, &*Builder.GetInsertPoint(), false, nullptr,
  527. &DT, &LI);
  528. BranchInst *Branch = cast<BranchInst>(HeadBlock->getTerminator());
  529. BasicBlock *ThenBlock = Branch->getSuccessor(0);
  530. BasicBlock *TailBlock = Branch->getSuccessor(1);
  531. // Assign descriptive names.
  532. if (auto *CondInst = dyn_cast<Instruction>(Cond))
  533. CondInst->setName("polly." + Subject + ".cond");
  534. ThenBlock->setName(BlockName + "." + Subject + ".partial");
  535. TailBlock->setName(BlockName + ".cont");
  536. // Put the client code into the conditional block and continue in the merge
  537. // block afterwards.
  538. Builder.SetInsertPoint(ThenBlock, ThenBlock->getFirstInsertionPt());
  539. GenThenFunc();
  540. Builder.SetInsertPoint(TailBlock, TailBlock->getFirstInsertionPt());
  541. }
  542. static std::string getInstName(Value *Val) {
  543. std::string Result;
  544. raw_string_ostream OS(Result);
  545. Val->printAsOperand(OS, false);
  546. return OS.str();
  547. }
  548. void BlockGenerator::generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT &LTS,
  549. ValueMapT &BBMap) {
  550. if (!TraceStmts)
  551. return;
  552. Scop *S = Stmt.getParent();
  553. const char *BaseName = Stmt.getBaseName();
  554. isl::ast_build AstBuild = Stmt.getAstBuild();
  555. isl::set Domain = Stmt.getDomain();
  556. isl::union_map USchedule = AstBuild.get_schedule().intersect_domain(Domain);
  557. isl::map Schedule = isl::map::from_union_map(USchedule);
  558. assert(Schedule.is_empty().is_false() &&
  559. "The stmt must have a valid instance");
  560. isl::multi_pw_aff ScheduleMultiPwAff =
  561. isl::pw_multi_aff::from_map(Schedule.reverse());
  562. isl::ast_build RestrictedBuild = AstBuild.restrict(Schedule.range());
  563. // Sequence of strings to print.
  564. SmallVector<llvm::Value *, 8> Values;
  565. // Print the name of the statement.
  566. // TODO: Indent by the depth of the statement instance in the schedule tree.
  567. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, BaseName));
  568. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "("));
  569. // Add the coordinate of the statement instance.
  570. for (unsigned i : rangeIslSize(0, ScheduleMultiPwAff.dim(isl::dim::out))) {
  571. if (i > 0)
  572. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ","));
  573. isl::ast_expr IsInSet = RestrictedBuild.expr_from(ScheduleMultiPwAff.at(i));
  574. Values.push_back(ExprBuilder->create(IsInSet.copy()));
  575. }
  576. if (TraceScalars) {
  577. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ")"));
  578. DenseSet<Instruction *> Encountered;
  579. // Add the value of each scalar (and the result of PHIs) used in the
  580. // statement.
  581. // TODO: Values used in region-statements.
  582. for (Instruction *Inst : Stmt.insts()) {
  583. if (!RuntimeDebugBuilder::isPrintable(Inst->getType()))
  584. continue;
  585. if (isa<PHINode>(Inst)) {
  586. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, " "));
  587. Values.push_back(RuntimeDebugBuilder::getPrintableString(
  588. Builder, getInstName(Inst)));
  589. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "="));
  590. Values.push_back(getNewValue(Stmt, Inst, BBMap, LTS,
  591. LI.getLoopFor(Inst->getParent())));
  592. } else {
  593. for (Value *Op : Inst->operand_values()) {
  594. // Do not print values that cannot change during the execution of the
  595. // SCoP.
  596. auto *OpInst = dyn_cast<Instruction>(Op);
  597. if (!OpInst)
  598. continue;
  599. if (!S->contains(OpInst))
  600. continue;
  601. // Print each scalar at most once, and exclude values defined in the
  602. // statement itself.
  603. if (Encountered.count(OpInst))
  604. continue;
  605. Values.push_back(
  606. RuntimeDebugBuilder::getPrintableString(Builder, " "));
  607. Values.push_back(RuntimeDebugBuilder::getPrintableString(
  608. Builder, getInstName(OpInst)));
  609. Values.push_back(
  610. RuntimeDebugBuilder::getPrintableString(Builder, "="));
  611. Values.push_back(getNewValue(Stmt, OpInst, BBMap, LTS,
  612. LI.getLoopFor(Inst->getParent())));
  613. Encountered.insert(OpInst);
  614. }
  615. }
  616. Encountered.insert(Inst);
  617. }
  618. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "\n"));
  619. } else {
  620. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ")\n"));
  621. }
  622. RuntimeDebugBuilder::createCPUPrinter(Builder, ArrayRef<Value *>(Values));
  623. }
  624. void BlockGenerator::generateScalarStores(
  625. ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
  626. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  627. Loop *L = LI.getLoopFor(Stmt.getBasicBlock());
  628. assert(Stmt.isBlockStmt() &&
  629. "Region statements need to use the generateScalarStores() function in "
  630. "the RegionGenerator");
  631. for (MemoryAccess *MA : Stmt) {
  632. if (MA->isOriginalArrayKind() || MA->isRead())
  633. continue;
  634. isl::set AccDom = MA->getAccessRelation().domain();
  635. std::string Subject = MA->getId().get_name();
  636. generateConditionalExecution(
  637. Stmt, AccDom, Subject.c_str(), [&, this, MA]() {
  638. Value *Val = MA->getAccessValue();
  639. if (MA->isAnyPHIKind()) {
  640. assert(MA->getIncoming().size() >= 1 &&
  641. "Block statements have exactly one exiting block, or "
  642. "multiple but "
  643. "with same incoming block and value");
  644. assert(std::all_of(MA->getIncoming().begin(),
  645. MA->getIncoming().end(),
  646. [&](std::pair<BasicBlock *, Value *> p) -> bool {
  647. return p.first == Stmt.getBasicBlock();
  648. }) &&
  649. "Incoming block must be statement's block");
  650. Val = MA->getIncoming()[0].second;
  651. }
  652. auto Address = getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS,
  653. BBMap, NewAccesses);
  654. Val = getNewValue(Stmt, Val, BBMap, LTS, L);
  655. assert((!isa<Instruction>(Val) ||
  656. DT.dominates(cast<Instruction>(Val)->getParent(),
  657. Builder.GetInsertBlock())) &&
  658. "Domination violation");
  659. assert((!isa<Instruction>(Address) ||
  660. DT.dominates(cast<Instruction>(Address)->getParent(),
  661. Builder.GetInsertBlock())) &&
  662. "Domination violation");
  663. // The new Val might have a different type than the old Val due to
  664. // ScalarEvolution looking through bitcasts.
  665. Address = Builder.CreateBitOrPointerCast(
  666. Address, Val->getType()->getPointerTo(
  667. Address->getType()->getPointerAddressSpace()));
  668. Builder.CreateStore(Val, Address);
  669. });
  670. }
  671. }
  672. void BlockGenerator::createScalarInitialization(Scop &S) {
  673. BasicBlock *ExitBB = S.getExit();
  674. BasicBlock *PreEntryBB = S.getEnteringBlock();
  675. Builder.SetInsertPoint(&*StartBlock->begin());
  676. for (auto &Array : S.arrays()) {
  677. if (Array->getNumberOfDimensions() != 0)
  678. continue;
  679. if (Array->isPHIKind()) {
  680. // For PHI nodes, the only values we need to store are the ones that
  681. // reach the PHI node from outside the region. In general there should
  682. // only be one such incoming edge and this edge should enter through
  683. // 'PreEntryBB'.
  684. auto PHI = cast<PHINode>(Array->getBasePtr());
  685. for (auto BI = PHI->block_begin(), BE = PHI->block_end(); BI != BE; BI++)
  686. if (!S.contains(*BI) && *BI != PreEntryBB)
  687. llvm_unreachable("Incoming edges from outside the scop should always "
  688. "come from PreEntryBB");
  689. int Idx = PHI->getBasicBlockIndex(PreEntryBB);
  690. if (Idx < 0)
  691. continue;
  692. Value *ScalarValue = PHI->getIncomingValue(Idx);
  693. Builder.CreateStore(ScalarValue, getOrCreateAlloca(Array));
  694. continue;
  695. }
  696. auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
  697. if (Inst && S.contains(Inst))
  698. continue;
  699. // PHI nodes that are not marked as such in their SAI object are either exit
  700. // PHI nodes we model as common scalars but without initialization, or
  701. // incoming phi nodes that need to be initialized. Check if the first is the
  702. // case for Inst and do not create and initialize memory if so.
  703. if (auto *PHI = dyn_cast_or_null<PHINode>(Inst))
  704. if (!S.hasSingleExitEdge() && PHI->getBasicBlockIndex(ExitBB) >= 0)
  705. continue;
  706. Builder.CreateStore(Array->getBasePtr(), getOrCreateAlloca(Array));
  707. }
  708. }
  709. void BlockGenerator::createScalarFinalization(Scop &S) {
  710. // The exit block of the __unoptimized__ region.
  711. BasicBlock *ExitBB = S.getExitingBlock();
  712. // The merge block __just after__ the region and the optimized region.
  713. BasicBlock *MergeBB = S.getExit();
  714. // The exit block of the __optimized__ region.
  715. BasicBlock *OptExitBB = *(pred_begin(MergeBB));
  716. if (OptExitBB == ExitBB)
  717. OptExitBB = *(++pred_begin(MergeBB));
  718. Builder.SetInsertPoint(OptExitBB->getTerminator());
  719. for (const auto &EscapeMapping : EscapeMap) {
  720. // Extract the escaping instruction and the escaping users as well as the
  721. // alloca the instruction was demoted to.
  722. Instruction *EscapeInst = EscapeMapping.first;
  723. const auto &EscapeMappingValue = EscapeMapping.second;
  724. const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second;
  725. auto *ScalarAddr = cast<AllocaInst>(&*EscapeMappingValue.first);
  726. // Reload the demoted instruction in the optimized version of the SCoP.
  727. Value *EscapeInstReload =
  728. Builder.CreateLoad(ScalarAddr->getAllocatedType(), ScalarAddr,
  729. EscapeInst->getName() + ".final_reload");
  730. EscapeInstReload =
  731. Builder.CreateBitOrPointerCast(EscapeInstReload, EscapeInst->getType());
  732. // Create the merge PHI that merges the optimized and unoptimized version.
  733. PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2,
  734. EscapeInst->getName() + ".merge");
  735. MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
  736. // Add the respective values to the merge PHI.
  737. MergePHI->addIncoming(EscapeInstReload, OptExitBB);
  738. MergePHI->addIncoming(EscapeInst, ExitBB);
  739. // The information of scalar evolution about the escaping instruction needs
  740. // to be revoked so the new merged instruction will be used.
  741. if (SE.isSCEVable(EscapeInst->getType()))
  742. SE.forgetValue(EscapeInst);
  743. // Replace all uses of the demoted instruction with the merge PHI.
  744. for (Instruction *EUser : EscapeUsers)
  745. EUser->replaceUsesOfWith(EscapeInst, MergePHI);
  746. }
  747. }
  748. void BlockGenerator::findOutsideUsers(Scop &S) {
  749. for (auto &Array : S.arrays()) {
  750. if (Array->getNumberOfDimensions() != 0)
  751. continue;
  752. if (Array->isPHIKind())
  753. continue;
  754. auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
  755. if (!Inst)
  756. continue;
  757. // Scop invariant hoisting moves some of the base pointers out of the scop.
  758. // We can ignore these, as the invariant load hoisting already registers the
  759. // relevant outside users.
  760. if (!S.contains(Inst))
  761. continue;
  762. handleOutsideUsers(S, Array);
  763. }
  764. }
  765. void BlockGenerator::createExitPHINodeMerges(Scop &S) {
  766. if (S.hasSingleExitEdge())
  767. return;
  768. auto *ExitBB = S.getExitingBlock();
  769. auto *MergeBB = S.getExit();
  770. auto *AfterMergeBB = MergeBB->getSingleSuccessor();
  771. BasicBlock *OptExitBB = *(pred_begin(MergeBB));
  772. if (OptExitBB == ExitBB)
  773. OptExitBB = *(++pred_begin(MergeBB));
  774. Builder.SetInsertPoint(OptExitBB->getTerminator());
  775. for (auto &SAI : S.arrays()) {
  776. auto *Val = SAI->getBasePtr();
  777. // Only Value-like scalars need a merge PHI. Exit block PHIs receive either
  778. // the original PHI's value or the reloaded incoming values from the
  779. // generated code. An llvm::Value is merged between the original code's
  780. // value or the generated one.
  781. if (!SAI->isExitPHIKind())
  782. continue;
  783. PHINode *PHI = dyn_cast<PHINode>(Val);
  784. if (!PHI)
  785. continue;
  786. if (PHI->getParent() != AfterMergeBB)
  787. continue;
  788. std::string Name = PHI->getName().str();
  789. Value *ScalarAddr = getOrCreateAlloca(SAI);
  790. Value *Reload = Builder.CreateLoad(SAI->getElementType(), ScalarAddr,
  791. Name + ".ph.final_reload");
  792. Reload = Builder.CreateBitOrPointerCast(Reload, PHI->getType());
  793. Value *OriginalValue = PHI->getIncomingValueForBlock(MergeBB);
  794. assert((!isa<Instruction>(OriginalValue) ||
  795. cast<Instruction>(OriginalValue)->getParent() != MergeBB) &&
  796. "Original value must no be one we just generated.");
  797. auto *MergePHI = PHINode::Create(PHI->getType(), 2, Name + ".ph.merge");
  798. MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
  799. MergePHI->addIncoming(Reload, OptExitBB);
  800. MergePHI->addIncoming(OriginalValue, ExitBB);
  801. int Idx = PHI->getBasicBlockIndex(MergeBB);
  802. PHI->setIncomingValue(Idx, MergePHI);
  803. }
  804. }
  805. void BlockGenerator::invalidateScalarEvolution(Scop &S) {
  806. for (auto &Stmt : S)
  807. if (Stmt.isCopyStmt())
  808. continue;
  809. else if (Stmt.isBlockStmt())
  810. for (auto &Inst : *Stmt.getBasicBlock())
  811. SE.forgetValue(&Inst);
  812. else if (Stmt.isRegionStmt())
  813. for (auto *BB : Stmt.getRegion()->blocks())
  814. for (auto &Inst : *BB)
  815. SE.forgetValue(&Inst);
  816. else
  817. llvm_unreachable("Unexpected statement type found");
  818. // Invalidate SCEV of loops surrounding the EscapeUsers.
  819. for (const auto &EscapeMapping : EscapeMap) {
  820. const EscapeUserVectorTy &EscapeUsers = EscapeMapping.second.second;
  821. for (Instruction *EUser : EscapeUsers) {
  822. if (Loop *L = LI.getLoopFor(EUser->getParent()))
  823. while (L) {
  824. SE.forgetLoop(L);
  825. L = L->getParentLoop();
  826. }
  827. }
  828. }
  829. }
  830. void BlockGenerator::finalizeSCoP(Scop &S) {
  831. findOutsideUsers(S);
  832. createScalarInitialization(S);
  833. createExitPHINodeMerges(S);
  834. createScalarFinalization(S);
  835. invalidateScalarEvolution(S);
  836. }
  837. VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen,
  838. std::vector<LoopToScevMapT> &VLTS,
  839. isl_map *Schedule)
  840. : BlockGenerator(BlockGen), VLTS(VLTS), Schedule(Schedule) {
  841. assert(Schedule && "No statement domain provided");
  842. }
  843. Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old,
  844. ValueMapT &VectorMap,
  845. VectorValueMapT &ScalarMaps,
  846. Loop *L) {
  847. if (Value *NewValue = VectorMap.lookup(Old))
  848. return NewValue;
  849. int Width = getVectorWidth();
  850. Value *Vector = UndefValue::get(FixedVectorType::get(Old->getType(), Width));
  851. for (int Lane = 0; Lane < Width; Lane++)
  852. Vector = Builder.CreateInsertElement(
  853. Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L),
  854. Builder.getInt32(Lane));
  855. VectorMap[Old] = Vector;
  856. return Vector;
  857. }
  858. Value *VectorBlockGenerator::generateStrideOneLoad(
  859. ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
  860. __isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) {
  861. unsigned VectorWidth = getVectorWidth();
  862. Type *VectorType = FixedVectorType::get(Load->getType(), VectorWidth);
  863. Type *VectorPtrType =
  864. PointerType::get(VectorType, Load->getPointerAddressSpace());
  865. unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
  866. Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset],
  867. VLTS[Offset], NewAccesses);
  868. Value *VectorPtr =
  869. Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
  870. LoadInst *VecLoad = Builder.CreateLoad(VectorType, VectorPtr,
  871. Load->getName() + "_p_vec_full");
  872. if (!Aligned)
  873. VecLoad->setAlignment(Align(8));
  874. if (NegativeStride) {
  875. SmallVector<Constant *, 16> Indices;
  876. for (int i = VectorWidth - 1; i >= 0; i--)
  877. Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
  878. Constant *SV = llvm::ConstantVector::get(Indices);
  879. Value *RevVecLoad = Builder.CreateShuffleVector(
  880. VecLoad, VecLoad, SV, Load->getName() + "_reverse");
  881. return RevVecLoad;
  882. }
  883. return VecLoad;
  884. }
  885. Value *VectorBlockGenerator::generateStrideZeroLoad(
  886. ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap,
  887. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  888. Type *VectorType = FixedVectorType::get(Load->getType(), 1);
  889. Type *VectorPtrType =
  890. PointerType::get(VectorType, Load->getPointerAddressSpace());
  891. Value *NewPointer =
  892. generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses);
  893. Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
  894. Load->getName() + "_p_vec_p");
  895. LoadInst *ScalarLoad = Builder.CreateLoad(VectorType, VectorPtr,
  896. Load->getName() + "_p_splat_one");
  897. if (!Aligned)
  898. ScalarLoad->setAlignment(Align(8));
  899. Constant *SplatVector = Constant::getNullValue(
  900. FixedVectorType::get(Builder.getInt32Ty(), getVectorWidth()));
  901. Value *VectorLoad = Builder.CreateShuffleVector(
  902. ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
  903. return VectorLoad;
  904. }
  905. Value *VectorBlockGenerator::generateUnknownStrideLoad(
  906. ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
  907. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  908. int VectorWidth = getVectorWidth();
  909. Type *ElemTy = Load->getType();
  910. auto *FVTy = FixedVectorType::get(ElemTy, VectorWidth);
  911. Value *Vector = UndefValue::get(FVTy);
  912. for (int i = 0; i < VectorWidth; i++) {
  913. Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[i],
  914. VLTS[i], NewAccesses);
  915. Value *ScalarLoad =
  916. Builder.CreateLoad(ElemTy, NewPointer, Load->getName() + "_p_scalar_");
  917. Vector = Builder.CreateInsertElement(
  918. Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
  919. }
  920. return Vector;
  921. }
  922. void VectorBlockGenerator::generateLoad(
  923. ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
  924. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  925. if (Value *PreloadLoad = GlobalMap.lookup(Load)) {
  926. VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad,
  927. Load->getName() + "_p");
  928. return;
  929. }
  930. if (!VectorType::isValidElementType(Load->getType())) {
  931. for (int i = 0; i < getVectorWidth(); i++)
  932. ScalarMaps[i][Load] =
  933. generateArrayLoad(Stmt, Load, ScalarMaps[i], VLTS[i], NewAccesses);
  934. return;
  935. }
  936. const MemoryAccess &Access = Stmt.getArrayAccessFor(Load);
  937. // Make sure we have scalar values available to access the pointer to
  938. // the data location.
  939. extractScalarValues(Load, VectorMap, ScalarMaps);
  940. Value *NewLoad;
  941. if (Access.isStrideZero(isl::manage_copy(Schedule)))
  942. NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0], NewAccesses);
  943. else if (Access.isStrideOne(isl::manage_copy(Schedule)))
  944. NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses);
  945. else if (Access.isStrideX(isl::manage_copy(Schedule), -1))
  946. NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses, true);
  947. else
  948. NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps, NewAccesses);
  949. VectorMap[Load] = NewLoad;
  950. }
  951. void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
  952. ValueMapT &VectorMap,
  953. VectorValueMapT &ScalarMaps) {
  954. int VectorWidth = getVectorWidth();
  955. Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap,
  956. ScalarMaps, getLoopForStmt(Stmt));
  957. assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
  958. const CastInst *Cast = dyn_cast<CastInst>(Inst);
  959. auto *DestType = FixedVectorType::get(Inst->getType(), VectorWidth);
  960. VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
  961. }
  962. void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
  963. ValueMapT &VectorMap,
  964. VectorValueMapT &ScalarMaps) {
  965. Loop *L = getLoopForStmt(Stmt);
  966. Value *OpZero = Inst->getOperand(0);
  967. Value *OpOne = Inst->getOperand(1);
  968. Value *NewOpZero, *NewOpOne;
  969. NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L);
  970. NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L);
  971. Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
  972. Inst->getName() + "p_vec");
  973. VectorMap[Inst] = NewInst;
  974. }
  975. void VectorBlockGenerator::copyStore(
  976. ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
  977. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  978. const MemoryAccess &Access = Stmt.getArrayAccessFor(Store);
  979. Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap,
  980. ScalarMaps, getLoopForStmt(Stmt));
  981. // Make sure we have scalar values available to access the pointer to
  982. // the data location.
  983. extractScalarValues(Store, VectorMap, ScalarMaps);
  984. if (Access.isStrideOne(isl::manage_copy(Schedule))) {
  985. Type *VectorType = FixedVectorType::get(Store->getValueOperand()->getType(),
  986. getVectorWidth());
  987. Type *VectorPtrType =
  988. PointerType::get(VectorType, Store->getPointerAddressSpace());
  989. Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0],
  990. VLTS[0], NewAccesses);
  991. Value *VectorPtr =
  992. Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
  993. StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
  994. if (!Aligned)
  995. Store->setAlignment(Align(8));
  996. } else {
  997. for (unsigned i = 0; i < ScalarMaps.size(); i++) {
  998. Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
  999. Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[i],
  1000. VLTS[i], NewAccesses);
  1001. Builder.CreateStore(Scalar, NewPointer);
  1002. }
  1003. }
  1004. }
  1005. bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
  1006. ValueMapT &VectorMap) {
  1007. for (Value *Operand : Inst->operands())
  1008. if (VectorMap.count(Operand))
  1009. return true;
  1010. return false;
  1011. }
  1012. bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
  1013. ValueMapT &VectorMap,
  1014. VectorValueMapT &ScalarMaps) {
  1015. bool HasVectorOperand = false;
  1016. int VectorWidth = getVectorWidth();
  1017. for (Value *Operand : Inst->operands()) {
  1018. ValueMapT::iterator VecOp = VectorMap.find(Operand);
  1019. if (VecOp == VectorMap.end())
  1020. continue;
  1021. HasVectorOperand = true;
  1022. Value *NewVector = VecOp->second;
  1023. for (int i = 0; i < VectorWidth; ++i) {
  1024. ValueMapT &SM = ScalarMaps[i];
  1025. // If there is one scalar extracted, all scalar elements should have
  1026. // already been extracted by the code here. So no need to check for the
  1027. // existence of all of them.
  1028. if (SM.count(Operand))
  1029. break;
  1030. SM[Operand] =
  1031. Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
  1032. }
  1033. }
  1034. return HasVectorOperand;
  1035. }
  1036. void VectorBlockGenerator::copyInstScalarized(
  1037. ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
  1038. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1039. bool HasVectorOperand;
  1040. int VectorWidth = getVectorWidth();
  1041. HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
  1042. for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
  1043. BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane],
  1044. VLTS[VectorLane], NewAccesses);
  1045. if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
  1046. return;
  1047. // Make the result available as vector value.
  1048. auto *FVTy = FixedVectorType::get(Inst->getType(), VectorWidth);
  1049. Value *Vector = UndefValue::get(FVTy);
  1050. for (int i = 0; i < VectorWidth; i++)
  1051. Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
  1052. Builder.getInt32(i));
  1053. VectorMap[Inst] = Vector;
  1054. }
  1055. int VectorBlockGenerator::getVectorWidth() { return VLTS.size(); }
  1056. void VectorBlockGenerator::copyInstruction(
  1057. ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
  1058. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1059. // Terminator instructions control the control flow. They are explicitly
  1060. // expressed in the clast and do not need to be copied.
  1061. if (Inst->isTerminator())
  1062. return;
  1063. if (canSyntheziseInStmt(Stmt, Inst))
  1064. return;
  1065. if (auto *Load = dyn_cast<LoadInst>(Inst)) {
  1066. generateLoad(Stmt, Load, VectorMap, ScalarMaps, NewAccesses);
  1067. return;
  1068. }
  1069. if (hasVectorOperands(Inst, VectorMap)) {
  1070. if (auto *Store = dyn_cast<StoreInst>(Inst)) {
  1071. // Identified as redundant by -polly-simplify.
  1072. if (!Stmt.getArrayAccessOrNULLFor(Store))
  1073. return;
  1074. copyStore(Stmt, Store, VectorMap, ScalarMaps, NewAccesses);
  1075. return;
  1076. }
  1077. if (auto *Unary = dyn_cast<UnaryInstruction>(Inst)) {
  1078. copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps);
  1079. return;
  1080. }
  1081. if (auto *Binary = dyn_cast<BinaryOperator>(Inst)) {
  1082. copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps);
  1083. return;
  1084. }
  1085. // Fallthrough: We generate scalar instructions, if we don't know how to
  1086. // generate vector code.
  1087. }
  1088. copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses);
  1089. }
  1090. void VectorBlockGenerator::generateScalarVectorLoads(
  1091. ScopStmt &Stmt, ValueMapT &VectorBlockMap) {
  1092. for (MemoryAccess *MA : Stmt) {
  1093. if (MA->isArrayKind() || MA->isWrite())
  1094. continue;
  1095. auto *Address = getOrCreateAlloca(*MA);
  1096. Type *VectorType = FixedVectorType::get(MA->getElementType(), 1);
  1097. Type *VectorPtrType = PointerType::get(
  1098. VectorType, Address->getType()->getPointerAddressSpace());
  1099. Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType,
  1100. Address->getName() + "_p_vec_p");
  1101. auto *Val = Builder.CreateLoad(VectorType, VectorPtr,
  1102. Address->getName() + ".reload");
  1103. Constant *SplatVector = Constant::getNullValue(
  1104. FixedVectorType::get(Builder.getInt32Ty(), getVectorWidth()));
  1105. Value *VectorVal = Builder.CreateShuffleVector(
  1106. Val, Val, SplatVector, Address->getName() + "_p_splat");
  1107. VectorBlockMap[MA->getAccessValue()] = VectorVal;
  1108. }
  1109. }
  1110. void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) {
  1111. for (MemoryAccess *MA : Stmt) {
  1112. if (MA->isArrayKind() || MA->isRead())
  1113. continue;
  1114. llvm_unreachable("Scalar stores not expected in vector loop");
  1115. }
  1116. }
  1117. void VectorBlockGenerator::copyStmt(
  1118. ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1119. assert(Stmt.isBlockStmt() &&
  1120. "TODO: Only block statements can be copied by the vector block "
  1121. "generator");
  1122. BasicBlock *BB = Stmt.getBasicBlock();
  1123. BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
  1124. &*Builder.GetInsertPoint(), &DT, &LI);
  1125. CopyBB->setName("polly.stmt." + BB->getName());
  1126. Builder.SetInsertPoint(&CopyBB->front());
  1127. // Create two maps that store the mapping from the original instructions of
  1128. // the old basic block to their copies in the new basic block. Those maps
  1129. // are basic block local.
  1130. //
  1131. // As vector code generation is supported there is one map for scalar values
  1132. // and one for vector values.
  1133. //
  1134. // In case we just do scalar code generation, the vectorMap is not used and
  1135. // the scalarMap has just one dimension, which contains the mapping.
  1136. //
  1137. // In case vector code generation is done, an instruction may either appear
  1138. // in the vector map once (as it is calculating >vectorwidth< values at a
  1139. // time. Or (if the values are calculated using scalar operations), it
  1140. // appears once in every dimension of the scalarMap.
  1141. VectorValueMapT ScalarBlockMap(getVectorWidth());
  1142. ValueMapT VectorBlockMap;
  1143. generateScalarVectorLoads(Stmt, VectorBlockMap);
  1144. for (Instruction *Inst : Stmt.getInstructions())
  1145. copyInstruction(Stmt, Inst, VectorBlockMap, ScalarBlockMap, NewAccesses);
  1146. verifyNoScalarStores(Stmt);
  1147. }
  1148. BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB,
  1149. BasicBlock *BBCopy) {
  1150. BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock();
  1151. BasicBlock *BBCopyIDom = EndBlockMap.lookup(BBIDom);
  1152. if (BBCopyIDom)
  1153. DT.changeImmediateDominator(BBCopy, BBCopyIDom);
  1154. return StartBlockMap.lookup(BBIDom);
  1155. }
  1156. // This is to determine whether an llvm::Value (defined in @p BB) is usable when
  1157. // leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock())
  1158. // does not work in cases where the exit block has edges from outside the
  1159. // region. In that case the llvm::Value would never be usable in in the exit
  1160. // block. The RegionGenerator however creates an new exit block ('ExitBBCopy')
  1161. // for the subregion's exiting edges only. We need to determine whether an
  1162. // llvm::Value is usable in there. We do this by checking whether it dominates
  1163. // all exiting blocks individually.
  1164. static bool isDominatingSubregionExit(const DominatorTree &DT, Region *R,
  1165. BasicBlock *BB) {
  1166. for (auto ExitingBB : predecessors(R->getExit())) {
  1167. // Check for non-subregion incoming edges.
  1168. if (!R->contains(ExitingBB))
  1169. continue;
  1170. if (!DT.dominates(BB, ExitingBB))
  1171. return false;
  1172. }
  1173. return true;
  1174. }
  1175. // Find the direct dominator of the subregion's exit block if the subregion was
  1176. // simplified.
  1177. static BasicBlock *findExitDominator(DominatorTree &DT, Region *R) {
  1178. BasicBlock *Common = nullptr;
  1179. for (auto ExitingBB : predecessors(R->getExit())) {
  1180. // Check for non-subregion incoming edges.
  1181. if (!R->contains(ExitingBB))
  1182. continue;
  1183. // First exiting edge.
  1184. if (!Common) {
  1185. Common = ExitingBB;
  1186. continue;
  1187. }
  1188. Common = DT.findNearestCommonDominator(Common, ExitingBB);
  1189. }
  1190. assert(Common && R->contains(Common));
  1191. return Common;
  1192. }
  1193. void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
  1194. isl_id_to_ast_expr *IdToAstExp) {
  1195. assert(Stmt.isRegionStmt() &&
  1196. "Only region statements can be copied by the region generator");
  1197. // Forget all old mappings.
  1198. StartBlockMap.clear();
  1199. EndBlockMap.clear();
  1200. RegionMaps.clear();
  1201. IncompletePHINodeMap.clear();
  1202. // Collection of all values related to this subregion.
  1203. ValueMapT ValueMap;
  1204. // The region represented by the statement.
  1205. Region *R = Stmt.getRegion();
  1206. // Create a dedicated entry for the region where we can reload all demoted
  1207. // inputs.
  1208. BasicBlock *EntryBB = R->getEntry();
  1209. BasicBlock *EntryBBCopy = SplitBlock(Builder.GetInsertBlock(),
  1210. &*Builder.GetInsertPoint(), &DT, &LI);
  1211. EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry");
  1212. Builder.SetInsertPoint(&EntryBBCopy->front());
  1213. ValueMapT &EntryBBMap = RegionMaps[EntryBBCopy];
  1214. generateScalarLoads(Stmt, LTS, EntryBBMap, IdToAstExp);
  1215. generateBeginStmtTrace(Stmt, LTS, EntryBBMap);
  1216. for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI)
  1217. if (!R->contains(*PI)) {
  1218. StartBlockMap[*PI] = EntryBBCopy;
  1219. EndBlockMap[*PI] = EntryBBCopy;
  1220. }
  1221. // Iterate over all blocks in the region in a breadth-first search.
  1222. std::deque<BasicBlock *> Blocks;
  1223. SmallSetVector<BasicBlock *, 8> SeenBlocks;
  1224. Blocks.push_back(EntryBB);
  1225. SeenBlocks.insert(EntryBB);
  1226. while (!Blocks.empty()) {
  1227. BasicBlock *BB = Blocks.front();
  1228. Blocks.pop_front();
  1229. // First split the block and update dominance information.
  1230. BasicBlock *BBCopy = splitBB(BB);
  1231. BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy);
  1232. // Get the mapping for this block and initialize it with either the scalar
  1233. // loads from the generated entering block (which dominates all blocks of
  1234. // this subregion) or the maps of the immediate dominator, if part of the
  1235. // subregion. The latter necessarily includes the former.
  1236. ValueMapT *InitBBMap;
  1237. if (BBCopyIDom) {
  1238. assert(RegionMaps.count(BBCopyIDom));
  1239. InitBBMap = &RegionMaps[BBCopyIDom];
  1240. } else
  1241. InitBBMap = &EntryBBMap;
  1242. auto Inserted = RegionMaps.insert(std::make_pair(BBCopy, *InitBBMap));
  1243. ValueMapT &RegionMap = Inserted.first->second;
  1244. // Copy the block with the BlockGenerator.
  1245. Builder.SetInsertPoint(&BBCopy->front());
  1246. copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp);
  1247. // In order to remap PHI nodes we store also basic block mappings.
  1248. StartBlockMap[BB] = BBCopy;
  1249. EndBlockMap[BB] = Builder.GetInsertBlock();
  1250. // Add values to incomplete PHI nodes waiting for this block to be copied.
  1251. for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB])
  1252. addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, LTS);
  1253. IncompletePHINodeMap[BB].clear();
  1254. // And continue with new successors inside the region.
  1255. for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++)
  1256. if (R->contains(*SI) && SeenBlocks.insert(*SI))
  1257. Blocks.push_back(*SI);
  1258. // Remember value in case it is visible after this subregion.
  1259. if (isDominatingSubregionExit(DT, R, BB))
  1260. ValueMap.insert(RegionMap.begin(), RegionMap.end());
  1261. }
  1262. // Now create a new dedicated region exit block and add it to the region map.
  1263. BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(),
  1264. &*Builder.GetInsertPoint(), &DT, &LI);
  1265. ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit");
  1266. StartBlockMap[R->getExit()] = ExitBBCopy;
  1267. EndBlockMap[R->getExit()] = ExitBBCopy;
  1268. BasicBlock *ExitDomBBCopy = EndBlockMap.lookup(findExitDominator(DT, R));
  1269. assert(ExitDomBBCopy &&
  1270. "Common exit dominator must be within region; at least the entry node "
  1271. "must match");
  1272. DT.changeImmediateDominator(ExitBBCopy, ExitDomBBCopy);
  1273. // As the block generator doesn't handle control flow we need to add the
  1274. // region control flow by hand after all blocks have been copied.
  1275. for (BasicBlock *BB : SeenBlocks) {
  1276. BasicBlock *BBCopyStart = StartBlockMap[BB];
  1277. BasicBlock *BBCopyEnd = EndBlockMap[BB];
  1278. Instruction *TI = BB->getTerminator();
  1279. if (isa<UnreachableInst>(TI)) {
  1280. while (!BBCopyEnd->empty())
  1281. BBCopyEnd->begin()->eraseFromParent();
  1282. new UnreachableInst(BBCopyEnd->getContext(), BBCopyEnd);
  1283. continue;
  1284. }
  1285. Instruction *BICopy = BBCopyEnd->getTerminator();
  1286. ValueMapT &RegionMap = RegionMaps[BBCopyStart];
  1287. RegionMap.insert(StartBlockMap.begin(), StartBlockMap.end());
  1288. Builder.SetInsertPoint(BICopy);
  1289. copyInstScalar(Stmt, TI, RegionMap, LTS);
  1290. BICopy->eraseFromParent();
  1291. }
  1292. // Add counting PHI nodes to all loops in the region that can be used as
  1293. // replacement for SCEVs referring to the old loop.
  1294. for (BasicBlock *BB : SeenBlocks) {
  1295. Loop *L = LI.getLoopFor(BB);
  1296. if (L == nullptr || L->getHeader() != BB || !R->contains(L))
  1297. continue;
  1298. BasicBlock *BBCopy = StartBlockMap[BB];
  1299. Value *NullVal = Builder.getInt32(0);
  1300. PHINode *LoopPHI =
  1301. PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv");
  1302. Instruction *LoopPHIInc = BinaryOperator::CreateAdd(
  1303. LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc");
  1304. LoopPHI->insertBefore(&BBCopy->front());
  1305. LoopPHIInc->insertBefore(BBCopy->getTerminator());
  1306. for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) {
  1307. if (!R->contains(PredBB))
  1308. continue;
  1309. if (L->contains(PredBB))
  1310. LoopPHI->addIncoming(LoopPHIInc, EndBlockMap[PredBB]);
  1311. else
  1312. LoopPHI->addIncoming(NullVal, EndBlockMap[PredBB]);
  1313. }
  1314. for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy)))
  1315. if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0)
  1316. LoopPHI->addIncoming(NullVal, PredBBCopy);
  1317. LTS[L] = SE.getUnknown(LoopPHI);
  1318. }
  1319. // Continue generating code in the exit block.
  1320. Builder.SetInsertPoint(&*ExitBBCopy->getFirstInsertionPt());
  1321. // Write values visible to other statements.
  1322. generateScalarStores(Stmt, LTS, ValueMap, IdToAstExp);
  1323. StartBlockMap.clear();
  1324. EndBlockMap.clear();
  1325. RegionMaps.clear();
  1326. IncompletePHINodeMap.clear();
  1327. }
  1328. PHINode *RegionGenerator::buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS,
  1329. ValueMapT &BBMap, Loop *L) {
  1330. ScopStmt *Stmt = MA->getStatement();
  1331. Region *SubR = Stmt->getRegion();
  1332. auto Incoming = MA->getIncoming();
  1333. PollyIRBuilder::InsertPointGuard IPGuard(Builder);
  1334. PHINode *OrigPHI = cast<PHINode>(MA->getAccessInstruction());
  1335. BasicBlock *NewSubregionExit = Builder.GetInsertBlock();
  1336. // This can happen if the subregion is simplified after the ScopStmts
  1337. // have been created; simplification happens as part of CodeGeneration.
  1338. if (OrigPHI->getParent() != SubR->getExit()) {
  1339. BasicBlock *FormerExit = SubR->getExitingBlock();
  1340. if (FormerExit)
  1341. NewSubregionExit = StartBlockMap.lookup(FormerExit);
  1342. }
  1343. PHINode *NewPHI = PHINode::Create(OrigPHI->getType(), Incoming.size(),
  1344. "polly." + OrigPHI->getName(),
  1345. NewSubregionExit->getFirstNonPHI());
  1346. // Add the incoming values to the PHI.
  1347. for (auto &Pair : Incoming) {
  1348. BasicBlock *OrigIncomingBlock = Pair.first;
  1349. BasicBlock *NewIncomingBlockStart = StartBlockMap.lookup(OrigIncomingBlock);
  1350. BasicBlock *NewIncomingBlockEnd = EndBlockMap.lookup(OrigIncomingBlock);
  1351. Builder.SetInsertPoint(NewIncomingBlockEnd->getTerminator());
  1352. assert(RegionMaps.count(NewIncomingBlockStart));
  1353. assert(RegionMaps.count(NewIncomingBlockEnd));
  1354. ValueMapT *LocalBBMap = &RegionMaps[NewIncomingBlockStart];
  1355. Value *OrigIncomingValue = Pair.second;
  1356. Value *NewIncomingValue =
  1357. getNewValue(*Stmt, OrigIncomingValue, *LocalBBMap, LTS, L);
  1358. NewPHI->addIncoming(NewIncomingValue, NewIncomingBlockEnd);
  1359. }
  1360. return NewPHI;
  1361. }
  1362. Value *RegionGenerator::getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS,
  1363. ValueMapT &BBMap) {
  1364. ScopStmt *Stmt = MA->getStatement();
  1365. // TODO: Add some test cases that ensure this is really the right choice.
  1366. Loop *L = LI.getLoopFor(Stmt->getRegion()->getExit());
  1367. if (MA->isAnyPHIKind()) {
  1368. auto Incoming = MA->getIncoming();
  1369. assert(!Incoming.empty() &&
  1370. "PHI WRITEs must have originate from at least one incoming block");
  1371. // If there is only one incoming value, we do not need to create a PHI.
  1372. if (Incoming.size() == 1) {
  1373. Value *OldVal = Incoming[0].second;
  1374. return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
  1375. }
  1376. return buildExitPHI(MA, LTS, BBMap, L);
  1377. }
  1378. // MemoryKind::Value accesses leaving the subregion must dominate the exit
  1379. // block; just pass the copied value.
  1380. Value *OldVal = MA->getAccessValue();
  1381. return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
  1382. }
  1383. void RegionGenerator::generateScalarStores(
  1384. ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
  1385. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1386. assert(Stmt.getRegion() &&
  1387. "Block statements need to use the generateScalarStores() "
  1388. "function in the BlockGenerator");
  1389. // Get the exit scalar values before generating the writes.
  1390. // This is necessary because RegionGenerator::getExitScalar may insert
  1391. // PHINodes that depend on the region's exiting blocks. But
  1392. // BlockGenerator::generateConditionalExecution may insert a new basic block
  1393. // such that the current basic block is not a direct successor of the exiting
  1394. // blocks anymore. Hence, build the PHINodes while the current block is still
  1395. // the direct successor.
  1396. SmallDenseMap<MemoryAccess *, Value *> NewExitScalars;
  1397. for (MemoryAccess *MA : Stmt) {
  1398. if (MA->isOriginalArrayKind() || MA->isRead())
  1399. continue;
  1400. Value *NewVal = getExitScalar(MA, LTS, BBMap);
  1401. NewExitScalars[MA] = NewVal;
  1402. }
  1403. for (MemoryAccess *MA : Stmt) {
  1404. if (MA->isOriginalArrayKind() || MA->isRead())
  1405. continue;
  1406. isl::set AccDom = MA->getAccessRelation().domain();
  1407. std::string Subject = MA->getId().get_name();
  1408. generateConditionalExecution(
  1409. Stmt, AccDom, Subject.c_str(), [&, this, MA]() {
  1410. Value *NewVal = NewExitScalars.lookup(MA);
  1411. assert(NewVal && "The exit scalar must be determined before");
  1412. Value *Address = getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS,
  1413. BBMap, NewAccesses);
  1414. assert((!isa<Instruction>(NewVal) ||
  1415. DT.dominates(cast<Instruction>(NewVal)->getParent(),
  1416. Builder.GetInsertBlock())) &&
  1417. "Domination violation");
  1418. assert((!isa<Instruction>(Address) ||
  1419. DT.dominates(cast<Instruction>(Address)->getParent(),
  1420. Builder.GetInsertBlock())) &&
  1421. "Domination violation");
  1422. Builder.CreateStore(NewVal, Address);
  1423. });
  1424. }
  1425. }
  1426. void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, PHINode *PHI,
  1427. PHINode *PHICopy, BasicBlock *IncomingBB,
  1428. LoopToScevMapT &LTS) {
  1429. // If the incoming block was not yet copied mark this PHI as incomplete.
  1430. // Once the block will be copied the incoming value will be added.
  1431. BasicBlock *BBCopyStart = StartBlockMap[IncomingBB];
  1432. BasicBlock *BBCopyEnd = EndBlockMap[IncomingBB];
  1433. if (!BBCopyStart) {
  1434. assert(!BBCopyEnd);
  1435. assert(Stmt.represents(IncomingBB) &&
  1436. "Bad incoming block for PHI in non-affine region");
  1437. IncompletePHINodeMap[IncomingBB].push_back(std::make_pair(PHI, PHICopy));
  1438. return;
  1439. }
  1440. assert(RegionMaps.count(BBCopyStart) &&
  1441. "Incoming PHI block did not have a BBMap");
  1442. ValueMapT &BBCopyMap = RegionMaps[BBCopyStart];
  1443. Value *OpCopy = nullptr;
  1444. if (Stmt.represents(IncomingBB)) {
  1445. Value *Op = PHI->getIncomingValueForBlock(IncomingBB);
  1446. // If the current insert block is different from the PHIs incoming block
  1447. // change it, otherwise do not.
  1448. auto IP = Builder.GetInsertPoint();
  1449. if (IP->getParent() != BBCopyEnd)
  1450. Builder.SetInsertPoint(BBCopyEnd->getTerminator());
  1451. OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForStmt(Stmt));
  1452. if (IP->getParent() != BBCopyEnd)
  1453. Builder.SetInsertPoint(&*IP);
  1454. } else {
  1455. // All edges from outside the non-affine region become a single edge
  1456. // in the new copy of the non-affine region. Make sure to only add the
  1457. // corresponding edge the first time we encounter a basic block from
  1458. // outside the non-affine region.
  1459. if (PHICopy->getBasicBlockIndex(BBCopyEnd) >= 0)
  1460. return;
  1461. // Get the reloaded value.
  1462. OpCopy = getNewValue(Stmt, PHI, BBCopyMap, LTS, getLoopForStmt(Stmt));
  1463. }
  1464. assert(OpCopy && "Incoming PHI value was not copied properly");
  1465. PHICopy->addIncoming(OpCopy, BBCopyEnd);
  1466. }
  1467. void RegionGenerator::copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI,
  1468. ValueMapT &BBMap,
  1469. LoopToScevMapT &LTS) {
  1470. unsigned NumIncoming = PHI->getNumIncomingValues();
  1471. PHINode *PHICopy =
  1472. Builder.CreatePHI(PHI->getType(), NumIncoming, "polly." + PHI->getName());
  1473. PHICopy->moveBefore(PHICopy->getParent()->getFirstNonPHI());
  1474. BBMap[PHI] = PHICopy;
  1475. for (BasicBlock *IncomingBB : PHI->blocks())
  1476. addOperandToPHI(Stmt, PHI, PHICopy, IncomingBB, LTS);
  1477. }