BlockGenerators.cpp 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793
  1. //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the BlockGenerator and VectorBlockGenerator classes,
  10. // which generate sequential code and vectorized code for a polyhedral
  11. // statement, respectively.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "polly/CodeGen/BlockGenerators.h"
  15. #include "polly/CodeGen/IslExprBuilder.h"
  16. #include "polly/CodeGen/RuntimeDebugBuilder.h"
  17. #include "polly/Options.h"
  18. #include "polly/ScopInfo.h"
  19. #include "polly/Support/ISLTools.h"
  20. #include "polly/Support/ScopHelper.h"
  21. #include "polly/Support/VirtualInstruction.h"
  22. #include "llvm/Analysis/LoopInfo.h"
  23. #include "llvm/Analysis/RegionInfo.h"
  24. #include "llvm/Analysis/ScalarEvolution.h"
  25. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  26. #include "llvm/Transforms/Utils/Local.h"
  27. #include "isl/ast.h"
  28. #include <deque>
  29. using namespace llvm;
  30. using namespace polly;
  31. static cl::opt<bool> Aligned("enable-polly-aligned",
  32. cl::desc("Assumed aligned memory accesses."),
  33. cl::Hidden, cl::cat(PollyCategory));
  34. bool PollyDebugPrinting;
  35. static cl::opt<bool, true> DebugPrintingX(
  36. "polly-codegen-add-debug-printing",
  37. cl::desc("Add printf calls that show the values loaded/stored."),
  38. cl::location(PollyDebugPrinting), cl::Hidden, cl::cat(PollyCategory));
  39. static cl::opt<bool> TraceStmts(
  40. "polly-codegen-trace-stmts",
  41. cl::desc("Add printf calls that print the statement being executed"),
  42. cl::Hidden, cl::cat(PollyCategory));
  43. static cl::opt<bool> TraceScalars(
  44. "polly-codegen-trace-scalars",
  45. cl::desc("Add printf calls that print the values of all scalar values "
  46. "used in a statement. Requires -polly-codegen-trace-stmts."),
  47. cl::Hidden, cl::cat(PollyCategory));
  48. BlockGenerator::BlockGenerator(
  49. PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT,
  50. AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap,
  51. ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, BasicBlock *StartBlock)
  52. : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT),
  53. EntryBB(nullptr), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
  54. GlobalMap(GlobalMap), StartBlock(StartBlock) {}
  55. Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
  56. ValueMapT &BBMap,
  57. LoopToScevMapT &LTS,
  58. Loop *L) const {
  59. if (!SE.isSCEVable(Old->getType()))
  60. return nullptr;
  61. const SCEV *Scev = SE.getSCEVAtScope(Old, L);
  62. if (!Scev)
  63. return nullptr;
  64. if (isa<SCEVCouldNotCompute>(Scev))
  65. return nullptr;
  66. const SCEV *NewScev = SCEVLoopAddRecRewriter::rewrite(Scev, LTS, SE);
  67. ValueMapT VTV;
  68. VTV.insert(BBMap.begin(), BBMap.end());
  69. VTV.insert(GlobalMap.begin(), GlobalMap.end());
  70. Scop &S = *Stmt.getParent();
  71. const DataLayout &DL = S.getFunction().getParent()->getDataLayout();
  72. auto IP = Builder.GetInsertPoint();
  73. assert(IP != Builder.GetInsertBlock()->end() &&
  74. "Only instructions can be insert points for SCEVExpander");
  75. Value *Expanded =
  76. expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV,
  77. StartBlock->getSinglePredecessor());
  78. BBMap[Old] = Expanded;
  79. return Expanded;
  80. }
  81. Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
  82. LoopToScevMapT &LTS, Loop *L) const {
  83. auto lookupGlobally = [this](Value *Old) -> Value * {
  84. Value *New = GlobalMap.lookup(Old);
  85. if (!New)
  86. return nullptr;
  87. // Required by:
  88. // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
  89. // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
  90. // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
  91. // * Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
  92. // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
  93. // * Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
  94. // GlobalMap should be a mapping from (value in original SCoP) to (copied
  95. // value in generated SCoP), without intermediate mappings, which might
  96. // easily require transitiveness as well.
  97. if (Value *NewRemapped = GlobalMap.lookup(New))
  98. New = NewRemapped;
  99. // No test case for this code.
  100. if (Old->getType()->getScalarSizeInBits() <
  101. New->getType()->getScalarSizeInBits())
  102. New = Builder.CreateTruncOrBitCast(New, Old->getType());
  103. return New;
  104. };
  105. Value *New = nullptr;
  106. auto VUse = VirtualUse::create(&Stmt, L, Old, true);
  107. switch (VUse.getKind()) {
  108. case VirtualUse::Block:
  109. // BasicBlock are constants, but the BlockGenerator copies them.
  110. New = BBMap.lookup(Old);
  111. break;
  112. case VirtualUse::Constant:
  113. // Used by:
  114. // * Isl/CodeGen/OpenMP/reference-argument-from-non-affine-region.ll
  115. // Constants should not be redefined. In this case, the GlobalMap just
  116. // contains a mapping to the same constant, which is unnecessary, but
  117. // harmless.
  118. if ((New = lookupGlobally(Old)))
  119. break;
  120. assert(!BBMap.count(Old));
  121. New = Old;
  122. break;
  123. case VirtualUse::ReadOnly:
  124. assert(!GlobalMap.count(Old));
  125. // Required for:
  126. // * Isl/CodeGen/MemAccess/create_arrays.ll
  127. // * Isl/CodeGen/read-only-scalars.ll
  128. // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
  129. // For some reason these reload a read-only value. The reloaded value ends
  130. // up in BBMap, buts its value should be identical.
  131. //
  132. // Required for:
  133. // * Isl/CodeGen/OpenMP/single_loop_with_param.ll
  134. // The parallel subfunctions need to reference the read-only value from the
  135. // parent function, this is done by reloading them locally.
  136. if ((New = BBMap.lookup(Old)))
  137. break;
  138. New = Old;
  139. break;
  140. case VirtualUse::Synthesizable:
  141. // Used by:
  142. // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
  143. // * Isl/CodeGen/OpenMP/recomputed-srem.ll
  144. // * Isl/CodeGen/OpenMP/reference-other-bb.ll
  145. // * Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll
  146. // For some reason synthesizable values end up in GlobalMap. Their values
  147. // are the same as trySynthesizeNewValue would return. The legacy
  148. // implementation prioritized GlobalMap, so this is what we do here as well.
  149. // Ideally, synthesizable values should not end up in GlobalMap.
  150. if ((New = lookupGlobally(Old)))
  151. break;
  152. // Required for:
  153. // * Isl/CodeGen/RuntimeDebugBuilder/combine_different_values.ll
  154. // * Isl/CodeGen/getNumberOfIterations.ll
  155. // * Isl/CodeGen/non_affine_float_compare.ll
  156. // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
  157. // Ideally, synthesizable values are synthesized by trySynthesizeNewValue,
  158. // not precomputed (SCEVExpander has its own caching mechanism).
  159. // These tests fail without this, but I think trySynthesizeNewValue would
  160. // just re-synthesize the same instructions.
  161. if ((New = BBMap.lookup(Old)))
  162. break;
  163. New = trySynthesizeNewValue(Stmt, Old, BBMap, LTS, L);
  164. break;
  165. case VirtualUse::Hoisted:
  166. // TODO: Hoisted invariant loads should be found in GlobalMap only, but not
  167. // redefined locally (which will be ignored anyway). That is, the following
  168. // assertion should apply: assert(!BBMap.count(Old))
  169. New = lookupGlobally(Old);
  170. break;
  171. case VirtualUse::Intra:
  172. case VirtualUse::Inter:
  173. assert(!GlobalMap.count(Old) &&
  174. "Intra and inter-stmt values are never global");
  175. New = BBMap.lookup(Old);
  176. break;
  177. }
  178. assert(New && "Unexpected scalar dependence in region!");
  179. return New;
  180. }
  181. void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst,
  182. ValueMapT &BBMap, LoopToScevMapT &LTS) {
  183. // We do not generate debug intrinsics as we did not investigate how to
  184. // copy them correctly. At the current state, they just crash the code
  185. // generation as the meta-data operands are not correctly copied.
  186. if (isa<DbgInfoIntrinsic>(Inst))
  187. return;
  188. Instruction *NewInst = Inst->clone();
  189. // Replace old operands with the new ones.
  190. for (Value *OldOperand : Inst->operands()) {
  191. Value *NewOperand =
  192. getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForStmt(Stmt));
  193. if (!NewOperand) {
  194. assert(!isa<StoreInst>(NewInst) &&
  195. "Store instructions are always needed!");
  196. NewInst->deleteValue();
  197. return;
  198. }
  199. NewInst->replaceUsesOfWith(OldOperand, NewOperand);
  200. }
  201. Builder.Insert(NewInst);
  202. BBMap[Inst] = NewInst;
  203. // When copying the instruction onto the Module meant for the GPU,
  204. // debug metadata attached to an instruction causes all related
  205. // metadata to be pulled into the Module. This includes the DICompileUnit,
  206. // which will not be listed in llvm.dbg.cu of the Module since the Module
  207. // doesn't contain one. This fails the verification of the Module and the
  208. // subsequent generation of the ASM string.
  209. if (NewInst->getModule() != Inst->getModule())
  210. NewInst->setDebugLoc(llvm::DebugLoc());
  211. if (!NewInst->getType()->isVoidTy())
  212. NewInst->setName("p_" + Inst->getName());
  213. }
  214. Value *
  215. BlockGenerator::generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
  216. ValueMapT &BBMap, LoopToScevMapT &LTS,
  217. isl_id_to_ast_expr *NewAccesses) {
  218. const MemoryAccess &MA = Stmt.getArrayAccessFor(Inst);
  219. return generateLocationAccessed(
  220. Stmt, getLoopForStmt(Stmt),
  221. Inst.isNull() ? nullptr : Inst.getPointerOperand(), BBMap, LTS,
  222. NewAccesses, MA.getId().release(), MA.getAccessValue()->getType());
  223. }
  224. Value *BlockGenerator::generateLocationAccessed(
  225. ScopStmt &Stmt, Loop *L, Value *Pointer, ValueMapT &BBMap,
  226. LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses, __isl_take isl_id *Id,
  227. Type *ExpectedType) {
  228. isl_ast_expr *AccessExpr = isl_id_to_ast_expr_get(NewAccesses, Id);
  229. if (AccessExpr) {
  230. AccessExpr = isl_ast_expr_address_of(AccessExpr);
  231. auto Address = ExprBuilder->create(AccessExpr);
  232. // Cast the address of this memory access to a pointer type that has the
  233. // same element type as the original access, but uses the address space of
  234. // the newly generated pointer.
  235. auto OldPtrTy = ExpectedType->getPointerTo();
  236. auto NewPtrTy = Address->getType();
  237. OldPtrTy = PointerType::getWithSamePointeeType(
  238. OldPtrTy, NewPtrTy->getPointerAddressSpace());
  239. if (OldPtrTy != NewPtrTy)
  240. Address = Builder.CreateBitOrPointerCast(Address, OldPtrTy);
  241. return Address;
  242. }
  243. assert(
  244. Pointer &&
  245. "If expression was not generated, must use the original pointer value");
  246. return getNewValue(Stmt, Pointer, BBMap, LTS, L);
  247. }
  248. Value *
  249. BlockGenerator::getImplicitAddress(MemoryAccess &Access, Loop *L,
  250. LoopToScevMapT &LTS, ValueMapT &BBMap,
  251. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  252. if (Access.isLatestArrayKind())
  253. return generateLocationAccessed(*Access.getStatement(), L, nullptr, BBMap,
  254. LTS, NewAccesses, Access.getId().release(),
  255. Access.getAccessValue()->getType());
  256. return getOrCreateAlloca(Access);
  257. }
  258. Loop *BlockGenerator::getLoopForStmt(const ScopStmt &Stmt) const {
  259. auto *StmtBB = Stmt.getEntryBlock();
  260. return LI.getLoopFor(StmtBB);
  261. }
  262. Value *BlockGenerator::generateArrayLoad(ScopStmt &Stmt, LoadInst *Load,
  263. ValueMapT &BBMap, LoopToScevMapT &LTS,
  264. isl_id_to_ast_expr *NewAccesses) {
  265. if (Value *PreloadLoad = GlobalMap.lookup(Load))
  266. return PreloadLoad;
  267. Value *NewPointer =
  268. generateLocationAccessed(Stmt, Load, BBMap, LTS, NewAccesses);
  269. Value *ScalarLoad =
  270. Builder.CreateAlignedLoad(Load->getType(), NewPointer, Load->getAlign(),
  271. Load->getName() + "_p_scalar_");
  272. if (PollyDebugPrinting)
  273. RuntimeDebugBuilder::createCPUPrinter(Builder, "Load from ", NewPointer,
  274. ": ", ScalarLoad, "\n");
  275. return ScalarLoad;
  276. }
  277. void BlockGenerator::generateArrayStore(ScopStmt &Stmt, StoreInst *Store,
  278. ValueMapT &BBMap, LoopToScevMapT &LTS,
  279. isl_id_to_ast_expr *NewAccesses) {
  280. MemoryAccess &MA = Stmt.getArrayAccessFor(Store);
  281. isl::set AccDom = MA.getAccessRelation().domain();
  282. std::string Subject = MA.getId().get_name();
  283. generateConditionalExecution(Stmt, AccDom, Subject.c_str(), [&, this]() {
  284. Value *NewPointer =
  285. generateLocationAccessed(Stmt, Store, BBMap, LTS, NewAccesses);
  286. Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap,
  287. LTS, getLoopForStmt(Stmt));
  288. if (PollyDebugPrinting)
  289. RuntimeDebugBuilder::createCPUPrinter(Builder, "Store to ", NewPointer,
  290. ": ", ValueOperand, "\n");
  291. Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlign());
  292. });
  293. }
  294. bool BlockGenerator::canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst) {
  295. Loop *L = getLoopForStmt(Stmt);
  296. return (Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) &&
  297. canSynthesize(Inst, *Stmt.getParent(), &SE, L);
  298. }
  299. void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst,
  300. ValueMapT &BBMap, LoopToScevMapT &LTS,
  301. isl_id_to_ast_expr *NewAccesses) {
  302. // Terminator instructions control the control flow. They are explicitly
  303. // expressed in the clast and do not need to be copied.
  304. if (Inst->isTerminator())
  305. return;
  306. // Synthesizable statements will be generated on-demand.
  307. if (canSyntheziseInStmt(Stmt, Inst))
  308. return;
  309. if (auto *Load = dyn_cast<LoadInst>(Inst)) {
  310. Value *NewLoad = generateArrayLoad(Stmt, Load, BBMap, LTS, NewAccesses);
  311. // Compute NewLoad before its insertion in BBMap to make the insertion
  312. // deterministic.
  313. BBMap[Load] = NewLoad;
  314. return;
  315. }
  316. if (auto *Store = dyn_cast<StoreInst>(Inst)) {
  317. // Identified as redundant by -polly-simplify.
  318. if (!Stmt.getArrayAccessOrNULLFor(Store))
  319. return;
  320. generateArrayStore(Stmt, Store, BBMap, LTS, NewAccesses);
  321. return;
  322. }
  323. if (auto *PHI = dyn_cast<PHINode>(Inst)) {
  324. copyPHIInstruction(Stmt, PHI, BBMap, LTS);
  325. return;
  326. }
  327. // Skip some special intrinsics for which we do not adjust the semantics to
  328. // the new schedule. All others are handled like every other instruction.
  329. if (isIgnoredIntrinsic(Inst))
  330. return;
  331. copyInstScalar(Stmt, Inst, BBMap, LTS);
  332. }
  333. void BlockGenerator::removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap) {
  334. auto NewBB = Builder.GetInsertBlock();
  335. for (auto I = NewBB->rbegin(); I != NewBB->rend(); I++) {
  336. Instruction *NewInst = &*I;
  337. if (!isInstructionTriviallyDead(NewInst))
  338. continue;
  339. for (auto Pair : BBMap)
  340. if (Pair.second == NewInst) {
  341. BBMap.erase(Pair.first);
  342. }
  343. NewInst->eraseFromParent();
  344. I = NewBB->rbegin();
  345. }
  346. }
  347. void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
  348. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  349. assert(Stmt.isBlockStmt() &&
  350. "Only block statements can be copied by the block generator");
  351. ValueMapT BBMap;
  352. BasicBlock *BB = Stmt.getBasicBlock();
  353. copyBB(Stmt, BB, BBMap, LTS, NewAccesses);
  354. removeDeadInstructions(BB, BBMap);
  355. }
  356. BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
  357. BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
  358. &*Builder.GetInsertPoint(), &DT, &LI);
  359. CopyBB->setName("polly.stmt." + BB->getName());
  360. return CopyBB;
  361. }
  362. BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB,
  363. ValueMapT &BBMap, LoopToScevMapT &LTS,
  364. isl_id_to_ast_expr *NewAccesses) {
  365. BasicBlock *CopyBB = splitBB(BB);
  366. Builder.SetInsertPoint(&CopyBB->front());
  367. generateScalarLoads(Stmt, LTS, BBMap, NewAccesses);
  368. generateBeginStmtTrace(Stmt, LTS, BBMap);
  369. copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses);
  370. // After a basic block was copied store all scalars that escape this block in
  371. // their alloca.
  372. generateScalarStores(Stmt, LTS, BBMap, NewAccesses);
  373. return CopyBB;
  374. }
  375. void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
  376. ValueMapT &BBMap, LoopToScevMapT &LTS,
  377. isl_id_to_ast_expr *NewAccesses) {
  378. EntryBB = &CopyBB->getParent()->getEntryBlock();
  379. // Block statements and the entry blocks of region statement are code
  380. // generated from instruction lists. This allow us to optimize the
  381. // instructions that belong to a certain scop statement. As the code
  382. // structure of region statements might be arbitrary complex, optimizing the
  383. // instruction list is not yet supported.
  384. if (Stmt.isBlockStmt() || (Stmt.isRegionStmt() && Stmt.getEntryBlock() == BB))
  385. for (Instruction *Inst : Stmt.getInstructions())
  386. copyInstruction(Stmt, Inst, BBMap, LTS, NewAccesses);
  387. else
  388. for (Instruction &Inst : *BB)
  389. copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses);
  390. }
  391. Value *BlockGenerator::getOrCreateAlloca(const MemoryAccess &Access) {
  392. assert(!Access.isLatestArrayKind() && "Trying to get alloca for array kind");
  393. return getOrCreateAlloca(Access.getLatestScopArrayInfo());
  394. }
  395. Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) {
  396. assert(!Array->isArrayKind() && "Trying to get alloca for array kind");
  397. auto &Addr = ScalarMap[Array];
  398. if (Addr) {
  399. // Allow allocas to be (temporarily) redirected once by adding a new
  400. // old-alloca-addr to new-addr mapping to GlobalMap. This functionality
  401. // is used for example by the OpenMP code generation where a first use
  402. // of a scalar while still in the host code allocates a normal alloca with
  403. // getOrCreateAlloca. When the values of this scalar are accessed during
  404. // the generation of the parallel subfunction, these values are copied over
  405. // to the parallel subfunction and each request for a scalar alloca slot
  406. // must be forwarded to the temporary in-subfunction slot. This mapping is
  407. // removed when the subfunction has been generated and again normal host
  408. // code is generated. Due to the following reasons it is not possible to
  409. // perform the GlobalMap lookup right after creating the alloca below, but
  410. // instead we need to check GlobalMap at each call to getOrCreateAlloca:
  411. //
  412. // 1) GlobalMap may be changed multiple times (for each parallel loop),
  413. // 2) The temporary mapping is commonly only known after the initial
  414. // alloca has already been generated, and
  415. // 3) The original alloca value must be restored after leaving the
  416. // sub-function.
  417. if (Value *NewAddr = GlobalMap.lookup(&*Addr))
  418. return NewAddr;
  419. return Addr;
  420. }
  421. Type *Ty = Array->getElementType();
  422. Value *ScalarBase = Array->getBasePtr();
  423. std::string NameExt;
  424. if (Array->isPHIKind())
  425. NameExt = ".phiops";
  426. else
  427. NameExt = ".s2a";
  428. const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
  429. Addr =
  430. new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
  431. DL.getPrefTypeAlign(Ty), ScalarBase->getName() + NameExt);
  432. EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
  433. Addr->insertBefore(&*EntryBB->getFirstInsertionPt());
  434. return Addr;
  435. }
  436. void BlockGenerator::handleOutsideUsers(const Scop &S, ScopArrayInfo *Array) {
  437. Instruction *Inst = cast<Instruction>(Array->getBasePtr());
  438. // If there are escape users we get the alloca for this instruction and put it
  439. // in the EscapeMap for later finalization. Lastly, if the instruction was
  440. // copied multiple times we already did this and can exit.
  441. if (EscapeMap.count(Inst))
  442. return;
  443. EscapeUserVectorTy EscapeUsers;
  444. for (User *U : Inst->users()) {
  445. // Non-instruction user will never escape.
  446. Instruction *UI = dyn_cast<Instruction>(U);
  447. if (!UI)
  448. continue;
  449. if (S.contains(UI))
  450. continue;
  451. EscapeUsers.push_back(UI);
  452. }
  453. // Exit if no escape uses were found.
  454. if (EscapeUsers.empty())
  455. return;
  456. // Get or create an escape alloca for this instruction.
  457. auto *ScalarAddr = getOrCreateAlloca(Array);
  458. // Remember that this instruction has escape uses and the escape alloca.
  459. EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers));
  460. }
  461. void BlockGenerator::generateScalarLoads(
  462. ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
  463. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  464. for (MemoryAccess *MA : Stmt) {
  465. if (MA->isOriginalArrayKind() || MA->isWrite())
  466. continue;
  467. #ifndef NDEBUG
  468. auto StmtDom =
  469. Stmt.getDomain().intersect_params(Stmt.getParent()->getContext());
  470. auto AccDom = MA->getAccessRelation().domain();
  471. assert(!StmtDom.is_subset(AccDom).is_false() &&
  472. "Scalar must be loaded in all statement instances");
  473. #endif
  474. auto *Address =
  475. getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
  476. assert((!isa<Instruction>(Address) ||
  477. DT.dominates(cast<Instruction>(Address)->getParent(),
  478. Builder.GetInsertBlock())) &&
  479. "Domination violation");
  480. BBMap[MA->getAccessValue()] = Builder.CreateLoad(
  481. MA->getElementType(), Address, Address->getName() + ".reload");
  482. }
  483. }
  484. Value *BlockGenerator::buildContainsCondition(ScopStmt &Stmt,
  485. const isl::set &Subdomain) {
  486. isl::ast_build AstBuild = Stmt.getAstBuild();
  487. isl::set Domain = Stmt.getDomain();
  488. isl::union_map USchedule = AstBuild.get_schedule();
  489. USchedule = USchedule.intersect_domain(Domain);
  490. assert(!USchedule.is_empty());
  491. isl::map Schedule = isl::map::from_union_map(USchedule);
  492. isl::set ScheduledDomain = Schedule.range();
  493. isl::set ScheduledSet = Subdomain.apply(Schedule);
  494. isl::ast_build RestrictedBuild = AstBuild.restrict(ScheduledDomain);
  495. isl::ast_expr IsInSet = RestrictedBuild.expr_from(ScheduledSet);
  496. Value *IsInSetExpr = ExprBuilder->create(IsInSet.copy());
  497. IsInSetExpr = Builder.CreateICmpNE(
  498. IsInSetExpr, ConstantInt::get(IsInSetExpr->getType(), 0));
  499. return IsInSetExpr;
  500. }
  501. void BlockGenerator::generateConditionalExecution(
  502. ScopStmt &Stmt, const isl::set &Subdomain, StringRef Subject,
  503. const std::function<void()> &GenThenFunc) {
  504. isl::set StmtDom = Stmt.getDomain();
  505. // If the condition is a tautology, don't generate a condition around the
  506. // code.
  507. bool IsPartialWrite =
  508. !StmtDom.intersect_params(Stmt.getParent()->getContext())
  509. .is_subset(Subdomain);
  510. if (!IsPartialWrite) {
  511. GenThenFunc();
  512. return;
  513. }
  514. // Generate the condition.
  515. Value *Cond = buildContainsCondition(Stmt, Subdomain);
  516. // Don't call GenThenFunc if it is never executed. An ast index expression
  517. // might not be defined in this case.
  518. if (auto *Const = dyn_cast<ConstantInt>(Cond))
  519. if (Const->isZero())
  520. return;
  521. BasicBlock *HeadBlock = Builder.GetInsertBlock();
  522. StringRef BlockName = HeadBlock->getName();
  523. // Generate the conditional block.
  524. SplitBlockAndInsertIfThen(Cond, &*Builder.GetInsertPoint(), false, nullptr,
  525. &DT, &LI);
  526. BranchInst *Branch = cast<BranchInst>(HeadBlock->getTerminator());
  527. BasicBlock *ThenBlock = Branch->getSuccessor(0);
  528. BasicBlock *TailBlock = Branch->getSuccessor(1);
  529. // Assign descriptive names.
  530. if (auto *CondInst = dyn_cast<Instruction>(Cond))
  531. CondInst->setName("polly." + Subject + ".cond");
  532. ThenBlock->setName(BlockName + "." + Subject + ".partial");
  533. TailBlock->setName(BlockName + ".cont");
  534. // Put the client code into the conditional block and continue in the merge
  535. // block afterwards.
  536. Builder.SetInsertPoint(ThenBlock, ThenBlock->getFirstInsertionPt());
  537. GenThenFunc();
  538. Builder.SetInsertPoint(TailBlock, TailBlock->getFirstInsertionPt());
  539. }
  540. static std::string getInstName(Value *Val) {
  541. std::string Result;
  542. raw_string_ostream OS(Result);
  543. Val->printAsOperand(OS, false);
  544. return OS.str();
  545. }
  546. void BlockGenerator::generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT &LTS,
  547. ValueMapT &BBMap) {
  548. if (!TraceStmts)
  549. return;
  550. Scop *S = Stmt.getParent();
  551. const char *BaseName = Stmt.getBaseName();
  552. isl::ast_build AstBuild = Stmt.getAstBuild();
  553. isl::set Domain = Stmt.getDomain();
  554. isl::union_map USchedule = AstBuild.get_schedule().intersect_domain(Domain);
  555. isl::map Schedule = isl::map::from_union_map(USchedule);
  556. assert(Schedule.is_empty().is_false() &&
  557. "The stmt must have a valid instance");
  558. isl::multi_pw_aff ScheduleMultiPwAff =
  559. isl::pw_multi_aff::from_map(Schedule.reverse());
  560. isl::ast_build RestrictedBuild = AstBuild.restrict(Schedule.range());
  561. // Sequence of strings to print.
  562. SmallVector<llvm::Value *, 8> Values;
  563. // Print the name of the statement.
  564. // TODO: Indent by the depth of the statement instance in the schedule tree.
  565. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, BaseName));
  566. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "("));
  567. // Add the coordinate of the statement instance.
  568. for (unsigned i : rangeIslSize(0, ScheduleMultiPwAff.dim(isl::dim::out))) {
  569. if (i > 0)
  570. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ","));
  571. isl::ast_expr IsInSet = RestrictedBuild.expr_from(ScheduleMultiPwAff.at(i));
  572. Values.push_back(ExprBuilder->create(IsInSet.copy()));
  573. }
  574. if (TraceScalars) {
  575. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ")"));
  576. DenseSet<Instruction *> Encountered;
  577. // Add the value of each scalar (and the result of PHIs) used in the
  578. // statement.
  579. // TODO: Values used in region-statements.
  580. for (Instruction *Inst : Stmt.insts()) {
  581. if (!RuntimeDebugBuilder::isPrintable(Inst->getType()))
  582. continue;
  583. if (isa<PHINode>(Inst)) {
  584. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, " "));
  585. Values.push_back(RuntimeDebugBuilder::getPrintableString(
  586. Builder, getInstName(Inst)));
  587. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "="));
  588. Values.push_back(getNewValue(Stmt, Inst, BBMap, LTS,
  589. LI.getLoopFor(Inst->getParent())));
  590. } else {
  591. for (Value *Op : Inst->operand_values()) {
  592. // Do not print values that cannot change during the execution of the
  593. // SCoP.
  594. auto *OpInst = dyn_cast<Instruction>(Op);
  595. if (!OpInst)
  596. continue;
  597. if (!S->contains(OpInst))
  598. continue;
  599. // Print each scalar at most once, and exclude values defined in the
  600. // statement itself.
  601. if (Encountered.count(OpInst))
  602. continue;
  603. Values.push_back(
  604. RuntimeDebugBuilder::getPrintableString(Builder, " "));
  605. Values.push_back(RuntimeDebugBuilder::getPrintableString(
  606. Builder, getInstName(OpInst)));
  607. Values.push_back(
  608. RuntimeDebugBuilder::getPrintableString(Builder, "="));
  609. Values.push_back(getNewValue(Stmt, OpInst, BBMap, LTS,
  610. LI.getLoopFor(Inst->getParent())));
  611. Encountered.insert(OpInst);
  612. }
  613. }
  614. Encountered.insert(Inst);
  615. }
  616. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "\n"));
  617. } else {
  618. Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ")\n"));
  619. }
  620. RuntimeDebugBuilder::createCPUPrinter(Builder, ArrayRef<Value *>(Values));
  621. }
  622. void BlockGenerator::generateScalarStores(
  623. ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
  624. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  625. Loop *L = LI.getLoopFor(Stmt.getBasicBlock());
  626. assert(Stmt.isBlockStmt() &&
  627. "Region statements need to use the generateScalarStores() function in "
  628. "the RegionGenerator");
  629. for (MemoryAccess *MA : Stmt) {
  630. if (MA->isOriginalArrayKind() || MA->isRead())
  631. continue;
  632. isl::set AccDom = MA->getAccessRelation().domain();
  633. std::string Subject = MA->getId().get_name();
  634. generateConditionalExecution(
  635. Stmt, AccDom, Subject.c_str(), [&, this, MA]() {
  636. Value *Val = MA->getAccessValue();
  637. if (MA->isAnyPHIKind()) {
  638. assert(MA->getIncoming().size() >= 1 &&
  639. "Block statements have exactly one exiting block, or "
  640. "multiple but "
  641. "with same incoming block and value");
  642. assert(std::all_of(MA->getIncoming().begin(),
  643. MA->getIncoming().end(),
  644. [&](std::pair<BasicBlock *, Value *> p) -> bool {
  645. return p.first == Stmt.getBasicBlock();
  646. }) &&
  647. "Incoming block must be statement's block");
  648. Val = MA->getIncoming()[0].second;
  649. }
  650. auto Address = getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS,
  651. BBMap, NewAccesses);
  652. Val = getNewValue(Stmt, Val, BBMap, LTS, L);
  653. assert((!isa<Instruction>(Val) ||
  654. DT.dominates(cast<Instruction>(Val)->getParent(),
  655. Builder.GetInsertBlock())) &&
  656. "Domination violation");
  657. assert((!isa<Instruction>(Address) ||
  658. DT.dominates(cast<Instruction>(Address)->getParent(),
  659. Builder.GetInsertBlock())) &&
  660. "Domination violation");
  661. // The new Val might have a different type than the old Val due to
  662. // ScalarEvolution looking through bitcasts.
  663. Address = Builder.CreateBitOrPointerCast(
  664. Address, Val->getType()->getPointerTo(
  665. Address->getType()->getPointerAddressSpace()));
  666. Builder.CreateStore(Val, Address);
  667. });
  668. }
  669. }
  670. void BlockGenerator::createScalarInitialization(Scop &S) {
  671. BasicBlock *ExitBB = S.getExit();
  672. BasicBlock *PreEntryBB = S.getEnteringBlock();
  673. Builder.SetInsertPoint(&*StartBlock->begin());
  674. for (auto &Array : S.arrays()) {
  675. if (Array->getNumberOfDimensions() != 0)
  676. continue;
  677. if (Array->isPHIKind()) {
  678. // For PHI nodes, the only values we need to store are the ones that
  679. // reach the PHI node from outside the region. In general there should
  680. // only be one such incoming edge and this edge should enter through
  681. // 'PreEntryBB'.
  682. auto PHI = cast<PHINode>(Array->getBasePtr());
  683. for (auto BI = PHI->block_begin(), BE = PHI->block_end(); BI != BE; BI++)
  684. if (!S.contains(*BI) && *BI != PreEntryBB)
  685. llvm_unreachable("Incoming edges from outside the scop should always "
  686. "come from PreEntryBB");
  687. int Idx = PHI->getBasicBlockIndex(PreEntryBB);
  688. if (Idx < 0)
  689. continue;
  690. Value *ScalarValue = PHI->getIncomingValue(Idx);
  691. Builder.CreateStore(ScalarValue, getOrCreateAlloca(Array));
  692. continue;
  693. }
  694. auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
  695. if (Inst && S.contains(Inst))
  696. continue;
  697. // PHI nodes that are not marked as such in their SAI object are either exit
  698. // PHI nodes we model as common scalars but without initialization, or
  699. // incoming phi nodes that need to be initialized. Check if the first is the
  700. // case for Inst and do not create and initialize memory if so.
  701. if (auto *PHI = dyn_cast_or_null<PHINode>(Inst))
  702. if (!S.hasSingleExitEdge() && PHI->getBasicBlockIndex(ExitBB) >= 0)
  703. continue;
  704. Builder.CreateStore(Array->getBasePtr(), getOrCreateAlloca(Array));
  705. }
  706. }
  707. void BlockGenerator::createScalarFinalization(Scop &S) {
  708. // The exit block of the __unoptimized__ region.
  709. BasicBlock *ExitBB = S.getExitingBlock();
  710. // The merge block __just after__ the region and the optimized region.
  711. BasicBlock *MergeBB = S.getExit();
  712. // The exit block of the __optimized__ region.
  713. BasicBlock *OptExitBB = *(pred_begin(MergeBB));
  714. if (OptExitBB == ExitBB)
  715. OptExitBB = *(++pred_begin(MergeBB));
  716. Builder.SetInsertPoint(OptExitBB->getTerminator());
  717. for (const auto &EscapeMapping : EscapeMap) {
  718. // Extract the escaping instruction and the escaping users as well as the
  719. // alloca the instruction was demoted to.
  720. Instruction *EscapeInst = EscapeMapping.first;
  721. const auto &EscapeMappingValue = EscapeMapping.second;
  722. const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second;
  723. auto *ScalarAddr = cast<AllocaInst>(&*EscapeMappingValue.first);
  724. // Reload the demoted instruction in the optimized version of the SCoP.
  725. Value *EscapeInstReload =
  726. Builder.CreateLoad(ScalarAddr->getAllocatedType(), ScalarAddr,
  727. EscapeInst->getName() + ".final_reload");
  728. EscapeInstReload =
  729. Builder.CreateBitOrPointerCast(EscapeInstReload, EscapeInst->getType());
  730. // Create the merge PHI that merges the optimized and unoptimized version.
  731. PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2,
  732. EscapeInst->getName() + ".merge");
  733. MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
  734. // Add the respective values to the merge PHI.
  735. MergePHI->addIncoming(EscapeInstReload, OptExitBB);
  736. MergePHI->addIncoming(EscapeInst, ExitBB);
  737. // The information of scalar evolution about the escaping instruction needs
  738. // to be revoked so the new merged instruction will be used.
  739. if (SE.isSCEVable(EscapeInst->getType()))
  740. SE.forgetValue(EscapeInst);
  741. // Replace all uses of the demoted instruction with the merge PHI.
  742. for (Instruction *EUser : EscapeUsers)
  743. EUser->replaceUsesOfWith(EscapeInst, MergePHI);
  744. }
  745. }
  746. void BlockGenerator::findOutsideUsers(Scop &S) {
  747. for (auto &Array : S.arrays()) {
  748. if (Array->getNumberOfDimensions() != 0)
  749. continue;
  750. if (Array->isPHIKind())
  751. continue;
  752. auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
  753. if (!Inst)
  754. continue;
  755. // Scop invariant hoisting moves some of the base pointers out of the scop.
  756. // We can ignore these, as the invariant load hoisting already registers the
  757. // relevant outside users.
  758. if (!S.contains(Inst))
  759. continue;
  760. handleOutsideUsers(S, Array);
  761. }
  762. }
  763. void BlockGenerator::createExitPHINodeMerges(Scop &S) {
  764. if (S.hasSingleExitEdge())
  765. return;
  766. auto *ExitBB = S.getExitingBlock();
  767. auto *MergeBB = S.getExit();
  768. auto *AfterMergeBB = MergeBB->getSingleSuccessor();
  769. BasicBlock *OptExitBB = *(pred_begin(MergeBB));
  770. if (OptExitBB == ExitBB)
  771. OptExitBB = *(++pred_begin(MergeBB));
  772. Builder.SetInsertPoint(OptExitBB->getTerminator());
  773. for (auto &SAI : S.arrays()) {
  774. auto *Val = SAI->getBasePtr();
  775. // Only Value-like scalars need a merge PHI. Exit block PHIs receive either
  776. // the original PHI's value or the reloaded incoming values from the
  777. // generated code. An llvm::Value is merged between the original code's
  778. // value or the generated one.
  779. if (!SAI->isExitPHIKind())
  780. continue;
  781. PHINode *PHI = dyn_cast<PHINode>(Val);
  782. if (!PHI)
  783. continue;
  784. if (PHI->getParent() != AfterMergeBB)
  785. continue;
  786. std::string Name = PHI->getName().str();
  787. Value *ScalarAddr = getOrCreateAlloca(SAI);
  788. Value *Reload = Builder.CreateLoad(SAI->getElementType(), ScalarAddr,
  789. Name + ".ph.final_reload");
  790. Reload = Builder.CreateBitOrPointerCast(Reload, PHI->getType());
  791. Value *OriginalValue = PHI->getIncomingValueForBlock(MergeBB);
  792. assert((!isa<Instruction>(OriginalValue) ||
  793. cast<Instruction>(OriginalValue)->getParent() != MergeBB) &&
  794. "Original value must no be one we just generated.");
  795. auto *MergePHI = PHINode::Create(PHI->getType(), 2, Name + ".ph.merge");
  796. MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
  797. MergePHI->addIncoming(Reload, OptExitBB);
  798. MergePHI->addIncoming(OriginalValue, ExitBB);
  799. int Idx = PHI->getBasicBlockIndex(MergeBB);
  800. PHI->setIncomingValue(Idx, MergePHI);
  801. }
  802. }
  803. void BlockGenerator::invalidateScalarEvolution(Scop &S) {
  804. for (auto &Stmt : S)
  805. if (Stmt.isCopyStmt())
  806. continue;
  807. else if (Stmt.isBlockStmt())
  808. for (auto &Inst : *Stmt.getBasicBlock())
  809. SE.forgetValue(&Inst);
  810. else if (Stmt.isRegionStmt())
  811. for (auto *BB : Stmt.getRegion()->blocks())
  812. for (auto &Inst : *BB)
  813. SE.forgetValue(&Inst);
  814. else
  815. llvm_unreachable("Unexpected statement type found");
  816. // Invalidate SCEV of loops surrounding the EscapeUsers.
  817. for (const auto &EscapeMapping : EscapeMap) {
  818. const EscapeUserVectorTy &EscapeUsers = EscapeMapping.second.second;
  819. for (Instruction *EUser : EscapeUsers) {
  820. if (Loop *L = LI.getLoopFor(EUser->getParent()))
  821. while (L) {
  822. SE.forgetLoop(L);
  823. L = L->getParentLoop();
  824. }
  825. }
  826. }
  827. }
  828. void BlockGenerator::finalizeSCoP(Scop &S) {
  829. findOutsideUsers(S);
  830. createScalarInitialization(S);
  831. createExitPHINodeMerges(S);
  832. createScalarFinalization(S);
  833. invalidateScalarEvolution(S);
  834. }
  835. VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen,
  836. std::vector<LoopToScevMapT> &VLTS,
  837. isl_map *Schedule)
  838. : BlockGenerator(BlockGen), VLTS(VLTS), Schedule(Schedule) {
  839. assert(Schedule && "No statement domain provided");
  840. }
  841. Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old,
  842. ValueMapT &VectorMap,
  843. VectorValueMapT &ScalarMaps,
  844. Loop *L) {
  845. if (Value *NewValue = VectorMap.lookup(Old))
  846. return NewValue;
  847. int Width = getVectorWidth();
  848. Value *Vector = UndefValue::get(FixedVectorType::get(Old->getType(), Width));
  849. for (int Lane = 0; Lane < Width; Lane++)
  850. Vector = Builder.CreateInsertElement(
  851. Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L),
  852. Builder.getInt32(Lane));
  853. VectorMap[Old] = Vector;
  854. return Vector;
  855. }
  856. Value *VectorBlockGenerator::generateStrideOneLoad(
  857. ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
  858. __isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) {
  859. unsigned VectorWidth = getVectorWidth();
  860. Type *VectorType = FixedVectorType::get(Load->getType(), VectorWidth);
  861. Type *VectorPtrType =
  862. PointerType::get(VectorType, Load->getPointerAddressSpace());
  863. unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
  864. Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset],
  865. VLTS[Offset], NewAccesses);
  866. Value *VectorPtr =
  867. Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
  868. LoadInst *VecLoad = Builder.CreateLoad(VectorType, VectorPtr,
  869. Load->getName() + "_p_vec_full");
  870. if (!Aligned)
  871. VecLoad->setAlignment(Align(8));
  872. if (NegativeStride) {
  873. SmallVector<Constant *, 16> Indices;
  874. for (int i = VectorWidth - 1; i >= 0; i--)
  875. Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
  876. Constant *SV = llvm::ConstantVector::get(Indices);
  877. Value *RevVecLoad = Builder.CreateShuffleVector(
  878. VecLoad, VecLoad, SV, Load->getName() + "_reverse");
  879. return RevVecLoad;
  880. }
  881. return VecLoad;
  882. }
  883. Value *VectorBlockGenerator::generateStrideZeroLoad(
  884. ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap,
  885. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  886. Type *VectorType = FixedVectorType::get(Load->getType(), 1);
  887. Type *VectorPtrType =
  888. PointerType::get(VectorType, Load->getPointerAddressSpace());
  889. Value *NewPointer =
  890. generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses);
  891. Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
  892. Load->getName() + "_p_vec_p");
  893. LoadInst *ScalarLoad = Builder.CreateLoad(VectorType, VectorPtr,
  894. Load->getName() + "_p_splat_one");
  895. if (!Aligned)
  896. ScalarLoad->setAlignment(Align(8));
  897. Constant *SplatVector = Constant::getNullValue(
  898. FixedVectorType::get(Builder.getInt32Ty(), getVectorWidth()));
  899. Value *VectorLoad = Builder.CreateShuffleVector(
  900. ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
  901. return VectorLoad;
  902. }
  903. Value *VectorBlockGenerator::generateUnknownStrideLoad(
  904. ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
  905. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  906. int VectorWidth = getVectorWidth();
  907. Type *ElemTy = Load->getType();
  908. auto *FVTy = FixedVectorType::get(ElemTy, VectorWidth);
  909. Value *Vector = UndefValue::get(FVTy);
  910. for (int i = 0; i < VectorWidth; i++) {
  911. Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[i],
  912. VLTS[i], NewAccesses);
  913. Value *ScalarLoad =
  914. Builder.CreateLoad(ElemTy, NewPointer, Load->getName() + "_p_scalar_");
  915. Vector = Builder.CreateInsertElement(
  916. Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
  917. }
  918. return Vector;
  919. }
  920. void VectorBlockGenerator::generateLoad(
  921. ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
  922. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  923. if (Value *PreloadLoad = GlobalMap.lookup(Load)) {
  924. VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad,
  925. Load->getName() + "_p");
  926. return;
  927. }
  928. if (!VectorType::isValidElementType(Load->getType())) {
  929. for (int i = 0; i < getVectorWidth(); i++)
  930. ScalarMaps[i][Load] =
  931. generateArrayLoad(Stmt, Load, ScalarMaps[i], VLTS[i], NewAccesses);
  932. return;
  933. }
  934. const MemoryAccess &Access = Stmt.getArrayAccessFor(Load);
  935. // Make sure we have scalar values available to access the pointer to
  936. // the data location.
  937. extractScalarValues(Load, VectorMap, ScalarMaps);
  938. Value *NewLoad;
  939. if (Access.isStrideZero(isl::manage_copy(Schedule)))
  940. NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0], NewAccesses);
  941. else if (Access.isStrideOne(isl::manage_copy(Schedule)))
  942. NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses);
  943. else if (Access.isStrideX(isl::manage_copy(Schedule), -1))
  944. NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses, true);
  945. else
  946. NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps, NewAccesses);
  947. VectorMap[Load] = NewLoad;
  948. }
  949. void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
  950. ValueMapT &VectorMap,
  951. VectorValueMapT &ScalarMaps) {
  952. int VectorWidth = getVectorWidth();
  953. Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap,
  954. ScalarMaps, getLoopForStmt(Stmt));
  955. assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
  956. const CastInst *Cast = dyn_cast<CastInst>(Inst);
  957. auto *DestType = FixedVectorType::get(Inst->getType(), VectorWidth);
  958. VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
  959. }
  960. void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
  961. ValueMapT &VectorMap,
  962. VectorValueMapT &ScalarMaps) {
  963. Loop *L = getLoopForStmt(Stmt);
  964. Value *OpZero = Inst->getOperand(0);
  965. Value *OpOne = Inst->getOperand(1);
  966. Value *NewOpZero, *NewOpOne;
  967. NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L);
  968. NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L);
  969. Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
  970. Inst->getName() + "p_vec");
  971. VectorMap[Inst] = NewInst;
  972. }
  973. void VectorBlockGenerator::copyStore(
  974. ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
  975. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  976. const MemoryAccess &Access = Stmt.getArrayAccessFor(Store);
  977. Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap,
  978. ScalarMaps, getLoopForStmt(Stmt));
  979. // Make sure we have scalar values available to access the pointer to
  980. // the data location.
  981. extractScalarValues(Store, VectorMap, ScalarMaps);
  982. if (Access.isStrideOne(isl::manage_copy(Schedule))) {
  983. Type *VectorType = FixedVectorType::get(Store->getValueOperand()->getType(),
  984. getVectorWidth());
  985. Type *VectorPtrType =
  986. PointerType::get(VectorType, Store->getPointerAddressSpace());
  987. Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0],
  988. VLTS[0], NewAccesses);
  989. Value *VectorPtr =
  990. Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
  991. StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
  992. if (!Aligned)
  993. Store->setAlignment(Align(8));
  994. } else {
  995. for (unsigned i = 0; i < ScalarMaps.size(); i++) {
  996. Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
  997. Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[i],
  998. VLTS[i], NewAccesses);
  999. Builder.CreateStore(Scalar, NewPointer);
  1000. }
  1001. }
  1002. }
  1003. bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
  1004. ValueMapT &VectorMap) {
  1005. for (Value *Operand : Inst->operands())
  1006. if (VectorMap.count(Operand))
  1007. return true;
  1008. return false;
  1009. }
  1010. bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
  1011. ValueMapT &VectorMap,
  1012. VectorValueMapT &ScalarMaps) {
  1013. bool HasVectorOperand = false;
  1014. int VectorWidth = getVectorWidth();
  1015. for (Value *Operand : Inst->operands()) {
  1016. ValueMapT::iterator VecOp = VectorMap.find(Operand);
  1017. if (VecOp == VectorMap.end())
  1018. continue;
  1019. HasVectorOperand = true;
  1020. Value *NewVector = VecOp->second;
  1021. for (int i = 0; i < VectorWidth; ++i) {
  1022. ValueMapT &SM = ScalarMaps[i];
  1023. // If there is one scalar extracted, all scalar elements should have
  1024. // already been extracted by the code here. So no need to check for the
  1025. // existence of all of them.
  1026. if (SM.count(Operand))
  1027. break;
  1028. SM[Operand] =
  1029. Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
  1030. }
  1031. }
  1032. return HasVectorOperand;
  1033. }
  1034. void VectorBlockGenerator::copyInstScalarized(
  1035. ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
  1036. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1037. bool HasVectorOperand;
  1038. int VectorWidth = getVectorWidth();
  1039. HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
  1040. for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
  1041. BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane],
  1042. VLTS[VectorLane], NewAccesses);
  1043. if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
  1044. return;
  1045. // Make the result available as vector value.
  1046. auto *FVTy = FixedVectorType::get(Inst->getType(), VectorWidth);
  1047. Value *Vector = UndefValue::get(FVTy);
  1048. for (int i = 0; i < VectorWidth; i++)
  1049. Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
  1050. Builder.getInt32(i));
  1051. VectorMap[Inst] = Vector;
  1052. }
  1053. int VectorBlockGenerator::getVectorWidth() { return VLTS.size(); }
  1054. void VectorBlockGenerator::copyInstruction(
  1055. ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
  1056. VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1057. // Terminator instructions control the control flow. They are explicitly
  1058. // expressed in the clast and do not need to be copied.
  1059. if (Inst->isTerminator())
  1060. return;
  1061. if (canSyntheziseInStmt(Stmt, Inst))
  1062. return;
  1063. if (auto *Load = dyn_cast<LoadInst>(Inst)) {
  1064. generateLoad(Stmt, Load, VectorMap, ScalarMaps, NewAccesses);
  1065. return;
  1066. }
  1067. if (hasVectorOperands(Inst, VectorMap)) {
  1068. if (auto *Store = dyn_cast<StoreInst>(Inst)) {
  1069. // Identified as redundant by -polly-simplify.
  1070. if (!Stmt.getArrayAccessOrNULLFor(Store))
  1071. return;
  1072. copyStore(Stmt, Store, VectorMap, ScalarMaps, NewAccesses);
  1073. return;
  1074. }
  1075. if (auto *Unary = dyn_cast<UnaryInstruction>(Inst)) {
  1076. copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps);
  1077. return;
  1078. }
  1079. if (auto *Binary = dyn_cast<BinaryOperator>(Inst)) {
  1080. copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps);
  1081. return;
  1082. }
  1083. // Fallthrough: We generate scalar instructions, if we don't know how to
  1084. // generate vector code.
  1085. }
  1086. copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses);
  1087. }
  1088. void VectorBlockGenerator::generateScalarVectorLoads(
  1089. ScopStmt &Stmt, ValueMapT &VectorBlockMap) {
  1090. for (MemoryAccess *MA : Stmt) {
  1091. if (MA->isArrayKind() || MA->isWrite())
  1092. continue;
  1093. auto *Address = getOrCreateAlloca(*MA);
  1094. Type *VectorType = FixedVectorType::get(MA->getElementType(), 1);
  1095. Type *VectorPtrType = PointerType::get(
  1096. VectorType, Address->getType()->getPointerAddressSpace());
  1097. Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType,
  1098. Address->getName() + "_p_vec_p");
  1099. auto *Val = Builder.CreateLoad(VectorType, VectorPtr,
  1100. Address->getName() + ".reload");
  1101. Constant *SplatVector = Constant::getNullValue(
  1102. FixedVectorType::get(Builder.getInt32Ty(), getVectorWidth()));
  1103. Value *VectorVal = Builder.CreateShuffleVector(
  1104. Val, Val, SplatVector, Address->getName() + "_p_splat");
  1105. VectorBlockMap[MA->getAccessValue()] = VectorVal;
  1106. }
  1107. }
  1108. void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) {
  1109. for (MemoryAccess *MA : Stmt) {
  1110. if (MA->isArrayKind() || MA->isRead())
  1111. continue;
  1112. llvm_unreachable("Scalar stores not expected in vector loop");
  1113. }
  1114. }
  1115. void VectorBlockGenerator::copyStmt(
  1116. ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1117. assert(Stmt.isBlockStmt() &&
  1118. "TODO: Only block statements can be copied by the vector block "
  1119. "generator");
  1120. BasicBlock *BB = Stmt.getBasicBlock();
  1121. BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
  1122. &*Builder.GetInsertPoint(), &DT, &LI);
  1123. CopyBB->setName("polly.stmt." + BB->getName());
  1124. Builder.SetInsertPoint(&CopyBB->front());
  1125. // Create two maps that store the mapping from the original instructions of
  1126. // the old basic block to their copies in the new basic block. Those maps
  1127. // are basic block local.
  1128. //
  1129. // As vector code generation is supported there is one map for scalar values
  1130. // and one for vector values.
  1131. //
  1132. // In case we just do scalar code generation, the vectorMap is not used and
  1133. // the scalarMap has just one dimension, which contains the mapping.
  1134. //
  1135. // In case vector code generation is done, an instruction may either appear
  1136. // in the vector map once (as it is calculating >vectorwidth< values at a
  1137. // time. Or (if the values are calculated using scalar operations), it
  1138. // appears once in every dimension of the scalarMap.
  1139. VectorValueMapT ScalarBlockMap(getVectorWidth());
  1140. ValueMapT VectorBlockMap;
  1141. generateScalarVectorLoads(Stmt, VectorBlockMap);
  1142. for (Instruction *Inst : Stmt.getInstructions())
  1143. copyInstruction(Stmt, Inst, VectorBlockMap, ScalarBlockMap, NewAccesses);
  1144. verifyNoScalarStores(Stmt);
  1145. }
  1146. BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB,
  1147. BasicBlock *BBCopy) {
  1148. BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock();
  1149. BasicBlock *BBCopyIDom = EndBlockMap.lookup(BBIDom);
  1150. if (BBCopyIDom)
  1151. DT.changeImmediateDominator(BBCopy, BBCopyIDom);
  1152. return StartBlockMap.lookup(BBIDom);
  1153. }
  1154. // This is to determine whether an llvm::Value (defined in @p BB) is usable when
  1155. // leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock())
  1156. // does not work in cases where the exit block has edges from outside the
  1157. // region. In that case the llvm::Value would never be usable in in the exit
  1158. // block. The RegionGenerator however creates an new exit block ('ExitBBCopy')
  1159. // for the subregion's exiting edges only. We need to determine whether an
  1160. // llvm::Value is usable in there. We do this by checking whether it dominates
  1161. // all exiting blocks individually.
  1162. static bool isDominatingSubregionExit(const DominatorTree &DT, Region *R,
  1163. BasicBlock *BB) {
  1164. for (auto ExitingBB : predecessors(R->getExit())) {
  1165. // Check for non-subregion incoming edges.
  1166. if (!R->contains(ExitingBB))
  1167. continue;
  1168. if (!DT.dominates(BB, ExitingBB))
  1169. return false;
  1170. }
  1171. return true;
  1172. }
  1173. // Find the direct dominator of the subregion's exit block if the subregion was
  1174. // simplified.
  1175. static BasicBlock *findExitDominator(DominatorTree &DT, Region *R) {
  1176. BasicBlock *Common = nullptr;
  1177. for (auto ExitingBB : predecessors(R->getExit())) {
  1178. // Check for non-subregion incoming edges.
  1179. if (!R->contains(ExitingBB))
  1180. continue;
  1181. // First exiting edge.
  1182. if (!Common) {
  1183. Common = ExitingBB;
  1184. continue;
  1185. }
  1186. Common = DT.findNearestCommonDominator(Common, ExitingBB);
  1187. }
  1188. assert(Common && R->contains(Common));
  1189. return Common;
  1190. }
  1191. void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
  1192. __isl_keep isl_id_to_ast_expr *IdToAstExp) {
  1193. assert(Stmt.isRegionStmt() &&
  1194. "Only region statements can be copied by the region generator");
  1195. // Forget all old mappings.
  1196. StartBlockMap.clear();
  1197. EndBlockMap.clear();
  1198. RegionMaps.clear();
  1199. IncompletePHINodeMap.clear();
  1200. // Collection of all values related to this subregion.
  1201. ValueMapT ValueMap;
  1202. // The region represented by the statement.
  1203. Region *R = Stmt.getRegion();
  1204. // Create a dedicated entry for the region where we can reload all demoted
  1205. // inputs.
  1206. BasicBlock *EntryBB = R->getEntry();
  1207. BasicBlock *EntryBBCopy = SplitBlock(Builder.GetInsertBlock(),
  1208. &*Builder.GetInsertPoint(), &DT, &LI);
  1209. EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry");
  1210. Builder.SetInsertPoint(&EntryBBCopy->front());
  1211. ValueMapT &EntryBBMap = RegionMaps[EntryBBCopy];
  1212. generateScalarLoads(Stmt, LTS, EntryBBMap, IdToAstExp);
  1213. generateBeginStmtTrace(Stmt, LTS, EntryBBMap);
  1214. for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI)
  1215. if (!R->contains(*PI)) {
  1216. StartBlockMap[*PI] = EntryBBCopy;
  1217. EndBlockMap[*PI] = EntryBBCopy;
  1218. }
  1219. // Iterate over all blocks in the region in a breadth-first search.
  1220. std::deque<BasicBlock *> Blocks;
  1221. SmallSetVector<BasicBlock *, 8> SeenBlocks;
  1222. Blocks.push_back(EntryBB);
  1223. SeenBlocks.insert(EntryBB);
  1224. while (!Blocks.empty()) {
  1225. BasicBlock *BB = Blocks.front();
  1226. Blocks.pop_front();
  1227. // First split the block and update dominance information.
  1228. BasicBlock *BBCopy = splitBB(BB);
  1229. BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy);
  1230. // Get the mapping for this block and initialize it with either the scalar
  1231. // loads from the generated entering block (which dominates all blocks of
  1232. // this subregion) or the maps of the immediate dominator, if part of the
  1233. // subregion. The latter necessarily includes the former.
  1234. ValueMapT *InitBBMap;
  1235. if (BBCopyIDom) {
  1236. assert(RegionMaps.count(BBCopyIDom));
  1237. InitBBMap = &RegionMaps[BBCopyIDom];
  1238. } else
  1239. InitBBMap = &EntryBBMap;
  1240. auto Inserted = RegionMaps.insert(std::make_pair(BBCopy, *InitBBMap));
  1241. ValueMapT &RegionMap = Inserted.first->second;
  1242. // Copy the block with the BlockGenerator.
  1243. Builder.SetInsertPoint(&BBCopy->front());
  1244. copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp);
  1245. // In order to remap PHI nodes we store also basic block mappings.
  1246. StartBlockMap[BB] = BBCopy;
  1247. EndBlockMap[BB] = Builder.GetInsertBlock();
  1248. // Add values to incomplete PHI nodes waiting for this block to be copied.
  1249. for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB])
  1250. addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, LTS);
  1251. IncompletePHINodeMap[BB].clear();
  1252. // And continue with new successors inside the region.
  1253. for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++)
  1254. if (R->contains(*SI) && SeenBlocks.insert(*SI))
  1255. Blocks.push_back(*SI);
  1256. // Remember value in case it is visible after this subregion.
  1257. if (isDominatingSubregionExit(DT, R, BB))
  1258. ValueMap.insert(RegionMap.begin(), RegionMap.end());
  1259. }
  1260. // Now create a new dedicated region exit block and add it to the region map.
  1261. BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(),
  1262. &*Builder.GetInsertPoint(), &DT, &LI);
  1263. ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit");
  1264. StartBlockMap[R->getExit()] = ExitBBCopy;
  1265. EndBlockMap[R->getExit()] = ExitBBCopy;
  1266. BasicBlock *ExitDomBBCopy = EndBlockMap.lookup(findExitDominator(DT, R));
  1267. assert(ExitDomBBCopy &&
  1268. "Common exit dominator must be within region; at least the entry node "
  1269. "must match");
  1270. DT.changeImmediateDominator(ExitBBCopy, ExitDomBBCopy);
  1271. // As the block generator doesn't handle control flow we need to add the
  1272. // region control flow by hand after all blocks have been copied.
  1273. for (BasicBlock *BB : SeenBlocks) {
  1274. BasicBlock *BBCopyStart = StartBlockMap[BB];
  1275. BasicBlock *BBCopyEnd = EndBlockMap[BB];
  1276. Instruction *TI = BB->getTerminator();
  1277. if (isa<UnreachableInst>(TI)) {
  1278. while (!BBCopyEnd->empty())
  1279. BBCopyEnd->begin()->eraseFromParent();
  1280. new UnreachableInst(BBCopyEnd->getContext(), BBCopyEnd);
  1281. continue;
  1282. }
  1283. Instruction *BICopy = BBCopyEnd->getTerminator();
  1284. ValueMapT &RegionMap = RegionMaps[BBCopyStart];
  1285. RegionMap.insert(StartBlockMap.begin(), StartBlockMap.end());
  1286. Builder.SetInsertPoint(BICopy);
  1287. copyInstScalar(Stmt, TI, RegionMap, LTS);
  1288. BICopy->eraseFromParent();
  1289. }
  1290. // Add counting PHI nodes to all loops in the region that can be used as
  1291. // replacement for SCEVs referring to the old loop.
  1292. for (BasicBlock *BB : SeenBlocks) {
  1293. Loop *L = LI.getLoopFor(BB);
  1294. if (L == nullptr || L->getHeader() != BB || !R->contains(L))
  1295. continue;
  1296. BasicBlock *BBCopy = StartBlockMap[BB];
  1297. Value *NullVal = Builder.getInt32(0);
  1298. PHINode *LoopPHI =
  1299. PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv");
  1300. Instruction *LoopPHIInc = BinaryOperator::CreateAdd(
  1301. LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc");
  1302. LoopPHI->insertBefore(&BBCopy->front());
  1303. LoopPHIInc->insertBefore(BBCopy->getTerminator());
  1304. for (auto *PredBB : predecessors(BB)) {
  1305. if (!R->contains(PredBB))
  1306. continue;
  1307. if (L->contains(PredBB))
  1308. LoopPHI->addIncoming(LoopPHIInc, EndBlockMap[PredBB]);
  1309. else
  1310. LoopPHI->addIncoming(NullVal, EndBlockMap[PredBB]);
  1311. }
  1312. for (auto *PredBBCopy : predecessors(BBCopy))
  1313. if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0)
  1314. LoopPHI->addIncoming(NullVal, PredBBCopy);
  1315. LTS[L] = SE.getUnknown(LoopPHI);
  1316. }
  1317. // Continue generating code in the exit block.
  1318. Builder.SetInsertPoint(&*ExitBBCopy->getFirstInsertionPt());
  1319. // Write values visible to other statements.
  1320. generateScalarStores(Stmt, LTS, ValueMap, IdToAstExp);
  1321. StartBlockMap.clear();
  1322. EndBlockMap.clear();
  1323. RegionMaps.clear();
  1324. IncompletePHINodeMap.clear();
  1325. }
  1326. PHINode *RegionGenerator::buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS,
  1327. ValueMapT &BBMap, Loop *L) {
  1328. ScopStmt *Stmt = MA->getStatement();
  1329. Region *SubR = Stmt->getRegion();
  1330. auto Incoming = MA->getIncoming();
  1331. PollyIRBuilder::InsertPointGuard IPGuard(Builder);
  1332. PHINode *OrigPHI = cast<PHINode>(MA->getAccessInstruction());
  1333. BasicBlock *NewSubregionExit = Builder.GetInsertBlock();
  1334. // This can happen if the subregion is simplified after the ScopStmts
  1335. // have been created; simplification happens as part of CodeGeneration.
  1336. if (OrigPHI->getParent() != SubR->getExit()) {
  1337. BasicBlock *FormerExit = SubR->getExitingBlock();
  1338. if (FormerExit)
  1339. NewSubregionExit = StartBlockMap.lookup(FormerExit);
  1340. }
  1341. PHINode *NewPHI = PHINode::Create(OrigPHI->getType(), Incoming.size(),
  1342. "polly." + OrigPHI->getName(),
  1343. NewSubregionExit->getFirstNonPHI());
  1344. // Add the incoming values to the PHI.
  1345. for (auto &Pair : Incoming) {
  1346. BasicBlock *OrigIncomingBlock = Pair.first;
  1347. BasicBlock *NewIncomingBlockStart = StartBlockMap.lookup(OrigIncomingBlock);
  1348. BasicBlock *NewIncomingBlockEnd = EndBlockMap.lookup(OrigIncomingBlock);
  1349. Builder.SetInsertPoint(NewIncomingBlockEnd->getTerminator());
  1350. assert(RegionMaps.count(NewIncomingBlockStart));
  1351. assert(RegionMaps.count(NewIncomingBlockEnd));
  1352. ValueMapT *LocalBBMap = &RegionMaps[NewIncomingBlockStart];
  1353. Value *OrigIncomingValue = Pair.second;
  1354. Value *NewIncomingValue =
  1355. getNewValue(*Stmt, OrigIncomingValue, *LocalBBMap, LTS, L);
  1356. NewPHI->addIncoming(NewIncomingValue, NewIncomingBlockEnd);
  1357. }
  1358. return NewPHI;
  1359. }
  1360. Value *RegionGenerator::getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS,
  1361. ValueMapT &BBMap) {
  1362. ScopStmt *Stmt = MA->getStatement();
  1363. // TODO: Add some test cases that ensure this is really the right choice.
  1364. Loop *L = LI.getLoopFor(Stmt->getRegion()->getExit());
  1365. if (MA->isAnyPHIKind()) {
  1366. auto Incoming = MA->getIncoming();
  1367. assert(!Incoming.empty() &&
  1368. "PHI WRITEs must have originate from at least one incoming block");
  1369. // If there is only one incoming value, we do not need to create a PHI.
  1370. if (Incoming.size() == 1) {
  1371. Value *OldVal = Incoming[0].second;
  1372. return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
  1373. }
  1374. return buildExitPHI(MA, LTS, BBMap, L);
  1375. }
  1376. // MemoryKind::Value accesses leaving the subregion must dominate the exit
  1377. // block; just pass the copied value.
  1378. Value *OldVal = MA->getAccessValue();
  1379. return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
  1380. }
  1381. void RegionGenerator::generateScalarStores(
  1382. ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
  1383. __isl_keep isl_id_to_ast_expr *NewAccesses) {
  1384. assert(Stmt.getRegion() &&
  1385. "Block statements need to use the generateScalarStores() "
  1386. "function in the BlockGenerator");
  1387. // Get the exit scalar values before generating the writes.
  1388. // This is necessary because RegionGenerator::getExitScalar may insert
  1389. // PHINodes that depend on the region's exiting blocks. But
  1390. // BlockGenerator::generateConditionalExecution may insert a new basic block
  1391. // such that the current basic block is not a direct successor of the exiting
  1392. // blocks anymore. Hence, build the PHINodes while the current block is still
  1393. // the direct successor.
  1394. SmallDenseMap<MemoryAccess *, Value *> NewExitScalars;
  1395. for (MemoryAccess *MA : Stmt) {
  1396. if (MA->isOriginalArrayKind() || MA->isRead())
  1397. continue;
  1398. Value *NewVal = getExitScalar(MA, LTS, BBMap);
  1399. NewExitScalars[MA] = NewVal;
  1400. }
  1401. for (MemoryAccess *MA : Stmt) {
  1402. if (MA->isOriginalArrayKind() || MA->isRead())
  1403. continue;
  1404. isl::set AccDom = MA->getAccessRelation().domain();
  1405. std::string Subject = MA->getId().get_name();
  1406. generateConditionalExecution(
  1407. Stmt, AccDom, Subject.c_str(), [&, this, MA]() {
  1408. Value *NewVal = NewExitScalars.lookup(MA);
  1409. assert(NewVal && "The exit scalar must be determined before");
  1410. Value *Address = getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS,
  1411. BBMap, NewAccesses);
  1412. assert((!isa<Instruction>(NewVal) ||
  1413. DT.dominates(cast<Instruction>(NewVal)->getParent(),
  1414. Builder.GetInsertBlock())) &&
  1415. "Domination violation");
  1416. assert((!isa<Instruction>(Address) ||
  1417. DT.dominates(cast<Instruction>(Address)->getParent(),
  1418. Builder.GetInsertBlock())) &&
  1419. "Domination violation");
  1420. Builder.CreateStore(NewVal, Address);
  1421. });
  1422. }
  1423. }
  1424. void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, PHINode *PHI,
  1425. PHINode *PHICopy, BasicBlock *IncomingBB,
  1426. LoopToScevMapT &LTS) {
  1427. // If the incoming block was not yet copied mark this PHI as incomplete.
  1428. // Once the block will be copied the incoming value will be added.
  1429. BasicBlock *BBCopyStart = StartBlockMap[IncomingBB];
  1430. BasicBlock *BBCopyEnd = EndBlockMap[IncomingBB];
  1431. if (!BBCopyStart) {
  1432. assert(!BBCopyEnd);
  1433. assert(Stmt.represents(IncomingBB) &&
  1434. "Bad incoming block for PHI in non-affine region");
  1435. IncompletePHINodeMap[IncomingBB].push_back(std::make_pair(PHI, PHICopy));
  1436. return;
  1437. }
  1438. assert(RegionMaps.count(BBCopyStart) &&
  1439. "Incoming PHI block did not have a BBMap");
  1440. ValueMapT &BBCopyMap = RegionMaps[BBCopyStart];
  1441. Value *OpCopy = nullptr;
  1442. if (Stmt.represents(IncomingBB)) {
  1443. Value *Op = PHI->getIncomingValueForBlock(IncomingBB);
  1444. // If the current insert block is different from the PHIs incoming block
  1445. // change it, otherwise do not.
  1446. auto IP = Builder.GetInsertPoint();
  1447. if (IP->getParent() != BBCopyEnd)
  1448. Builder.SetInsertPoint(BBCopyEnd->getTerminator());
  1449. OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForStmt(Stmt));
  1450. if (IP->getParent() != BBCopyEnd)
  1451. Builder.SetInsertPoint(&*IP);
  1452. } else {
  1453. // All edges from outside the non-affine region become a single edge
  1454. // in the new copy of the non-affine region. Make sure to only add the
  1455. // corresponding edge the first time we encounter a basic block from
  1456. // outside the non-affine region.
  1457. if (PHICopy->getBasicBlockIndex(BBCopyEnd) >= 0)
  1458. return;
  1459. // Get the reloaded value.
  1460. OpCopy = getNewValue(Stmt, PHI, BBCopyMap, LTS, getLoopForStmt(Stmt));
  1461. }
  1462. assert(OpCopy && "Incoming PHI value was not copied properly");
  1463. PHICopy->addIncoming(OpCopy, BBCopyEnd);
  1464. }
  1465. void RegionGenerator::copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI,
  1466. ValueMapT &BBMap,
  1467. LoopToScevMapT &LTS) {
  1468. unsigned NumIncoming = PHI->getNumIncomingValues();
  1469. PHINode *PHICopy =
  1470. Builder.CreatePHI(PHI->getType(), NumIncoming, "polly." + PHI->getName());
  1471. PHICopy->moveBefore(PHICopy->getParent()->getFirstNonPHI());
  1472. BBMap[PHI] = PHICopy;
  1473. for (BasicBlock *IncomingBB : PHI->blocks())
  1474. addOperandToPHI(Stmt, PHI, PHICopy, IncomingBB, LTS);
  1475. }