VPlan.cpp 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657
  1. //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This is the LLVM vectorization plan. It represents a candidate for
  11. /// vectorization, allowing to plan and optimize how to vectorize a given loop
  12. /// before generating LLVM-IR.
  13. /// The vectorizer uses vectorization plans to estimate the costs of potential
  14. /// candidates and if profitable to execute the desired plan, generating vector
  15. /// LLVM-IR code.
  16. ///
  17. //===----------------------------------------------------------------------===//
  18. #include "VPlan.h"
  19. #include "VPlanDominatorTree.h"
  20. #include "llvm/ADT/DepthFirstIterator.h"
  21. #include "llvm/ADT/PostOrderIterator.h"
  22. #include "llvm/ADT/STLExtras.h"
  23. #include "llvm/ADT/SmallVector.h"
  24. #include "llvm/ADT/Twine.h"
  25. #include "llvm/Analysis/IVDescriptors.h"
  26. #include "llvm/Analysis/LoopInfo.h"
  27. #include "llvm/IR/BasicBlock.h"
  28. #include "llvm/IR/CFG.h"
  29. #include "llvm/IR/InstrTypes.h"
  30. #include "llvm/IR/Instruction.h"
  31. #include "llvm/IR/Instructions.h"
  32. #include "llvm/IR/Type.h"
  33. #include "llvm/IR/Value.h"
  34. #include "llvm/Support/Casting.h"
  35. #include "llvm/Support/CommandLine.h"
  36. #include "llvm/Support/Debug.h"
  37. #include "llvm/Support/ErrorHandling.h"
  38. #include "llvm/Support/GenericDomTreeConstruction.h"
  39. #include "llvm/Support/GraphWriter.h"
  40. #include "llvm/Support/raw_ostream.h"
  41. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  42. #include <cassert>
  43. #include <iterator>
  44. #include <string>
  45. #include <vector>
  46. using namespace llvm;
  47. extern cl::opt<bool> EnableVPlanNativePath;
  48. #define DEBUG_TYPE "vplan"
  49. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  50. raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
  51. const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
  52. VPSlotTracker SlotTracker(
  53. (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
  54. V.print(OS, SlotTracker);
  55. return OS;
  56. }
  57. #endif
  58. Value *VPLane::getAsRuntimeExpr(IRBuilder<> &Builder,
  59. const ElementCount &VF) const {
  60. switch (LaneKind) {
  61. case VPLane::Kind::ScalableLast:
  62. // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
  63. return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
  64. Builder.getInt32(VF.getKnownMinValue() - Lane));
  65. case VPLane::Kind::First:
  66. return Builder.getInt32(Lane);
  67. }
  68. llvm_unreachable("Unknown lane kind");
  69. }
  70. VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
  71. : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
  72. if (Def)
  73. Def->addDefinedValue(this);
  74. }
  75. VPValue::~VPValue() {
  76. assert(Users.empty() && "trying to delete a VPValue with remaining users");
  77. if (Def)
  78. Def->removeDefinedValue(this);
  79. }
  80. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  81. void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
  82. if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
  83. R->print(OS, "", SlotTracker);
  84. else
  85. printAsOperand(OS, SlotTracker);
  86. }
  87. void VPValue::dump() const {
  88. const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
  89. VPSlotTracker SlotTracker(
  90. (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
  91. print(dbgs(), SlotTracker);
  92. dbgs() << "\n";
  93. }
  94. void VPDef::dump() const {
  95. const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
  96. VPSlotTracker SlotTracker(
  97. (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
  98. print(dbgs(), "", SlotTracker);
  99. dbgs() << "\n";
  100. }
  101. #endif
  102. // Get the top-most entry block of \p Start. This is the entry block of the
  103. // containing VPlan. This function is templated to support both const and non-const blocks
  104. template <typename T> static T *getPlanEntry(T *Start) {
  105. T *Next = Start;
  106. T *Current = Start;
  107. while ((Next = Next->getParent()))
  108. Current = Next;
  109. SmallSetVector<T *, 8> WorkList;
  110. WorkList.insert(Current);
  111. for (unsigned i = 0; i < WorkList.size(); i++) {
  112. T *Current = WorkList[i];
  113. if (Current->getNumPredecessors() == 0)
  114. return Current;
  115. auto &Predecessors = Current->getPredecessors();
  116. WorkList.insert(Predecessors.begin(), Predecessors.end());
  117. }
  118. llvm_unreachable("VPlan without any entry node without predecessors");
  119. }
  120. VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
  121. const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
  122. /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
  123. const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
  124. const VPBlockBase *Block = this;
  125. while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  126. Block = Region->getEntry();
  127. return cast<VPBasicBlock>(Block);
  128. }
  129. VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
  130. VPBlockBase *Block = this;
  131. while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  132. Block = Region->getEntry();
  133. return cast<VPBasicBlock>(Block);
  134. }
  135. void VPBlockBase::setPlan(VPlan *ParentPlan) {
  136. assert(ParentPlan->getEntry() == this &&
  137. "Can only set plan on its entry block.");
  138. Plan = ParentPlan;
  139. }
  140. /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
  141. const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
  142. const VPBlockBase *Block = this;
  143. while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  144. Block = Region->getExit();
  145. return cast<VPBasicBlock>(Block);
  146. }
  147. VPBasicBlock *VPBlockBase::getExitBasicBlock() {
  148. VPBlockBase *Block = this;
  149. while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  150. Block = Region->getExit();
  151. return cast<VPBasicBlock>(Block);
  152. }
  153. VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
  154. if (!Successors.empty() || !Parent)
  155. return this;
  156. assert(Parent->getExit() == this &&
  157. "Block w/o successors not the exit of its parent.");
  158. return Parent->getEnclosingBlockWithSuccessors();
  159. }
  160. VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
  161. if (!Predecessors.empty() || !Parent)
  162. return this;
  163. assert(Parent->getEntry() == this &&
  164. "Block w/o predecessors not the entry of its parent.");
  165. return Parent->getEnclosingBlockWithPredecessors();
  166. }
  167. VPValue *VPBlockBase::getCondBit() {
  168. return CondBitUser.getSingleOperandOrNull();
  169. }
  170. const VPValue *VPBlockBase::getCondBit() const {
  171. return CondBitUser.getSingleOperandOrNull();
  172. }
  173. void VPBlockBase::setCondBit(VPValue *CV) { CondBitUser.resetSingleOpUser(CV); }
  174. VPValue *VPBlockBase::getPredicate() {
  175. return PredicateUser.getSingleOperandOrNull();
  176. }
  177. const VPValue *VPBlockBase::getPredicate() const {
  178. return PredicateUser.getSingleOperandOrNull();
  179. }
  180. void VPBlockBase::setPredicate(VPValue *CV) {
  181. PredicateUser.resetSingleOpUser(CV);
  182. }
  183. void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
  184. SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
  185. for (VPBlockBase *Block : Blocks)
  186. delete Block;
  187. }
  188. VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
  189. iterator It = begin();
  190. while (It != end() && It->isPhi())
  191. It++;
  192. return It;
  193. }
  194. Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
  195. if (!Def->getDef())
  196. return Def->getLiveInIRValue();
  197. if (hasScalarValue(Def, Instance)) {
  198. return Data
  199. .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
  200. }
  201. assert(hasVectorValue(Def, Instance.Part));
  202. auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
  203. if (!VecPart->getType()->isVectorTy()) {
  204. assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
  205. return VecPart;
  206. }
  207. // TODO: Cache created scalar values.
  208. Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
  209. auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
  210. // set(Def, Extract, Instance);
  211. return Extract;
  212. }
  213. BasicBlock *
  214. VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
  215. // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
  216. // Pred stands for Predessor. Prev stands for Previous - last visited/created.
  217. BasicBlock *PrevBB = CFG.PrevBB;
  218. BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
  219. PrevBB->getParent(), CFG.LastBB);
  220. LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
  221. // Hook up the new basic block to its predecessors.
  222. for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
  223. VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
  224. auto &PredVPSuccessors = PredVPBB->getSuccessors();
  225. BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
  226. // In outer loop vectorization scenario, the predecessor BBlock may not yet
  227. // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
  228. // vectorization. We do not encounter this case in inner loop vectorization
  229. // as we start out by building a loop skeleton with the vector loop header
  230. // and latch blocks. As a result, we never enter this function for the
  231. // header block in the non VPlan-native path.
  232. if (!PredBB) {
  233. assert(EnableVPlanNativePath &&
  234. "Unexpected null predecessor in non VPlan-native path");
  235. CFG.VPBBsToFix.push_back(PredVPBB);
  236. continue;
  237. }
  238. assert(PredBB && "Predecessor basic-block not found building successor.");
  239. auto *PredBBTerminator = PredBB->getTerminator();
  240. LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
  241. if (isa<UnreachableInst>(PredBBTerminator)) {
  242. assert(PredVPSuccessors.size() == 1 &&
  243. "Predecessor ending w/o branch must have single successor.");
  244. PredBBTerminator->eraseFromParent();
  245. BranchInst::Create(NewBB, PredBB);
  246. } else {
  247. assert(PredVPSuccessors.size() == 2 &&
  248. "Predecessor ending with branch must have two successors.");
  249. unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
  250. assert(!PredBBTerminator->getSuccessor(idx) &&
  251. "Trying to reset an existing successor block.");
  252. PredBBTerminator->setSuccessor(idx, NewBB);
  253. }
  254. }
  255. return NewBB;
  256. }
  257. void VPBasicBlock::execute(VPTransformState *State) {
  258. bool Replica = State->Instance && !State->Instance->isFirstIteration();
  259. VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
  260. VPBlockBase *SingleHPred = nullptr;
  261. BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
  262. // 1. Create an IR basic block, or reuse the last one if possible.
  263. // The last IR basic block is reused, as an optimization, in three cases:
  264. // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
  265. // B. when the current VPBB has a single (hierarchical) predecessor which
  266. // is PrevVPBB and the latter has a single (hierarchical) successor; and
  267. // C. when the current VPBB is an entry of a region replica - where PrevVPBB
  268. // is the exit of this region from a previous instance, or the predecessor
  269. // of this region.
  270. if (PrevVPBB && /* A */
  271. !((SingleHPred = getSingleHierarchicalPredecessor()) &&
  272. SingleHPred->getExitBasicBlock() == PrevVPBB &&
  273. PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
  274. !(Replica && getPredecessors().empty())) { /* C */
  275. NewBB = createEmptyBasicBlock(State->CFG);
  276. State->Builder.SetInsertPoint(NewBB);
  277. // Temporarily terminate with unreachable until CFG is rewired.
  278. UnreachableInst *Terminator = State->Builder.CreateUnreachable();
  279. State->Builder.SetInsertPoint(Terminator);
  280. // Register NewBB in its loop. In innermost loops its the same for all BB's.
  281. Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
  282. L->addBasicBlockToLoop(NewBB, *State->LI);
  283. State->CFG.PrevBB = NewBB;
  284. }
  285. // 2. Fill the IR basic block with IR instructions.
  286. LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
  287. << " in BB:" << NewBB->getName() << '\n');
  288. State->CFG.VPBB2IRBB[this] = NewBB;
  289. State->CFG.PrevVPBB = this;
  290. for (VPRecipeBase &Recipe : Recipes)
  291. Recipe.execute(*State);
  292. VPValue *CBV;
  293. if (EnableVPlanNativePath && (CBV = getCondBit())) {
  294. assert(CBV->getUnderlyingValue() &&
  295. "Unexpected null underlying value for condition bit");
  296. // Condition bit value in a VPBasicBlock is used as the branch selector. In
  297. // the VPlan-native path case, since all branches are uniform we generate a
  298. // branch instruction using the condition value from vector lane 0 and dummy
  299. // successors. The successors are fixed later when the successor blocks are
  300. // visited.
  301. Value *NewCond = State->get(CBV, {0, 0});
  302. // Replace the temporary unreachable terminator with the new conditional
  303. // branch.
  304. auto *CurrentTerminator = NewBB->getTerminator();
  305. assert(isa<UnreachableInst>(CurrentTerminator) &&
  306. "Expected to replace unreachable terminator with conditional "
  307. "branch.");
  308. auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
  309. CondBr->setSuccessor(0, nullptr);
  310. ReplaceInstWithInst(CurrentTerminator, CondBr);
  311. }
  312. LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
  313. }
  314. void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
  315. for (VPRecipeBase &R : Recipes) {
  316. for (auto *Def : R.definedValues())
  317. Def->replaceAllUsesWith(NewValue);
  318. for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
  319. R.setOperand(I, NewValue);
  320. }
  321. }
  322. VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
  323. assert((SplitAt == end() || SplitAt->getParent() == this) &&
  324. "can only split at a position in the same block");
  325. SmallVector<VPBlockBase *, 2> Succs(successors());
  326. // First, disconnect the current block from its successors.
  327. for (VPBlockBase *Succ : Succs)
  328. VPBlockUtils::disconnectBlocks(this, Succ);
  329. // Create new empty block after the block to split.
  330. auto *SplitBlock = new VPBasicBlock(getName() + ".split");
  331. VPBlockUtils::insertBlockAfter(SplitBlock, this);
  332. // Add successors for block to split to new block.
  333. for (VPBlockBase *Succ : Succs)
  334. VPBlockUtils::connectBlocks(SplitBlock, Succ);
  335. // Finally, move the recipes starting at SplitAt to new block.
  336. for (VPRecipeBase &ToMove :
  337. make_early_inc_range(make_range(SplitAt, this->end())))
  338. ToMove.moveBefore(*SplitBlock, SplitBlock->end());
  339. return SplitBlock;
  340. }
  341. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  342. void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
  343. if (getSuccessors().empty()) {
  344. O << Indent << "No successors\n";
  345. } else {
  346. O << Indent << "Successor(s): ";
  347. ListSeparator LS;
  348. for (auto *Succ : getSuccessors())
  349. O << LS << Succ->getName();
  350. O << '\n';
  351. }
  352. }
  353. void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
  354. VPSlotTracker &SlotTracker) const {
  355. O << Indent << getName() << ":\n";
  356. if (const VPValue *Pred = getPredicate()) {
  357. O << Indent << "BlockPredicate:";
  358. Pred->printAsOperand(O, SlotTracker);
  359. if (const auto *PredInst = dyn_cast<VPInstruction>(Pred))
  360. O << " (" << PredInst->getParent()->getName() << ")";
  361. O << '\n';
  362. }
  363. auto RecipeIndent = Indent + " ";
  364. for (const VPRecipeBase &Recipe : *this) {
  365. Recipe.print(O, RecipeIndent, SlotTracker);
  366. O << '\n';
  367. }
  368. printSuccessors(O, Indent);
  369. if (const VPValue *CBV = getCondBit()) {
  370. O << Indent << "CondBit: ";
  371. CBV->printAsOperand(O, SlotTracker);
  372. if (const auto *CBI = dyn_cast<VPInstruction>(CBV))
  373. O << " (" << CBI->getParent()->getName() << ")";
  374. O << '\n';
  375. }
  376. }
  377. #endif
  378. void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
  379. for (VPBlockBase *Block : depth_first(Entry))
  380. // Drop all references in VPBasicBlocks and replace all uses with
  381. // DummyValue.
  382. Block->dropAllReferences(NewValue);
  383. }
  384. void VPRegionBlock::execute(VPTransformState *State) {
  385. ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
  386. if (!isReplicator()) {
  387. // Visit the VPBlocks connected to "this", starting from it.
  388. for (VPBlockBase *Block : RPOT) {
  389. if (EnableVPlanNativePath) {
  390. // The inner loop vectorization path does not represent loop preheader
  391. // and exit blocks as part of the VPlan. In the VPlan-native path, skip
  392. // vectorizing loop preheader block. In future, we may replace this
  393. // check with the check for loop preheader.
  394. if (Block->getNumPredecessors() == 0)
  395. continue;
  396. // Skip vectorizing loop exit block. In future, we may replace this
  397. // check with the check for loop exit.
  398. if (Block->getNumSuccessors() == 0)
  399. continue;
  400. }
  401. LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
  402. Block->execute(State);
  403. }
  404. return;
  405. }
  406. assert(!State->Instance && "Replicating a Region with non-null instance.");
  407. // Enter replicating mode.
  408. State->Instance = VPIteration(0, 0);
  409. for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
  410. State->Instance->Part = Part;
  411. assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
  412. for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
  413. ++Lane) {
  414. State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
  415. // Visit the VPBlocks connected to \p this, starting from it.
  416. for (VPBlockBase *Block : RPOT) {
  417. LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
  418. Block->execute(State);
  419. }
  420. }
  421. }
  422. // Exit replicating mode.
  423. State->Instance.reset();
  424. }
  425. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  426. void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
  427. VPSlotTracker &SlotTracker) const {
  428. O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
  429. auto NewIndent = Indent + " ";
  430. for (auto *BlockBase : depth_first(Entry)) {
  431. O << '\n';
  432. BlockBase->print(O, NewIndent, SlotTracker);
  433. }
  434. O << Indent << "}\n";
  435. printSuccessors(O, Indent);
  436. }
  437. #endif
  438. bool VPRecipeBase::mayWriteToMemory() const {
  439. switch (getVPDefID()) {
  440. case VPWidenMemoryInstructionSC: {
  441. return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
  442. }
  443. case VPReplicateSC:
  444. case VPWidenCallSC:
  445. return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
  446. ->mayWriteToMemory();
  447. case VPBranchOnMaskSC:
  448. return false;
  449. case VPWidenIntOrFpInductionSC:
  450. case VPWidenCanonicalIVSC:
  451. case VPWidenPHISC:
  452. case VPBlendSC:
  453. case VPWidenSC:
  454. case VPWidenGEPSC:
  455. case VPReductionSC:
  456. case VPWidenSelectSC: {
  457. const Instruction *I =
  458. dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
  459. (void)I;
  460. assert((!I || !I->mayWriteToMemory()) &&
  461. "underlying instruction may write to memory");
  462. return false;
  463. }
  464. default:
  465. return true;
  466. }
  467. }
  468. bool VPRecipeBase::mayReadFromMemory() const {
  469. switch (getVPDefID()) {
  470. case VPWidenMemoryInstructionSC: {
  471. return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
  472. }
  473. case VPReplicateSC:
  474. case VPWidenCallSC:
  475. return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
  476. ->mayReadFromMemory();
  477. case VPBranchOnMaskSC:
  478. return false;
  479. case VPWidenIntOrFpInductionSC:
  480. case VPWidenCanonicalIVSC:
  481. case VPWidenPHISC:
  482. case VPBlendSC:
  483. case VPWidenSC:
  484. case VPWidenGEPSC:
  485. case VPReductionSC:
  486. case VPWidenSelectSC: {
  487. const Instruction *I =
  488. dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
  489. (void)I;
  490. assert((!I || !I->mayReadFromMemory()) &&
  491. "underlying instruction may read from memory");
  492. return false;
  493. }
  494. default:
  495. return true;
  496. }
  497. }
  498. bool VPRecipeBase::mayHaveSideEffects() const {
  499. switch (getVPDefID()) {
  500. case VPBranchOnMaskSC:
  501. return false;
  502. case VPWidenIntOrFpInductionSC:
  503. case VPWidenCanonicalIVSC:
  504. case VPWidenPHISC:
  505. case VPBlendSC:
  506. case VPWidenSC:
  507. case VPWidenGEPSC:
  508. case VPReductionSC:
  509. case VPWidenSelectSC: {
  510. const Instruction *I =
  511. dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
  512. (void)I;
  513. assert((!I || !I->mayHaveSideEffects()) &&
  514. "underlying instruction has side-effects");
  515. return false;
  516. }
  517. case VPReplicateSC: {
  518. auto *R = cast<VPReplicateRecipe>(this);
  519. return R->getUnderlyingInstr()->mayHaveSideEffects();
  520. }
  521. default:
  522. return true;
  523. }
  524. }
  525. void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
  526. assert(!Parent && "Recipe already in some VPBasicBlock");
  527. assert(InsertPos->getParent() &&
  528. "Insertion position not in any VPBasicBlock");
  529. Parent = InsertPos->getParent();
  530. Parent->getRecipeList().insert(InsertPos->getIterator(), this);
  531. }
  532. void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
  533. assert(!Parent && "Recipe already in some VPBasicBlock");
  534. assert(InsertPos->getParent() &&
  535. "Insertion position not in any VPBasicBlock");
  536. Parent = InsertPos->getParent();
  537. Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
  538. }
  539. void VPRecipeBase::removeFromParent() {
  540. assert(getParent() && "Recipe not in any VPBasicBlock");
  541. getParent()->getRecipeList().remove(getIterator());
  542. Parent = nullptr;
  543. }
  544. iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
  545. assert(getParent() && "Recipe not in any VPBasicBlock");
  546. return getParent()->getRecipeList().erase(getIterator());
  547. }
  548. void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
  549. removeFromParent();
  550. insertAfter(InsertPos);
  551. }
  552. void VPRecipeBase::moveBefore(VPBasicBlock &BB,
  553. iplist<VPRecipeBase>::iterator I) {
  554. assert(I == BB.end() || I->getParent() == &BB);
  555. removeFromParent();
  556. Parent = &BB;
  557. BB.getRecipeList().insert(I, this);
  558. }
  559. void VPInstruction::generateInstruction(VPTransformState &State,
  560. unsigned Part) {
  561. IRBuilder<> &Builder = State.Builder;
  562. Builder.SetCurrentDebugLocation(DL);
  563. if (Instruction::isBinaryOp(getOpcode())) {
  564. Value *A = State.get(getOperand(0), Part);
  565. Value *B = State.get(getOperand(1), Part);
  566. Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
  567. State.set(this, V, Part);
  568. return;
  569. }
  570. switch (getOpcode()) {
  571. case VPInstruction::Not: {
  572. Value *A = State.get(getOperand(0), Part);
  573. Value *V = Builder.CreateNot(A);
  574. State.set(this, V, Part);
  575. break;
  576. }
  577. case VPInstruction::ICmpULE: {
  578. Value *IV = State.get(getOperand(0), Part);
  579. Value *TC = State.get(getOperand(1), Part);
  580. Value *V = Builder.CreateICmpULE(IV, TC);
  581. State.set(this, V, Part);
  582. break;
  583. }
  584. case Instruction::Select: {
  585. Value *Cond = State.get(getOperand(0), Part);
  586. Value *Op1 = State.get(getOperand(1), Part);
  587. Value *Op2 = State.get(getOperand(2), Part);
  588. Value *V = Builder.CreateSelect(Cond, Op1, Op2);
  589. State.set(this, V, Part);
  590. break;
  591. }
  592. case VPInstruction::ActiveLaneMask: {
  593. // Get first lane of vector induction variable.
  594. Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
  595. // Get the original loop tripcount.
  596. Value *ScalarTC = State.get(getOperand(1), Part);
  597. auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
  598. auto *PredTy = VectorType::get(Int1Ty, State.VF);
  599. Instruction *Call = Builder.CreateIntrinsic(
  600. Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
  601. {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
  602. State.set(this, Call, Part);
  603. break;
  604. }
  605. case VPInstruction::FirstOrderRecurrenceSplice: {
  606. // Generate code to combine the previous and current values in vector v3.
  607. //
  608. // vector.ph:
  609. // v_init = vector(..., ..., ..., a[-1])
  610. // br vector.body
  611. //
  612. // vector.body
  613. // i = phi [0, vector.ph], [i+4, vector.body]
  614. // v1 = phi [v_init, vector.ph], [v2, vector.body]
  615. // v2 = a[i, i+1, i+2, i+3];
  616. // v3 = vector(v1(3), v2(0, 1, 2))
  617. // For the first part, use the recurrence phi (v1), otherwise v2.
  618. auto *V1 = State.get(getOperand(0), 0);
  619. Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
  620. if (!PartMinus1->getType()->isVectorTy()) {
  621. State.set(this, PartMinus1, Part);
  622. } else {
  623. Value *V2 = State.get(getOperand(1), Part);
  624. State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
  625. }
  626. break;
  627. }
  628. case VPInstruction::CanonicalIVIncrement:
  629. case VPInstruction::CanonicalIVIncrementNUW: {
  630. Value *Next = nullptr;
  631. if (Part == 0) {
  632. bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
  633. auto *Phi = State.get(getOperand(0), 0);
  634. // The loop step is equal to the vectorization factor (num of SIMD
  635. // elements) times the unroll factor (num of SIMD instructions).
  636. Value *Step =
  637. createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
  638. Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
  639. } else {
  640. Next = State.get(this, 0);
  641. }
  642. State.set(this, Next, Part);
  643. break;
  644. }
  645. case VPInstruction::BranchOnCount: {
  646. if (Part != 0)
  647. break;
  648. // First create the compare.
  649. Value *IV = State.get(getOperand(0), Part);
  650. Value *TC = State.get(getOperand(1), Part);
  651. Value *Cond = Builder.CreateICmpEQ(IV, TC);
  652. // Now create the branch.
  653. auto *Plan = getParent()->getPlan();
  654. VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
  655. VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
  656. if (Header->empty()) {
  657. assert(EnableVPlanNativePath &&
  658. "empty entry block only expected in VPlanNativePath");
  659. Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
  660. }
  661. // TODO: Once the exit block is modeled in VPlan, use it instead of going
  662. // through State.CFG.LastBB.
  663. BasicBlock *Exit =
  664. cast<BranchInst>(State.CFG.LastBB->getTerminator())->getSuccessor(0);
  665. Builder.CreateCondBr(Cond, Exit, State.CFG.VPBB2IRBB[Header]);
  666. Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
  667. break;
  668. }
  669. default:
  670. llvm_unreachable("Unsupported opcode for instruction");
  671. }
  672. }
  673. void VPInstruction::execute(VPTransformState &State) {
  674. assert(!State.Instance && "VPInstruction executing an Instance");
  675. IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
  676. State.Builder.setFastMathFlags(FMF);
  677. for (unsigned Part = 0; Part < State.UF; ++Part)
  678. generateInstruction(State, Part);
  679. }
  680. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  681. void VPInstruction::dump() const {
  682. VPSlotTracker SlotTracker(getParent()->getPlan());
  683. print(dbgs(), "", SlotTracker);
  684. }
  685. void VPInstruction::print(raw_ostream &O, const Twine &Indent,
  686. VPSlotTracker &SlotTracker) const {
  687. O << Indent << "EMIT ";
  688. if (hasResult()) {
  689. printAsOperand(O, SlotTracker);
  690. O << " = ";
  691. }
  692. switch (getOpcode()) {
  693. case VPInstruction::Not:
  694. O << "not";
  695. break;
  696. case VPInstruction::ICmpULE:
  697. O << "icmp ule";
  698. break;
  699. case VPInstruction::SLPLoad:
  700. O << "combined load";
  701. break;
  702. case VPInstruction::SLPStore:
  703. O << "combined store";
  704. break;
  705. case VPInstruction::ActiveLaneMask:
  706. O << "active lane mask";
  707. break;
  708. case VPInstruction::FirstOrderRecurrenceSplice:
  709. O << "first-order splice";
  710. break;
  711. case VPInstruction::CanonicalIVIncrement:
  712. O << "VF * UF + ";
  713. break;
  714. case VPInstruction::CanonicalIVIncrementNUW:
  715. O << "VF * UF +(nuw) ";
  716. break;
  717. case VPInstruction::BranchOnCount:
  718. O << "branch-on-count ";
  719. break;
  720. default:
  721. O << Instruction::getOpcodeName(getOpcode());
  722. }
  723. O << FMF;
  724. for (const VPValue *Operand : operands()) {
  725. O << " ";
  726. Operand->printAsOperand(O, SlotTracker);
  727. }
  728. if (DL) {
  729. O << ", !dbg ";
  730. DL.print(O);
  731. }
  732. }
  733. #endif
  734. void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
  735. // Make sure the VPInstruction is a floating-point operation.
  736. assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
  737. Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
  738. Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
  739. Opcode == Instruction::FCmp) &&
  740. "this op can't take fast-math flags");
  741. FMF = FMFNew;
  742. }
  743. void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
  744. Value *CanonicalIVStartValue,
  745. VPTransformState &State) {
  746. // Check if the trip count is needed, and if so build it.
  747. if (TripCount && TripCount->getNumUsers()) {
  748. for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
  749. State.set(TripCount, TripCountV, Part);
  750. }
  751. // Check if the backedge taken count is needed, and if so build it.
  752. if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
  753. IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
  754. auto *TCMO = Builder.CreateSub(TripCountV,
  755. ConstantInt::get(TripCountV->getType(), 1),
  756. "trip.count.minus.1");
  757. auto VF = State.VF;
  758. Value *VTCMO =
  759. VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
  760. for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
  761. State.set(BackedgeTakenCount, VTCMO, Part);
  762. }
  763. for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
  764. State.set(&VectorTripCount, VectorTripCountV, Part);
  765. // When vectorizing the epilogue loop, the canonical induction start value
  766. // needs to be changed from zero to the value after the main vector loop.
  767. if (CanonicalIVStartValue) {
  768. VPValue *VPV = new VPValue(CanonicalIVStartValue);
  769. addExternalDef(VPV);
  770. auto *IV = getCanonicalIV();
  771. assert(all_of(IV->users(),
  772. [](const VPUser *U) {
  773. auto *VPI = cast<VPInstruction>(U);
  774. return VPI->getOpcode() ==
  775. VPInstruction::CanonicalIVIncrement ||
  776. VPI->getOpcode() ==
  777. VPInstruction::CanonicalIVIncrementNUW;
  778. }) &&
  779. "the canonical IV should only be used by its increments when "
  780. "resetting the start value");
  781. IV->setOperand(0, VPV);
  782. }
  783. }
  784. /// Generate the code inside the body of the vectorized loop. Assumes a single
  785. /// LoopVectorBody basic-block was created for this. Introduce additional
  786. /// basic-blocks as needed, and fill them all.
  787. void VPlan::execute(VPTransformState *State) {
  788. // 0. Set the reverse mapping from VPValues to Values for code generation.
  789. for (auto &Entry : Value2VPValue)
  790. State->VPValue2Value[Entry.second] = Entry.first;
  791. BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
  792. State->CFG.VectorPreHeader = VectorPreHeaderBB;
  793. BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
  794. assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
  795. // 1. Make room to generate basic-blocks inside loop body if needed.
  796. BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
  797. VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
  798. Loop *L = State->LI->getLoopFor(VectorHeaderBB);
  799. L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
  800. // Remove the edge between Header and Latch to allow other connections.
  801. // Temporarily terminate with unreachable until CFG is rewired.
  802. // Note: this asserts the generated code's assumption that
  803. // getFirstInsertionPt() can be dereferenced into an Instruction.
  804. VectorHeaderBB->getTerminator()->eraseFromParent();
  805. State->Builder.SetInsertPoint(VectorHeaderBB);
  806. UnreachableInst *Terminator = State->Builder.CreateUnreachable();
  807. State->Builder.SetInsertPoint(Terminator);
  808. // 2. Generate code in loop body.
  809. State->CFG.PrevVPBB = nullptr;
  810. State->CFG.PrevBB = VectorHeaderBB;
  811. State->CFG.LastBB = VectorLatchBB;
  812. for (VPBlockBase *Block : depth_first(Entry))
  813. Block->execute(State);
  814. // Setup branch terminator successors for VPBBs in VPBBsToFix based on
  815. // VPBB's successors.
  816. for (auto VPBB : State->CFG.VPBBsToFix) {
  817. assert(EnableVPlanNativePath &&
  818. "Unexpected VPBBsToFix in non VPlan-native path");
  819. BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
  820. assert(BB && "Unexpected null basic block for VPBB");
  821. unsigned Idx = 0;
  822. auto *BBTerminator = BB->getTerminator();
  823. for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
  824. VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
  825. BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
  826. ++Idx;
  827. }
  828. }
  829. // 3. Merge the temporary latch created with the last basic-block filled.
  830. BasicBlock *LastBB = State->CFG.PrevBB;
  831. assert(isa<BranchInst>(LastBB->getTerminator()) &&
  832. "Expected VPlan CFG to terminate with branch");
  833. // Move both the branch and check from LastBB to VectorLatchBB.
  834. auto *LastBranch = cast<BranchInst>(LastBB->getTerminator());
  835. LastBranch->moveBefore(VectorLatchBB->getTerminator());
  836. VectorLatchBB->getTerminator()->eraseFromParent();
  837. // Move condition so it is guaranteed to be next to branch. This is only done
  838. // to avoid excessive test updates.
  839. // TODO: Remove special handling once the increments for all inductions are
  840. // modeled explicitly in VPlan.
  841. cast<Instruction>(LastBranch->getCondition())->moveBefore(LastBranch);
  842. // Connect LastBB to VectorLatchBB to facilitate their merge.
  843. BranchInst::Create(VectorLatchBB, LastBB);
  844. // Merge LastBB with Latch.
  845. bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
  846. (void)Merged;
  847. assert(Merged && "Could not merge last basic block with latch.");
  848. VectorLatchBB = LastBB;
  849. // Fix the latch value of canonical, reduction and first-order recurrences
  850. // phis in the vector loop.
  851. VPBasicBlock *Header = Entry->getEntryBasicBlock();
  852. if (Header->empty()) {
  853. assert(EnableVPlanNativePath);
  854. Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
  855. }
  856. for (VPRecipeBase &R : Header->phis()) {
  857. // Skip phi-like recipes that generate their backedege values themselves.
  858. // TODO: Model their backedge values explicitly.
  859. if (isa<VPWidenIntOrFpInductionRecipe>(&R) || isa<VPWidenPHIRecipe>(&R))
  860. continue;
  861. auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
  862. // For canonical IV, first-order recurrences and in-order reduction phis,
  863. // only a single part is generated, which provides the last part from the
  864. // previous iteration. For non-ordered reductions all UF parts are
  865. // generated.
  866. bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
  867. isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
  868. cast<VPReductionPHIRecipe>(PhiR)->isOrdered();
  869. unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
  870. for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
  871. Value *Phi = State->get(PhiR, Part);
  872. Value *Val = State->get(PhiR->getBackedgeValue(),
  873. SinglePartNeeded ? State->UF - 1 : Part);
  874. cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
  875. }
  876. }
  877. // We do not attempt to preserve DT for outer loop vectorization currently.
  878. if (!EnableVPlanNativePath)
  879. updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB,
  880. L->getExitBlock());
  881. }
  882. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  883. LLVM_DUMP_METHOD
  884. void VPlan::print(raw_ostream &O) const {
  885. VPSlotTracker SlotTracker(this);
  886. O << "VPlan '" << Name << "' {";
  887. if (VectorTripCount.getNumUsers() > 0) {
  888. O << "\nLive-in ";
  889. VectorTripCount.printAsOperand(O, SlotTracker);
  890. O << " = vector-trip-count\n";
  891. }
  892. if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
  893. O << "\nLive-in ";
  894. BackedgeTakenCount->printAsOperand(O, SlotTracker);
  895. O << " = backedge-taken count\n";
  896. }
  897. for (const VPBlockBase *Block : depth_first(getEntry())) {
  898. O << '\n';
  899. Block->print(O, "", SlotTracker);
  900. }
  901. O << "}\n";
  902. }
  903. LLVM_DUMP_METHOD
  904. void VPlan::printDOT(raw_ostream &O) const {
  905. VPlanPrinter Printer(O, *this);
  906. Printer.dump();
  907. }
  908. LLVM_DUMP_METHOD
  909. void VPlan::dump() const { print(dbgs()); }
  910. #endif
  911. void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
  912. BasicBlock *LoopLatchBB,
  913. BasicBlock *LoopExitBB) {
  914. BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
  915. assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
  916. // The vector body may be more than a single basic-block by this point.
  917. // Update the dominator tree information inside the vector body by propagating
  918. // it from header to latch, expecting only triangular control-flow, if any.
  919. BasicBlock *PostDomSucc = nullptr;
  920. for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
  921. // Get the list of successors of this block.
  922. std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
  923. assert(Succs.size() <= 2 &&
  924. "Basic block in vector loop has more than 2 successors.");
  925. PostDomSucc = Succs[0];
  926. if (Succs.size() == 1) {
  927. assert(PostDomSucc->getSinglePredecessor() &&
  928. "PostDom successor has more than one predecessor.");
  929. DT->addNewBlock(PostDomSucc, BB);
  930. continue;
  931. }
  932. BasicBlock *InterimSucc = Succs[1];
  933. if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
  934. PostDomSucc = Succs[1];
  935. InterimSucc = Succs[0];
  936. }
  937. assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
  938. "One successor of a basic block does not lead to the other.");
  939. assert(InterimSucc->getSinglePredecessor() &&
  940. "Interim successor has more than one predecessor.");
  941. assert(PostDomSucc->hasNPredecessors(2) &&
  942. "PostDom successor has more than two predecessors.");
  943. DT->addNewBlock(InterimSucc, BB);
  944. DT->addNewBlock(PostDomSucc, BB);
  945. }
  946. // Latch block is a new dominator for the loop exit.
  947. DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
  948. assert(DT->verify(DominatorTree::VerificationLevel::Fast));
  949. }
  950. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  951. Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
  952. return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
  953. Twine(getOrCreateBID(Block));
  954. }
  955. Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
  956. const std::string &Name = Block->getName();
  957. if (!Name.empty())
  958. return Name;
  959. return "VPB" + Twine(getOrCreateBID(Block));
  960. }
  961. void VPlanPrinter::dump() {
  962. Depth = 1;
  963. bumpIndent(0);
  964. OS << "digraph VPlan {\n";
  965. OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
  966. if (!Plan.getName().empty())
  967. OS << "\\n" << DOT::EscapeString(Plan.getName());
  968. if (Plan.BackedgeTakenCount) {
  969. OS << ", where:\\n";
  970. Plan.BackedgeTakenCount->print(OS, SlotTracker);
  971. OS << " := BackedgeTakenCount";
  972. }
  973. OS << "\"]\n";
  974. OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
  975. OS << "edge [fontname=Courier, fontsize=30]\n";
  976. OS << "compound=true\n";
  977. for (const VPBlockBase *Block : depth_first(Plan.getEntry()))
  978. dumpBlock(Block);
  979. OS << "}\n";
  980. }
  981. void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
  982. if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
  983. dumpBasicBlock(BasicBlock);
  984. else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  985. dumpRegion(Region);
  986. else
  987. llvm_unreachable("Unsupported kind of VPBlock.");
  988. }
  989. void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
  990. bool Hidden, const Twine &Label) {
  991. // Due to "dot" we print an edge between two regions as an edge between the
  992. // exit basic block and the entry basic of the respective regions.
  993. const VPBlockBase *Tail = From->getExitBasicBlock();
  994. const VPBlockBase *Head = To->getEntryBasicBlock();
  995. OS << Indent << getUID(Tail) << " -> " << getUID(Head);
  996. OS << " [ label=\"" << Label << '\"';
  997. if (Tail != From)
  998. OS << " ltail=" << getUID(From);
  999. if (Head != To)
  1000. OS << " lhead=" << getUID(To);
  1001. if (Hidden)
  1002. OS << "; splines=none";
  1003. OS << "]\n";
  1004. }
  1005. void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
  1006. auto &Successors = Block->getSuccessors();
  1007. if (Successors.size() == 1)
  1008. drawEdge(Block, Successors.front(), false, "");
  1009. else if (Successors.size() == 2) {
  1010. drawEdge(Block, Successors.front(), false, "T");
  1011. drawEdge(Block, Successors.back(), false, "F");
  1012. } else {
  1013. unsigned SuccessorNumber = 0;
  1014. for (auto *Successor : Successors)
  1015. drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
  1016. }
  1017. }
  1018. void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
  1019. // Implement dot-formatted dump by performing plain-text dump into the
  1020. // temporary storage followed by some post-processing.
  1021. OS << Indent << getUID(BasicBlock) << " [label =\n";
  1022. bumpIndent(1);
  1023. std::string Str;
  1024. raw_string_ostream SS(Str);
  1025. // Use no indentation as we need to wrap the lines into quotes ourselves.
  1026. BasicBlock->print(SS, "", SlotTracker);
  1027. // We need to process each line of the output separately, so split
  1028. // single-string plain-text dump.
  1029. SmallVector<StringRef, 0> Lines;
  1030. StringRef(Str).rtrim('\n').split(Lines, "\n");
  1031. auto EmitLine = [&](StringRef Line, StringRef Suffix) {
  1032. OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
  1033. };
  1034. // Don't need the "+" after the last line.
  1035. for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
  1036. EmitLine(Line, " +\n");
  1037. EmitLine(Lines.back(), "\n");
  1038. bumpIndent(-1);
  1039. OS << Indent << "]\n";
  1040. dumpEdges(BasicBlock);
  1041. }
  1042. void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
  1043. OS << Indent << "subgraph " << getUID(Region) << " {\n";
  1044. bumpIndent(1);
  1045. OS << Indent << "fontname=Courier\n"
  1046. << Indent << "label=\""
  1047. << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
  1048. << DOT::EscapeString(Region->getName()) << "\"\n";
  1049. // Dump the blocks of the region.
  1050. assert(Region->getEntry() && "Region contains no inner blocks.");
  1051. for (const VPBlockBase *Block : depth_first(Region->getEntry()))
  1052. dumpBlock(Block);
  1053. bumpIndent(-1);
  1054. OS << Indent << "}\n";
  1055. dumpEdges(Region);
  1056. }
  1057. void VPlanIngredient::print(raw_ostream &O) const {
  1058. if (auto *Inst = dyn_cast<Instruction>(V)) {
  1059. if (!Inst->getType()->isVoidTy()) {
  1060. Inst->printAsOperand(O, false);
  1061. O << " = ";
  1062. }
  1063. O << Inst->getOpcodeName() << " ";
  1064. unsigned E = Inst->getNumOperands();
  1065. if (E > 0) {
  1066. Inst->getOperand(0)->printAsOperand(O, false);
  1067. for (unsigned I = 1; I < E; ++I)
  1068. Inst->getOperand(I)->printAsOperand(O << ", ", false);
  1069. }
  1070. } else // !Inst
  1071. V->printAsOperand(O, false);
  1072. }
  1073. void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
  1074. VPSlotTracker &SlotTracker) const {
  1075. O << Indent << "WIDEN-CALL ";
  1076. auto *CI = cast<CallInst>(getUnderlyingInstr());
  1077. if (CI->getType()->isVoidTy())
  1078. O << "void ";
  1079. else {
  1080. printAsOperand(O, SlotTracker);
  1081. O << " = ";
  1082. }
  1083. O << "call @" << CI->getCalledFunction()->getName() << "(";
  1084. printOperands(O, SlotTracker);
  1085. O << ")";
  1086. }
  1087. void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
  1088. VPSlotTracker &SlotTracker) const {
  1089. O << Indent << "WIDEN-SELECT ";
  1090. printAsOperand(O, SlotTracker);
  1091. O << " = select ";
  1092. getOperand(0)->printAsOperand(O, SlotTracker);
  1093. O << ", ";
  1094. getOperand(1)->printAsOperand(O, SlotTracker);
  1095. O << ", ";
  1096. getOperand(2)->printAsOperand(O, SlotTracker);
  1097. O << (InvariantCond ? " (condition is loop invariant)" : "");
  1098. }
  1099. void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
  1100. VPSlotTracker &SlotTracker) const {
  1101. O << Indent << "WIDEN ";
  1102. printAsOperand(O, SlotTracker);
  1103. O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
  1104. printOperands(O, SlotTracker);
  1105. }
  1106. void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
  1107. VPSlotTracker &SlotTracker) const {
  1108. O << Indent << "WIDEN-INDUCTION";
  1109. if (getTruncInst()) {
  1110. O << "\\l\"";
  1111. O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
  1112. O << " +\n" << Indent << "\" ";
  1113. getVPValue(0)->printAsOperand(O, SlotTracker);
  1114. } else
  1115. O << " " << VPlanIngredient(IV);
  1116. }
  1117. #endif
  1118. bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
  1119. auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
  1120. auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
  1121. return StartC && StartC->isZero() && StepC && StepC->isOne();
  1122. }
  1123. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1124. void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
  1125. VPSlotTracker &SlotTracker) const {
  1126. O << Indent << "WIDEN-GEP ";
  1127. O << (IsPtrLoopInvariant ? "Inv" : "Var");
  1128. size_t IndicesNumber = IsIndexLoopInvariant.size();
  1129. for (size_t I = 0; I < IndicesNumber; ++I)
  1130. O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
  1131. O << " ";
  1132. printAsOperand(O, SlotTracker);
  1133. O << " = getelementptr ";
  1134. printOperands(O, SlotTracker);
  1135. }
  1136. void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
  1137. VPSlotTracker &SlotTracker) const {
  1138. O << Indent << "WIDEN-PHI ";
  1139. auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
  1140. // Unless all incoming values are modeled in VPlan print the original PHI
  1141. // directly.
  1142. // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
  1143. // values as VPValues.
  1144. if (getNumOperands() != OriginalPhi->getNumOperands()) {
  1145. O << VPlanIngredient(OriginalPhi);
  1146. return;
  1147. }
  1148. printAsOperand(O, SlotTracker);
  1149. O << " = phi ";
  1150. printOperands(O, SlotTracker);
  1151. }
  1152. void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
  1153. VPSlotTracker &SlotTracker) const {
  1154. O << Indent << "BLEND ";
  1155. Phi->printAsOperand(O, false);
  1156. O << " =";
  1157. if (getNumIncomingValues() == 1) {
  1158. // Not a User of any mask: not really blending, this is a
  1159. // single-predecessor phi.
  1160. O << " ";
  1161. getIncomingValue(0)->printAsOperand(O, SlotTracker);
  1162. } else {
  1163. for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
  1164. O << " ";
  1165. getIncomingValue(I)->printAsOperand(O, SlotTracker);
  1166. O << "/";
  1167. getMask(I)->printAsOperand(O, SlotTracker);
  1168. }
  1169. }
  1170. }
  1171. void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
  1172. VPSlotTracker &SlotTracker) const {
  1173. O << Indent << "REDUCE ";
  1174. printAsOperand(O, SlotTracker);
  1175. O << " = ";
  1176. getChainOp()->printAsOperand(O, SlotTracker);
  1177. O << " +";
  1178. if (isa<FPMathOperator>(getUnderlyingInstr()))
  1179. O << getUnderlyingInstr()->getFastMathFlags();
  1180. O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
  1181. getVecOp()->printAsOperand(O, SlotTracker);
  1182. if (getCondOp()) {
  1183. O << ", ";
  1184. getCondOp()->printAsOperand(O, SlotTracker);
  1185. }
  1186. O << ")";
  1187. }
  1188. void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
  1189. VPSlotTracker &SlotTracker) const {
  1190. O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
  1191. if (!getUnderlyingInstr()->getType()->isVoidTy()) {
  1192. printAsOperand(O, SlotTracker);
  1193. O << " = ";
  1194. }
  1195. O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
  1196. printOperands(O, SlotTracker);
  1197. if (AlsoPack)
  1198. O << " (S->V)";
  1199. }
  1200. void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
  1201. VPSlotTracker &SlotTracker) const {
  1202. O << Indent << "PHI-PREDICATED-INSTRUCTION ";
  1203. printAsOperand(O, SlotTracker);
  1204. O << " = ";
  1205. printOperands(O, SlotTracker);
  1206. }
  1207. void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
  1208. VPSlotTracker &SlotTracker) const {
  1209. O << Indent << "WIDEN ";
  1210. if (!isStore()) {
  1211. printAsOperand(O, SlotTracker);
  1212. O << " = ";
  1213. }
  1214. O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
  1215. printOperands(O, SlotTracker);
  1216. }
  1217. #endif
  1218. void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
  1219. Value *Start = getStartValue()->getLiveInIRValue();
  1220. PHINode *EntryPart = PHINode::Create(
  1221. Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
  1222. EntryPart->addIncoming(Start, State.CFG.VectorPreHeader);
  1223. EntryPart->setDebugLoc(DL);
  1224. for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
  1225. State.set(this, EntryPart, Part);
  1226. }
  1227. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1228. void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
  1229. VPSlotTracker &SlotTracker) const {
  1230. O << Indent << "EMIT ";
  1231. printAsOperand(O, SlotTracker);
  1232. O << " = CANONICAL-INDUCTION";
  1233. }
  1234. #endif
  1235. void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
  1236. Value *CanonicalIV = State.get(getOperand(0), 0);
  1237. Type *STy = CanonicalIV->getType();
  1238. IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
  1239. ElementCount VF = State.VF;
  1240. Value *VStart = VF.isScalar()
  1241. ? CanonicalIV
  1242. : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
  1243. for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
  1244. Value *VStep = createStepForVF(Builder, STy, VF, Part);
  1245. if (VF.isVector()) {
  1246. VStep = Builder.CreateVectorSplat(VF, VStep);
  1247. VStep = Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
  1248. }
  1249. Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
  1250. State.set(this, CanonicalVectorIV, Part);
  1251. }
  1252. }
  1253. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1254. void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
  1255. VPSlotTracker &SlotTracker) const {
  1256. O << Indent << "EMIT ";
  1257. printAsOperand(O, SlotTracker);
  1258. O << " = WIDEN-CANONICAL-INDUCTION ";
  1259. printOperands(O, SlotTracker);
  1260. }
  1261. #endif
  1262. void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
  1263. auto &Builder = State.Builder;
  1264. // Create a vector from the initial value.
  1265. auto *VectorInit = getStartValue()->getLiveInIRValue();
  1266. Type *VecTy = State.VF.isScalar()
  1267. ? VectorInit->getType()
  1268. : VectorType::get(VectorInit->getType(), State.VF);
  1269. if (State.VF.isVector()) {
  1270. auto *IdxTy = Builder.getInt32Ty();
  1271. auto *One = ConstantInt::get(IdxTy, 1);
  1272. IRBuilder<>::InsertPointGuard Guard(Builder);
  1273. Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
  1274. auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
  1275. auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
  1276. VectorInit = Builder.CreateInsertElement(
  1277. PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
  1278. }
  1279. // Create a phi node for the new recurrence.
  1280. PHINode *EntryPart = PHINode::Create(
  1281. VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
  1282. EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader);
  1283. State.set(this, EntryPart, 0);
  1284. }
  1285. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1286. void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
  1287. VPSlotTracker &SlotTracker) const {
  1288. O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
  1289. printAsOperand(O, SlotTracker);
  1290. O << " = phi ";
  1291. printOperands(O, SlotTracker);
  1292. }
  1293. #endif
  1294. void VPReductionPHIRecipe::execute(VPTransformState &State) {
  1295. PHINode *PN = cast<PHINode>(getUnderlyingValue());
  1296. auto &Builder = State.Builder;
  1297. // In order to support recurrences we need to be able to vectorize Phi nodes.
  1298. // Phi nodes have cycles, so we need to vectorize them in two stages. This is
  1299. // stage #1: We create a new vector PHI node with no incoming edges. We'll use
  1300. // this value when we vectorize all of the instructions that use the PHI.
  1301. bool ScalarPHI = State.VF.isScalar() || IsInLoop;
  1302. Type *VecTy =
  1303. ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
  1304. BasicBlock *HeaderBB = State.CFG.PrevBB;
  1305. assert(State.LI->getLoopFor(HeaderBB)->getHeader() == HeaderBB &&
  1306. "recipe must be in the vector loop header");
  1307. unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
  1308. for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
  1309. Value *EntryPart =
  1310. PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
  1311. State.set(this, EntryPart, Part);
  1312. }
  1313. // Reductions do not have to start at zero. They can start with
  1314. // any loop invariant values.
  1315. VPValue *StartVPV = getStartValue();
  1316. Value *StartV = StartVPV->getLiveInIRValue();
  1317. Value *Iden = nullptr;
  1318. RecurKind RK = RdxDesc.getRecurrenceKind();
  1319. if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
  1320. RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
  1321. // MinMax reduction have the start value as their identify.
  1322. if (ScalarPHI) {
  1323. Iden = StartV;
  1324. } else {
  1325. IRBuilderBase::InsertPointGuard IPBuilder(Builder);
  1326. Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
  1327. StartV = Iden =
  1328. Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
  1329. }
  1330. } else {
  1331. Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
  1332. RdxDesc.getFastMathFlags());
  1333. if (!ScalarPHI) {
  1334. Iden = Builder.CreateVectorSplat(State.VF, Iden);
  1335. IRBuilderBase::InsertPointGuard IPBuilder(Builder);
  1336. Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
  1337. Constant *Zero = Builder.getInt32(0);
  1338. StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
  1339. }
  1340. }
  1341. for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
  1342. Value *EntryPart = State.get(this, Part);
  1343. // Make sure to add the reduction start value only to the
  1344. // first unroll part.
  1345. Value *StartVal = (Part == 0) ? StartV : Iden;
  1346. cast<PHINode>(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader);
  1347. }
  1348. }
  1349. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1350. void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
  1351. VPSlotTracker &SlotTracker) const {
  1352. O << Indent << "WIDEN-REDUCTION-PHI ";
  1353. printAsOperand(O, SlotTracker);
  1354. O << " = phi ";
  1355. printOperands(O, SlotTracker);
  1356. }
  1357. #endif
  1358. template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
  1359. void VPValue::replaceAllUsesWith(VPValue *New) {
  1360. for (unsigned J = 0; J < getNumUsers();) {
  1361. VPUser *User = Users[J];
  1362. unsigned NumUsers = getNumUsers();
  1363. for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
  1364. if (User->getOperand(I) == this)
  1365. User->setOperand(I, New);
  1366. // If a user got removed after updating the current user, the next user to
  1367. // update will be moved to the current position, so we only need to
  1368. // increment the index if the number of users did not change.
  1369. if (NumUsers == getNumUsers())
  1370. J++;
  1371. }
  1372. }
  1373. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1374. void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
  1375. if (const Value *UV = getUnderlyingValue()) {
  1376. OS << "ir<";
  1377. UV->printAsOperand(OS, false);
  1378. OS << ">";
  1379. return;
  1380. }
  1381. unsigned Slot = Tracker.getSlot(this);
  1382. if (Slot == unsigned(-1))
  1383. OS << "<badref>";
  1384. else
  1385. OS << "vp<%" << Tracker.getSlot(this) << ">";
  1386. }
  1387. void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
  1388. interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
  1389. Op->printAsOperand(O, SlotTracker);
  1390. });
  1391. }
  1392. #endif
  1393. void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
  1394. Old2NewTy &Old2New,
  1395. InterleavedAccessInfo &IAI) {
  1396. ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
  1397. for (VPBlockBase *Base : RPOT) {
  1398. visitBlock(Base, Old2New, IAI);
  1399. }
  1400. }
  1401. void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
  1402. InterleavedAccessInfo &IAI) {
  1403. if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
  1404. for (VPRecipeBase &VPI : *VPBB) {
  1405. if (isa<VPHeaderPHIRecipe>(&VPI))
  1406. continue;
  1407. assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
  1408. auto *VPInst = cast<VPInstruction>(&VPI);
  1409. auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
  1410. auto *IG = IAI.getInterleaveGroup(Inst);
  1411. if (!IG)
  1412. continue;
  1413. auto NewIGIter = Old2New.find(IG);
  1414. if (NewIGIter == Old2New.end())
  1415. Old2New[IG] = new InterleaveGroup<VPInstruction>(
  1416. IG->getFactor(), IG->isReverse(), IG->getAlign());
  1417. if (Inst == IG->getInsertPos())
  1418. Old2New[IG]->setInsertPos(VPInst);
  1419. InterleaveGroupMap[VPInst] = Old2New[IG];
  1420. InterleaveGroupMap[VPInst]->insertMember(
  1421. VPInst, IG->getIndex(Inst),
  1422. Align(IG->isReverse() ? (-1) * int(IG->getFactor())
  1423. : IG->getFactor()));
  1424. }
  1425. } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
  1426. visitRegion(Region, Old2New, IAI);
  1427. else
  1428. llvm_unreachable("Unsupported kind of VPBlock.");
  1429. }
  1430. VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
  1431. InterleavedAccessInfo &IAI) {
  1432. Old2NewTy Old2New;
  1433. visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
  1434. }
  1435. void VPSlotTracker::assignSlot(const VPValue *V) {
  1436. assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
  1437. Slots[V] = NextSlot++;
  1438. }
  1439. void VPSlotTracker::assignSlots(const VPlan &Plan) {
  1440. for (const VPValue *V : Plan.VPExternalDefs)
  1441. assignSlot(V);
  1442. assignSlot(&Plan.VectorTripCount);
  1443. if (Plan.BackedgeTakenCount)
  1444. assignSlot(Plan.BackedgeTakenCount);
  1445. ReversePostOrderTraversal<
  1446. VPBlockRecursiveTraversalWrapper<const VPBlockBase *>>
  1447. RPOT(VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(
  1448. Plan.getEntry()));
  1449. for (const VPBasicBlock *VPBB :
  1450. VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
  1451. for (const VPRecipeBase &Recipe : *VPBB)
  1452. for (VPValue *Def : Recipe.definedValues())
  1453. assignSlot(Def);
  1454. }
  1455. bool vputils::onlyFirstLaneUsed(VPValue *Def) {
  1456. return all_of(Def->users(), [Def](VPUser *U) {
  1457. return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(Def);
  1458. });
  1459. }