VPlanRecipes.cpp 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322
  1. //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file contains implementations for different VPlan recipes.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13. #include "VPlan.h"
  14. #include "llvm/ADT/STLExtras.h"
  15. #include "llvm/ADT/SmallVector.h"
  16. #include "llvm/ADT/Twine.h"
  17. #include "llvm/Analysis/IVDescriptors.h"
  18. #include "llvm/IR/BasicBlock.h"
  19. #include "llvm/IR/IRBuilder.h"
  20. #include "llvm/IR/Instruction.h"
  21. #include "llvm/IR/Instructions.h"
  22. #include "llvm/IR/Type.h"
  23. #include "llvm/IR/Value.h"
  24. #include "llvm/Support/Casting.h"
  25. #include "llvm/Support/CommandLine.h"
  26. #include "llvm/Support/Debug.h"
  27. #include "llvm/Support/raw_ostream.h"
  28. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  29. #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
  30. #include <cassert>
  31. using namespace llvm;
  32. using VectorParts = SmallVector<Value *, 2>;
  33. extern cl::opt<bool> EnableVPlanNativePath;
  34. #define LV_NAME "loop-vectorize"
  35. #define DEBUG_TYPE LV_NAME
  36. bool VPRecipeBase::mayWriteToMemory() const {
  37. switch (getVPDefID()) {
  38. case VPWidenMemoryInstructionSC: {
  39. return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
  40. }
  41. case VPReplicateSC:
  42. case VPWidenCallSC:
  43. return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
  44. ->mayWriteToMemory();
  45. case VPBranchOnMaskSC:
  46. case VPScalarIVStepsSC:
  47. return false;
  48. case VPWidenIntOrFpInductionSC:
  49. case VPWidenCanonicalIVSC:
  50. case VPWidenPHISC:
  51. case VPBlendSC:
  52. case VPWidenSC:
  53. case VPWidenGEPSC:
  54. case VPReductionSC:
  55. case VPWidenSelectSC: {
  56. const Instruction *I =
  57. dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
  58. (void)I;
  59. assert((!I || !I->mayWriteToMemory()) &&
  60. "underlying instruction may write to memory");
  61. return false;
  62. }
  63. default:
  64. return true;
  65. }
  66. }
  67. bool VPRecipeBase::mayReadFromMemory() const {
  68. switch (getVPDefID()) {
  69. case VPWidenMemoryInstructionSC: {
  70. return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
  71. }
  72. case VPReplicateSC:
  73. case VPWidenCallSC:
  74. return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
  75. ->mayReadFromMemory();
  76. case VPBranchOnMaskSC:
  77. case VPScalarIVStepsSC:
  78. return false;
  79. case VPWidenIntOrFpInductionSC:
  80. case VPWidenCanonicalIVSC:
  81. case VPWidenPHISC:
  82. case VPBlendSC:
  83. case VPWidenSC:
  84. case VPWidenGEPSC:
  85. case VPReductionSC:
  86. case VPWidenSelectSC: {
  87. const Instruction *I =
  88. dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
  89. (void)I;
  90. assert((!I || !I->mayReadFromMemory()) &&
  91. "underlying instruction may read from memory");
  92. return false;
  93. }
  94. default:
  95. return true;
  96. }
  97. }
  98. bool VPRecipeBase::mayHaveSideEffects() const {
  99. switch (getVPDefID()) {
  100. case VPDerivedIVSC:
  101. case VPPredInstPHISC:
  102. return false;
  103. case VPWidenIntOrFpInductionSC:
  104. case VPWidenPointerInductionSC:
  105. case VPWidenCanonicalIVSC:
  106. case VPWidenPHISC:
  107. case VPBlendSC:
  108. case VPWidenSC:
  109. case VPWidenGEPSC:
  110. case VPReductionSC:
  111. case VPWidenSelectSC:
  112. case VPScalarIVStepsSC: {
  113. const Instruction *I =
  114. dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
  115. (void)I;
  116. assert((!I || !I->mayHaveSideEffects()) &&
  117. "underlying instruction has side-effects");
  118. return false;
  119. }
  120. case VPReplicateSC: {
  121. auto *R = cast<VPReplicateRecipe>(this);
  122. return R->getUnderlyingInstr()->mayHaveSideEffects();
  123. }
  124. default:
  125. return true;
  126. }
  127. }
  128. void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
  129. auto Lane = VPLane::getLastLaneForVF(State.VF);
  130. VPValue *ExitValue = getOperand(0);
  131. if (vputils::isUniformAfterVectorization(ExitValue))
  132. Lane = VPLane::getFirstLane();
  133. Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
  134. State.Builder.GetInsertBlock());
  135. }
  136. void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
  137. assert(!Parent && "Recipe already in some VPBasicBlock");
  138. assert(InsertPos->getParent() &&
  139. "Insertion position not in any VPBasicBlock");
  140. Parent = InsertPos->getParent();
  141. Parent->getRecipeList().insert(InsertPos->getIterator(), this);
  142. }
  143. void VPRecipeBase::insertBefore(VPBasicBlock &BB,
  144. iplist<VPRecipeBase>::iterator I) {
  145. assert(!Parent && "Recipe already in some VPBasicBlock");
  146. assert(I == BB.end() || I->getParent() == &BB);
  147. Parent = &BB;
  148. BB.getRecipeList().insert(I, this);
  149. }
  150. void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
  151. assert(!Parent && "Recipe already in some VPBasicBlock");
  152. assert(InsertPos->getParent() &&
  153. "Insertion position not in any VPBasicBlock");
  154. Parent = InsertPos->getParent();
  155. Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
  156. }
  157. void VPRecipeBase::removeFromParent() {
  158. assert(getParent() && "Recipe not in any VPBasicBlock");
  159. getParent()->getRecipeList().remove(getIterator());
  160. Parent = nullptr;
  161. }
  162. iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
  163. assert(getParent() && "Recipe not in any VPBasicBlock");
  164. return getParent()->getRecipeList().erase(getIterator());
  165. }
  166. void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
  167. removeFromParent();
  168. insertAfter(InsertPos);
  169. }
  170. void VPRecipeBase::moveBefore(VPBasicBlock &BB,
  171. iplist<VPRecipeBase>::iterator I) {
  172. removeFromParent();
  173. insertBefore(BB, I);
  174. }
  175. void VPInstruction::generateInstruction(VPTransformState &State,
  176. unsigned Part) {
  177. IRBuilderBase &Builder = State.Builder;
  178. Builder.SetCurrentDebugLocation(DL);
  179. if (Instruction::isBinaryOp(getOpcode())) {
  180. Value *A = State.get(getOperand(0), Part);
  181. Value *B = State.get(getOperand(1), Part);
  182. Value *V =
  183. Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
  184. State.set(this, V, Part);
  185. return;
  186. }
  187. switch (getOpcode()) {
  188. case VPInstruction::Not: {
  189. Value *A = State.get(getOperand(0), Part);
  190. Value *V = Builder.CreateNot(A, Name);
  191. State.set(this, V, Part);
  192. break;
  193. }
  194. case VPInstruction::ICmpULE: {
  195. Value *IV = State.get(getOperand(0), Part);
  196. Value *TC = State.get(getOperand(1), Part);
  197. Value *V = Builder.CreateICmpULE(IV, TC, Name);
  198. State.set(this, V, Part);
  199. break;
  200. }
  201. case Instruction::Select: {
  202. Value *Cond = State.get(getOperand(0), Part);
  203. Value *Op1 = State.get(getOperand(1), Part);
  204. Value *Op2 = State.get(getOperand(2), Part);
  205. Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name);
  206. State.set(this, V, Part);
  207. break;
  208. }
  209. case VPInstruction::ActiveLaneMask: {
  210. // Get first lane of vector induction variable.
  211. Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
  212. // Get the original loop tripcount.
  213. Value *ScalarTC = State.get(getOperand(1), Part);
  214. auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
  215. auto *PredTy = VectorType::get(Int1Ty, State.VF);
  216. Instruction *Call = Builder.CreateIntrinsic(
  217. Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
  218. {VIVElem0, ScalarTC}, nullptr, Name);
  219. State.set(this, Call, Part);
  220. break;
  221. }
  222. case VPInstruction::FirstOrderRecurrenceSplice: {
  223. // Generate code to combine the previous and current values in vector v3.
  224. //
  225. // vector.ph:
  226. // v_init = vector(..., ..., ..., a[-1])
  227. // br vector.body
  228. //
  229. // vector.body
  230. // i = phi [0, vector.ph], [i+4, vector.body]
  231. // v1 = phi [v_init, vector.ph], [v2, vector.body]
  232. // v2 = a[i, i+1, i+2, i+3];
  233. // v3 = vector(v1(3), v2(0, 1, 2))
  234. // For the first part, use the recurrence phi (v1), otherwise v2.
  235. auto *V1 = State.get(getOperand(0), 0);
  236. Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
  237. if (!PartMinus1->getType()->isVectorTy()) {
  238. State.set(this, PartMinus1, Part);
  239. } else {
  240. Value *V2 = State.get(getOperand(1), Part);
  241. State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name),
  242. Part);
  243. }
  244. break;
  245. }
  246. case VPInstruction::CanonicalIVIncrement:
  247. case VPInstruction::CanonicalIVIncrementNUW: {
  248. Value *Next = nullptr;
  249. if (Part == 0) {
  250. bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
  251. auto *Phi = State.get(getOperand(0), 0);
  252. // The loop step is equal to the vectorization factor (num of SIMD
  253. // elements) times the unroll factor (num of SIMD instructions).
  254. Value *Step =
  255. createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
  256. Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
  257. } else {
  258. Next = State.get(this, 0);
  259. }
  260. State.set(this, Next, Part);
  261. break;
  262. }
  263. case VPInstruction::CanonicalIVIncrementForPart:
  264. case VPInstruction::CanonicalIVIncrementForPartNUW: {
  265. bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW;
  266. auto *IV = State.get(getOperand(0), VPIteration(0, 0));
  267. if (Part == 0) {
  268. State.set(this, IV, Part);
  269. break;
  270. }
  271. // The canonical IV is incremented by the vectorization factor (num of SIMD
  272. // elements) times the unroll part.
  273. Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
  274. Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false);
  275. State.set(this, Next, Part);
  276. break;
  277. }
  278. case VPInstruction::BranchOnCond: {
  279. if (Part != 0)
  280. break;
  281. Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
  282. VPRegionBlock *ParentRegion = getParent()->getParent();
  283. VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
  284. // Replace the temporary unreachable terminator with a new conditional
  285. // branch, hooking it up to backward destination for exiting blocks now and
  286. // to forward destination(s) later when they are created.
  287. BranchInst *CondBr =
  288. Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
  289. if (getParent()->isExiting())
  290. CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
  291. CondBr->setSuccessor(0, nullptr);
  292. Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
  293. break;
  294. }
  295. case VPInstruction::BranchOnCount: {
  296. if (Part != 0)
  297. break;
  298. // First create the compare.
  299. Value *IV = State.get(getOperand(0), Part);
  300. Value *TC = State.get(getOperand(1), Part);
  301. Value *Cond = Builder.CreateICmpEQ(IV, TC);
  302. // Now create the branch.
  303. auto *Plan = getParent()->getPlan();
  304. VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
  305. VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
  306. // Replace the temporary unreachable terminator with a new conditional
  307. // branch, hooking it up to backward destination (the header) now and to the
  308. // forward destination (the exit/middle block) later when it is created.
  309. // Note that CreateCondBr expects a valid BB as first argument, so we need
  310. // to set it to nullptr later.
  311. BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
  312. State.CFG.VPBB2IRBB[Header]);
  313. CondBr->setSuccessor(0, nullptr);
  314. Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
  315. break;
  316. }
  317. default:
  318. llvm_unreachable("Unsupported opcode for instruction");
  319. }
  320. }
  321. void VPInstruction::execute(VPTransformState &State) {
  322. assert(!State.Instance && "VPInstruction executing an Instance");
  323. IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
  324. State.Builder.setFastMathFlags(FMF);
  325. for (unsigned Part = 0; Part < State.UF; ++Part)
  326. generateInstruction(State, Part);
  327. }
  328. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  329. void VPInstruction::dump() const {
  330. VPSlotTracker SlotTracker(getParent()->getPlan());
  331. print(dbgs(), "", SlotTracker);
  332. }
  333. void VPInstruction::print(raw_ostream &O, const Twine &Indent,
  334. VPSlotTracker &SlotTracker) const {
  335. O << Indent << "EMIT ";
  336. if (hasResult()) {
  337. printAsOperand(O, SlotTracker);
  338. O << " = ";
  339. }
  340. switch (getOpcode()) {
  341. case VPInstruction::Not:
  342. O << "not";
  343. break;
  344. case VPInstruction::ICmpULE:
  345. O << "icmp ule";
  346. break;
  347. case VPInstruction::SLPLoad:
  348. O << "combined load";
  349. break;
  350. case VPInstruction::SLPStore:
  351. O << "combined store";
  352. break;
  353. case VPInstruction::ActiveLaneMask:
  354. O << "active lane mask";
  355. break;
  356. case VPInstruction::FirstOrderRecurrenceSplice:
  357. O << "first-order splice";
  358. break;
  359. case VPInstruction::CanonicalIVIncrement:
  360. O << "VF * UF + ";
  361. break;
  362. case VPInstruction::CanonicalIVIncrementNUW:
  363. O << "VF * UF +(nuw) ";
  364. break;
  365. case VPInstruction::BranchOnCond:
  366. O << "branch-on-cond";
  367. break;
  368. case VPInstruction::CanonicalIVIncrementForPart:
  369. O << "VF * Part + ";
  370. break;
  371. case VPInstruction::CanonicalIVIncrementForPartNUW:
  372. O << "VF * Part +(nuw) ";
  373. break;
  374. case VPInstruction::BranchOnCount:
  375. O << "branch-on-count ";
  376. break;
  377. default:
  378. O << Instruction::getOpcodeName(getOpcode());
  379. }
  380. O << FMF;
  381. for (const VPValue *Operand : operands()) {
  382. O << " ";
  383. Operand->printAsOperand(O, SlotTracker);
  384. }
  385. if (DL) {
  386. O << ", !dbg ";
  387. DL.print(O);
  388. }
  389. }
  390. #endif
  391. void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
  392. // Make sure the VPInstruction is a floating-point operation.
  393. assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
  394. Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
  395. Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
  396. Opcode == Instruction::FCmp) &&
  397. "this op can't take fast-math flags");
  398. FMF = FMFNew;
  399. }
  400. void VPWidenCallRecipe::execute(VPTransformState &State) {
  401. auto &CI = *cast<CallInst>(getUnderlyingInstr());
  402. assert(!isa<DbgInfoIntrinsic>(CI) &&
  403. "DbgInfoIntrinsic should have been dropped during VPlan construction");
  404. State.setDebugLocFromInst(&CI);
  405. SmallVector<Type *, 4> Tys;
  406. for (Value *ArgOperand : CI.args())
  407. Tys.push_back(
  408. ToVectorTy(ArgOperand->getType(), State.VF.getKnownMinValue()));
  409. for (unsigned Part = 0; Part < State.UF; ++Part) {
  410. SmallVector<Type *, 2> TysForDecl = {CI.getType()};
  411. SmallVector<Value *, 4> Args;
  412. for (const auto &I : enumerate(operands())) {
  413. // Some intrinsics have a scalar argument - don't replace it with a
  414. // vector.
  415. Value *Arg;
  416. if (VectorIntrinsicID == Intrinsic::not_intrinsic ||
  417. !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
  418. Arg = State.get(I.value(), Part);
  419. else
  420. Arg = State.get(I.value(), VPIteration(0, 0));
  421. if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
  422. TysForDecl.push_back(Arg->getType());
  423. Args.push_back(Arg);
  424. }
  425. Function *VectorF;
  426. if (VectorIntrinsicID != Intrinsic::not_intrinsic) {
  427. // Use vector version of the intrinsic.
  428. if (State.VF.isVector())
  429. TysForDecl[0] =
  430. VectorType::get(CI.getType()->getScalarType(), State.VF);
  431. Module *M = State.Builder.GetInsertBlock()->getModule();
  432. VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
  433. assert(VectorF && "Can't retrieve vector intrinsic.");
  434. } else {
  435. // Use vector version of the function call.
  436. const VFShape Shape = VFShape::get(CI, State.VF, false /*HasGlobalPred*/);
  437. #ifndef NDEBUG
  438. assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr &&
  439. "Can't create vector function.");
  440. #endif
  441. VectorF = VFDatabase(CI).getVectorizedFunction(Shape);
  442. }
  443. SmallVector<OperandBundleDef, 1> OpBundles;
  444. CI.getOperandBundlesAsDefs(OpBundles);
  445. CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
  446. if (isa<FPMathOperator>(V))
  447. V->copyFastMathFlags(&CI);
  448. State.set(this, V, Part);
  449. State.addMetadata(V, &CI);
  450. }
  451. }
  452. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  453. void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
  454. VPSlotTracker &SlotTracker) const {
  455. O << Indent << "WIDEN-CALL ";
  456. auto *CI = cast<CallInst>(getUnderlyingInstr());
  457. if (CI->getType()->isVoidTy())
  458. O << "void ";
  459. else {
  460. printAsOperand(O, SlotTracker);
  461. O << " = ";
  462. }
  463. O << "call @" << CI->getCalledFunction()->getName() << "(";
  464. printOperands(O, SlotTracker);
  465. O << ")";
  466. if (VectorIntrinsicID)
  467. O << " (using vector intrinsic)";
  468. else
  469. O << " (using library function)";
  470. }
  471. void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
  472. VPSlotTracker &SlotTracker) const {
  473. O << Indent << "WIDEN-SELECT ";
  474. printAsOperand(O, SlotTracker);
  475. O << " = select ";
  476. getOperand(0)->printAsOperand(O, SlotTracker);
  477. O << ", ";
  478. getOperand(1)->printAsOperand(O, SlotTracker);
  479. O << ", ";
  480. getOperand(2)->printAsOperand(O, SlotTracker);
  481. O << (InvariantCond ? " (condition is loop invariant)" : "");
  482. }
  483. #endif
  484. void VPWidenSelectRecipe::execute(VPTransformState &State) {
  485. auto &I = *cast<SelectInst>(getUnderlyingInstr());
  486. State.setDebugLocFromInst(&I);
  487. // The condition can be loop invariant but still defined inside the
  488. // loop. This means that we can't just use the original 'cond' value.
  489. // We have to take the 'vectorized' value and pick the first lane.
  490. // Instcombine will make this a no-op.
  491. auto *InvarCond =
  492. InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
  493. for (unsigned Part = 0; Part < State.UF; ++Part) {
  494. Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
  495. Value *Op0 = State.get(getOperand(1), Part);
  496. Value *Op1 = State.get(getOperand(2), Part);
  497. Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
  498. State.set(this, Sel, Part);
  499. State.addMetadata(Sel, &I);
  500. }
  501. }
  502. void VPWidenRecipe::execute(VPTransformState &State) {
  503. auto &I = *cast<Instruction>(getUnderlyingValue());
  504. auto &Builder = State.Builder;
  505. switch (I.getOpcode()) {
  506. case Instruction::Call:
  507. case Instruction::Br:
  508. case Instruction::PHI:
  509. case Instruction::GetElementPtr:
  510. case Instruction::Select:
  511. llvm_unreachable("This instruction is handled by a different recipe.");
  512. case Instruction::UDiv:
  513. case Instruction::SDiv:
  514. case Instruction::SRem:
  515. case Instruction::URem:
  516. case Instruction::Add:
  517. case Instruction::FAdd:
  518. case Instruction::Sub:
  519. case Instruction::FSub:
  520. case Instruction::FNeg:
  521. case Instruction::Mul:
  522. case Instruction::FMul:
  523. case Instruction::FDiv:
  524. case Instruction::FRem:
  525. case Instruction::Shl:
  526. case Instruction::LShr:
  527. case Instruction::AShr:
  528. case Instruction::And:
  529. case Instruction::Or:
  530. case Instruction::Xor: {
  531. // Just widen unops and binops.
  532. State.setDebugLocFromInst(&I);
  533. for (unsigned Part = 0; Part < State.UF; ++Part) {
  534. SmallVector<Value *, 2> Ops;
  535. for (VPValue *VPOp : operands())
  536. Ops.push_back(State.get(VPOp, Part));
  537. Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
  538. if (auto *VecOp = dyn_cast<Instruction>(V)) {
  539. VecOp->copyIRFlags(&I);
  540. // If the instruction is vectorized and was in a basic block that needed
  541. // predication, we can't propagate poison-generating flags (nuw/nsw,
  542. // exact, etc.). The control flow has been linearized and the
  543. // instruction is no longer guarded by the predicate, which could make
  544. // the flag properties to no longer hold.
  545. if (State.MayGeneratePoisonRecipes.contains(this))
  546. VecOp->dropPoisonGeneratingFlags();
  547. }
  548. // Use this vector value for all users of the original instruction.
  549. State.set(this, V, Part);
  550. State.addMetadata(V, &I);
  551. }
  552. break;
  553. }
  554. case Instruction::Freeze: {
  555. State.setDebugLocFromInst(&I);
  556. for (unsigned Part = 0; Part < State.UF; ++Part) {
  557. Value *Op = State.get(getOperand(0), Part);
  558. Value *Freeze = Builder.CreateFreeze(Op);
  559. State.set(this, Freeze, Part);
  560. }
  561. break;
  562. }
  563. case Instruction::ICmp:
  564. case Instruction::FCmp: {
  565. // Widen compares. Generate vector compares.
  566. bool FCmp = (I.getOpcode() == Instruction::FCmp);
  567. auto *Cmp = cast<CmpInst>(&I);
  568. State.setDebugLocFromInst(Cmp);
  569. for (unsigned Part = 0; Part < State.UF; ++Part) {
  570. Value *A = State.get(getOperand(0), Part);
  571. Value *B = State.get(getOperand(1), Part);
  572. Value *C = nullptr;
  573. if (FCmp) {
  574. // Propagate fast math flags.
  575. IRBuilder<>::FastMathFlagGuard FMFG(Builder);
  576. Builder.setFastMathFlags(Cmp->getFastMathFlags());
  577. C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
  578. } else {
  579. C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
  580. }
  581. State.set(this, C, Part);
  582. State.addMetadata(C, &I);
  583. }
  584. break;
  585. }
  586. case Instruction::ZExt:
  587. case Instruction::SExt:
  588. case Instruction::FPToUI:
  589. case Instruction::FPToSI:
  590. case Instruction::FPExt:
  591. case Instruction::PtrToInt:
  592. case Instruction::IntToPtr:
  593. case Instruction::SIToFP:
  594. case Instruction::UIToFP:
  595. case Instruction::Trunc:
  596. case Instruction::FPTrunc:
  597. case Instruction::BitCast: {
  598. auto *CI = cast<CastInst>(&I);
  599. State.setDebugLocFromInst(CI);
  600. /// Vectorize casts.
  601. Type *DestTy = (State.VF.isScalar())
  602. ? CI->getType()
  603. : VectorType::get(CI->getType(), State.VF);
  604. for (unsigned Part = 0; Part < State.UF; ++Part) {
  605. Value *A = State.get(getOperand(0), Part);
  606. Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
  607. State.set(this, Cast, Part);
  608. State.addMetadata(Cast, &I);
  609. }
  610. break;
  611. }
  612. default:
  613. // This instruction is not vectorized by simple widening.
  614. LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
  615. llvm_unreachable("Unhandled instruction!");
  616. } // end of switch.
  617. }
  618. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  619. void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
  620. VPSlotTracker &SlotTracker) const {
  621. O << Indent << "WIDEN ";
  622. printAsOperand(O, SlotTracker);
  623. const Instruction *UI = getUnderlyingInstr();
  624. O << " = " << UI->getOpcodeName() << " ";
  625. if (auto *Cmp = dyn_cast<CmpInst>(UI))
  626. O << CmpInst::getPredicateName(Cmp->getPredicate()) << " ";
  627. printOperands(O, SlotTracker);
  628. }
  629. void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
  630. VPSlotTracker &SlotTracker) const {
  631. O << Indent << "WIDEN-INDUCTION";
  632. if (getTruncInst()) {
  633. O << "\\l\"";
  634. O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
  635. O << " +\n" << Indent << "\" ";
  636. getVPValue(0)->printAsOperand(O, SlotTracker);
  637. } else
  638. O << " " << VPlanIngredient(IV);
  639. O << ", ";
  640. getStepValue()->printAsOperand(O, SlotTracker);
  641. }
  642. #endif
  643. bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
  644. auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
  645. auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
  646. return StartC && StartC->isZero() && StepC && StepC->isOne();
  647. }
  648. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  649. void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
  650. VPSlotTracker &SlotTracker) const {
  651. O << Indent;
  652. printAsOperand(O, SlotTracker);
  653. O << Indent << "= DERIVED-IV ";
  654. getStartValue()->printAsOperand(O, SlotTracker);
  655. O << " + ";
  656. getCanonicalIV()->printAsOperand(O, SlotTracker);
  657. O << " * ";
  658. getStepValue()->printAsOperand(O, SlotTracker);
  659. if (IndDesc.getStep()->getType() != ResultTy)
  660. O << " (truncated to " << *ResultTy << ")";
  661. }
  662. #endif
  663. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  664. void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
  665. VPSlotTracker &SlotTracker) const {
  666. O << Indent;
  667. printAsOperand(O, SlotTracker);
  668. O << Indent << "= SCALAR-STEPS ";
  669. printOperands(O, SlotTracker);
  670. }
  671. #endif
  672. void VPWidenGEPRecipe::execute(VPTransformState &State) {
  673. auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
  674. // Construct a vector GEP by widening the operands of the scalar GEP as
  675. // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
  676. // results in a vector of pointers when at least one operand of the GEP
  677. // is vector-typed. Thus, to keep the representation compact, we only use
  678. // vector-typed operands for loop-varying values.
  679. if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
  680. // If we are vectorizing, but the GEP has only loop-invariant operands,
  681. // the GEP we build (by only using vector-typed operands for
  682. // loop-varying values) would be a scalar pointer. Thus, to ensure we
  683. // produce a vector of pointers, we need to either arbitrarily pick an
  684. // operand to broadcast, or broadcast a clone of the original GEP.
  685. // Here, we broadcast a clone of the original.
  686. //
  687. // TODO: If at some point we decide to scalarize instructions having
  688. // loop-invariant operands, this special case will no longer be
  689. // required. We would add the scalarization decision to
  690. // collectLoopScalars() and teach getVectorValue() to broadcast
  691. // the lane-zero scalar value.
  692. auto *Clone = State.Builder.Insert(GEP->clone());
  693. for (unsigned Part = 0; Part < State.UF; ++Part) {
  694. Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
  695. State.set(this, EntryPart, Part);
  696. State.addMetadata(EntryPart, GEP);
  697. }
  698. } else {
  699. // If the GEP has at least one loop-varying operand, we are sure to
  700. // produce a vector of pointers. But if we are only unrolling, we want
  701. // to produce a scalar GEP for each unroll part. Thus, the GEP we
  702. // produce with the code below will be scalar (if VF == 1) or vector
  703. // (otherwise). Note that for the unroll-only case, we still maintain
  704. // values in the vector mapping with initVector, as we do for other
  705. // instructions.
  706. for (unsigned Part = 0; Part < State.UF; ++Part) {
  707. // The pointer operand of the new GEP. If it's loop-invariant, we
  708. // won't broadcast it.
  709. auto *Ptr = IsPtrLoopInvariant
  710. ? State.get(getOperand(0), VPIteration(0, 0))
  711. : State.get(getOperand(0), Part);
  712. // Collect all the indices for the new GEP. If any index is
  713. // loop-invariant, we won't broadcast it.
  714. SmallVector<Value *, 4> Indices;
  715. for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
  716. VPValue *Operand = getOperand(I);
  717. if (IsIndexLoopInvariant[I - 1])
  718. Indices.push_back(State.get(Operand, VPIteration(0, 0)));
  719. else
  720. Indices.push_back(State.get(Operand, Part));
  721. }
  722. // If the GEP instruction is vectorized and was in a basic block that
  723. // needed predication, we can't propagate the poison-generating 'inbounds'
  724. // flag. The control flow has been linearized and the GEP is no longer
  725. // guarded by the predicate, which could make the 'inbounds' properties to
  726. // no longer hold.
  727. bool IsInBounds =
  728. GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
  729. // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
  730. // but it should be a vector, otherwise.
  731. auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
  732. Indices, "", IsInBounds);
  733. assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
  734. "NewGEP is not a pointer vector");
  735. State.set(this, NewGEP, Part);
  736. State.addMetadata(NewGEP, GEP);
  737. }
  738. }
  739. }
  740. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  741. void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
  742. VPSlotTracker &SlotTracker) const {
  743. O << Indent << "WIDEN-GEP ";
  744. O << (IsPtrLoopInvariant ? "Inv" : "Var");
  745. size_t IndicesNumber = IsIndexLoopInvariant.size();
  746. for (size_t I = 0; I < IndicesNumber; ++I)
  747. O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
  748. O << " ";
  749. printAsOperand(O, SlotTracker);
  750. O << " = getelementptr ";
  751. printOperands(O, SlotTracker);
  752. }
  753. #endif
  754. void VPBlendRecipe::execute(VPTransformState &State) {
  755. State.setDebugLocFromInst(Phi);
  756. // We know that all PHIs in non-header blocks are converted into
  757. // selects, so we don't have to worry about the insertion order and we
  758. // can just use the builder.
  759. // At this point we generate the predication tree. There may be
  760. // duplications since this is a simple recursive scan, but future
  761. // optimizations will clean it up.
  762. unsigned NumIncoming = getNumIncomingValues();
  763. // Generate a sequence of selects of the form:
  764. // SELECT(Mask3, In3,
  765. // SELECT(Mask2, In2,
  766. // SELECT(Mask1, In1,
  767. // In0)))
  768. // Note that Mask0 is never used: lanes for which no path reaches this phi and
  769. // are essentially undef are taken from In0.
  770. VectorParts Entry(State.UF);
  771. for (unsigned In = 0; In < NumIncoming; ++In) {
  772. for (unsigned Part = 0; Part < State.UF; ++Part) {
  773. // We might have single edge PHIs (blocks) - use an identity
  774. // 'select' for the first PHI operand.
  775. Value *In0 = State.get(getIncomingValue(In), Part);
  776. if (In == 0)
  777. Entry[Part] = In0; // Initialize with the first incoming value.
  778. else {
  779. // Select between the current value and the previous incoming edge
  780. // based on the incoming mask.
  781. Value *Cond = State.get(getMask(In), Part);
  782. Entry[Part] =
  783. State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
  784. }
  785. }
  786. }
  787. for (unsigned Part = 0; Part < State.UF; ++Part)
  788. State.set(this, Entry[Part], Part);
  789. }
  790. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  791. void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
  792. VPSlotTracker &SlotTracker) const {
  793. O << Indent << "BLEND ";
  794. Phi->printAsOperand(O, false);
  795. O << " =";
  796. if (getNumIncomingValues() == 1) {
  797. // Not a User of any mask: not really blending, this is a
  798. // single-predecessor phi.
  799. O << " ";
  800. getIncomingValue(0)->printAsOperand(O, SlotTracker);
  801. } else {
  802. for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
  803. O << " ";
  804. getIncomingValue(I)->printAsOperand(O, SlotTracker);
  805. O << "/";
  806. getMask(I)->printAsOperand(O, SlotTracker);
  807. }
  808. }
  809. }
  810. void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
  811. VPSlotTracker &SlotTracker) const {
  812. O << Indent << "REDUCE ";
  813. printAsOperand(O, SlotTracker);
  814. O << " = ";
  815. getChainOp()->printAsOperand(O, SlotTracker);
  816. O << " +";
  817. if (isa<FPMathOperator>(getUnderlyingInstr()))
  818. O << getUnderlyingInstr()->getFastMathFlags();
  819. O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
  820. getVecOp()->printAsOperand(O, SlotTracker);
  821. if (getCondOp()) {
  822. O << ", ";
  823. getCondOp()->printAsOperand(O, SlotTracker);
  824. }
  825. O << ")";
  826. if (RdxDesc->IntermediateStore)
  827. O << " (with final reduction value stored in invariant address sank "
  828. "outside of loop)";
  829. }
  830. void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
  831. VPSlotTracker &SlotTracker) const {
  832. O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
  833. if (!getUnderlyingInstr()->getType()->isVoidTy()) {
  834. printAsOperand(O, SlotTracker);
  835. O << " = ";
  836. }
  837. if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
  838. O << "call @" << CB->getCalledFunction()->getName() << "(";
  839. interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
  840. O, [&O, &SlotTracker](VPValue *Op) {
  841. Op->printAsOperand(O, SlotTracker);
  842. });
  843. O << ")";
  844. } else {
  845. O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
  846. printOperands(O, SlotTracker);
  847. }
  848. if (AlsoPack)
  849. O << " (S->V)";
  850. }
  851. #endif
  852. void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
  853. assert(State.Instance && "Branch on Mask works only on single instance.");
  854. unsigned Part = State.Instance->Part;
  855. unsigned Lane = State.Instance->Lane.getKnownLane();
  856. Value *ConditionBit = nullptr;
  857. VPValue *BlockInMask = getMask();
  858. if (BlockInMask) {
  859. ConditionBit = State.get(BlockInMask, Part);
  860. if (ConditionBit->getType()->isVectorTy())
  861. ConditionBit = State.Builder.CreateExtractElement(
  862. ConditionBit, State.Builder.getInt32(Lane));
  863. } else // Block in mask is all-one.
  864. ConditionBit = State.Builder.getTrue();
  865. // Replace the temporary unreachable terminator with a new conditional branch,
  866. // whose two destinations will be set later when they are created.
  867. auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
  868. assert(isa<UnreachableInst>(CurrentTerminator) &&
  869. "Expected to replace unreachable terminator with conditional branch.");
  870. auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
  871. CondBr->setSuccessor(0, nullptr);
  872. ReplaceInstWithInst(CurrentTerminator, CondBr);
  873. }
  874. void VPPredInstPHIRecipe::execute(VPTransformState &State) {
  875. assert(State.Instance && "Predicated instruction PHI works per instance.");
  876. Instruction *ScalarPredInst =
  877. cast<Instruction>(State.get(getOperand(0), *State.Instance));
  878. BasicBlock *PredicatedBB = ScalarPredInst->getParent();
  879. BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
  880. assert(PredicatingBB && "Predicated block has no single predecessor.");
  881. assert(isa<VPReplicateRecipe>(getOperand(0)) &&
  882. "operand must be VPReplicateRecipe");
  883. // By current pack/unpack logic we need to generate only a single phi node: if
  884. // a vector value for the predicated instruction exists at this point it means
  885. // the instruction has vector users only, and a phi for the vector value is
  886. // needed. In this case the recipe of the predicated instruction is marked to
  887. // also do that packing, thereby "hoisting" the insert-element sequence.
  888. // Otherwise, a phi node for the scalar value is needed.
  889. unsigned Part = State.Instance->Part;
  890. if (State.hasVectorValue(getOperand(0), Part)) {
  891. Value *VectorValue = State.get(getOperand(0), Part);
  892. InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
  893. PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
  894. VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
  895. VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
  896. if (State.hasVectorValue(this, Part))
  897. State.reset(this, VPhi, Part);
  898. else
  899. State.set(this, VPhi, Part);
  900. // NOTE: Currently we need to update the value of the operand, so the next
  901. // predicated iteration inserts its generated value in the correct vector.
  902. State.reset(getOperand(0), VPhi, Part);
  903. } else {
  904. Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
  905. PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
  906. Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
  907. PredicatingBB);
  908. Phi->addIncoming(ScalarPredInst, PredicatedBB);
  909. if (State.hasScalarValue(this, *State.Instance))
  910. State.reset(this, Phi, *State.Instance);
  911. else
  912. State.set(this, Phi, *State.Instance);
  913. // NOTE: Currently we need to update the value of the operand, so the next
  914. // predicated iteration inserts its generated value in the correct vector.
  915. State.reset(getOperand(0), Phi, *State.Instance);
  916. }
  917. }
  918. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  919. void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
  920. VPSlotTracker &SlotTracker) const {
  921. O << Indent << "PHI-PREDICATED-INSTRUCTION ";
  922. printAsOperand(O, SlotTracker);
  923. O << " = ";
  924. printOperands(O, SlotTracker);
  925. }
  926. void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
  927. VPSlotTracker &SlotTracker) const {
  928. O << Indent << "WIDEN ";
  929. if (!isStore()) {
  930. getVPSingleValue()->printAsOperand(O, SlotTracker);
  931. O << " = ";
  932. }
  933. O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
  934. printOperands(O, SlotTracker);
  935. }
  936. #endif
  937. void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
  938. Value *Start = getStartValue()->getLiveInIRValue();
  939. PHINode *EntryPart = PHINode::Create(
  940. Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
  941. BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
  942. EntryPart->addIncoming(Start, VectorPH);
  943. EntryPart->setDebugLoc(DL);
  944. for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
  945. State.set(this, EntryPart, Part);
  946. }
  947. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  948. void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
  949. VPSlotTracker &SlotTracker) const {
  950. O << Indent << "EMIT ";
  951. printAsOperand(O, SlotTracker);
  952. O << " = CANONICAL-INDUCTION";
  953. }
  954. #endif
  955. bool VPCanonicalIVPHIRecipe::isCanonical(const InductionDescriptor &ID,
  956. Type *Ty) const {
  957. if (Ty != getScalarType())
  958. return false;
  959. // The start value of ID must match the start value of this canonical
  960. // induction.
  961. if (getStartValue()->getLiveInIRValue() != ID.getStartValue())
  962. return false;
  963. ConstantInt *Step = ID.getConstIntStepValue();
  964. // ID must also be incremented by one. IK_IntInduction always increment the
  965. // induction by Step, but the binary op may not be set.
  966. return ID.getKind() == InductionDescriptor::IK_IntInduction && Step &&
  967. Step->isOne();
  968. }
  969. bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
  970. return IsScalarAfterVectorization &&
  971. (!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
  972. }
  973. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  974. void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
  975. VPSlotTracker &SlotTracker) const {
  976. O << Indent << "EMIT ";
  977. printAsOperand(O, SlotTracker);
  978. O << " = WIDEN-POINTER-INDUCTION ";
  979. getStartValue()->printAsOperand(O, SlotTracker);
  980. O << ", " << *IndDesc.getStep();
  981. }
  982. #endif
  983. void VPExpandSCEVRecipe::execute(VPTransformState &State) {
  984. assert(!State.Instance && "cannot be used in per-lane");
  985. const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
  986. SCEVExpander Exp(SE, DL, "induction");
  987. Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
  988. &*State.Builder.GetInsertPoint());
  989. for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
  990. State.set(this, Res, Part);
  991. }
  992. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  993. void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
  994. VPSlotTracker &SlotTracker) const {
  995. O << Indent << "EMIT ";
  996. getVPSingleValue()->printAsOperand(O, SlotTracker);
  997. O << " = EXPAND SCEV " << *Expr;
  998. }
  999. #endif
  1000. void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
  1001. Value *CanonicalIV = State.get(getOperand(0), 0);
  1002. Type *STy = CanonicalIV->getType();
  1003. IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
  1004. ElementCount VF = State.VF;
  1005. Value *VStart = VF.isScalar()
  1006. ? CanonicalIV
  1007. : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
  1008. for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
  1009. Value *VStep = createStepForVF(Builder, STy, VF, Part);
  1010. if (VF.isVector()) {
  1011. VStep = Builder.CreateVectorSplat(VF, VStep);
  1012. VStep =
  1013. Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
  1014. }
  1015. Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
  1016. State.set(this, CanonicalVectorIV, Part);
  1017. }
  1018. }
  1019. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1020. void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
  1021. VPSlotTracker &SlotTracker) const {
  1022. O << Indent << "EMIT ";
  1023. printAsOperand(O, SlotTracker);
  1024. O << " = WIDEN-CANONICAL-INDUCTION ";
  1025. printOperands(O, SlotTracker);
  1026. }
  1027. #endif
  1028. void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
  1029. auto &Builder = State.Builder;
  1030. // Create a vector from the initial value.
  1031. auto *VectorInit = getStartValue()->getLiveInIRValue();
  1032. Type *VecTy = State.VF.isScalar()
  1033. ? VectorInit->getType()
  1034. : VectorType::get(VectorInit->getType(), State.VF);
  1035. BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
  1036. if (State.VF.isVector()) {
  1037. auto *IdxTy = Builder.getInt32Ty();
  1038. auto *One = ConstantInt::get(IdxTy, 1);
  1039. IRBuilder<>::InsertPointGuard Guard(Builder);
  1040. Builder.SetInsertPoint(VectorPH->getTerminator());
  1041. auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
  1042. auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
  1043. VectorInit = Builder.CreateInsertElement(
  1044. PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
  1045. }
  1046. // Create a phi node for the new recurrence.
  1047. PHINode *EntryPart = PHINode::Create(
  1048. VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
  1049. EntryPart->addIncoming(VectorInit, VectorPH);
  1050. State.set(this, EntryPart, 0);
  1051. }
  1052. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1053. void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
  1054. VPSlotTracker &SlotTracker) const {
  1055. O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
  1056. printAsOperand(O, SlotTracker);
  1057. O << " = phi ";
  1058. printOperands(O, SlotTracker);
  1059. }
  1060. #endif
  1061. void VPReductionPHIRecipe::execute(VPTransformState &State) {
  1062. PHINode *PN = cast<PHINode>(getUnderlyingValue());
  1063. auto &Builder = State.Builder;
  1064. // In order to support recurrences we need to be able to vectorize Phi nodes.
  1065. // Phi nodes have cycles, so we need to vectorize them in two stages. This is
  1066. // stage #1: We create a new vector PHI node with no incoming edges. We'll use
  1067. // this value when we vectorize all of the instructions that use the PHI.
  1068. bool ScalarPHI = State.VF.isScalar() || IsInLoop;
  1069. Type *VecTy =
  1070. ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
  1071. BasicBlock *HeaderBB = State.CFG.PrevBB;
  1072. assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
  1073. "recipe must be in the vector loop header");
  1074. unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
  1075. for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
  1076. Value *EntryPart =
  1077. PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
  1078. State.set(this, EntryPart, Part);
  1079. }
  1080. BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
  1081. // Reductions do not have to start at zero. They can start with
  1082. // any loop invariant values.
  1083. VPValue *StartVPV = getStartValue();
  1084. Value *StartV = StartVPV->getLiveInIRValue();
  1085. Value *Iden = nullptr;
  1086. RecurKind RK = RdxDesc.getRecurrenceKind();
  1087. if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
  1088. RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
  1089. // MinMax reduction have the start value as their identify.
  1090. if (ScalarPHI) {
  1091. Iden = StartV;
  1092. } else {
  1093. IRBuilderBase::InsertPointGuard IPBuilder(Builder);
  1094. Builder.SetInsertPoint(VectorPH->getTerminator());
  1095. StartV = Iden =
  1096. Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
  1097. }
  1098. } else {
  1099. Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
  1100. RdxDesc.getFastMathFlags());
  1101. if (!ScalarPHI) {
  1102. Iden = Builder.CreateVectorSplat(State.VF, Iden);
  1103. IRBuilderBase::InsertPointGuard IPBuilder(Builder);
  1104. Builder.SetInsertPoint(VectorPH->getTerminator());
  1105. Constant *Zero = Builder.getInt32(0);
  1106. StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
  1107. }
  1108. }
  1109. for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
  1110. Value *EntryPart = State.get(this, Part);
  1111. // Make sure to add the reduction start value only to the
  1112. // first unroll part.
  1113. Value *StartVal = (Part == 0) ? StartV : Iden;
  1114. cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
  1115. }
  1116. }
  1117. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1118. void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
  1119. VPSlotTracker &SlotTracker) const {
  1120. O << Indent << "WIDEN-REDUCTION-PHI ";
  1121. printAsOperand(O, SlotTracker);
  1122. O << " = phi ";
  1123. printOperands(O, SlotTracker);
  1124. }
  1125. #endif
  1126. void VPWidenPHIRecipe::execute(VPTransformState &State) {
  1127. assert(EnableVPlanNativePath &&
  1128. "Non-native vplans are not expected to have VPWidenPHIRecipes.");
  1129. // Currently we enter here in the VPlan-native path for non-induction
  1130. // PHIs where all control flow is uniform. We simply widen these PHIs.
  1131. // Create a vector phi with no operands - the vector phi operands will be
  1132. // set at the end of vector code generation.
  1133. VPBasicBlock *Parent = getParent();
  1134. VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
  1135. unsigned StartIdx = 0;
  1136. // For phis in header blocks of loop regions, use the index of the value
  1137. // coming from the preheader.
  1138. if (LoopRegion->getEntryBasicBlock() == Parent) {
  1139. for (unsigned I = 0; I < getNumOperands(); ++I) {
  1140. if (getIncomingBlock(I) ==
  1141. LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
  1142. StartIdx = I;
  1143. }
  1144. }
  1145. Value *Op0 = State.get(getOperand(StartIdx), 0);
  1146. Type *VecTy = Op0->getType();
  1147. Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
  1148. State.set(this, VecPhi, 0);
  1149. }
  1150. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1151. void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
  1152. VPSlotTracker &SlotTracker) const {
  1153. O << Indent << "WIDEN-PHI ";
  1154. auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
  1155. // Unless all incoming values are modeled in VPlan print the original PHI
  1156. // directly.
  1157. // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
  1158. // values as VPValues.
  1159. if (getNumOperands() != OriginalPhi->getNumOperands()) {
  1160. O << VPlanIngredient(OriginalPhi);
  1161. return;
  1162. }
  1163. printAsOperand(O, SlotTracker);
  1164. O << " = phi ";
  1165. printOperands(O, SlotTracker);
  1166. }
  1167. #endif
  1168. // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
  1169. // remove VPActiveLaneMaskPHIRecipe.
  1170. void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
  1171. BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
  1172. for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
  1173. Value *StartMask = State.get(getOperand(0), Part);
  1174. PHINode *EntryPart =
  1175. State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
  1176. EntryPart->addIncoming(StartMask, VectorPH);
  1177. EntryPart->setDebugLoc(DL);
  1178. State.set(this, EntryPart, Part);
  1179. }
  1180. }
  1181. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1182. void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
  1183. VPSlotTracker &SlotTracker) const {
  1184. O << Indent << "ACTIVE-LANE-MASK-PHI ";
  1185. printAsOperand(O, SlotTracker);
  1186. O << " = phi ";
  1187. printOperands(O, SlotTracker);
  1188. }
  1189. #endif