LoopUtils.cpp 68 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789
  1. //===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines common loop utility functions.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Transforms/Utils/LoopUtils.h"
  13. #include "llvm/ADT/DenseSet.h"
  14. #include "llvm/ADT/Optional.h"
  15. #include "llvm/ADT/PriorityWorklist.h"
  16. #include "llvm/ADT/ScopeExit.h"
  17. #include "llvm/ADT/SetVector.h"
  18. #include "llvm/ADT/SmallPtrSet.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include "llvm/Analysis/AliasAnalysis.h"
  21. #include "llvm/Analysis/BasicAliasAnalysis.h"
  22. #include "llvm/Analysis/DomTreeUpdater.h"
  23. #include "llvm/Analysis/GlobalsModRef.h"
  24. #include "llvm/Analysis/InstSimplifyFolder.h"
  25. #include "llvm/Analysis/InstructionSimplify.h"
  26. #include "llvm/Analysis/LoopAccessAnalysis.h"
  27. #include "llvm/Analysis/LoopInfo.h"
  28. #include "llvm/Analysis/LoopPass.h"
  29. #include "llvm/Analysis/MemorySSA.h"
  30. #include "llvm/Analysis/MemorySSAUpdater.h"
  31. #include "llvm/Analysis/MustExecute.h"
  32. #include "llvm/Analysis/ScalarEvolution.h"
  33. #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
  34. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  35. #include "llvm/Analysis/TargetTransformInfo.h"
  36. #include "llvm/Analysis/ValueTracking.h"
  37. #include "llvm/IR/DIBuilder.h"
  38. #include "llvm/IR/Dominators.h"
  39. #include "llvm/IR/Instructions.h"
  40. #include "llvm/IR/IntrinsicInst.h"
  41. #include "llvm/IR/MDBuilder.h"
  42. #include "llvm/IR/Module.h"
  43. #include "llvm/IR/Operator.h"
  44. #include "llvm/IR/PatternMatch.h"
  45. #include "llvm/IR/ValueHandle.h"
  46. #include "llvm/InitializePasses.h"
  47. #include "llvm/Pass.h"
  48. #include "llvm/Support/Debug.h"
  49. #include "llvm/Support/KnownBits.h"
  50. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  51. #include "llvm/Transforms/Utils/Local.h"
  52. #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
  53. using namespace llvm;
  54. using namespace llvm::PatternMatch;
  55. #define DEBUG_TYPE "loop-utils"
  56. static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
  57. static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
  58. bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
  59. MemorySSAUpdater *MSSAU,
  60. bool PreserveLCSSA) {
  61. bool Changed = false;
  62. // We re-use a vector for the in-loop predecesosrs.
  63. SmallVector<BasicBlock *, 4> InLoopPredecessors;
  64. auto RewriteExit = [&](BasicBlock *BB) {
  65. assert(InLoopPredecessors.empty() &&
  66. "Must start with an empty predecessors list!");
  67. auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
  68. // See if there are any non-loop predecessors of this exit block and
  69. // keep track of the in-loop predecessors.
  70. bool IsDedicatedExit = true;
  71. for (auto *PredBB : predecessors(BB))
  72. if (L->contains(PredBB)) {
  73. if (isa<IndirectBrInst>(PredBB->getTerminator()))
  74. // We cannot rewrite exiting edges from an indirectbr.
  75. return false;
  76. if (isa<CallBrInst>(PredBB->getTerminator()))
  77. // We cannot rewrite exiting edges from a callbr.
  78. return false;
  79. InLoopPredecessors.push_back(PredBB);
  80. } else {
  81. IsDedicatedExit = false;
  82. }
  83. assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
  84. // Nothing to do if this is already a dedicated exit.
  85. if (IsDedicatedExit)
  86. return false;
  87. auto *NewExitBB = SplitBlockPredecessors(
  88. BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
  89. if (!NewExitBB)
  90. LLVM_DEBUG(
  91. dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
  92. << *L << "\n");
  93. else
  94. LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
  95. << NewExitBB->getName() << "\n");
  96. return true;
  97. };
  98. // Walk the exit blocks directly rather than building up a data structure for
  99. // them, but only visit each one once.
  100. SmallPtrSet<BasicBlock *, 4> Visited;
  101. for (auto *BB : L->blocks())
  102. for (auto *SuccBB : successors(BB)) {
  103. // We're looking for exit blocks so skip in-loop successors.
  104. if (L->contains(SuccBB))
  105. continue;
  106. // Visit each exit block exactly once.
  107. if (!Visited.insert(SuccBB).second)
  108. continue;
  109. Changed |= RewriteExit(SuccBB);
  110. }
  111. return Changed;
  112. }
  113. /// Returns the instructions that use values defined in the loop.
  114. SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
  115. SmallVector<Instruction *, 8> UsedOutside;
  116. for (auto *Block : L->getBlocks())
  117. // FIXME: I believe that this could use copy_if if the Inst reference could
  118. // be adapted into a pointer.
  119. for (auto &Inst : *Block) {
  120. auto Users = Inst.users();
  121. if (any_of(Users, [&](User *U) {
  122. auto *Use = cast<Instruction>(U);
  123. return !L->contains(Use->getParent());
  124. }))
  125. UsedOutside.push_back(&Inst);
  126. }
  127. return UsedOutside;
  128. }
  129. void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
  130. // By definition, all loop passes need the LoopInfo analysis and the
  131. // Dominator tree it depends on. Because they all participate in the loop
  132. // pass manager, they must also preserve these.
  133. AU.addRequired<DominatorTreeWrapperPass>();
  134. AU.addPreserved<DominatorTreeWrapperPass>();
  135. AU.addRequired<LoopInfoWrapperPass>();
  136. AU.addPreserved<LoopInfoWrapperPass>();
  137. // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
  138. // here because users shouldn't directly get them from this header.
  139. extern char &LoopSimplifyID;
  140. extern char &LCSSAID;
  141. AU.addRequiredID(LoopSimplifyID);
  142. AU.addPreservedID(LoopSimplifyID);
  143. AU.addRequiredID(LCSSAID);
  144. AU.addPreservedID(LCSSAID);
  145. // This is used in the LPPassManager to perform LCSSA verification on passes
  146. // which preserve lcssa form
  147. AU.addRequired<LCSSAVerificationPass>();
  148. AU.addPreserved<LCSSAVerificationPass>();
  149. // Loop passes are designed to run inside of a loop pass manager which means
  150. // that any function analyses they require must be required by the first loop
  151. // pass in the manager (so that it is computed before the loop pass manager
  152. // runs) and preserved by all loop pasess in the manager. To make this
  153. // reasonably robust, the set needed for most loop passes is maintained here.
  154. // If your loop pass requires an analysis not listed here, you will need to
  155. // carefully audit the loop pass manager nesting structure that results.
  156. AU.addRequired<AAResultsWrapperPass>();
  157. AU.addPreserved<AAResultsWrapperPass>();
  158. AU.addPreserved<BasicAAWrapperPass>();
  159. AU.addPreserved<GlobalsAAWrapperPass>();
  160. AU.addPreserved<SCEVAAWrapperPass>();
  161. AU.addRequired<ScalarEvolutionWrapperPass>();
  162. AU.addPreserved<ScalarEvolutionWrapperPass>();
  163. // FIXME: When all loop passes preserve MemorySSA, it can be required and
  164. // preserved here instead of the individual handling in each pass.
  165. }
  166. /// Manually defined generic "LoopPass" dependency initialization. This is used
  167. /// to initialize the exact set of passes from above in \c
  168. /// getLoopAnalysisUsage. It can be used within a loop pass's initialization
  169. /// with:
  170. ///
  171. /// INITIALIZE_PASS_DEPENDENCY(LoopPass)
  172. ///
  173. /// As-if "LoopPass" were a pass.
  174. void llvm::initializeLoopPassPass(PassRegistry &Registry) {
  175. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  176. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  177. INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  178. INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
  179. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  180. INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
  181. INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
  182. INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
  183. INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
  184. INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
  185. }
  186. /// Create MDNode for input string.
  187. static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
  188. LLVMContext &Context = TheLoop->getHeader()->getContext();
  189. Metadata *MDs[] = {
  190. MDString::get(Context, Name),
  191. ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
  192. return MDNode::get(Context, MDs);
  193. }
  194. /// Set input string into loop metadata by keeping other values intact.
  195. /// If the string is already in loop metadata update value if it is
  196. /// different.
  197. void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
  198. unsigned V) {
  199. SmallVector<Metadata *, 4> MDs(1);
  200. // If the loop already has metadata, retain it.
  201. MDNode *LoopID = TheLoop->getLoopID();
  202. if (LoopID) {
  203. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
  204. MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
  205. // If it is of form key = value, try to parse it.
  206. if (Node->getNumOperands() == 2) {
  207. MDString *S = dyn_cast<MDString>(Node->getOperand(0));
  208. if (S && S->getString().equals(StringMD)) {
  209. ConstantInt *IntMD =
  210. mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
  211. if (IntMD && IntMD->getSExtValue() == V)
  212. // It is already in place. Do nothing.
  213. return;
  214. // We need to update the value, so just skip it here and it will
  215. // be added after copying other existed nodes.
  216. continue;
  217. }
  218. }
  219. MDs.push_back(Node);
  220. }
  221. }
  222. // Add new metadata.
  223. MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
  224. // Replace current metadata node with new one.
  225. LLVMContext &Context = TheLoop->getHeader()->getContext();
  226. MDNode *NewLoopID = MDNode::get(Context, MDs);
  227. // Set operand 0 to refer to the loop id itself.
  228. NewLoopID->replaceOperandWith(0, NewLoopID);
  229. TheLoop->setLoopID(NewLoopID);
  230. }
  231. Optional<ElementCount>
  232. llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
  233. Optional<int> Width =
  234. getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
  235. if (Width.hasValue()) {
  236. Optional<int> IsScalable = getOptionalIntLoopAttribute(
  237. TheLoop, "llvm.loop.vectorize.scalable.enable");
  238. return ElementCount::get(*Width, IsScalable.getValueOr(false));
  239. }
  240. return None;
  241. }
  242. Optional<MDNode *> llvm::makeFollowupLoopID(
  243. MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
  244. const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
  245. if (!OrigLoopID) {
  246. if (AlwaysNew)
  247. return nullptr;
  248. return None;
  249. }
  250. assert(OrigLoopID->getOperand(0) == OrigLoopID);
  251. bool InheritAllAttrs = !InheritOptionsExceptPrefix;
  252. bool InheritSomeAttrs =
  253. InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
  254. SmallVector<Metadata *, 8> MDs;
  255. MDs.push_back(nullptr);
  256. bool Changed = false;
  257. if (InheritAllAttrs || InheritSomeAttrs) {
  258. for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
  259. MDNode *Op = cast<MDNode>(Existing.get());
  260. auto InheritThisAttribute = [InheritSomeAttrs,
  261. InheritOptionsExceptPrefix](MDNode *Op) {
  262. if (!InheritSomeAttrs)
  263. return false;
  264. // Skip malformatted attribute metadata nodes.
  265. if (Op->getNumOperands() == 0)
  266. return true;
  267. Metadata *NameMD = Op->getOperand(0).get();
  268. if (!isa<MDString>(NameMD))
  269. return true;
  270. StringRef AttrName = cast<MDString>(NameMD)->getString();
  271. // Do not inherit excluded attributes.
  272. return !AttrName.startswith(InheritOptionsExceptPrefix);
  273. };
  274. if (InheritThisAttribute(Op))
  275. MDs.push_back(Op);
  276. else
  277. Changed = true;
  278. }
  279. } else {
  280. // Modified if we dropped at least one attribute.
  281. Changed = OrigLoopID->getNumOperands() > 1;
  282. }
  283. bool HasAnyFollowup = false;
  284. for (StringRef OptionName : FollowupOptions) {
  285. MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
  286. if (!FollowupNode)
  287. continue;
  288. HasAnyFollowup = true;
  289. for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
  290. MDs.push_back(Option.get());
  291. Changed = true;
  292. }
  293. }
  294. // Attributes of the followup loop not specified explicity, so signal to the
  295. // transformation pass to add suitable attributes.
  296. if (!AlwaysNew && !HasAnyFollowup)
  297. return None;
  298. // If no attributes were added or remove, the previous loop Id can be reused.
  299. if (!AlwaysNew && !Changed)
  300. return OrigLoopID;
  301. // No attributes is equivalent to having no !llvm.loop metadata at all.
  302. if (MDs.size() == 1)
  303. return nullptr;
  304. // Build the new loop ID.
  305. MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
  306. FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
  307. return FollowupLoopID;
  308. }
  309. bool llvm::hasDisableAllTransformsHint(const Loop *L) {
  310. return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);
  311. }
  312. bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
  313. return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);
  314. }
  315. TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
  316. if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
  317. return TM_SuppressedByUser;
  318. Optional<int> Count =
  319. getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
  320. if (Count.hasValue())
  321. return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
  322. if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
  323. return TM_ForcedByUser;
  324. if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
  325. return TM_ForcedByUser;
  326. if (hasDisableAllTransformsHint(L))
  327. return TM_Disable;
  328. return TM_Unspecified;
  329. }
  330. TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
  331. if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
  332. return TM_SuppressedByUser;
  333. Optional<int> Count =
  334. getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
  335. if (Count.hasValue())
  336. return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
  337. if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
  338. return TM_ForcedByUser;
  339. if (hasDisableAllTransformsHint(L))
  340. return TM_Disable;
  341. return TM_Unspecified;
  342. }
  343. TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
  344. Optional<bool> Enable =
  345. getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
  346. if (Enable == false)
  347. return TM_SuppressedByUser;
  348. Optional<ElementCount> VectorizeWidth =
  349. getOptionalElementCountLoopAttribute(L);
  350. Optional<int> InterleaveCount =
  351. getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
  352. // 'Forcing' vector width and interleave count to one effectively disables
  353. // this tranformation.
  354. if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
  355. InterleaveCount == 1)
  356. return TM_SuppressedByUser;
  357. if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
  358. return TM_Disable;
  359. if (Enable == true)
  360. return TM_ForcedByUser;
  361. if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
  362. return TM_Disable;
  363. if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
  364. return TM_Enable;
  365. if (hasDisableAllTransformsHint(L))
  366. return TM_Disable;
  367. return TM_Unspecified;
  368. }
  369. TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
  370. if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
  371. return TM_ForcedByUser;
  372. if (hasDisableAllTransformsHint(L))
  373. return TM_Disable;
  374. return TM_Unspecified;
  375. }
  376. TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
  377. if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
  378. return TM_SuppressedByUser;
  379. if (hasDisableAllTransformsHint(L))
  380. return TM_Disable;
  381. return TM_Unspecified;
  382. }
  383. /// Does a BFS from a given node to all of its children inside a given loop.
  384. /// The returned vector of nodes includes the starting point.
  385. SmallVector<DomTreeNode *, 16>
  386. llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
  387. SmallVector<DomTreeNode *, 16> Worklist;
  388. auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
  389. // Only include subregions in the top level loop.
  390. BasicBlock *BB = DTN->getBlock();
  391. if (CurLoop->contains(BB))
  392. Worklist.push_back(DTN);
  393. };
  394. AddRegionToWorklist(N);
  395. for (size_t I = 0; I < Worklist.size(); I++) {
  396. for (DomTreeNode *Child : Worklist[I]->children())
  397. AddRegionToWorklist(Child);
  398. }
  399. return Worklist;
  400. }
  401. void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
  402. LoopInfo *LI, MemorySSA *MSSA) {
  403. assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
  404. auto *Preheader = L->getLoopPreheader();
  405. assert(Preheader && "Preheader should exist!");
  406. std::unique_ptr<MemorySSAUpdater> MSSAU;
  407. if (MSSA)
  408. MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
  409. // Now that we know the removal is safe, remove the loop by changing the
  410. // branch from the preheader to go to the single exit block.
  411. //
  412. // Because we're deleting a large chunk of code at once, the sequence in which
  413. // we remove things is very important to avoid invalidation issues.
  414. // Tell ScalarEvolution that the loop is deleted. Do this before
  415. // deleting the loop so that ScalarEvolution can look at the loop
  416. // to determine what it needs to clean up.
  417. if (SE)
  418. SE->forgetLoop(L);
  419. auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
  420. assert(OldBr && "Preheader must end with a branch");
  421. assert(OldBr->isUnconditional() && "Preheader must have a single successor");
  422. // Connect the preheader to the exit block. Keep the old edge to the header
  423. // around to perform the dominator tree update in two separate steps
  424. // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
  425. // preheader -> header.
  426. //
  427. //
  428. // 0. Preheader 1. Preheader 2. Preheader
  429. // | | | |
  430. // V | V |
  431. // Header <--\ | Header <--\ | Header <--\
  432. // | | | | | | | | | | |
  433. // | V | | | V | | | V |
  434. // | Body --/ | | Body --/ | | Body --/
  435. // V V V V V
  436. // Exit Exit Exit
  437. //
  438. // By doing this is two separate steps we can perform the dominator tree
  439. // update without using the batch update API.
  440. //
  441. // Even when the loop is never executed, we cannot remove the edge from the
  442. // source block to the exit block. Consider the case where the unexecuted loop
  443. // branches back to an outer loop. If we deleted the loop and removed the edge
  444. // coming to this inner loop, this will break the outer loop structure (by
  445. // deleting the backedge of the outer loop). If the outer loop is indeed a
  446. // non-loop, it will be deleted in a future iteration of loop deletion pass.
  447. IRBuilder<> Builder(OldBr);
  448. auto *ExitBlock = L->getUniqueExitBlock();
  449. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
  450. if (ExitBlock) {
  451. assert(ExitBlock && "Should have a unique exit block!");
  452. assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
  453. Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
  454. // Remove the old branch. The conditional branch becomes a new terminator.
  455. OldBr->eraseFromParent();
  456. // Rewrite phis in the exit block to get their inputs from the Preheader
  457. // instead of the exiting block.
  458. for (PHINode &P : ExitBlock->phis()) {
  459. // Set the zero'th element of Phi to be from the preheader and remove all
  460. // other incoming values. Given the loop has dedicated exits, all other
  461. // incoming values must be from the exiting blocks.
  462. int PredIndex = 0;
  463. P.setIncomingBlock(PredIndex, Preheader);
  464. // Removes all incoming values from all other exiting blocks (including
  465. // duplicate values from an exiting block).
  466. // Nuke all entries except the zero'th entry which is the preheader entry.
  467. // NOTE! We need to remove Incoming Values in the reverse order as done
  468. // below, to keep the indices valid for deletion (removeIncomingValues
  469. // updates getNumIncomingValues and shifts all values down into the
  470. // operand being deleted).
  471. for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
  472. P.removeIncomingValue(e - i, false);
  473. assert((P.getNumIncomingValues() == 1 &&
  474. P.getIncomingBlock(PredIndex) == Preheader) &&
  475. "Should have exactly one value and that's from the preheader!");
  476. }
  477. if (DT) {
  478. DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
  479. if (MSSA) {
  480. MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
  481. *DT);
  482. if (VerifyMemorySSA)
  483. MSSA->verifyMemorySSA();
  484. }
  485. }
  486. // Disconnect the loop body by branching directly to its exit.
  487. Builder.SetInsertPoint(Preheader->getTerminator());
  488. Builder.CreateBr(ExitBlock);
  489. // Remove the old branch.
  490. Preheader->getTerminator()->eraseFromParent();
  491. } else {
  492. assert(L->hasNoExitBlocks() &&
  493. "Loop should have either zero or one exit blocks.");
  494. Builder.SetInsertPoint(OldBr);
  495. Builder.CreateUnreachable();
  496. Preheader->getTerminator()->eraseFromParent();
  497. }
  498. if (DT) {
  499. DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
  500. if (MSSA) {
  501. MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
  502. *DT);
  503. SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
  504. L->block_end());
  505. MSSAU->removeBlocks(DeadBlockSet);
  506. if (VerifyMemorySSA)
  507. MSSA->verifyMemorySSA();
  508. }
  509. }
  510. // Use a map to unique and a vector to guarantee deterministic ordering.
  511. llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet;
  512. llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
  513. if (ExitBlock) {
  514. // Given LCSSA form is satisfied, we should not have users of instructions
  515. // within the dead loop outside of the loop. However, LCSSA doesn't take
  516. // unreachable uses into account. We handle them here.
  517. // We could do it after drop all references (in this case all users in the
  518. // loop will be already eliminated and we have less work to do but according
  519. // to API doc of User::dropAllReferences only valid operation after dropping
  520. // references, is deletion. So let's substitute all usages of
  521. // instruction from the loop with undef value of corresponding type first.
  522. for (auto *Block : L->blocks())
  523. for (Instruction &I : *Block) {
  524. auto *Undef = UndefValue::get(I.getType());
  525. for (Use &U : llvm::make_early_inc_range(I.uses())) {
  526. if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
  527. if (L->contains(Usr->getParent()))
  528. continue;
  529. // If we have a DT then we can check that uses outside a loop only in
  530. // unreachable block.
  531. if (DT)
  532. assert(!DT->isReachableFromEntry(U) &&
  533. "Unexpected user in reachable block");
  534. U.set(Undef);
  535. }
  536. auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
  537. if (!DVI)
  538. continue;
  539. auto Key =
  540. DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()});
  541. if (Key != DeadDebugSet.end())
  542. continue;
  543. DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()});
  544. DeadDebugInst.push_back(DVI);
  545. }
  546. // After the loop has been deleted all the values defined and modified
  547. // inside the loop are going to be unavailable.
  548. // Since debug values in the loop have been deleted, inserting an undef
  549. // dbg.value truncates the range of any dbg.value before the loop where the
  550. // loop used to be. This is particularly important for constant values.
  551. DIBuilder DIB(*ExitBlock->getModule());
  552. Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
  553. assert(InsertDbgValueBefore &&
  554. "There should be a non-PHI instruction in exit block, else these "
  555. "instructions will have no parent.");
  556. for (auto *DVI : DeadDebugInst)
  557. DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()),
  558. DVI->getVariable(), DVI->getExpression(),
  559. DVI->getDebugLoc(), InsertDbgValueBefore);
  560. }
  561. // Remove the block from the reference counting scheme, so that we can
  562. // delete it freely later.
  563. for (auto *Block : L->blocks())
  564. Block->dropAllReferences();
  565. if (MSSA && VerifyMemorySSA)
  566. MSSA->verifyMemorySSA();
  567. if (LI) {
  568. // Erase the instructions and the blocks without having to worry
  569. // about ordering because we already dropped the references.
  570. // NOTE: This iteration is safe because erasing the block does not remove
  571. // its entry from the loop's block list. We do that in the next section.
  572. for (BasicBlock *BB : L->blocks())
  573. BB->eraseFromParent();
  574. // Finally, the blocks from loopinfo. This has to happen late because
  575. // otherwise our loop iterators won't work.
  576. SmallPtrSet<BasicBlock *, 8> blocks;
  577. blocks.insert(L->block_begin(), L->block_end());
  578. for (BasicBlock *BB : blocks)
  579. LI->removeBlock(BB);
  580. // The last step is to update LoopInfo now that we've eliminated this loop.
  581. // Note: LoopInfo::erase remove the given loop and relink its subloops with
  582. // its parent. While removeLoop/removeChildLoop remove the given loop but
  583. // not relink its subloops, which is what we want.
  584. if (Loop *ParentLoop = L->getParentLoop()) {
  585. Loop::iterator I = find(*ParentLoop, L);
  586. assert(I != ParentLoop->end() && "Couldn't find loop");
  587. ParentLoop->removeChildLoop(I);
  588. } else {
  589. Loop::iterator I = find(*LI, L);
  590. assert(I != LI->end() && "Couldn't find loop");
  591. LI->removeLoop(I);
  592. }
  593. LI->destroy(L);
  594. }
  595. }
  596. static Loop *getOutermostLoop(Loop *L) {
  597. while (Loop *Parent = L->getParentLoop())
  598. L = Parent;
  599. return L;
  600. }
  601. void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
  602. LoopInfo &LI, MemorySSA *MSSA) {
  603. auto *Latch = L->getLoopLatch();
  604. assert(Latch && "multiple latches not yet supported");
  605. auto *Header = L->getHeader();
  606. Loop *OutermostLoop = getOutermostLoop(L);
  607. SE.forgetLoop(L);
  608. std::unique_ptr<MemorySSAUpdater> MSSAU;
  609. if (MSSA)
  610. MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
  611. // Update the CFG and domtree. We chose to special case a couple of
  612. // of common cases for code quality and test readability reasons.
  613. [&]() -> void {
  614. if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
  615. if (!BI->isConditional()) {
  616. DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
  617. (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
  618. MSSAU.get());
  619. return;
  620. }
  621. // Conditional latch/exit - note that latch can be shared by inner
  622. // and outer loop so the other target doesn't need to an exit
  623. if (L->isLoopExiting(Latch)) {
  624. // TODO: Generalize ConstantFoldTerminator so that it can be used
  625. // here without invalidating LCSSA or MemorySSA. (Tricky case for
  626. // LCSSA: header is an exit block of a preceeding sibling loop w/o
  627. // dedicated exits.)
  628. const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
  629. BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
  630. DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
  631. Header->removePredecessor(Latch, true);
  632. IRBuilder<> Builder(BI);
  633. auto *NewBI = Builder.CreateBr(ExitBB);
  634. // Transfer the metadata to the new branch instruction (minus the
  635. // loop info since this is no longer a loop)
  636. NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
  637. LLVMContext::MD_annotation});
  638. BI->eraseFromParent();
  639. DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
  640. if (MSSA)
  641. MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
  642. return;
  643. }
  644. }
  645. // General case. By splitting the backedge, and then explicitly making it
  646. // unreachable we gracefully handle corner cases such as switch and invoke
  647. // termiantors.
  648. auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
  649. DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
  650. (void)changeToUnreachable(BackedgeBB->getTerminator(),
  651. /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
  652. }();
  653. // Erase (and destroy) this loop instance. Handles relinking sub-loops
  654. // and blocks within the loop as needed.
  655. LI.erase(L);
  656. // If the loop we broke had a parent, then changeToUnreachable might have
  657. // caused a block to be removed from the parent loop (see loop_nest_lcssa
  658. // test case in zero-btc.ll for an example), thus changing the parent's
  659. // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
  660. // loop which might have a had a block removed.
  661. if (OutermostLoop != L)
  662. formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
  663. }
  664. /// Checks if \p L has an exiting latch branch. There may also be other
  665. /// exiting blocks. Returns branch instruction terminating the loop
  666. /// latch if above check is successful, nullptr otherwise.
  667. static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) {
  668. BasicBlock *Latch = L->getLoopLatch();
  669. if (!Latch)
  670. return nullptr;
  671. BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
  672. if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
  673. return nullptr;
  674. assert((LatchBR->getSuccessor(0) == L->getHeader() ||
  675. LatchBR->getSuccessor(1) == L->getHeader()) &&
  676. "At least one edge out of the latch must go to the header");
  677. return LatchBR;
  678. }
  679. /// Return the estimated trip count for any exiting branch which dominates
  680. /// the loop latch.
  681. static Optional<uint64_t>
  682. getEstimatedTripCount(BranchInst *ExitingBranch, Loop *L,
  683. uint64_t &OrigExitWeight) {
  684. // To estimate the number of times the loop body was executed, we want to
  685. // know the number of times the backedge was taken, vs. the number of times
  686. // we exited the loop.
  687. uint64_t LoopWeight, ExitWeight;
  688. if (!ExitingBranch->extractProfMetadata(LoopWeight, ExitWeight))
  689. return None;
  690. if (L->contains(ExitingBranch->getSuccessor(1)))
  691. std::swap(LoopWeight, ExitWeight);
  692. if (!ExitWeight)
  693. // Don't have a way to return predicated infinite
  694. return None;
  695. OrigExitWeight = ExitWeight;
  696. // Estimated exit count is a ratio of the loop weight by the weight of the
  697. // edge exiting the loop, rounded to nearest.
  698. uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
  699. // Estimated trip count is one plus estimated exit count.
  700. return ExitCount + 1;
  701. }
  702. Optional<unsigned>
  703. llvm::getLoopEstimatedTripCount(Loop *L,
  704. unsigned *EstimatedLoopInvocationWeight) {
  705. // Currently we take the estimate exit count only from the loop latch,
  706. // ignoring other exiting blocks. This can overestimate the trip count
  707. // if we exit through another exit, but can never underestimate it.
  708. // TODO: incorporate information from other exits
  709. if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
  710. uint64_t ExitWeight;
  711. if (Optional<uint64_t> EstTripCount =
  712. getEstimatedTripCount(LatchBranch, L, ExitWeight)) {
  713. if (EstimatedLoopInvocationWeight)
  714. *EstimatedLoopInvocationWeight = ExitWeight;
  715. return *EstTripCount;
  716. }
  717. }
  718. return None;
  719. }
  720. bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
  721. unsigned EstimatedloopInvocationWeight) {
  722. // At the moment, we currently support changing the estimate trip count of
  723. // the latch branch only. We could extend this API to manipulate estimated
  724. // trip counts for any exit.
  725. BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
  726. if (!LatchBranch)
  727. return false;
  728. // Calculate taken and exit weights.
  729. unsigned LatchExitWeight = 0;
  730. unsigned BackedgeTakenWeight = 0;
  731. if (EstimatedTripCount > 0) {
  732. LatchExitWeight = EstimatedloopInvocationWeight;
  733. BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
  734. }
  735. // Make a swap if back edge is taken when condition is "false".
  736. if (LatchBranch->getSuccessor(0) != L->getHeader())
  737. std::swap(BackedgeTakenWeight, LatchExitWeight);
  738. MDBuilder MDB(LatchBranch->getContext());
  739. // Set/Update profile metadata.
  740. LatchBranch->setMetadata(
  741. LLVMContext::MD_prof,
  742. MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
  743. return true;
  744. }
  745. bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
  746. ScalarEvolution &SE) {
  747. Loop *OuterL = InnerLoop->getParentLoop();
  748. if (!OuterL)
  749. return true;
  750. // Get the backedge taken count for the inner loop
  751. BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
  752. const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
  753. if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
  754. !InnerLoopBECountSC->getType()->isIntegerTy())
  755. return false;
  756. // Get whether count is invariant to the outer loop
  757. ScalarEvolution::LoopDisposition LD =
  758. SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
  759. if (LD != ScalarEvolution::LoopInvariant)
  760. return false;
  761. return true;
  762. }
  763. CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
  764. switch (RK) {
  765. default:
  766. llvm_unreachable("Unknown min/max recurrence kind");
  767. case RecurKind::UMin:
  768. return CmpInst::ICMP_ULT;
  769. case RecurKind::UMax:
  770. return CmpInst::ICMP_UGT;
  771. case RecurKind::SMin:
  772. return CmpInst::ICMP_SLT;
  773. case RecurKind::SMax:
  774. return CmpInst::ICMP_SGT;
  775. case RecurKind::FMin:
  776. return CmpInst::FCMP_OLT;
  777. case RecurKind::FMax:
  778. return CmpInst::FCMP_OGT;
  779. }
  780. }
  781. Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
  782. RecurKind RK, Value *Left, Value *Right) {
  783. if (auto VTy = dyn_cast<VectorType>(Left->getType()))
  784. StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
  785. Value *Cmp =
  786. Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
  787. return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
  788. }
  789. Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
  790. Value *Right) {
  791. CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
  792. Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
  793. Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
  794. return Select;
  795. }
  796. // Helper to generate an ordered reduction.
  797. Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
  798. unsigned Op, RecurKind RdxKind) {
  799. unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
  800. // Extract and apply reduction ops in ascending order:
  801. // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
  802. Value *Result = Acc;
  803. for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
  804. Value *Ext =
  805. Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
  806. if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
  807. Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
  808. "bin.rdx");
  809. } else {
  810. assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
  811. "Invalid min/max");
  812. Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
  813. }
  814. }
  815. return Result;
  816. }
  817. // Helper to generate a log2 shuffle reduction.
  818. Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
  819. unsigned Op, RecurKind RdxKind) {
  820. unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
  821. // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
  822. // and vector ops, reducing the set of values being computed by half each
  823. // round.
  824. assert(isPowerOf2_32(VF) &&
  825. "Reduction emission only supported for pow2 vectors!");
  826. // Note: fast-math-flags flags are controlled by the builder configuration
  827. // and are assumed to apply to all generated arithmetic instructions. Other
  828. // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
  829. // of the builder configuration, and since they're not passed explicitly,
  830. // will never be relevant here. Note that it would be generally unsound to
  831. // propagate these from an intrinsic call to the expansion anyways as we/
  832. // change the order of operations.
  833. Value *TmpVec = Src;
  834. SmallVector<int, 32> ShuffleMask(VF);
  835. for (unsigned i = VF; i != 1; i >>= 1) {
  836. // Move the upper half of the vector to the lower half.
  837. for (unsigned j = 0; j != i / 2; ++j)
  838. ShuffleMask[j] = i / 2 + j;
  839. // Fill the rest of the mask with undef.
  840. std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
  841. Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
  842. if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
  843. TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
  844. "bin.rdx");
  845. } else {
  846. assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
  847. "Invalid min/max");
  848. TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
  849. }
  850. }
  851. // The result is in the first element of the vector.
  852. return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
  853. }
  854. Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
  855. const TargetTransformInfo *TTI,
  856. Value *Src,
  857. const RecurrenceDescriptor &Desc,
  858. PHINode *OrigPhi) {
  859. assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind(
  860. Desc.getRecurrenceKind()) &&
  861. "Unexpected reduction kind");
  862. Value *InitVal = Desc.getRecurrenceStartValue();
  863. Value *NewVal = nullptr;
  864. // First use the original phi to determine the new value we're trying to
  865. // select from in the loop.
  866. SelectInst *SI = nullptr;
  867. for (auto *U : OrigPhi->users()) {
  868. if ((SI = dyn_cast<SelectInst>(U)))
  869. break;
  870. }
  871. assert(SI && "One user of the original phi should be a select");
  872. if (SI->getTrueValue() == OrigPhi)
  873. NewVal = SI->getFalseValue();
  874. else {
  875. assert(SI->getFalseValue() == OrigPhi &&
  876. "At least one input to the select should be the original Phi");
  877. NewVal = SI->getTrueValue();
  878. }
  879. // Create a splat vector with the new value and compare this to the vector
  880. // we want to reduce.
  881. ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
  882. Value *Right = Builder.CreateVectorSplat(EC, InitVal);
  883. Value *Cmp =
  884. Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
  885. // If any predicate is true it means that we want to select the new value.
  886. Cmp = Builder.CreateOrReduce(Cmp);
  887. return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
  888. }
  889. Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
  890. const TargetTransformInfo *TTI,
  891. Value *Src, RecurKind RdxKind) {
  892. auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
  893. switch (RdxKind) {
  894. case RecurKind::Add:
  895. return Builder.CreateAddReduce(Src);
  896. case RecurKind::Mul:
  897. return Builder.CreateMulReduce(Src);
  898. case RecurKind::And:
  899. return Builder.CreateAndReduce(Src);
  900. case RecurKind::Or:
  901. return Builder.CreateOrReduce(Src);
  902. case RecurKind::Xor:
  903. return Builder.CreateXorReduce(Src);
  904. case RecurKind::FMulAdd:
  905. case RecurKind::FAdd:
  906. return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
  907. Src);
  908. case RecurKind::FMul:
  909. return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
  910. case RecurKind::SMax:
  911. return Builder.CreateIntMaxReduce(Src, true);
  912. case RecurKind::SMin:
  913. return Builder.CreateIntMinReduce(Src, true);
  914. case RecurKind::UMax:
  915. return Builder.CreateIntMaxReduce(Src, false);
  916. case RecurKind::UMin:
  917. return Builder.CreateIntMinReduce(Src, false);
  918. case RecurKind::FMax:
  919. return Builder.CreateFPMaxReduce(Src);
  920. case RecurKind::FMin:
  921. return Builder.CreateFPMinReduce(Src);
  922. default:
  923. llvm_unreachable("Unhandled opcode");
  924. }
  925. }
  926. Value *llvm::createTargetReduction(IRBuilderBase &B,
  927. const TargetTransformInfo *TTI,
  928. const RecurrenceDescriptor &Desc, Value *Src,
  929. PHINode *OrigPhi) {
  930. // TODO: Support in-order reductions based on the recurrence descriptor.
  931. // All ops in the reduction inherit fast-math-flags from the recurrence
  932. // descriptor.
  933. IRBuilderBase::FastMathFlagGuard FMFGuard(B);
  934. B.setFastMathFlags(Desc.getFastMathFlags());
  935. RecurKind RK = Desc.getRecurrenceKind();
  936. if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
  937. return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
  938. return createSimpleTargetReduction(B, TTI, Src, RK);
  939. }
  940. Value *llvm::createOrderedReduction(IRBuilderBase &B,
  941. const RecurrenceDescriptor &Desc,
  942. Value *Src, Value *Start) {
  943. assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
  944. Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
  945. "Unexpected reduction kind");
  946. assert(Src->getType()->isVectorTy() && "Expected a vector type");
  947. assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
  948. return B.CreateFAddReduce(Start, Src);
  949. }
  950. void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
  951. auto *VecOp = dyn_cast<Instruction>(I);
  952. if (!VecOp)
  953. return;
  954. auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
  955. : dyn_cast<Instruction>(OpValue);
  956. if (!Intersection)
  957. return;
  958. const unsigned Opcode = Intersection->getOpcode();
  959. VecOp->copyIRFlags(Intersection);
  960. for (auto *V : VL) {
  961. auto *Instr = dyn_cast<Instruction>(V);
  962. if (!Instr)
  963. continue;
  964. if (OpValue == nullptr || Opcode == Instr->getOpcode())
  965. VecOp->andIRFlags(V);
  966. }
  967. }
  968. bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
  969. ScalarEvolution &SE) {
  970. const SCEV *Zero = SE.getZero(S->getType());
  971. return SE.isAvailableAtLoopEntry(S, L) &&
  972. SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
  973. }
  974. bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
  975. ScalarEvolution &SE) {
  976. const SCEV *Zero = SE.getZero(S->getType());
  977. return SE.isAvailableAtLoopEntry(S, L) &&
  978. SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
  979. }
  980. bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
  981. bool Signed) {
  982. unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
  983. APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
  984. APInt::getMinValue(BitWidth);
  985. auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
  986. return SE.isAvailableAtLoopEntry(S, L) &&
  987. SE.isLoopEntryGuardedByCond(L, Predicate, S,
  988. SE.getConstant(Min));
  989. }
  990. bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
  991. bool Signed) {
  992. unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
  993. APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :
  994. APInt::getMaxValue(BitWidth);
  995. auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
  996. return SE.isAvailableAtLoopEntry(S, L) &&
  997. SE.isLoopEntryGuardedByCond(L, Predicate, S,
  998. SE.getConstant(Max));
  999. }
  1000. //===----------------------------------------------------------------------===//
  1001. // rewriteLoopExitValues - Optimize IV users outside the loop.
  1002. // As a side effect, reduces the amount of IV processing within the loop.
  1003. //===----------------------------------------------------------------------===//
  1004. static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
  1005. SmallPtrSet<const Instruction *, 8> Visited;
  1006. SmallVector<const Instruction *, 8> WorkList;
  1007. Visited.insert(I);
  1008. WorkList.push_back(I);
  1009. while (!WorkList.empty()) {
  1010. const Instruction *Curr = WorkList.pop_back_val();
  1011. // This use is outside the loop, nothing to do.
  1012. if (!L->contains(Curr))
  1013. continue;
  1014. // Do we assume it is a "hard" use which will not be eliminated easily?
  1015. if (Curr->mayHaveSideEffects())
  1016. return true;
  1017. // Otherwise, add all its users to worklist.
  1018. for (auto U : Curr->users()) {
  1019. auto *UI = cast<Instruction>(U);
  1020. if (Visited.insert(UI).second)
  1021. WorkList.push_back(UI);
  1022. }
  1023. }
  1024. return false;
  1025. }
  1026. // Collect information about PHI nodes which can be transformed in
  1027. // rewriteLoopExitValues.
  1028. struct RewritePhi {
  1029. PHINode *PN; // For which PHI node is this replacement?
  1030. unsigned Ith; // For which incoming value?
  1031. const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
  1032. Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
  1033. bool HighCost; // Is this expansion a high-cost?
  1034. RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
  1035. bool H)
  1036. : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
  1037. HighCost(H) {}
  1038. };
  1039. // Check whether it is possible to delete the loop after rewriting exit
  1040. // value. If it is possible, ignore ReplaceExitValue and do rewriting
  1041. // aggressively.
  1042. static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
  1043. BasicBlock *Preheader = L->getLoopPreheader();
  1044. // If there is no preheader, the loop will not be deleted.
  1045. if (!Preheader)
  1046. return false;
  1047. // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
  1048. // We obviate multiple ExitingBlocks case for simplicity.
  1049. // TODO: If we see testcase with multiple ExitingBlocks can be deleted
  1050. // after exit value rewriting, we can enhance the logic here.
  1051. SmallVector<BasicBlock *, 4> ExitingBlocks;
  1052. L->getExitingBlocks(ExitingBlocks);
  1053. SmallVector<BasicBlock *, 8> ExitBlocks;
  1054. L->getUniqueExitBlocks(ExitBlocks);
  1055. if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
  1056. return false;
  1057. BasicBlock *ExitBlock = ExitBlocks[0];
  1058. BasicBlock::iterator BI = ExitBlock->begin();
  1059. while (PHINode *P = dyn_cast<PHINode>(BI)) {
  1060. Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
  1061. // If the Incoming value of P is found in RewritePhiSet, we know it
  1062. // could be rewritten to use a loop invariant value in transformation
  1063. // phase later. Skip it in the loop invariant check below.
  1064. bool found = false;
  1065. for (const RewritePhi &Phi : RewritePhiSet) {
  1066. unsigned i = Phi.Ith;
  1067. if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
  1068. found = true;
  1069. break;
  1070. }
  1071. }
  1072. Instruction *I;
  1073. if (!found && (I = dyn_cast<Instruction>(Incoming)))
  1074. if (!L->hasLoopInvariantOperands(I))
  1075. return false;
  1076. ++BI;
  1077. }
  1078. for (auto *BB : L->blocks())
  1079. if (llvm::any_of(*BB, [](Instruction &I) {
  1080. return I.mayHaveSideEffects();
  1081. }))
  1082. return false;
  1083. return true;
  1084. }
  1085. int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
  1086. ScalarEvolution *SE,
  1087. const TargetTransformInfo *TTI,
  1088. SCEVExpander &Rewriter, DominatorTree *DT,
  1089. ReplaceExitVal ReplaceExitValue,
  1090. SmallVector<WeakTrackingVH, 16> &DeadInsts) {
  1091. // Check a pre-condition.
  1092. assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
  1093. "Indvars did not preserve LCSSA!");
  1094. SmallVector<BasicBlock*, 8> ExitBlocks;
  1095. L->getUniqueExitBlocks(ExitBlocks);
  1096. SmallVector<RewritePhi, 8> RewritePhiSet;
  1097. // Find all values that are computed inside the loop, but used outside of it.
  1098. // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
  1099. // the exit blocks of the loop to find them.
  1100. for (BasicBlock *ExitBB : ExitBlocks) {
  1101. // If there are no PHI nodes in this exit block, then no values defined
  1102. // inside the loop are used on this path, skip it.
  1103. PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
  1104. if (!PN) continue;
  1105. unsigned NumPreds = PN->getNumIncomingValues();
  1106. // Iterate over all of the PHI nodes.
  1107. BasicBlock::iterator BBI = ExitBB->begin();
  1108. while ((PN = dyn_cast<PHINode>(BBI++))) {
  1109. if (PN->use_empty())
  1110. continue; // dead use, don't replace it
  1111. if (!SE->isSCEVable(PN->getType()))
  1112. continue;
  1113. // Iterate over all of the values in all the PHI nodes.
  1114. for (unsigned i = 0; i != NumPreds; ++i) {
  1115. // If the value being merged in is not integer or is not defined
  1116. // in the loop, skip it.
  1117. Value *InVal = PN->getIncomingValue(i);
  1118. if (!isa<Instruction>(InVal))
  1119. continue;
  1120. // If this pred is for a subloop, not L itself, skip it.
  1121. if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
  1122. continue; // The Block is in a subloop, skip it.
  1123. // Check that InVal is defined in the loop.
  1124. Instruction *Inst = cast<Instruction>(InVal);
  1125. if (!L->contains(Inst))
  1126. continue;
  1127. // Okay, this instruction has a user outside of the current loop
  1128. // and varies predictably *inside* the loop. Evaluate the value it
  1129. // contains when the loop exits, if possible. We prefer to start with
  1130. // expressions which are true for all exits (so as to maximize
  1131. // expression reuse by the SCEVExpander), but resort to per-exit
  1132. // evaluation if that fails.
  1133. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
  1134. if (isa<SCEVCouldNotCompute>(ExitValue) ||
  1135. !SE->isLoopInvariant(ExitValue, L) ||
  1136. !isSafeToExpand(ExitValue, *SE)) {
  1137. // TODO: This should probably be sunk into SCEV in some way; maybe a
  1138. // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
  1139. // most SCEV expressions and other recurrence types (e.g. shift
  1140. // recurrences). Is there existing code we can reuse?
  1141. const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
  1142. if (isa<SCEVCouldNotCompute>(ExitCount))
  1143. continue;
  1144. if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
  1145. if (AddRec->getLoop() == L)
  1146. ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
  1147. if (isa<SCEVCouldNotCompute>(ExitValue) ||
  1148. !SE->isLoopInvariant(ExitValue, L) ||
  1149. !isSafeToExpand(ExitValue, *SE))
  1150. continue;
  1151. }
  1152. // Computing the value outside of the loop brings no benefit if it is
  1153. // definitely used inside the loop in a way which can not be optimized
  1154. // away. Avoid doing so unless we know we have a value which computes
  1155. // the ExitValue already. TODO: This should be merged into SCEV
  1156. // expander to leverage its knowledge of existing expressions.
  1157. if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
  1158. !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
  1159. continue;
  1160. // Check if expansions of this SCEV would count as being high cost.
  1161. bool HighCost = Rewriter.isHighCostExpansion(
  1162. ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
  1163. // Note that we must not perform expansions until after
  1164. // we query *all* the costs, because if we perform temporary expansion
  1165. // inbetween, one that we might not intend to keep, said expansion
  1166. // *may* affect cost calculation of the the next SCEV's we'll query,
  1167. // and next SCEV may errneously get smaller cost.
  1168. // Collect all the candidate PHINodes to be rewritten.
  1169. RewritePhiSet.emplace_back(PN, i, ExitValue, Inst, HighCost);
  1170. }
  1171. }
  1172. }
  1173. // TODO: evaluate whether it is beneficial to change how we calculate
  1174. // high-cost: if we have SCEV 'A' which we know we will expand, should we
  1175. // calculate the cost of other SCEV's after expanding SCEV 'A', thus
  1176. // potentially giving cost bonus to those other SCEV's?
  1177. bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
  1178. int NumReplaced = 0;
  1179. // Transformation.
  1180. for (const RewritePhi &Phi : RewritePhiSet) {
  1181. PHINode *PN = Phi.PN;
  1182. // Only do the rewrite when the ExitValue can be expanded cheaply.
  1183. // If LoopCanBeDel is true, rewrite exit value aggressively.
  1184. if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost)
  1185. continue;
  1186. Value *ExitVal = Rewriter.expandCodeFor(
  1187. Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
  1188. LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
  1189. << '\n'
  1190. << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
  1191. #ifndef NDEBUG
  1192. // If we reuse an instruction from a loop which is neither L nor one of
  1193. // its containing loops, we end up breaking LCSSA form for this loop by
  1194. // creating a new use of its instruction.
  1195. if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
  1196. if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
  1197. if (EVL != L)
  1198. assert(EVL->contains(L) && "LCSSA breach detected!");
  1199. #endif
  1200. NumReplaced++;
  1201. Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
  1202. PN->setIncomingValue(Phi.Ith, ExitVal);
  1203. // It's necessary to tell ScalarEvolution about this explicitly so that
  1204. // it can walk the def-use list and forget all SCEVs, as it may not be
  1205. // watching the PHI itself. Once the new exit value is in place, there
  1206. // may not be a def-use connection between the loop and every instruction
  1207. // which got a SCEVAddRecExpr for that loop.
  1208. SE->forgetValue(PN);
  1209. // If this instruction is dead now, delete it. Don't do it now to avoid
  1210. // invalidating iterators.
  1211. if (isInstructionTriviallyDead(Inst, TLI))
  1212. DeadInsts.push_back(Inst);
  1213. // Replace PN with ExitVal if that is legal and does not break LCSSA.
  1214. if (PN->getNumIncomingValues() == 1 &&
  1215. LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
  1216. PN->replaceAllUsesWith(ExitVal);
  1217. PN->eraseFromParent();
  1218. }
  1219. }
  1220. // The insertion point instruction may have been deleted; clear it out
  1221. // so that the rewriter doesn't trip over it later.
  1222. Rewriter.clearInsertPoint();
  1223. return NumReplaced;
  1224. }
  1225. /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
  1226. /// \p OrigLoop.
  1227. void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
  1228. Loop *RemainderLoop, uint64_t UF) {
  1229. assert(UF > 0 && "Zero unrolled factor is not supported");
  1230. assert(UnrolledLoop != RemainderLoop &&
  1231. "Unrolled and Remainder loops are expected to distinct");
  1232. // Get number of iterations in the original scalar loop.
  1233. unsigned OrigLoopInvocationWeight = 0;
  1234. Optional<unsigned> OrigAverageTripCount =
  1235. getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
  1236. if (!OrigAverageTripCount)
  1237. return;
  1238. // Calculate number of iterations in unrolled loop.
  1239. unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
  1240. // Calculate number of iterations for remainder loop.
  1241. unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
  1242. setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
  1243. OrigLoopInvocationWeight);
  1244. setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
  1245. OrigLoopInvocationWeight);
  1246. }
  1247. /// Utility that implements appending of loops onto a worklist.
  1248. /// Loops are added in preorder (analogous for reverse postorder for trees),
  1249. /// and the worklist is processed LIFO.
  1250. template <typename RangeT>
  1251. void llvm::appendReversedLoopsToWorklist(
  1252. RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
  1253. // We use an internal worklist to build up the preorder traversal without
  1254. // recursion.
  1255. SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
  1256. // We walk the initial sequence of loops in reverse because we generally want
  1257. // to visit defs before uses and the worklist is LIFO.
  1258. for (Loop *RootL : Loops) {
  1259. assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
  1260. assert(PreOrderWorklist.empty() &&
  1261. "Must start with an empty preorder walk worklist.");
  1262. PreOrderWorklist.push_back(RootL);
  1263. do {
  1264. Loop *L = PreOrderWorklist.pop_back_val();
  1265. PreOrderWorklist.append(L->begin(), L->end());
  1266. PreOrderLoops.push_back(L);
  1267. } while (!PreOrderWorklist.empty());
  1268. Worklist.insert(std::move(PreOrderLoops));
  1269. PreOrderLoops.clear();
  1270. }
  1271. }
  1272. template <typename RangeT>
  1273. void llvm::appendLoopsToWorklist(RangeT &&Loops,
  1274. SmallPriorityWorklist<Loop *, 4> &Worklist) {
  1275. appendReversedLoopsToWorklist(reverse(Loops), Worklist);
  1276. }
  1277. template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
  1278. ArrayRef<Loop *> &Loops, SmallPriorityWorklist<Loop *, 4> &Worklist);
  1279. template void
  1280. llvm::appendLoopsToWorklist<Loop &>(Loop &L,
  1281. SmallPriorityWorklist<Loop *, 4> &Worklist);
  1282. void llvm::appendLoopsToWorklist(LoopInfo &LI,
  1283. SmallPriorityWorklist<Loop *, 4> &Worklist) {
  1284. appendReversedLoopsToWorklist(LI, Worklist);
  1285. }
  1286. Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
  1287. LoopInfo *LI, LPPassManager *LPM) {
  1288. Loop &New = *LI->AllocateLoop();
  1289. if (PL)
  1290. PL->addChildLoop(&New);
  1291. else
  1292. LI->addTopLevelLoop(&New);
  1293. if (LPM)
  1294. LPM->addLoop(New);
  1295. // Add all of the blocks in L to the new loop.
  1296. for (BasicBlock *BB : L->blocks())
  1297. if (LI->getLoopFor(BB) == L)
  1298. New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
  1299. // Add all of the subloops to the new loop.
  1300. for (Loop *I : *L)
  1301. cloneLoop(I, &New, VM, LI, LPM);
  1302. return &New;
  1303. }
  1304. /// IR Values for the lower and upper bounds of a pointer evolution. We
  1305. /// need to use value-handles because SCEV expansion can invalidate previously
  1306. /// expanded values. Thus expansion of a pointer can invalidate the bounds for
  1307. /// a previous one.
  1308. struct PointerBounds {
  1309. TrackingVH<Value> Start;
  1310. TrackingVH<Value> End;
  1311. };
  1312. /// Expand code for the lower and upper bound of the pointer group \p CG
  1313. /// in \p TheLoop. \return the values for the bounds.
  1314. static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
  1315. Loop *TheLoop, Instruction *Loc,
  1316. SCEVExpander &Exp) {
  1317. LLVMContext &Ctx = Loc->getContext();
  1318. Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace);
  1319. Value *Start = nullptr, *End = nullptr;
  1320. LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
  1321. Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
  1322. End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
  1323. LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
  1324. return {Start, End};
  1325. }
  1326. /// Turns a collection of checks into a collection of expanded upper and
  1327. /// lower bounds for both pointers in the check.
  1328. static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>
  1329. expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
  1330. Instruction *Loc, SCEVExpander &Exp) {
  1331. SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
  1332. // Here we're relying on the SCEV Expander's cache to only emit code for the
  1333. // same bounds once.
  1334. transform(PointerChecks, std::back_inserter(ChecksWithBounds),
  1335. [&](const RuntimePointerCheck &Check) {
  1336. PointerBounds First = expandBounds(Check.first, L, Loc, Exp),
  1337. Second = expandBounds(Check.second, L, Loc, Exp);
  1338. return std::make_pair(First, Second);
  1339. });
  1340. return ChecksWithBounds;
  1341. }
  1342. Value *llvm::addRuntimeChecks(
  1343. Instruction *Loc, Loop *TheLoop,
  1344. const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
  1345. SCEVExpander &Exp) {
  1346. // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
  1347. // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
  1348. auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
  1349. LLVMContext &Ctx = Loc->getContext();
  1350. IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
  1351. Loc->getModule()->getDataLayout());
  1352. ChkBuilder.SetInsertPoint(Loc);
  1353. // Our instructions might fold to a constant.
  1354. Value *MemoryRuntimeCheck = nullptr;
  1355. for (const auto &Check : ExpandedChecks) {
  1356. const PointerBounds &A = Check.first, &B = Check.second;
  1357. // Check if two pointers (A and B) conflict where conflict is computed as:
  1358. // start(A) <= end(B) && start(B) <= end(A)
  1359. unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
  1360. unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
  1361. assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
  1362. (AS1 == A.End->getType()->getPointerAddressSpace()) &&
  1363. "Trying to bounds check pointers with different address spaces");
  1364. Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
  1365. Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
  1366. Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
  1367. Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
  1368. Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
  1369. Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
  1370. // [A|B].Start points to the first accessed byte under base [A|B].
  1371. // [A|B].End points to the last accessed byte, plus one.
  1372. // There is no conflict when the intervals are disjoint:
  1373. // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
  1374. //
  1375. // bound0 = (B.Start < A.End)
  1376. // bound1 = (A.Start < B.End)
  1377. // IsConflict = bound0 & bound1
  1378. Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
  1379. Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
  1380. Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
  1381. if (MemoryRuntimeCheck) {
  1382. IsConflict =
  1383. ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
  1384. }
  1385. MemoryRuntimeCheck = IsConflict;
  1386. }
  1387. return MemoryRuntimeCheck;
  1388. }
  1389. Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop &L,
  1390. unsigned MSSAThreshold,
  1391. MemorySSA &MSSA,
  1392. AAResults &AA) {
  1393. auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
  1394. if (!TI || !TI->isConditional())
  1395. return {};
  1396. auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
  1397. // The case with the condition outside the loop should already be handled
  1398. // earlier.
  1399. if (!CondI || !L.contains(CondI))
  1400. return {};
  1401. SmallVector<Instruction *> InstToDuplicate;
  1402. InstToDuplicate.push_back(CondI);
  1403. SmallVector<Value *, 4> WorkList;
  1404. WorkList.append(CondI->op_begin(), CondI->op_end());
  1405. SmallVector<MemoryAccess *, 4> AccessesToCheck;
  1406. SmallVector<MemoryLocation, 4> AccessedLocs;
  1407. while (!WorkList.empty()) {
  1408. Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
  1409. if (!I || !L.contains(I))
  1410. continue;
  1411. // TODO: support additional instructions.
  1412. if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
  1413. return {};
  1414. // Do not duplicate volatile and atomic loads.
  1415. if (auto *LI = dyn_cast<LoadInst>(I))
  1416. if (LI->isVolatile() || LI->isAtomic())
  1417. return {};
  1418. InstToDuplicate.push_back(I);
  1419. if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
  1420. if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
  1421. // Queue the defining access to check for alias checks.
  1422. AccessesToCheck.push_back(MemUse->getDefiningAccess());
  1423. AccessedLocs.push_back(MemoryLocation::get(I));
  1424. } else {
  1425. // MemoryDefs may clobber the location or may be atomic memory
  1426. // operations. Bail out.
  1427. return {};
  1428. }
  1429. }
  1430. WorkList.append(I->op_begin(), I->op_end());
  1431. }
  1432. if (InstToDuplicate.empty())
  1433. return {};
  1434. SmallVector<BasicBlock *, 4> ExitingBlocks;
  1435. L.getExitingBlocks(ExitingBlocks);
  1436. auto HasNoClobbersOnPath =
  1437. [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
  1438. MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
  1439. SmallVector<MemoryAccess *, 4> AccessesToCheck)
  1440. -> Optional<IVConditionInfo> {
  1441. IVConditionInfo Info;
  1442. // First, collect all blocks in the loop that are on a patch from Succ
  1443. // to the header.
  1444. SmallVector<BasicBlock *, 4> WorkList;
  1445. WorkList.push_back(Succ);
  1446. WorkList.push_back(Header);
  1447. SmallPtrSet<BasicBlock *, 4> Seen;
  1448. Seen.insert(Header);
  1449. Info.PathIsNoop &=
  1450. all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
  1451. while (!WorkList.empty()) {
  1452. BasicBlock *Current = WorkList.pop_back_val();
  1453. if (!L.contains(Current))
  1454. continue;
  1455. const auto &SeenIns = Seen.insert(Current);
  1456. if (!SeenIns.second)
  1457. continue;
  1458. Info.PathIsNoop &= all_of(
  1459. *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
  1460. WorkList.append(succ_begin(Current), succ_end(Current));
  1461. }
  1462. // Require at least 2 blocks on a path through the loop. This skips
  1463. // paths that directly exit the loop.
  1464. if (Seen.size() < 2)
  1465. return {};
  1466. // Next, check if there are any MemoryDefs that are on the path through
  1467. // the loop (in the Seen set) and they may-alias any of the locations in
  1468. // AccessedLocs. If that is the case, they may modify the condition and
  1469. // partial unswitching is not possible.
  1470. SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
  1471. while (!AccessesToCheck.empty()) {
  1472. MemoryAccess *Current = AccessesToCheck.pop_back_val();
  1473. auto SeenI = SeenAccesses.insert(Current);
  1474. if (!SeenI.second || !Seen.contains(Current->getBlock()))
  1475. continue;
  1476. // Bail out if exceeded the threshold.
  1477. if (SeenAccesses.size() >= MSSAThreshold)
  1478. return {};
  1479. // MemoryUse are read-only accesses.
  1480. if (isa<MemoryUse>(Current))
  1481. continue;
  1482. // For a MemoryDef, check if is aliases any of the location feeding
  1483. // the original condition.
  1484. if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
  1485. if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
  1486. return isModSet(
  1487. AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
  1488. }))
  1489. return {};
  1490. }
  1491. for (Use &U : Current->uses())
  1492. AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
  1493. }
  1494. // We could also allow loops with known trip counts without mustprogress,
  1495. // but ScalarEvolution may not be available.
  1496. Info.PathIsNoop &= isMustProgress(&L);
  1497. // If the path is considered a no-op so far, check if it reaches a
  1498. // single exit block without any phis. This ensures no values from the
  1499. // loop are used outside of the loop.
  1500. if (Info.PathIsNoop) {
  1501. for (auto *Exiting : ExitingBlocks) {
  1502. if (!Seen.contains(Exiting))
  1503. continue;
  1504. for (auto *Succ : successors(Exiting)) {
  1505. if (L.contains(Succ))
  1506. continue;
  1507. Info.PathIsNoop &= llvm::empty(Succ->phis()) &&
  1508. (!Info.ExitForPath || Info.ExitForPath == Succ);
  1509. if (!Info.PathIsNoop)
  1510. break;
  1511. assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
  1512. "cannot have multiple exit blocks");
  1513. Info.ExitForPath = Succ;
  1514. }
  1515. }
  1516. }
  1517. if (!Info.ExitForPath)
  1518. Info.PathIsNoop = false;
  1519. Info.InstToDuplicate = InstToDuplicate;
  1520. return Info;
  1521. };
  1522. // If we branch to the same successor, partial unswitching will not be
  1523. // beneficial.
  1524. if (TI->getSuccessor(0) == TI->getSuccessor(1))
  1525. return {};
  1526. if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
  1527. AccessesToCheck)) {
  1528. Info->KnownValue = ConstantInt::getTrue(TI->getContext());
  1529. return Info;
  1530. }
  1531. if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
  1532. AccessesToCheck)) {
  1533. Info->KnownValue = ConstantInt::getFalse(TI->getContext());
  1534. return Info;
  1535. }
  1536. return {};
  1537. }