ScalarEvolutionExpander.cpp 104 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678
  1. //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the implementation of the scalar evolution expander,
  10. // which is used to generate the code corresponding to a given scalar evolution
  11. // expression.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
  15. #include "llvm/ADT/STLExtras.h"
  16. #include "llvm/ADT/ScopeExit.h"
  17. #include "llvm/ADT/SmallSet.h"
  18. #include "llvm/Analysis/InstructionSimplify.h"
  19. #include "llvm/Analysis/LoopInfo.h"
  20. #include "llvm/Analysis/TargetTransformInfo.h"
  21. #include "llvm/Analysis/ValueTracking.h"
  22. #include "llvm/IR/DataLayout.h"
  23. #include "llvm/IR/Dominators.h"
  24. #include "llvm/IR/IntrinsicInst.h"
  25. #include "llvm/IR/PatternMatch.h"
  26. #include "llvm/Support/CommandLine.h"
  27. #include "llvm/Support/raw_ostream.h"
  28. #include "llvm/Transforms/Utils/LoopUtils.h"
  29. #ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
  30. #define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
  31. #else
  32. #define SCEV_DEBUG_WITH_TYPE(TYPE, X)
  33. #endif
  34. using namespace llvm;
  35. cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
  36. "scev-cheap-expansion-budget", cl::Hidden, cl::init(4),
  37. cl::desc("When performing SCEV expansion only if it is cheap to do, this "
  38. "controls the budget that is considered cheap (default = 4)"));
  39. using namespace PatternMatch;
  40. /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
  41. /// reusing an existing cast if a suitable one (= dominating IP) exists, or
  42. /// creating a new one.
  43. Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
  44. Instruction::CastOps Op,
  45. BasicBlock::iterator IP) {
  46. // This function must be called with the builder having a valid insertion
  47. // point. It doesn't need to be the actual IP where the uses of the returned
  48. // cast will be added, but it must dominate such IP.
  49. // We use this precondition to produce a cast that will dominate all its
  50. // uses. In particular, this is crucial for the case where the builder's
  51. // insertion point *is* the point where we were asked to put the cast.
  52. // Since we don't know the builder's insertion point is actually
  53. // where the uses will be added (only that it dominates it), we are
  54. // not allowed to move it.
  55. BasicBlock::iterator BIP = Builder.GetInsertPoint();
  56. Value *Ret = nullptr;
  57. // Check to see if there is already a cast!
  58. for (User *U : V->users()) {
  59. if (U->getType() != Ty)
  60. continue;
  61. CastInst *CI = dyn_cast<CastInst>(U);
  62. if (!CI || CI->getOpcode() != Op)
  63. continue;
  64. // Found a suitable cast that is at IP or comes before IP. Use it. Note that
  65. // the cast must also properly dominate the Builder's insertion point.
  66. if (IP->getParent() == CI->getParent() && &*BIP != CI &&
  67. (&*IP == CI || CI->comesBefore(&*IP))) {
  68. Ret = CI;
  69. break;
  70. }
  71. }
  72. // Create a new cast.
  73. if (!Ret) {
  74. SCEVInsertPointGuard Guard(Builder, this);
  75. Builder.SetInsertPoint(&*IP);
  76. Ret = Builder.CreateCast(Op, V, Ty, V->getName());
  77. }
  78. // We assert at the end of the function since IP might point to an
  79. // instruction with different dominance properties than a cast
  80. // (an invoke for example) and not dominate BIP (but the cast does).
  81. assert(!isa<Instruction>(Ret) ||
  82. SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
  83. return Ret;
  84. }
  85. BasicBlock::iterator
  86. SCEVExpander::findInsertPointAfter(Instruction *I,
  87. Instruction *MustDominate) const {
  88. BasicBlock::iterator IP = ++I->getIterator();
  89. if (auto *II = dyn_cast<InvokeInst>(I))
  90. IP = II->getNormalDest()->begin();
  91. while (isa<PHINode>(IP))
  92. ++IP;
  93. if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
  94. ++IP;
  95. } else if (isa<CatchSwitchInst>(IP)) {
  96. IP = MustDominate->getParent()->getFirstInsertionPt();
  97. } else {
  98. assert(!IP->isEHPad() && "unexpected eh pad!");
  99. }
  100. // Adjust insert point to be after instructions inserted by the expander, so
  101. // we can re-use already inserted instructions. Avoid skipping past the
  102. // original \p MustDominate, in case it is an inserted instruction.
  103. while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
  104. ++IP;
  105. return IP;
  106. }
  107. BasicBlock::iterator
  108. SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
  109. // Cast the argument at the beginning of the entry block, after
  110. // any bitcasts of other arguments.
  111. if (Argument *A = dyn_cast<Argument>(V)) {
  112. BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
  113. while ((isa<BitCastInst>(IP) &&
  114. isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
  115. cast<BitCastInst>(IP)->getOperand(0) != A) ||
  116. isa<DbgInfoIntrinsic>(IP))
  117. ++IP;
  118. return IP;
  119. }
  120. // Cast the instruction immediately after the instruction.
  121. if (Instruction *I = dyn_cast<Instruction>(V))
  122. return findInsertPointAfter(I, &*Builder.GetInsertPoint());
  123. // Otherwise, this must be some kind of a constant,
  124. // so let's plop this cast into the function's entry block.
  125. assert(isa<Constant>(V) &&
  126. "Expected the cast argument to be a global/constant");
  127. return Builder.GetInsertBlock()
  128. ->getParent()
  129. ->getEntryBlock()
  130. .getFirstInsertionPt();
  131. }
  132. /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
  133. /// which must be possible with a noop cast, doing what we can to share
  134. /// the casts.
  135. Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
  136. Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
  137. assert((Op == Instruction::BitCast ||
  138. Op == Instruction::PtrToInt ||
  139. Op == Instruction::IntToPtr) &&
  140. "InsertNoopCastOfTo cannot perform non-noop casts!");
  141. assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
  142. "InsertNoopCastOfTo cannot change sizes!");
  143. // inttoptr only works for integral pointers. For non-integral pointers, we
  144. // can create a GEP on i8* null with the integral value as index. Note that
  145. // it is safe to use GEP of null instead of inttoptr here, because only
  146. // expressions already based on a GEP of null should be converted to pointers
  147. // during expansion.
  148. if (Op == Instruction::IntToPtr) {
  149. auto *PtrTy = cast<PointerType>(Ty);
  150. if (DL.isNonIntegralPointerType(PtrTy)) {
  151. auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
  152. assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 &&
  153. "alloc size of i8 must by 1 byte for the GEP to be correct");
  154. auto *GEP = Builder.CreateGEP(
  155. Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep");
  156. return Builder.CreateBitCast(GEP, Ty);
  157. }
  158. }
  159. // Short-circuit unnecessary bitcasts.
  160. if (Op == Instruction::BitCast) {
  161. if (V->getType() == Ty)
  162. return V;
  163. if (CastInst *CI = dyn_cast<CastInst>(V)) {
  164. if (CI->getOperand(0)->getType() == Ty)
  165. return CI->getOperand(0);
  166. }
  167. }
  168. // Short-circuit unnecessary inttoptr<->ptrtoint casts.
  169. if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
  170. SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
  171. if (CastInst *CI = dyn_cast<CastInst>(V))
  172. if ((CI->getOpcode() == Instruction::PtrToInt ||
  173. CI->getOpcode() == Instruction::IntToPtr) &&
  174. SE.getTypeSizeInBits(CI->getType()) ==
  175. SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
  176. return CI->getOperand(0);
  177. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
  178. if ((CE->getOpcode() == Instruction::PtrToInt ||
  179. CE->getOpcode() == Instruction::IntToPtr) &&
  180. SE.getTypeSizeInBits(CE->getType()) ==
  181. SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
  182. return CE->getOperand(0);
  183. }
  184. // Fold a cast of a constant.
  185. if (Constant *C = dyn_cast<Constant>(V))
  186. return ConstantExpr::getCast(Op, C, Ty);
  187. // Try to reuse existing cast, or insert one.
  188. return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V));
  189. }
  190. /// InsertBinop - Insert the specified binary operator, doing a small amount
  191. /// of work to avoid inserting an obviously redundant operation, and hoisting
  192. /// to an outer loop when the opportunity is there and it is safe.
  193. Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
  194. Value *LHS, Value *RHS,
  195. SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
  196. // Fold a binop with constant operands.
  197. if (Constant *CLHS = dyn_cast<Constant>(LHS))
  198. if (Constant *CRHS = dyn_cast<Constant>(RHS))
  199. if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
  200. return Res;
  201. // Do a quick scan to see if we have this binop nearby. If so, reuse it.
  202. unsigned ScanLimit = 6;
  203. BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
  204. // Scanning starts from the last instruction before the insertion point.
  205. BasicBlock::iterator IP = Builder.GetInsertPoint();
  206. if (IP != BlockBegin) {
  207. --IP;
  208. for (; ScanLimit; --IP, --ScanLimit) {
  209. // Don't count dbg.value against the ScanLimit, to avoid perturbing the
  210. // generated code.
  211. if (isa<DbgInfoIntrinsic>(IP))
  212. ScanLimit++;
  213. auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
  214. // Ensure that no-wrap flags match.
  215. if (isa<OverflowingBinaryOperator>(I)) {
  216. if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
  217. return true;
  218. if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
  219. return true;
  220. }
  221. // Conservatively, do not use any instruction which has any of exact
  222. // flags installed.
  223. if (isa<PossiblyExactOperator>(I) && I->isExact())
  224. return true;
  225. return false;
  226. };
  227. if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
  228. IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
  229. return &*IP;
  230. if (IP == BlockBegin) break;
  231. }
  232. }
  233. // Save the original insertion point so we can restore it when we're done.
  234. DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
  235. SCEVInsertPointGuard Guard(Builder, this);
  236. if (IsSafeToHoist) {
  237. // Move the insertion point out of as many loops as we can.
  238. while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
  239. if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
  240. BasicBlock *Preheader = L->getLoopPreheader();
  241. if (!Preheader) break;
  242. // Ok, move up a level.
  243. Builder.SetInsertPoint(Preheader->getTerminator());
  244. }
  245. }
  246. // If we haven't found this binop, insert it.
  247. // TODO: Use the Builder, which will make CreateBinOp below fold with
  248. // InstSimplifyFolder.
  249. Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
  250. BO->setDebugLoc(Loc);
  251. if (Flags & SCEV::FlagNUW)
  252. BO->setHasNoUnsignedWrap();
  253. if (Flags & SCEV::FlagNSW)
  254. BO->setHasNoSignedWrap();
  255. return BO;
  256. }
  257. /// FactorOutConstant - Test if S is divisible by Factor, using signed
  258. /// division. If so, update S with Factor divided out and return true.
  259. /// S need not be evenly divisible if a reasonable remainder can be
  260. /// computed.
  261. static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
  262. const SCEV *Factor, ScalarEvolution &SE,
  263. const DataLayout &DL) {
  264. // Everything is divisible by one.
  265. if (Factor->isOne())
  266. return true;
  267. // x/x == 1.
  268. if (S == Factor) {
  269. S = SE.getConstant(S->getType(), 1);
  270. return true;
  271. }
  272. // For a Constant, check for a multiple of the given factor.
  273. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
  274. // 0/x == 0.
  275. if (C->isZero())
  276. return true;
  277. // Check for divisibility.
  278. if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
  279. ConstantInt *CI =
  280. ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));
  281. // If the quotient is zero and the remainder is non-zero, reject
  282. // the value at this scale. It will be considered for subsequent
  283. // smaller scales.
  284. if (!CI->isZero()) {
  285. const SCEV *Div = SE.getConstant(CI);
  286. S = Div;
  287. Remainder = SE.getAddExpr(
  288. Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));
  289. return true;
  290. }
  291. }
  292. }
  293. // In a Mul, check if there is a constant operand which is a multiple
  294. // of the given factor.
  295. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
  296. // Size is known, check if there is a constant operand which is a multiple
  297. // of the given factor. If so, we can factor it.
  298. if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor))
  299. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
  300. if (!C->getAPInt().srem(FC->getAPInt())) {
  301. SmallVector<const SCEV *, 4> NewMulOps(M->operands());
  302. NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
  303. S = SE.getMulExpr(NewMulOps);
  304. return true;
  305. }
  306. }
  307. // In an AddRec, check if both start and step are divisible.
  308. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
  309. const SCEV *Step = A->getStepRecurrence(SE);
  310. const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
  311. if (!FactorOutConstant(Step, StepRem, Factor, SE, DL))
  312. return false;
  313. if (!StepRem->isZero())
  314. return false;
  315. const SCEV *Start = A->getStart();
  316. if (!FactorOutConstant(Start, Remainder, Factor, SE, DL))
  317. return false;
  318. S = SE.getAddRecExpr(Start, Step, A->getLoop(),
  319. A->getNoWrapFlags(SCEV::FlagNW));
  320. return true;
  321. }
  322. return false;
  323. }
  324. /// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
  325. /// is the number of SCEVAddRecExprs present, which are kept at the end of
  326. /// the list.
  327. ///
  328. static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
  329. Type *Ty,
  330. ScalarEvolution &SE) {
  331. unsigned NumAddRecs = 0;
  332. for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
  333. ++NumAddRecs;
  334. // Group Ops into non-addrecs and addrecs.
  335. SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
  336. SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
  337. // Let ScalarEvolution sort and simplify the non-addrecs list.
  338. const SCEV *Sum = NoAddRecs.empty() ?
  339. SE.getConstant(Ty, 0) :
  340. SE.getAddExpr(NoAddRecs);
  341. // If it returned an add, use the operands. Otherwise it simplified
  342. // the sum into a single value, so just use that.
  343. Ops.clear();
  344. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
  345. append_range(Ops, Add->operands());
  346. else if (!Sum->isZero())
  347. Ops.push_back(Sum);
  348. // Then append the addrecs.
  349. Ops.append(AddRecs.begin(), AddRecs.end());
  350. }
  351. /// SplitAddRecs - Flatten a list of add operands, moving addrec start values
  352. /// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
  353. /// This helps expose more opportunities for folding parts of the expressions
  354. /// into GEP indices.
  355. ///
  356. static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
  357. Type *Ty,
  358. ScalarEvolution &SE) {
  359. // Find the addrecs.
  360. SmallVector<const SCEV *, 8> AddRecs;
  361. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  362. while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
  363. const SCEV *Start = A->getStart();
  364. if (Start->isZero()) break;
  365. const SCEV *Zero = SE.getConstant(Ty, 0);
  366. AddRecs.push_back(SE.getAddRecExpr(Zero,
  367. A->getStepRecurrence(SE),
  368. A->getLoop(),
  369. A->getNoWrapFlags(SCEV::FlagNW)));
  370. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
  371. Ops[i] = Zero;
  372. append_range(Ops, Add->operands());
  373. e += Add->getNumOperands();
  374. } else {
  375. Ops[i] = Start;
  376. }
  377. }
  378. if (!AddRecs.empty()) {
  379. // Add the addrecs onto the end of the list.
  380. Ops.append(AddRecs.begin(), AddRecs.end());
  381. // Resort the operand list, moving any constants to the front.
  382. SimplifyAddOperands(Ops, Ty, SE);
  383. }
  384. }
  385. /// expandAddToGEP - Expand an addition expression with a pointer type into
  386. /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
  387. /// BasicAliasAnalysis and other passes analyze the result. See the rules
  388. /// for getelementptr vs. inttoptr in
  389. /// http://llvm.org/docs/LangRef.html#pointeraliasing
  390. /// for details.
  391. ///
  392. /// Design note: The correctness of using getelementptr here depends on
  393. /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
  394. /// they may introduce pointer arithmetic which may not be safely converted
  395. /// into getelementptr.
  396. ///
  397. /// Design note: It might seem desirable for this function to be more
  398. /// loop-aware. If some of the indices are loop-invariant while others
  399. /// aren't, it might seem desirable to emit multiple GEPs, keeping the
  400. /// loop-invariant portions of the overall computation outside the loop.
  401. /// However, there are a few reasons this is not done here. Hoisting simple
  402. /// arithmetic is a low-level optimization that often isn't very
  403. /// important until late in the optimization process. In fact, passes
  404. /// like InstructionCombining will combine GEPs, even if it means
  405. /// pushing loop-invariant computation down into loops, so even if the
  406. /// GEPs were split here, the work would quickly be undone. The
  407. /// LoopStrengthReduction pass, which is usually run quite late (and
  408. /// after the last InstructionCombining pass), takes care of hoisting
  409. /// loop-invariant portions of expressions, after considering what
  410. /// can be folded using target addressing modes.
  411. ///
  412. Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
  413. const SCEV *const *op_end,
  414. PointerType *PTy,
  415. Type *Ty,
  416. Value *V) {
  417. SmallVector<Value *, 4> GepIndices;
  418. SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
  419. bool AnyNonZeroIndices = false;
  420. // Split AddRecs up into parts as either of the parts may be usable
  421. // without the other.
  422. SplitAddRecs(Ops, Ty, SE);
  423. Type *IntIdxTy = DL.getIndexType(PTy);
  424. // For opaque pointers, always generate i8 GEP.
  425. if (!PTy->isOpaque()) {
  426. // Descend down the pointer's type and attempt to convert the other
  427. // operands into GEP indices, at each level. The first index in a GEP
  428. // indexes into the array implied by the pointer operand; the rest of
  429. // the indices index into the element or field type selected by the
  430. // preceding index.
  431. Type *ElTy = PTy->getNonOpaquePointerElementType();
  432. for (;;) {
  433. // If the scale size is not 0, attempt to factor out a scale for
  434. // array indexing.
  435. SmallVector<const SCEV *, 8> ScaledOps;
  436. if (ElTy->isSized()) {
  437. const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy);
  438. if (!ElSize->isZero()) {
  439. SmallVector<const SCEV *, 8> NewOps;
  440. for (const SCEV *Op : Ops) {
  441. const SCEV *Remainder = SE.getConstant(Ty, 0);
  442. if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
  443. // Op now has ElSize factored out.
  444. ScaledOps.push_back(Op);
  445. if (!Remainder->isZero())
  446. NewOps.push_back(Remainder);
  447. AnyNonZeroIndices = true;
  448. } else {
  449. // The operand was not divisible, so add it to the list of
  450. // operands we'll scan next iteration.
  451. NewOps.push_back(Op);
  452. }
  453. }
  454. // If we made any changes, update Ops.
  455. if (!ScaledOps.empty()) {
  456. Ops = NewOps;
  457. SimplifyAddOperands(Ops, Ty, SE);
  458. }
  459. }
  460. }
  461. // Record the scaled array index for this level of the type. If
  462. // we didn't find any operands that could be factored, tentatively
  463. // assume that element zero was selected (since the zero offset
  464. // would obviously be folded away).
  465. Value *Scaled =
  466. ScaledOps.empty()
  467. ? Constant::getNullValue(Ty)
  468. : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty);
  469. GepIndices.push_back(Scaled);
  470. // Collect struct field index operands.
  471. while (StructType *STy = dyn_cast<StructType>(ElTy)) {
  472. bool FoundFieldNo = false;
  473. // An empty struct has no fields.
  474. if (STy->getNumElements() == 0) break;
  475. // Field offsets are known. See if a constant offset falls within any of
  476. // the struct fields.
  477. if (Ops.empty())
  478. break;
  479. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
  480. if (SE.getTypeSizeInBits(C->getType()) <= 64) {
  481. const StructLayout &SL = *DL.getStructLayout(STy);
  482. uint64_t FullOffset = C->getValue()->getZExtValue();
  483. if (FullOffset < SL.getSizeInBytes()) {
  484. unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
  485. GepIndices.push_back(
  486. ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
  487. ElTy = STy->getTypeAtIndex(ElIdx);
  488. Ops[0] =
  489. SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
  490. AnyNonZeroIndices = true;
  491. FoundFieldNo = true;
  492. }
  493. }
  494. // If no struct field offsets were found, tentatively assume that
  495. // field zero was selected (since the zero offset would obviously
  496. // be folded away).
  497. if (!FoundFieldNo) {
  498. ElTy = STy->getTypeAtIndex(0u);
  499. GepIndices.push_back(
  500. Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
  501. }
  502. }
  503. if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
  504. ElTy = ATy->getElementType();
  505. else
  506. // FIXME: Handle VectorType.
  507. // E.g., If ElTy is scalable vector, then ElSize is not a compile-time
  508. // constant, therefore can not be factored out. The generated IR is less
  509. // ideal with base 'V' cast to i8* and do ugly getelementptr over that.
  510. break;
  511. }
  512. }
  513. // If none of the operands were convertible to proper GEP indices, cast
  514. // the base to i8* and do an ugly getelementptr with that. It's still
  515. // better than ptrtoint+arithmetic+inttoptr at least.
  516. if (!AnyNonZeroIndices) {
  517. // Cast the base to i8*.
  518. if (!PTy->isOpaque())
  519. V = InsertNoopCastOfTo(V,
  520. Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
  521. assert(!isa<Instruction>(V) ||
  522. SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
  523. // Expand the operands for a plain byte offset.
  524. Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty);
  525. // Fold a GEP with constant operands.
  526. if (Constant *CLHS = dyn_cast<Constant>(V))
  527. if (Constant *CRHS = dyn_cast<Constant>(Idx))
  528. return Builder.CreateGEP(Builder.getInt8Ty(), CLHS, CRHS);
  529. // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
  530. unsigned ScanLimit = 6;
  531. BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
  532. // Scanning starts from the last instruction before the insertion point.
  533. BasicBlock::iterator IP = Builder.GetInsertPoint();
  534. if (IP != BlockBegin) {
  535. --IP;
  536. for (; ScanLimit; --IP, --ScanLimit) {
  537. // Don't count dbg.value against the ScanLimit, to avoid perturbing the
  538. // generated code.
  539. if (isa<DbgInfoIntrinsic>(IP))
  540. ScanLimit++;
  541. if (IP->getOpcode() == Instruction::GetElementPtr &&
  542. IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
  543. cast<GEPOperator>(&*IP)->getSourceElementType() ==
  544. Type::getInt8Ty(Ty->getContext()))
  545. return &*IP;
  546. if (IP == BlockBegin) break;
  547. }
  548. }
  549. // Save the original insertion point so we can restore it when we're done.
  550. SCEVInsertPointGuard Guard(Builder, this);
  551. // Move the insertion point out of as many loops as we can.
  552. while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
  553. if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
  554. BasicBlock *Preheader = L->getLoopPreheader();
  555. if (!Preheader) break;
  556. // Ok, move up a level.
  557. Builder.SetInsertPoint(Preheader->getTerminator());
  558. }
  559. // Emit a GEP.
  560. return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
  561. }
  562. {
  563. SCEVInsertPointGuard Guard(Builder, this);
  564. // Move the insertion point out of as many loops as we can.
  565. while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
  566. if (!L->isLoopInvariant(V)) break;
  567. bool AnyIndexNotLoopInvariant = any_of(
  568. GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); });
  569. if (AnyIndexNotLoopInvariant)
  570. break;
  571. BasicBlock *Preheader = L->getLoopPreheader();
  572. if (!Preheader) break;
  573. // Ok, move up a level.
  574. Builder.SetInsertPoint(Preheader->getTerminator());
  575. }
  576. // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
  577. // because ScalarEvolution may have changed the address arithmetic to
  578. // compute a value which is beyond the end of the allocated object.
  579. Value *Casted = V;
  580. if (V->getType() != PTy)
  581. Casted = InsertNoopCastOfTo(Casted, PTy);
  582. Value *GEP = Builder.CreateGEP(PTy->getNonOpaquePointerElementType(),
  583. Casted, GepIndices, "scevgep");
  584. Ops.push_back(SE.getUnknown(GEP));
  585. }
  586. return expand(SE.getAddExpr(Ops));
  587. }
  588. Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty,
  589. Value *V) {
  590. const SCEV *const Ops[1] = {Op};
  591. return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V);
  592. }
  593. /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
  594. /// SCEV expansion. If they are nested, this is the most nested. If they are
  595. /// neighboring, pick the later.
  596. static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
  597. DominatorTree &DT) {
  598. if (!A) return B;
  599. if (!B) return A;
  600. if (A->contains(B)) return B;
  601. if (B->contains(A)) return A;
  602. if (DT.dominates(A->getHeader(), B->getHeader())) return B;
  603. if (DT.dominates(B->getHeader(), A->getHeader())) return A;
  604. return A; // Arbitrarily break the tie.
  605. }
  606. /// getRelevantLoop - Get the most relevant loop associated with the given
  607. /// expression, according to PickMostRelevantLoop.
  608. const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
  609. // Test whether we've already computed the most relevant loop for this SCEV.
  610. auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
  611. if (!Pair.second)
  612. return Pair.first->second;
  613. switch (S->getSCEVType()) {
  614. case scConstant:
  615. return nullptr; // A constant has no relevant loops.
  616. case scTruncate:
  617. case scZeroExtend:
  618. case scSignExtend:
  619. case scPtrToInt:
  620. case scAddExpr:
  621. case scMulExpr:
  622. case scUDivExpr:
  623. case scAddRecExpr:
  624. case scUMaxExpr:
  625. case scSMaxExpr:
  626. case scUMinExpr:
  627. case scSMinExpr:
  628. case scSequentialUMinExpr: {
  629. const Loop *L = nullptr;
  630. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
  631. L = AR->getLoop();
  632. for (const SCEV *Op : S->operands())
  633. L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
  634. return RelevantLoops[S] = L;
  635. }
  636. case scUnknown: {
  637. const SCEVUnknown *U = cast<SCEVUnknown>(S);
  638. if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
  639. return Pair.first->second = SE.LI.getLoopFor(I->getParent());
  640. // A non-instruction has no relevant loops.
  641. return nullptr;
  642. }
  643. case scCouldNotCompute:
  644. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  645. }
  646. llvm_unreachable("Unexpected SCEV type!");
  647. }
  648. namespace {
  649. /// LoopCompare - Compare loops by PickMostRelevantLoop.
  650. class LoopCompare {
  651. DominatorTree &DT;
  652. public:
  653. explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
  654. bool operator()(std::pair<const Loop *, const SCEV *> LHS,
  655. std::pair<const Loop *, const SCEV *> RHS) const {
  656. // Keep pointer operands sorted at the end.
  657. if (LHS.second->getType()->isPointerTy() !=
  658. RHS.second->getType()->isPointerTy())
  659. return LHS.second->getType()->isPointerTy();
  660. // Compare loops with PickMostRelevantLoop.
  661. if (LHS.first != RHS.first)
  662. return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
  663. // If one operand is a non-constant negative and the other is not,
  664. // put the non-constant negative on the right so that a sub can
  665. // be used instead of a negate and add.
  666. if (LHS.second->isNonConstantNegative()) {
  667. if (!RHS.second->isNonConstantNegative())
  668. return false;
  669. } else if (RHS.second->isNonConstantNegative())
  670. return true;
  671. // Otherwise they are equivalent according to this comparison.
  672. return false;
  673. }
  674. };
  675. }
  676. Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
  677. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  678. // Collect all the add operands in a loop, along with their associated loops.
  679. // Iterate in reverse so that constants are emitted last, all else equal, and
  680. // so that pointer operands are inserted first, which the code below relies on
  681. // to form more involved GEPs.
  682. SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
  683. for (const SCEV *Op : reverse(S->operands()))
  684. OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
  685. // Sort by loop. Use a stable sort so that constants follow non-constants and
  686. // pointer operands precede non-pointer operands.
  687. llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
  688. // Emit instructions to add all the operands. Hoist as much as possible
  689. // out of loops, and form meaningful getelementptrs where possible.
  690. Value *Sum = nullptr;
  691. for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
  692. const Loop *CurLoop = I->first;
  693. const SCEV *Op = I->second;
  694. if (!Sum) {
  695. // This is the first operand. Just expand it.
  696. Sum = expand(Op);
  697. ++I;
  698. continue;
  699. }
  700. assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
  701. if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
  702. // The running sum expression is a pointer. Try to form a getelementptr
  703. // at this level with that as the base.
  704. SmallVector<const SCEV *, 4> NewOps;
  705. for (; I != E && I->first == CurLoop; ++I) {
  706. // If the operand is SCEVUnknown and not instructions, peek through
  707. // it, to enable more of it to be folded into the GEP.
  708. const SCEV *X = I->second;
  709. if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
  710. if (!isa<Instruction>(U->getValue()))
  711. X = SE.getSCEV(U->getValue());
  712. NewOps.push_back(X);
  713. }
  714. Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
  715. } else if (Op->isNonConstantNegative()) {
  716. // Instead of doing a negate and add, just do a subtract.
  717. Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty);
  718. Sum = InsertNoopCastOfTo(Sum, Ty);
  719. Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
  720. /*IsSafeToHoist*/ true);
  721. ++I;
  722. } else {
  723. // A simple add.
  724. Value *W = expandCodeForImpl(Op, Ty);
  725. Sum = InsertNoopCastOfTo(Sum, Ty);
  726. // Canonicalize a constant to the RHS.
  727. if (isa<Constant>(Sum)) std::swap(Sum, W);
  728. Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
  729. /*IsSafeToHoist*/ true);
  730. ++I;
  731. }
  732. }
  733. return Sum;
  734. }
  735. Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
  736. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  737. // Collect all the mul operands in a loop, along with their associated loops.
  738. // Iterate in reverse so that constants are emitted last, all else equal.
  739. SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
  740. for (const SCEV *Op : reverse(S->operands()))
  741. OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
  742. // Sort by loop. Use a stable sort so that constants follow non-constants.
  743. llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
  744. // Emit instructions to mul all the operands. Hoist as much as possible
  745. // out of loops.
  746. Value *Prod = nullptr;
  747. auto I = OpsAndLoops.begin();
  748. // Expand the calculation of X pow N in the following manner:
  749. // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
  750. // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
  751. const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() {
  752. auto E = I;
  753. // Calculate how many times the same operand from the same loop is included
  754. // into this power.
  755. uint64_t Exponent = 0;
  756. const uint64_t MaxExponent = UINT64_MAX >> 1;
  757. // No one sane will ever try to calculate such huge exponents, but if we
  758. // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
  759. // below when the power of 2 exceeds our Exponent, and we want it to be
  760. // 1u << 31 at most to not deal with unsigned overflow.
  761. while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
  762. ++Exponent;
  763. ++E;
  764. }
  765. assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
  766. // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
  767. // that are needed into the result.
  768. Value *P = expandCodeForImpl(I->second, Ty);
  769. Value *Result = nullptr;
  770. if (Exponent & 1)
  771. Result = P;
  772. for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
  773. P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
  774. /*IsSafeToHoist*/ true);
  775. if (Exponent & BinExp)
  776. Result = Result ? InsertBinop(Instruction::Mul, Result, P,
  777. SCEV::FlagAnyWrap,
  778. /*IsSafeToHoist*/ true)
  779. : P;
  780. }
  781. I = E;
  782. assert(Result && "Nothing was expanded?");
  783. return Result;
  784. };
  785. while (I != OpsAndLoops.end()) {
  786. if (!Prod) {
  787. // This is the first operand. Just expand it.
  788. Prod = ExpandOpBinPowN();
  789. } else if (I->second->isAllOnesValue()) {
  790. // Instead of doing a multiply by negative one, just do a negate.
  791. Prod = InsertNoopCastOfTo(Prod, Ty);
  792. Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
  793. SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
  794. ++I;
  795. } else {
  796. // A simple mul.
  797. Value *W = ExpandOpBinPowN();
  798. Prod = InsertNoopCastOfTo(Prod, Ty);
  799. // Canonicalize a constant to the RHS.
  800. if (isa<Constant>(Prod)) std::swap(Prod, W);
  801. const APInt *RHS;
  802. if (match(W, m_Power2(RHS))) {
  803. // Canonicalize Prod*(1<<C) to Prod<<C.
  804. assert(!Ty->isVectorTy() && "vector types are not SCEVable");
  805. auto NWFlags = S->getNoWrapFlags();
  806. // clear nsw flag if shl will produce poison value.
  807. if (RHS->logBase2() == RHS->getBitWidth() - 1)
  808. NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
  809. Prod = InsertBinop(Instruction::Shl, Prod,
  810. ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
  811. /*IsSafeToHoist*/ true);
  812. } else {
  813. Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
  814. /*IsSafeToHoist*/ true);
  815. }
  816. }
  817. }
  818. return Prod;
  819. }
  820. Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
  821. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  822. Value *LHS = expandCodeForImpl(S->getLHS(), Ty);
  823. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
  824. const APInt &RHS = SC->getAPInt();
  825. if (RHS.isPowerOf2())
  826. return InsertBinop(Instruction::LShr, LHS,
  827. ConstantInt::get(Ty, RHS.logBase2()),
  828. SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
  829. }
  830. Value *RHS = expandCodeForImpl(S->getRHS(), Ty);
  831. return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
  832. /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
  833. }
  834. /// Determine if this is a well-behaved chain of instructions leading back to
  835. /// the PHI. If so, it may be reused by expanded expressions.
  836. bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
  837. const Loop *L) {
  838. if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
  839. (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
  840. return false;
  841. // If any of the operands don't dominate the insert position, bail.
  842. // Addrec operands are always loop-invariant, so this can only happen
  843. // if there are instructions which haven't been hoisted.
  844. if (L == IVIncInsertLoop) {
  845. for (Use &Op : llvm::drop_begin(IncV->operands()))
  846. if (Instruction *OInst = dyn_cast<Instruction>(Op))
  847. if (!SE.DT.dominates(OInst, IVIncInsertPos))
  848. return false;
  849. }
  850. // Advance to the next instruction.
  851. IncV = dyn_cast<Instruction>(IncV->getOperand(0));
  852. if (!IncV)
  853. return false;
  854. if (IncV->mayHaveSideEffects())
  855. return false;
  856. if (IncV == PN)
  857. return true;
  858. return isNormalAddRecExprPHI(PN, IncV, L);
  859. }
  860. /// getIVIncOperand returns an induction variable increment's induction
  861. /// variable operand.
  862. ///
  863. /// If allowScale is set, any type of GEP is allowed as long as the nonIV
  864. /// operands dominate InsertPos.
  865. ///
  866. /// If allowScale is not set, ensure that a GEP increment conforms to one of the
  867. /// simple patterns generated by getAddRecExprPHILiterally and
  868. /// expandAddtoGEP. If the pattern isn't recognized, return NULL.
  869. Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
  870. Instruction *InsertPos,
  871. bool allowScale) {
  872. if (IncV == InsertPos)
  873. return nullptr;
  874. switch (IncV->getOpcode()) {
  875. default:
  876. return nullptr;
  877. // Check for a simple Add/Sub or GEP of a loop invariant step.
  878. case Instruction::Add:
  879. case Instruction::Sub: {
  880. Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
  881. if (!OInst || SE.DT.dominates(OInst, InsertPos))
  882. return dyn_cast<Instruction>(IncV->getOperand(0));
  883. return nullptr;
  884. }
  885. case Instruction::BitCast:
  886. return dyn_cast<Instruction>(IncV->getOperand(0));
  887. case Instruction::GetElementPtr:
  888. for (Use &U : llvm::drop_begin(IncV->operands())) {
  889. if (isa<Constant>(U))
  890. continue;
  891. if (Instruction *OInst = dyn_cast<Instruction>(U)) {
  892. if (!SE.DT.dominates(OInst, InsertPos))
  893. return nullptr;
  894. }
  895. if (allowScale) {
  896. // allow any kind of GEP as long as it can be hoisted.
  897. continue;
  898. }
  899. // This must be a pointer addition of constants (pretty), which is already
  900. // handled, or some number of address-size elements (ugly). Ugly geps
  901. // have 2 operands. i1* is used by the expander to represent an
  902. // address-size element.
  903. if (IncV->getNumOperands() != 2)
  904. return nullptr;
  905. unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
  906. if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
  907. && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
  908. return nullptr;
  909. break;
  910. }
  911. return dyn_cast<Instruction>(IncV->getOperand(0));
  912. }
  913. }
  914. /// If the insert point of the current builder or any of the builders on the
  915. /// stack of saved builders has 'I' as its insert point, update it to point to
  916. /// the instruction after 'I'. This is intended to be used when the instruction
  917. /// 'I' is being moved. If this fixup is not done and 'I' is moved to a
  918. /// different block, the inconsistent insert point (with a mismatched
  919. /// Instruction and Block) can lead to an instruction being inserted in a block
  920. /// other than its parent.
  921. void SCEVExpander::fixupInsertPoints(Instruction *I) {
  922. BasicBlock::iterator It(*I);
  923. BasicBlock::iterator NewInsertPt = std::next(It);
  924. if (Builder.GetInsertPoint() == It)
  925. Builder.SetInsertPoint(&*NewInsertPt);
  926. for (auto *InsertPtGuard : InsertPointGuards)
  927. if (InsertPtGuard->GetInsertPoint() == It)
  928. InsertPtGuard->SetInsertPoint(NewInsertPt);
  929. }
  930. /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
  931. /// it available to other uses in this loop. Recursively hoist any operands,
  932. /// until we reach a value that dominates InsertPos.
  933. bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos,
  934. bool RecomputePoisonFlags) {
  935. auto FixupPoisonFlags = [this](Instruction *I) {
  936. // Drop flags that are potentially inferred from old context and infer flags
  937. // in new context.
  938. I->dropPoisonGeneratingFlags();
  939. if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
  940. if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
  941. auto *BO = cast<BinaryOperator>(I);
  942. BO->setHasNoUnsignedWrap(
  943. ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW);
  944. BO->setHasNoSignedWrap(
  945. ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW);
  946. }
  947. };
  948. if (SE.DT.dominates(IncV, InsertPos)) {
  949. if (RecomputePoisonFlags)
  950. FixupPoisonFlags(IncV);
  951. return true;
  952. }
  953. // InsertPos must itself dominate IncV so that IncV's new position satisfies
  954. // its existing users.
  955. if (isa<PHINode>(InsertPos) ||
  956. !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
  957. return false;
  958. if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
  959. return false;
  960. // Check that the chain of IV operands leading back to Phi can be hoisted.
  961. SmallVector<Instruction*, 4> IVIncs;
  962. for(;;) {
  963. Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
  964. if (!Oper)
  965. return false;
  966. // IncV is safe to hoist.
  967. IVIncs.push_back(IncV);
  968. IncV = Oper;
  969. if (SE.DT.dominates(IncV, InsertPos))
  970. break;
  971. }
  972. for (Instruction *I : llvm::reverse(IVIncs)) {
  973. fixupInsertPoints(I);
  974. I->moveBefore(InsertPos);
  975. if (RecomputePoisonFlags)
  976. FixupPoisonFlags(I);
  977. }
  978. return true;
  979. }
  980. /// Determine if this cyclic phi is in a form that would have been generated by
  981. /// LSR. We don't care if the phi was actually expanded in this pass, as long
  982. /// as it is in a low-cost form, for example, no implied multiplication. This
  983. /// should match any patterns generated by getAddRecExprPHILiterally and
  984. /// expandAddtoGEP.
  985. bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
  986. const Loop *L) {
  987. for(Instruction *IVOper = IncV;
  988. (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
  989. /*allowScale=*/false));) {
  990. if (IVOper == PN)
  991. return true;
  992. }
  993. return false;
  994. }
  995. /// expandIVInc - Expand an IV increment at Builder's current InsertPos.
  996. /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
  997. /// need to materialize IV increments elsewhere to handle difficult situations.
  998. Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
  999. Type *ExpandTy, Type *IntTy,
  1000. bool useSubtract) {
  1001. Value *IncV;
  1002. // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
  1003. if (ExpandTy->isPointerTy()) {
  1004. PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
  1005. // If the step isn't constant, don't use an implicitly scaled GEP, because
  1006. // that would require a multiply inside the loop.
  1007. if (!isa<ConstantInt>(StepV))
  1008. GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
  1009. GEPPtrTy->getAddressSpace());
  1010. IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN);
  1011. if (IncV->getType() != PN->getType())
  1012. IncV = Builder.CreateBitCast(IncV, PN->getType());
  1013. } else {
  1014. IncV = useSubtract ?
  1015. Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
  1016. Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
  1017. }
  1018. return IncV;
  1019. }
  1020. /// Check whether we can cheaply express the requested SCEV in terms of
  1021. /// the available PHI SCEV by truncation and/or inversion of the step.
  1022. static bool canBeCheaplyTransformed(ScalarEvolution &SE,
  1023. const SCEVAddRecExpr *Phi,
  1024. const SCEVAddRecExpr *Requested,
  1025. bool &InvertStep) {
  1026. // We can't transform to match a pointer PHI.
  1027. if (Phi->getType()->isPointerTy())
  1028. return false;
  1029. Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
  1030. Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
  1031. if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
  1032. return false;
  1033. // Try truncate it if necessary.
  1034. Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
  1035. if (!Phi)
  1036. return false;
  1037. // Check whether truncation will help.
  1038. if (Phi == Requested) {
  1039. InvertStep = false;
  1040. return true;
  1041. }
  1042. // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
  1043. if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) {
  1044. InvertStep = true;
  1045. return true;
  1046. }
  1047. return false;
  1048. }
  1049. static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
  1050. if (!isa<IntegerType>(AR->getType()))
  1051. return false;
  1052. unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
  1053. Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
  1054. const SCEV *Step = AR->getStepRecurrence(SE);
  1055. const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
  1056. SE.getSignExtendExpr(AR, WideTy));
  1057. const SCEV *ExtendAfterOp =
  1058. SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
  1059. return ExtendAfterOp == OpAfterExtend;
  1060. }
  1061. static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
  1062. if (!isa<IntegerType>(AR->getType()))
  1063. return false;
  1064. unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
  1065. Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
  1066. const SCEV *Step = AR->getStepRecurrence(SE);
  1067. const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
  1068. SE.getZeroExtendExpr(AR, WideTy));
  1069. const SCEV *ExtendAfterOp =
  1070. SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
  1071. return ExtendAfterOp == OpAfterExtend;
  1072. }
  1073. /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
  1074. /// the base addrec, which is the addrec without any non-loop-dominating
  1075. /// values, and return the PHI.
  1076. PHINode *
  1077. SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
  1078. const Loop *L,
  1079. Type *ExpandTy,
  1080. Type *IntTy,
  1081. Type *&TruncTy,
  1082. bool &InvertStep) {
  1083. assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
  1084. // Reuse a previously-inserted PHI, if present.
  1085. BasicBlock *LatchBlock = L->getLoopLatch();
  1086. if (LatchBlock) {
  1087. PHINode *AddRecPhiMatch = nullptr;
  1088. Instruction *IncV = nullptr;
  1089. TruncTy = nullptr;
  1090. InvertStep = false;
  1091. // Only try partially matching scevs that need truncation and/or
  1092. // step-inversion if we know this loop is outside the current loop.
  1093. bool TryNonMatchingSCEV =
  1094. IVIncInsertLoop &&
  1095. SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
  1096. for (PHINode &PN : L->getHeader()->phis()) {
  1097. if (!SE.isSCEVable(PN.getType()))
  1098. continue;
  1099. // We should not look for a incomplete PHI. Getting SCEV for a incomplete
  1100. // PHI has no meaning at all.
  1101. if (!PN.isComplete()) {
  1102. SCEV_DEBUG_WITH_TYPE(
  1103. DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
  1104. continue;
  1105. }
  1106. const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
  1107. if (!PhiSCEV)
  1108. continue;
  1109. bool IsMatchingSCEV = PhiSCEV == Normalized;
  1110. // We only handle truncation and inversion of phi recurrences for the
  1111. // expanded expression if the expanded expression's loop dominates the
  1112. // loop we insert to. Check now, so we can bail out early.
  1113. if (!IsMatchingSCEV && !TryNonMatchingSCEV)
  1114. continue;
  1115. // TODO: this possibly can be reworked to avoid this cast at all.
  1116. Instruction *TempIncV =
  1117. dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
  1118. if (!TempIncV)
  1119. continue;
  1120. // Check whether we can reuse this PHI node.
  1121. if (LSRMode) {
  1122. if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
  1123. continue;
  1124. } else {
  1125. if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
  1126. continue;
  1127. }
  1128. // Stop if we have found an exact match SCEV.
  1129. if (IsMatchingSCEV) {
  1130. IncV = TempIncV;
  1131. TruncTy = nullptr;
  1132. InvertStep = false;
  1133. AddRecPhiMatch = &PN;
  1134. break;
  1135. }
  1136. // Try whether the phi can be translated into the requested form
  1137. // (truncated and/or offset by a constant).
  1138. if ((!TruncTy || InvertStep) &&
  1139. canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
  1140. // Record the phi node. But don't stop we might find an exact match
  1141. // later.
  1142. AddRecPhiMatch = &PN;
  1143. IncV = TempIncV;
  1144. TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
  1145. }
  1146. }
  1147. if (AddRecPhiMatch) {
  1148. // Ok, the add recurrence looks usable.
  1149. // Remember this PHI, even in post-inc mode.
  1150. InsertedValues.insert(AddRecPhiMatch);
  1151. // Remember the increment.
  1152. rememberInstruction(IncV);
  1153. // Those values were not actually inserted but re-used.
  1154. ReusedValues.insert(AddRecPhiMatch);
  1155. ReusedValues.insert(IncV);
  1156. return AddRecPhiMatch;
  1157. }
  1158. }
  1159. // Save the original insertion point so we can restore it when we're done.
  1160. SCEVInsertPointGuard Guard(Builder, this);
  1161. // Another AddRec may need to be recursively expanded below. For example, if
  1162. // this AddRec is quadratic, the StepV may itself be an AddRec in this
  1163. // loop. Remove this loop from the PostIncLoops set before expanding such
  1164. // AddRecs. Otherwise, we cannot find a valid position for the step
  1165. // (i.e. StepV can never dominate its loop header). Ideally, we could do
  1166. // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
  1167. // so it's not worth implementing SmallPtrSet::swap.
  1168. PostIncLoopSet SavedPostIncLoops = PostIncLoops;
  1169. PostIncLoops.clear();
  1170. // Expand code for the start value into the loop preheader.
  1171. assert(L->getLoopPreheader() &&
  1172. "Can't expand add recurrences without a loop preheader!");
  1173. Value *StartV =
  1174. expandCodeForImpl(Normalized->getStart(), ExpandTy,
  1175. L->getLoopPreheader()->getTerminator());
  1176. // StartV must have been be inserted into L's preheader to dominate the new
  1177. // phi.
  1178. assert(!isa<Instruction>(StartV) ||
  1179. SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
  1180. L->getHeader()));
  1181. // Expand code for the step value. Do this before creating the PHI so that PHI
  1182. // reuse code doesn't see an incomplete PHI.
  1183. const SCEV *Step = Normalized->getStepRecurrence(SE);
  1184. // If the stride is negative, insert a sub instead of an add for the increment
  1185. // (unless it's a constant, because subtracts of constants are canonicalized
  1186. // to adds).
  1187. bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
  1188. if (useSubtract)
  1189. Step = SE.getNegativeSCEV(Step);
  1190. // Expand the step somewhere that dominates the loop header.
  1191. Value *StepV = expandCodeForImpl(
  1192. Step, IntTy, &*L->getHeader()->getFirstInsertionPt());
  1193. // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
  1194. // we actually do emit an addition. It does not apply if we emit a
  1195. // subtraction.
  1196. bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
  1197. bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
  1198. // Create the PHI.
  1199. BasicBlock *Header = L->getHeader();
  1200. Builder.SetInsertPoint(Header, Header->begin());
  1201. pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
  1202. PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
  1203. Twine(IVName) + ".iv");
  1204. // Create the step instructions and populate the PHI.
  1205. for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
  1206. BasicBlock *Pred = *HPI;
  1207. // Add a start value.
  1208. if (!L->contains(Pred)) {
  1209. PN->addIncoming(StartV, Pred);
  1210. continue;
  1211. }
  1212. // Create a step value and add it to the PHI.
  1213. // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
  1214. // instructions at IVIncInsertPos.
  1215. Instruction *InsertPos = L == IVIncInsertLoop ?
  1216. IVIncInsertPos : Pred->getTerminator();
  1217. Builder.SetInsertPoint(InsertPos);
  1218. Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
  1219. if (isa<OverflowingBinaryOperator>(IncV)) {
  1220. if (IncrementIsNUW)
  1221. cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
  1222. if (IncrementIsNSW)
  1223. cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
  1224. }
  1225. PN->addIncoming(IncV, Pred);
  1226. }
  1227. // After expanding subexpressions, restore the PostIncLoops set so the caller
  1228. // can ensure that IVIncrement dominates the current uses.
  1229. PostIncLoops = SavedPostIncLoops;
  1230. // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
  1231. // effective when we are able to use an IV inserted here, so record it.
  1232. InsertedValues.insert(PN);
  1233. InsertedIVs.push_back(PN);
  1234. return PN;
  1235. }
  1236. Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
  1237. Type *STy = S->getType();
  1238. Type *IntTy = SE.getEffectiveSCEVType(STy);
  1239. const Loop *L = S->getLoop();
  1240. // Determine a normalized form of this expression, which is the expression
  1241. // before any post-inc adjustment is made.
  1242. const SCEVAddRecExpr *Normalized = S;
  1243. if (PostIncLoops.count(L)) {
  1244. PostIncLoopSet Loops;
  1245. Loops.insert(L);
  1246. Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE));
  1247. }
  1248. // Strip off any non-loop-dominating component from the addrec start.
  1249. const SCEV *Start = Normalized->getStart();
  1250. const SCEV *PostLoopOffset = nullptr;
  1251. if (!SE.properlyDominates(Start, L->getHeader())) {
  1252. PostLoopOffset = Start;
  1253. Start = SE.getConstant(Normalized->getType(), 0);
  1254. Normalized = cast<SCEVAddRecExpr>(
  1255. SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
  1256. Normalized->getLoop(),
  1257. Normalized->getNoWrapFlags(SCEV::FlagNW)));
  1258. }
  1259. // Strip off any non-loop-dominating component from the addrec step.
  1260. const SCEV *Step = Normalized->getStepRecurrence(SE);
  1261. const SCEV *PostLoopScale = nullptr;
  1262. if (!SE.dominates(Step, L->getHeader())) {
  1263. PostLoopScale = Step;
  1264. Step = SE.getConstant(Normalized->getType(), 1);
  1265. if (!Start->isZero()) {
  1266. // The normalization below assumes that Start is constant zero, so if
  1267. // it isn't re-associate Start to PostLoopOffset.
  1268. assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?");
  1269. PostLoopOffset = Start;
  1270. Start = SE.getConstant(Normalized->getType(), 0);
  1271. }
  1272. Normalized =
  1273. cast<SCEVAddRecExpr>(SE.getAddRecExpr(
  1274. Start, Step, Normalized->getLoop(),
  1275. Normalized->getNoWrapFlags(SCEV::FlagNW)));
  1276. }
  1277. // Expand the core addrec. If we need post-loop scaling, force it to
  1278. // expand to an integer type to avoid the need for additional casting.
  1279. Type *ExpandTy = PostLoopScale ? IntTy : STy;
  1280. // We can't use a pointer type for the addrec if the pointer type is
  1281. // non-integral.
  1282. Type *AddRecPHIExpandTy =
  1283. DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy;
  1284. // In some cases, we decide to reuse an existing phi node but need to truncate
  1285. // it and/or invert the step.
  1286. Type *TruncTy = nullptr;
  1287. bool InvertStep = false;
  1288. PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy,
  1289. IntTy, TruncTy, InvertStep);
  1290. // Accommodate post-inc mode, if necessary.
  1291. Value *Result;
  1292. if (!PostIncLoops.count(L))
  1293. Result = PN;
  1294. else {
  1295. // In PostInc mode, use the post-incremented value.
  1296. BasicBlock *LatchBlock = L->getLoopLatch();
  1297. assert(LatchBlock && "PostInc mode requires a unique loop latch!");
  1298. Result = PN->getIncomingValueForBlock(LatchBlock);
  1299. // We might be introducing a new use of the post-inc IV that is not poison
  1300. // safe, in which case we should drop poison generating flags. Only keep
  1301. // those flags for which SCEV has proven that they always hold.
  1302. if (isa<OverflowingBinaryOperator>(Result)) {
  1303. auto *I = cast<Instruction>(Result);
  1304. if (!S->hasNoUnsignedWrap())
  1305. I->setHasNoUnsignedWrap(false);
  1306. if (!S->hasNoSignedWrap())
  1307. I->setHasNoSignedWrap(false);
  1308. }
  1309. // For an expansion to use the postinc form, the client must call
  1310. // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
  1311. // or dominated by IVIncInsertPos.
  1312. if (isa<Instruction>(Result) &&
  1313. !SE.DT.dominates(cast<Instruction>(Result),
  1314. &*Builder.GetInsertPoint())) {
  1315. // The induction variable's postinc expansion does not dominate this use.
  1316. // IVUsers tries to prevent this case, so it is rare. However, it can
  1317. // happen when an IVUser outside the loop is not dominated by the latch
  1318. // block. Adjusting IVIncInsertPos before expansion begins cannot handle
  1319. // all cases. Consider a phi outside whose operand is replaced during
  1320. // expansion with the value of the postinc user. Without fundamentally
  1321. // changing the way postinc users are tracked, the only remedy is
  1322. // inserting an extra IV increment. StepV might fold into PostLoopOffset,
  1323. // but hopefully expandCodeFor handles that.
  1324. bool useSubtract =
  1325. !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
  1326. if (useSubtract)
  1327. Step = SE.getNegativeSCEV(Step);
  1328. Value *StepV;
  1329. {
  1330. // Expand the step somewhere that dominates the loop header.
  1331. SCEVInsertPointGuard Guard(Builder, this);
  1332. StepV = expandCodeForImpl(
  1333. Step, IntTy, &*L->getHeader()->getFirstInsertionPt());
  1334. }
  1335. Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
  1336. }
  1337. }
  1338. // We have decided to reuse an induction variable of a dominating loop. Apply
  1339. // truncation and/or inversion of the step.
  1340. if (TruncTy) {
  1341. Type *ResTy = Result->getType();
  1342. // Normalize the result type.
  1343. if (ResTy != SE.getEffectiveSCEVType(ResTy))
  1344. Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
  1345. // Truncate the result.
  1346. if (TruncTy != Result->getType())
  1347. Result = Builder.CreateTrunc(Result, TruncTy);
  1348. // Invert the result.
  1349. if (InvertStep)
  1350. Result = Builder.CreateSub(
  1351. expandCodeForImpl(Normalized->getStart(), TruncTy), Result);
  1352. }
  1353. // Re-apply any non-loop-dominating scale.
  1354. if (PostLoopScale) {
  1355. assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
  1356. Result = InsertNoopCastOfTo(Result, IntTy);
  1357. Result = Builder.CreateMul(Result,
  1358. expandCodeForImpl(PostLoopScale, IntTy));
  1359. }
  1360. // Re-apply any non-loop-dominating offset.
  1361. if (PostLoopOffset) {
  1362. if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
  1363. if (Result->getType()->isIntegerTy()) {
  1364. Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy);
  1365. Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base);
  1366. } else {
  1367. Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result);
  1368. }
  1369. } else {
  1370. Result = InsertNoopCastOfTo(Result, IntTy);
  1371. Result = Builder.CreateAdd(
  1372. Result, expandCodeForImpl(PostLoopOffset, IntTy));
  1373. }
  1374. }
  1375. return Result;
  1376. }
  1377. Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
  1378. // In canonical mode we compute the addrec as an expression of a canonical IV
  1379. // using evaluateAtIteration and expand the resulting SCEV expression. This
  1380. // way we avoid introducing new IVs to carry on the computation of the addrec
  1381. // throughout the loop.
  1382. //
  1383. // For nested addrecs evaluateAtIteration might need a canonical IV of a
  1384. // type wider than the addrec itself. Emitting a canonical IV of the
  1385. // proper type might produce non-legal types, for example expanding an i64
  1386. // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
  1387. // back to non-canonical mode for nested addrecs.
  1388. if (!CanonicalMode || (S->getNumOperands() > 2))
  1389. return expandAddRecExprLiterally(S);
  1390. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  1391. const Loop *L = S->getLoop();
  1392. // First check for an existing canonical IV in a suitable type.
  1393. PHINode *CanonicalIV = nullptr;
  1394. if (PHINode *PN = L->getCanonicalInductionVariable())
  1395. if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
  1396. CanonicalIV = PN;
  1397. // Rewrite an AddRec in terms of the canonical induction variable, if
  1398. // its type is more narrow.
  1399. if (CanonicalIV &&
  1400. SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
  1401. !S->getType()->isPointerTy()) {
  1402. SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
  1403. for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
  1404. NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType());
  1405. Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
  1406. S->getNoWrapFlags(SCEV::FlagNW)));
  1407. BasicBlock::iterator NewInsertPt =
  1408. findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
  1409. V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
  1410. &*NewInsertPt);
  1411. return V;
  1412. }
  1413. // {X,+,F} --> X + {0,+,F}
  1414. if (!S->getStart()->isZero()) {
  1415. if (PointerType *PTy = dyn_cast<PointerType>(S->getType())) {
  1416. Value *StartV = expand(SE.getPointerBase(S));
  1417. assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
  1418. return expandAddToGEP(SE.removePointerBase(S), PTy, Ty, StartV);
  1419. }
  1420. SmallVector<const SCEV *, 4> NewOps(S->operands());
  1421. NewOps[0] = SE.getConstant(Ty, 0);
  1422. const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
  1423. S->getNoWrapFlags(SCEV::FlagNW));
  1424. // Just do a normal add. Pre-expand the operands to suppress folding.
  1425. //
  1426. // The LHS and RHS values are factored out of the expand call to make the
  1427. // output independent of the argument evaluation order.
  1428. const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
  1429. const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
  1430. return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
  1431. }
  1432. // If we don't yet have a canonical IV, create one.
  1433. if (!CanonicalIV) {
  1434. // Create and insert the PHI node for the induction variable in the
  1435. // specified loop.
  1436. BasicBlock *Header = L->getHeader();
  1437. pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
  1438. CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
  1439. &Header->front());
  1440. rememberInstruction(CanonicalIV);
  1441. SmallSet<BasicBlock *, 4> PredSeen;
  1442. Constant *One = ConstantInt::get(Ty, 1);
  1443. for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
  1444. BasicBlock *HP = *HPI;
  1445. if (!PredSeen.insert(HP).second) {
  1446. // There must be an incoming value for each predecessor, even the
  1447. // duplicates!
  1448. CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
  1449. continue;
  1450. }
  1451. if (L->contains(HP)) {
  1452. // Insert a unit add instruction right before the terminator
  1453. // corresponding to the back-edge.
  1454. Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
  1455. "indvar.next",
  1456. HP->getTerminator());
  1457. Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
  1458. rememberInstruction(Add);
  1459. CanonicalIV->addIncoming(Add, HP);
  1460. } else {
  1461. CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
  1462. }
  1463. }
  1464. }
  1465. // {0,+,1} --> Insert a canonical induction variable into the loop!
  1466. if (S->isAffine() && S->getOperand(1)->isOne()) {
  1467. assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
  1468. "IVs with types different from the canonical IV should "
  1469. "already have been handled!");
  1470. return CanonicalIV;
  1471. }
  1472. // {0,+,F} --> {0,+,1} * F
  1473. // If this is a simple linear addrec, emit it now as a special case.
  1474. if (S->isAffine()) // {0,+,F} --> i*F
  1475. return
  1476. expand(SE.getTruncateOrNoop(
  1477. SE.getMulExpr(SE.getUnknown(CanonicalIV),
  1478. SE.getNoopOrAnyExtend(S->getOperand(1),
  1479. CanonicalIV->getType())),
  1480. Ty));
  1481. // If this is a chain of recurrences, turn it into a closed form, using the
  1482. // folders, then expandCodeFor the closed form. This allows the folders to
  1483. // simplify the expression without having to build a bunch of special code
  1484. // into this folder.
  1485. const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
  1486. // Promote S up to the canonical IV type, if the cast is foldable.
  1487. const SCEV *NewS = S;
  1488. const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
  1489. if (isa<SCEVAddRecExpr>(Ext))
  1490. NewS = Ext;
  1491. const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
  1492. // Truncate the result down to the original type, if needed.
  1493. const SCEV *T = SE.getTruncateOrNoop(V, Ty);
  1494. return expand(T);
  1495. }
  1496. Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
  1497. Value *V =
  1498. expandCodeForImpl(S->getOperand(), S->getOperand()->getType());
  1499. return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
  1500. GetOptimalInsertionPointForCastOf(V));
  1501. }
  1502. Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
  1503. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  1504. Value *V = expandCodeForImpl(
  1505. S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
  1506. );
  1507. return Builder.CreateTrunc(V, Ty);
  1508. }
  1509. Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
  1510. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  1511. Value *V = expandCodeForImpl(
  1512. S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
  1513. );
  1514. return Builder.CreateZExt(V, Ty);
  1515. }
  1516. Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
  1517. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  1518. Value *V = expandCodeForImpl(
  1519. S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
  1520. );
  1521. return Builder.CreateSExt(V, Ty);
  1522. }
  1523. Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
  1524. Intrinsic::ID IntrinID, Twine Name,
  1525. bool IsSequential) {
  1526. Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
  1527. Type *Ty = LHS->getType();
  1528. if (IsSequential)
  1529. LHS = Builder.CreateFreeze(LHS);
  1530. for (int i = S->getNumOperands() - 2; i >= 0; --i) {
  1531. Value *RHS = expandCodeForImpl(S->getOperand(i), Ty);
  1532. if (IsSequential && i != 0)
  1533. RHS = Builder.CreateFreeze(RHS);
  1534. Value *Sel;
  1535. if (Ty->isIntegerTy())
  1536. Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
  1537. /*FMFSource=*/nullptr, Name);
  1538. else {
  1539. Value *ICmp =
  1540. Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS);
  1541. Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name);
  1542. }
  1543. LHS = Sel;
  1544. }
  1545. return LHS;
  1546. }
  1547. Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
  1548. return expandMinMaxExpr(S, Intrinsic::smax, "smax");
  1549. }
  1550. Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
  1551. return expandMinMaxExpr(S, Intrinsic::umax, "umax");
  1552. }
  1553. Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
  1554. return expandMinMaxExpr(S, Intrinsic::smin, "smin");
  1555. }
  1556. Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
  1557. return expandMinMaxExpr(S, Intrinsic::umin, "umin");
  1558. }
  1559. Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
  1560. return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
  1561. }
  1562. Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,
  1563. Instruction *IP) {
  1564. setInsertPoint(IP);
  1565. Value *V = expandCodeForImpl(SH, Ty);
  1566. return V;
  1567. }
  1568. Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty) {
  1569. // Expand the code for this SCEV.
  1570. Value *V = expand(SH);
  1571. if (Ty) {
  1572. assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
  1573. "non-trivial casts should be done with the SCEVs directly!");
  1574. V = InsertNoopCastOfTo(V, Ty);
  1575. }
  1576. return V;
  1577. }
  1578. Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
  1579. const Instruction *InsertPt) {
  1580. // If the expansion is not in CanonicalMode, and the SCEV contains any
  1581. // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
  1582. if (!CanonicalMode && SE.containsAddRecurrence(S))
  1583. return nullptr;
  1584. // If S is a constant, it may be worse to reuse an existing Value.
  1585. if (isa<SCEVConstant>(S))
  1586. return nullptr;
  1587. // Choose a Value from the set which dominates the InsertPt.
  1588. // InsertPt should be inside the Value's parent loop so as not to break
  1589. // the LCSSA form.
  1590. for (Value *V : SE.getSCEVValues(S)) {
  1591. Instruction *EntInst = dyn_cast<Instruction>(V);
  1592. if (!EntInst)
  1593. continue;
  1594. assert(EntInst->getFunction() == InsertPt->getFunction());
  1595. if (S->getType() == V->getType() &&
  1596. SE.DT.dominates(EntInst, InsertPt) &&
  1597. (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
  1598. SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
  1599. return V;
  1600. }
  1601. return nullptr;
  1602. }
  1603. // The expansion of SCEV will either reuse a previous Value in ExprValueMap,
  1604. // or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
  1605. // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
  1606. // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
  1607. // the expansion will try to reuse Value from ExprValueMap, and only when it
  1608. // fails, expand the SCEV literally.
  1609. Value *SCEVExpander::expand(const SCEV *S) {
  1610. // Compute an insertion point for this SCEV object. Hoist the instructions
  1611. // as far out in the loop nest as possible.
  1612. Instruction *InsertPt = &*Builder.GetInsertPoint();
  1613. // We can move insertion point only if there is no div or rem operations
  1614. // otherwise we are risky to move it over the check for zero denominator.
  1615. auto SafeToHoist = [](const SCEV *S) {
  1616. return !SCEVExprContains(S, [](const SCEV *S) {
  1617. if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
  1618. if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
  1619. // Division by non-zero constants can be hoisted.
  1620. return SC->getValue()->isZero();
  1621. // All other divisions should not be moved as they may be
  1622. // divisions by zero and should be kept within the
  1623. // conditions of the surrounding loops that guard their
  1624. // execution (see PR35406).
  1625. return true;
  1626. }
  1627. return false;
  1628. });
  1629. };
  1630. if (SafeToHoist(S)) {
  1631. for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
  1632. L = L->getParentLoop()) {
  1633. if (SE.isLoopInvariant(S, L)) {
  1634. if (!L) break;
  1635. if (BasicBlock *Preheader = L->getLoopPreheader())
  1636. InsertPt = Preheader->getTerminator();
  1637. else
  1638. // LSR sets the insertion point for AddRec start/step values to the
  1639. // block start to simplify value reuse, even though it's an invalid
  1640. // position. SCEVExpander must correct for this in all cases.
  1641. InsertPt = &*L->getHeader()->getFirstInsertionPt();
  1642. } else {
  1643. // If the SCEV is computable at this level, insert it into the header
  1644. // after the PHIs (and after any other instructions that we've inserted
  1645. // there) so that it is guaranteed to dominate any user inside the loop.
  1646. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
  1647. InsertPt = &*L->getHeader()->getFirstInsertionPt();
  1648. while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
  1649. (isInsertedInstruction(InsertPt) ||
  1650. isa<DbgInfoIntrinsic>(InsertPt))) {
  1651. InsertPt = &*std::next(InsertPt->getIterator());
  1652. }
  1653. break;
  1654. }
  1655. }
  1656. }
  1657. // Check to see if we already expanded this here.
  1658. auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
  1659. if (I != InsertedExpressions.end())
  1660. return I->second;
  1661. SCEVInsertPointGuard Guard(Builder, this);
  1662. Builder.SetInsertPoint(InsertPt);
  1663. // Expand the expression into instructions.
  1664. Value *V = FindValueInExprValueMap(S, InsertPt);
  1665. if (!V) {
  1666. V = visit(S);
  1667. V = fixupLCSSAFormFor(V);
  1668. } else {
  1669. // If we're reusing an existing instruction, we are effectively CSEing two
  1670. // copies of the instruction (with potentially different flags). As such,
  1671. // we need to drop any poison generating flags unless we can prove that
  1672. // said flags must be valid for all new users.
  1673. if (auto *I = dyn_cast<Instruction>(V))
  1674. if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))
  1675. I->dropPoisonGeneratingFlags();
  1676. }
  1677. // Remember the expanded value for this SCEV at this location.
  1678. //
  1679. // This is independent of PostIncLoops. The mapped value simply materializes
  1680. // the expression at this insertion point. If the mapped value happened to be
  1681. // a postinc expansion, it could be reused by a non-postinc user, but only if
  1682. // its insertion point was already at the head of the loop.
  1683. InsertedExpressions[std::make_pair(S, InsertPt)] = V;
  1684. return V;
  1685. }
  1686. void SCEVExpander::rememberInstruction(Value *I) {
  1687. auto DoInsert = [this](Value *V) {
  1688. if (!PostIncLoops.empty())
  1689. InsertedPostIncValues.insert(V);
  1690. else
  1691. InsertedValues.insert(V);
  1692. };
  1693. DoInsert(I);
  1694. }
  1695. /// replaceCongruentIVs - Check for congruent phis in this loop header and
  1696. /// replace them with their most canonical representative. Return the number of
  1697. /// phis eliminated.
  1698. ///
  1699. /// This does not depend on any SCEVExpander state but should be used in
  1700. /// the same context that SCEVExpander is used.
  1701. unsigned
  1702. SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
  1703. SmallVectorImpl<WeakTrackingVH> &DeadInsts,
  1704. const TargetTransformInfo *TTI) {
  1705. // Find integer phis in order of increasing width.
  1706. SmallVector<PHINode*, 8> Phis;
  1707. for (PHINode &PN : L->getHeader()->phis())
  1708. Phis.push_back(&PN);
  1709. if (TTI)
  1710. // Use stable_sort to preserve order of equivalent PHIs, so the order
  1711. // of the sorted Phis is the same from run to run on the same loop.
  1712. llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
  1713. // Put pointers at the back and make sure pointer < pointer = false.
  1714. if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
  1715. return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
  1716. return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() <
  1717. LHS->getType()->getPrimitiveSizeInBits().getFixedValue();
  1718. });
  1719. unsigned NumElim = 0;
  1720. DenseMap<const SCEV *, PHINode *> ExprToIVMap;
  1721. // Process phis from wide to narrow. Map wide phis to their truncation
  1722. // so narrow phis can reuse them.
  1723. for (PHINode *Phi : Phis) {
  1724. auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
  1725. if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
  1726. return V;
  1727. if (!SE.isSCEVable(PN->getType()))
  1728. return nullptr;
  1729. auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
  1730. if (!Const)
  1731. return nullptr;
  1732. return Const->getValue();
  1733. };
  1734. // Fold constant phis. They may be congruent to other constant phis and
  1735. // would confuse the logic below that expects proper IVs.
  1736. if (Value *V = SimplifyPHINode(Phi)) {
  1737. if (V->getType() != Phi->getType())
  1738. continue;
  1739. SE.forgetValue(Phi);
  1740. Phi->replaceAllUsesWith(V);
  1741. DeadInsts.emplace_back(Phi);
  1742. ++NumElim;
  1743. SCEV_DEBUG_WITH_TYPE(DebugType,
  1744. dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
  1745. << '\n');
  1746. continue;
  1747. }
  1748. if (!SE.isSCEVable(Phi->getType()))
  1749. continue;
  1750. PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
  1751. if (!OrigPhiRef) {
  1752. OrigPhiRef = Phi;
  1753. if (Phi->getType()->isIntegerTy() && TTI &&
  1754. TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
  1755. // This phi can be freely truncated to the narrowest phi type. Map the
  1756. // truncated expression to it so it will be reused for narrow types.
  1757. const SCEV *TruncExpr =
  1758. SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
  1759. ExprToIVMap[TruncExpr] = Phi;
  1760. }
  1761. continue;
  1762. }
  1763. // Replacing a pointer phi with an integer phi or vice-versa doesn't make
  1764. // sense.
  1765. if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
  1766. continue;
  1767. if (BasicBlock *LatchBlock = L->getLoopLatch()) {
  1768. Instruction *OrigInc = dyn_cast<Instruction>(
  1769. OrigPhiRef->getIncomingValueForBlock(LatchBlock));
  1770. Instruction *IsomorphicInc =
  1771. dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
  1772. if (OrigInc && IsomorphicInc) {
  1773. // If this phi has the same width but is more canonical, replace the
  1774. // original with it. As part of the "more canonical" determination,
  1775. // respect a prior decision to use an IV chain.
  1776. if (OrigPhiRef->getType() == Phi->getType() &&
  1777. !(ChainedPhis.count(Phi) ||
  1778. isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) &&
  1779. (ChainedPhis.count(Phi) ||
  1780. isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
  1781. std::swap(OrigPhiRef, Phi);
  1782. std::swap(OrigInc, IsomorphicInc);
  1783. }
  1784. // Replacing the congruent phi is sufficient because acyclic
  1785. // redundancy elimination, CSE/GVN, should handle the
  1786. // rest. However, once SCEV proves that a phi is congruent,
  1787. // it's often the head of an IV user cycle that is isomorphic
  1788. // with the original phi. It's worth eagerly cleaning up the
  1789. // common case of a single IV increment so that DeleteDeadPHIs
  1790. // can remove cycles that had postinc uses.
  1791. // Because we may potentially introduce a new use of OrigIV that didn't
  1792. // exist before at this point, its poison flags need readjustment.
  1793. const SCEV *TruncExpr =
  1794. SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
  1795. if (OrigInc != IsomorphicInc &&
  1796. TruncExpr == SE.getSCEV(IsomorphicInc) &&
  1797. SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) &&
  1798. hoistIVInc(OrigInc, IsomorphicInc, /*RecomputePoisonFlags*/ true)) {
  1799. SCEV_DEBUG_WITH_TYPE(
  1800. DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: "
  1801. << *IsomorphicInc << '\n');
  1802. Value *NewInc = OrigInc;
  1803. if (OrigInc->getType() != IsomorphicInc->getType()) {
  1804. Instruction *IP = nullptr;
  1805. if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
  1806. IP = &*PN->getParent()->getFirstInsertionPt();
  1807. else
  1808. IP = OrigInc->getNextNode();
  1809. IRBuilder<> Builder(IP);
  1810. Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
  1811. NewInc = Builder.CreateTruncOrBitCast(
  1812. OrigInc, IsomorphicInc->getType(), IVName);
  1813. }
  1814. IsomorphicInc->replaceAllUsesWith(NewInc);
  1815. DeadInsts.emplace_back(IsomorphicInc);
  1816. }
  1817. }
  1818. }
  1819. SCEV_DEBUG_WITH_TYPE(DebugType,
  1820. dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
  1821. << '\n');
  1822. SCEV_DEBUG_WITH_TYPE(
  1823. DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
  1824. ++NumElim;
  1825. Value *NewIV = OrigPhiRef;
  1826. if (OrigPhiRef->getType() != Phi->getType()) {
  1827. IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
  1828. Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
  1829. NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
  1830. }
  1831. Phi->replaceAllUsesWith(NewIV);
  1832. DeadInsts.emplace_back(Phi);
  1833. }
  1834. return NumElim;
  1835. }
  1836. Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S,
  1837. const Instruction *At,
  1838. Loop *L) {
  1839. using namespace llvm::PatternMatch;
  1840. SmallVector<BasicBlock *, 4> ExitingBlocks;
  1841. L->getExitingBlocks(ExitingBlocks);
  1842. // Look for suitable value in simple conditions at the loop exits.
  1843. for (BasicBlock *BB : ExitingBlocks) {
  1844. ICmpInst::Predicate Pred;
  1845. Instruction *LHS, *RHS;
  1846. if (!match(BB->getTerminator(),
  1847. m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
  1848. m_BasicBlock(), m_BasicBlock())))
  1849. continue;
  1850. if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
  1851. return LHS;
  1852. if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
  1853. return RHS;
  1854. }
  1855. // Use expand's logic which is used for reusing a previous Value in
  1856. // ExprValueMap. Note that we don't currently model the cost of
  1857. // needing to drop poison generating flags on the instruction if we
  1858. // want to reuse it. We effectively assume that has zero cost.
  1859. return FindValueInExprValueMap(S, At);
  1860. }
  1861. template<typename T> static InstructionCost costAndCollectOperands(
  1862. const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
  1863. TargetTransformInfo::TargetCostKind CostKind,
  1864. SmallVectorImpl<SCEVOperand> &Worklist) {
  1865. const T *S = cast<T>(WorkItem.S);
  1866. InstructionCost Cost = 0;
  1867. // Object to help map SCEV operands to expanded IR instructions.
  1868. struct OperationIndices {
  1869. OperationIndices(unsigned Opc, size_t min, size_t max) :
  1870. Opcode(Opc), MinIdx(min), MaxIdx(max) { }
  1871. unsigned Opcode;
  1872. size_t MinIdx;
  1873. size_t MaxIdx;
  1874. };
  1875. // Collect the operations of all the instructions that will be needed to
  1876. // expand the SCEVExpr. This is so that when we come to cost the operands,
  1877. // we know what the generated user(s) will be.
  1878. SmallVector<OperationIndices, 2> Operations;
  1879. auto CastCost = [&](unsigned Opcode) -> InstructionCost {
  1880. Operations.emplace_back(Opcode, 0, 0);
  1881. return TTI.getCastInstrCost(Opcode, S->getType(),
  1882. S->getOperand(0)->getType(),
  1883. TTI::CastContextHint::None, CostKind);
  1884. };
  1885. auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
  1886. unsigned MinIdx = 0,
  1887. unsigned MaxIdx = 1) -> InstructionCost {
  1888. Operations.emplace_back(Opcode, MinIdx, MaxIdx);
  1889. return NumRequired *
  1890. TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
  1891. };
  1892. auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
  1893. unsigned MaxIdx) -> InstructionCost {
  1894. Operations.emplace_back(Opcode, MinIdx, MaxIdx);
  1895. Type *OpType = S->getType();
  1896. return NumRequired * TTI.getCmpSelInstrCost(
  1897. Opcode, OpType, CmpInst::makeCmpResultType(OpType),
  1898. CmpInst::BAD_ICMP_PREDICATE, CostKind);
  1899. };
  1900. switch (S->getSCEVType()) {
  1901. case scCouldNotCompute:
  1902. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  1903. case scUnknown:
  1904. case scConstant:
  1905. return 0;
  1906. case scPtrToInt:
  1907. Cost = CastCost(Instruction::PtrToInt);
  1908. break;
  1909. case scTruncate:
  1910. Cost = CastCost(Instruction::Trunc);
  1911. break;
  1912. case scZeroExtend:
  1913. Cost = CastCost(Instruction::ZExt);
  1914. break;
  1915. case scSignExtend:
  1916. Cost = CastCost(Instruction::SExt);
  1917. break;
  1918. case scUDivExpr: {
  1919. unsigned Opcode = Instruction::UDiv;
  1920. if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
  1921. if (SC->getAPInt().isPowerOf2())
  1922. Opcode = Instruction::LShr;
  1923. Cost = ArithCost(Opcode, 1);
  1924. break;
  1925. }
  1926. case scAddExpr:
  1927. Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
  1928. break;
  1929. case scMulExpr:
  1930. // TODO: this is a very pessimistic cost modelling for Mul,
  1931. // because of Bin Pow algorithm actually used by the expander,
  1932. // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
  1933. Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
  1934. break;
  1935. case scSMaxExpr:
  1936. case scUMaxExpr:
  1937. case scSMinExpr:
  1938. case scUMinExpr:
  1939. case scSequentialUMinExpr: {
  1940. // FIXME: should this ask the cost for Intrinsic's?
  1941. // The reduction tree.
  1942. Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
  1943. Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
  1944. switch (S->getSCEVType()) {
  1945. case scSequentialUMinExpr: {
  1946. // The safety net against poison.
  1947. // FIXME: this is broken.
  1948. Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
  1949. Cost += ArithCost(Instruction::Or,
  1950. S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
  1951. Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
  1952. break;
  1953. }
  1954. default:
  1955. assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
  1956. "Unhandled SCEV expression type?");
  1957. break;
  1958. }
  1959. break;
  1960. }
  1961. case scAddRecExpr: {
  1962. // In this polynominal, we may have some zero operands, and we shouldn't
  1963. // really charge for those. So how many non-zero coefficients are there?
  1964. int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
  1965. return !Op->isZero();
  1966. });
  1967. assert(NumTerms >= 1 && "Polynominal should have at least one term.");
  1968. assert(!(*std::prev(S->operands().end()))->isZero() &&
  1969. "Last operand should not be zero");
  1970. // Ignoring constant term (operand 0), how many of the coefficients are u> 1?
  1971. int NumNonZeroDegreeNonOneTerms =
  1972. llvm::count_if(S->operands(), [](const SCEV *Op) {
  1973. auto *SConst = dyn_cast<SCEVConstant>(Op);
  1974. return !SConst || SConst->getAPInt().ugt(1);
  1975. });
  1976. // Much like with normal add expr, the polynominal will require
  1977. // one less addition than the number of it's terms.
  1978. InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1,
  1979. /*MinIdx*/ 1, /*MaxIdx*/ 1);
  1980. // Here, *each* one of those will require a multiplication.
  1981. InstructionCost MulCost =
  1982. ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
  1983. Cost = AddCost + MulCost;
  1984. // What is the degree of this polynominal?
  1985. int PolyDegree = S->getNumOperands() - 1;
  1986. assert(PolyDegree >= 1 && "Should be at least affine.");
  1987. // The final term will be:
  1988. // Op_{PolyDegree} * x ^ {PolyDegree}
  1989. // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
  1990. // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
  1991. // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
  1992. // FIXME: this is conservatively correct, but might be overly pessimistic.
  1993. Cost += MulCost * (PolyDegree - 1);
  1994. break;
  1995. }
  1996. }
  1997. for (auto &CostOp : Operations) {
  1998. for (auto SCEVOp : enumerate(S->operands())) {
  1999. // Clamp the index to account for multiple IR operations being chained.
  2000. size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
  2001. size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
  2002. Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
  2003. }
  2004. }
  2005. return Cost;
  2006. }
  2007. bool SCEVExpander::isHighCostExpansionHelper(
  2008. const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
  2009. InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
  2010. SmallPtrSetImpl<const SCEV *> &Processed,
  2011. SmallVectorImpl<SCEVOperand> &Worklist) {
  2012. if (Cost > Budget)
  2013. return true; // Already run out of budget, give up.
  2014. const SCEV *S = WorkItem.S;
  2015. // Was the cost of expansion of this expression already accounted for?
  2016. if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
  2017. return false; // We have already accounted for this expression.
  2018. // If we can find an existing value for this scev available at the point "At"
  2019. // then consider the expression cheap.
  2020. if (getRelatedExistingExpansion(S, &At, L))
  2021. return false; // Consider the expression to be free.
  2022. TargetTransformInfo::TargetCostKind CostKind =
  2023. L->getHeader()->getParent()->hasMinSize()
  2024. ? TargetTransformInfo::TCK_CodeSize
  2025. : TargetTransformInfo::TCK_RecipThroughput;
  2026. switch (S->getSCEVType()) {
  2027. case scCouldNotCompute:
  2028. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  2029. case scUnknown:
  2030. // Assume to be zero-cost.
  2031. return false;
  2032. case scConstant: {
  2033. // Only evalulate the costs of constants when optimizing for size.
  2034. if (CostKind != TargetTransformInfo::TCK_CodeSize)
  2035. return false;
  2036. const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
  2037. Type *Ty = S->getType();
  2038. Cost += TTI.getIntImmCostInst(
  2039. WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
  2040. return Cost > Budget;
  2041. }
  2042. case scTruncate:
  2043. case scPtrToInt:
  2044. case scZeroExtend:
  2045. case scSignExtend: {
  2046. Cost +=
  2047. costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
  2048. return false; // Will answer upon next entry into this function.
  2049. }
  2050. case scUDivExpr: {
  2051. // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
  2052. // HowManyLessThans produced to compute a precise expression, rather than a
  2053. // UDiv from the user's code. If we can't find a UDiv in the code with some
  2054. // simple searching, we need to account for it's cost.
  2055. // At the beginning of this function we already tried to find existing
  2056. // value for plain 'S'. Now try to lookup 'S + 1' since it is common
  2057. // pattern involving division. This is just a simple search heuristic.
  2058. if (getRelatedExistingExpansion(
  2059. SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
  2060. return false; // Consider it to be free.
  2061. Cost +=
  2062. costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
  2063. return false; // Will answer upon next entry into this function.
  2064. }
  2065. case scAddExpr:
  2066. case scMulExpr:
  2067. case scUMaxExpr:
  2068. case scSMaxExpr:
  2069. case scUMinExpr:
  2070. case scSMinExpr:
  2071. case scSequentialUMinExpr: {
  2072. assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
  2073. "Nary expr should have more than 1 operand.");
  2074. // The simple nary expr will require one less op (or pair of ops)
  2075. // than the number of it's terms.
  2076. Cost +=
  2077. costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
  2078. return Cost > Budget;
  2079. }
  2080. case scAddRecExpr: {
  2081. assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
  2082. "Polynomial should be at least linear");
  2083. Cost += costAndCollectOperands<SCEVAddRecExpr>(
  2084. WorkItem, TTI, CostKind, Worklist);
  2085. return Cost > Budget;
  2086. }
  2087. }
  2088. llvm_unreachable("Unknown SCEV kind!");
  2089. }
  2090. Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
  2091. Instruction *IP) {
  2092. assert(IP);
  2093. switch (Pred->getKind()) {
  2094. case SCEVPredicate::P_Union:
  2095. return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
  2096. case SCEVPredicate::P_Compare:
  2097. return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP);
  2098. case SCEVPredicate::P_Wrap: {
  2099. auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
  2100. return expandWrapPredicate(AddRecPred, IP);
  2101. }
  2102. }
  2103. llvm_unreachable("Unknown SCEV predicate type");
  2104. }
  2105. Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred,
  2106. Instruction *IP) {
  2107. Value *Expr0 =
  2108. expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP);
  2109. Value *Expr1 =
  2110. expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP);
  2111. Builder.SetInsertPoint(IP);
  2112. auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
  2113. auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check");
  2114. return I;
  2115. }
  2116. Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
  2117. Instruction *Loc, bool Signed) {
  2118. assert(AR->isAffine() && "Cannot generate RT check for "
  2119. "non-affine expression");
  2120. // FIXME: It is highly suspicious that we're ignoring the predicates here.
  2121. SmallVector<const SCEVPredicate *, 4> Pred;
  2122. const SCEV *ExitCount =
  2123. SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
  2124. assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
  2125. const SCEV *Step = AR->getStepRecurrence(SE);
  2126. const SCEV *Start = AR->getStart();
  2127. Type *ARTy = AR->getType();
  2128. unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
  2129. unsigned DstBits = SE.getTypeSizeInBits(ARTy);
  2130. // The expression {Start,+,Step} has nusw/nssw if
  2131. // Step < 0, Start - |Step| * Backedge <= Start
  2132. // Step >= 0, Start + |Step| * Backedge > Start
  2133. // and |Step| * Backedge doesn't unsigned overflow.
  2134. IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits);
  2135. Builder.SetInsertPoint(Loc);
  2136. Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc);
  2137. IntegerType *Ty =
  2138. IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
  2139. Value *StepValue = expandCodeForImpl(Step, Ty, Loc);
  2140. Value *NegStepValue =
  2141. expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc);
  2142. Value *StartValue = expandCodeForImpl(Start, ARTy, Loc);
  2143. ConstantInt *Zero =
  2144. ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
  2145. Builder.SetInsertPoint(Loc);
  2146. // Compute |Step|
  2147. Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
  2148. Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
  2149. // Compute |Step| * Backedge
  2150. // Compute:
  2151. // 1. Start + |Step| * Backedge < Start
  2152. // 2. Start - |Step| * Backedge > Start
  2153. //
  2154. // And select either 1. or 2. depending on whether step is positive or
  2155. // negative. If Step is known to be positive or negative, only create
  2156. // either 1. or 2.
  2157. auto ComputeEndCheck = [&]() -> Value * {
  2158. // Checking <u 0 is always false.
  2159. if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
  2160. return ConstantInt::getFalse(Loc->getContext());
  2161. // Get the backedge taken count and truncate or extended to the AR type.
  2162. Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
  2163. Value *MulV, *OfMul;
  2164. if (Step->isOne()) {
  2165. // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
  2166. // needed, there is never an overflow, so to avoid artificially inflating
  2167. // the cost of the check, directly emit the optimized IR.
  2168. MulV = TruncTripCount;
  2169. OfMul = ConstantInt::getFalse(MulV->getContext());
  2170. } else {
  2171. auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
  2172. Intrinsic::umul_with_overflow, Ty);
  2173. CallInst *Mul =
  2174. Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
  2175. MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
  2176. OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
  2177. }
  2178. Value *Add = nullptr, *Sub = nullptr;
  2179. bool NeedPosCheck = !SE.isKnownNegative(Step);
  2180. bool NeedNegCheck = !SE.isKnownPositive(Step);
  2181. if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
  2182. StartValue = InsertNoopCastOfTo(
  2183. StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
  2184. Value *NegMulV = Builder.CreateNeg(MulV);
  2185. if (NeedPosCheck)
  2186. Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
  2187. if (NeedNegCheck)
  2188. Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
  2189. } else {
  2190. if (NeedPosCheck)
  2191. Add = Builder.CreateAdd(StartValue, MulV);
  2192. if (NeedNegCheck)
  2193. Sub = Builder.CreateSub(StartValue, MulV);
  2194. }
  2195. Value *EndCompareLT = nullptr;
  2196. Value *EndCompareGT = nullptr;
  2197. Value *EndCheck = nullptr;
  2198. if (NeedPosCheck)
  2199. EndCheck = EndCompareLT = Builder.CreateICmp(
  2200. Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
  2201. if (NeedNegCheck)
  2202. EndCheck = EndCompareGT = Builder.CreateICmp(
  2203. Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
  2204. if (NeedPosCheck && NeedNegCheck) {
  2205. // Select the answer based on the sign of Step.
  2206. EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
  2207. }
  2208. return Builder.CreateOr(EndCheck, OfMul);
  2209. };
  2210. Value *EndCheck = ComputeEndCheck();
  2211. // If the backedge taken count type is larger than the AR type,
  2212. // check that we don't drop any bits by truncating it. If we are
  2213. // dropping bits, then we have overflow (unless the step is zero).
  2214. if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) {
  2215. auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
  2216. auto *BackedgeCheck =
  2217. Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
  2218. ConstantInt::get(Loc->getContext(), MaxVal));
  2219. BackedgeCheck = Builder.CreateAnd(
  2220. BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
  2221. EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
  2222. }
  2223. return EndCheck;
  2224. }
  2225. Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
  2226. Instruction *IP) {
  2227. const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
  2228. Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
  2229. // Add a check for NUSW
  2230. if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
  2231. NUSWCheck = generateOverflowCheck(A, IP, false);
  2232. // Add a check for NSSW
  2233. if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
  2234. NSSWCheck = generateOverflowCheck(A, IP, true);
  2235. if (NUSWCheck && NSSWCheck)
  2236. return Builder.CreateOr(NUSWCheck, NSSWCheck);
  2237. if (NUSWCheck)
  2238. return NUSWCheck;
  2239. if (NSSWCheck)
  2240. return NSSWCheck;
  2241. return ConstantInt::getFalse(IP->getContext());
  2242. }
  2243. Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
  2244. Instruction *IP) {
  2245. // Loop over all checks in this set.
  2246. SmallVector<Value *> Checks;
  2247. for (const auto *Pred : Union->getPredicates()) {
  2248. Checks.push_back(expandCodeForPredicate(Pred, IP));
  2249. Builder.SetInsertPoint(IP);
  2250. }
  2251. if (Checks.empty())
  2252. return ConstantInt::getFalse(IP->getContext());
  2253. return Builder.CreateOr(Checks);
  2254. }
  2255. Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
  2256. auto *DefI = dyn_cast<Instruction>(V);
  2257. if (!PreserveLCSSA || !DefI)
  2258. return V;
  2259. Instruction *InsertPt = &*Builder.GetInsertPoint();
  2260. Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent());
  2261. Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent());
  2262. if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
  2263. return V;
  2264. // Create a temporary instruction to at the current insertion point, so we
  2265. // can hand it off to the helper to create LCSSA PHIs if required for the
  2266. // new use.
  2267. // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
  2268. // would accept a insertion point and return an LCSSA phi for that
  2269. // insertion point, so there is no need to insert & remove the temporary
  2270. // instruction.
  2271. Type *ToTy;
  2272. if (DefI->getType()->isIntegerTy())
  2273. ToTy = DefI->getType()->getPointerTo();
  2274. else
  2275. ToTy = Type::getInt32Ty(DefI->getContext());
  2276. Instruction *User =
  2277. CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt);
  2278. auto RemoveUserOnExit =
  2279. make_scope_exit([User]() { User->eraseFromParent(); });
  2280. SmallVector<Instruction *, 1> ToUpdate;
  2281. ToUpdate.push_back(DefI);
  2282. SmallVector<PHINode *, 16> PHIsToRemove;
  2283. formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove);
  2284. for (PHINode *PN : PHIsToRemove) {
  2285. if (!PN->use_empty())
  2286. continue;
  2287. InsertedValues.erase(PN);
  2288. InsertedPostIncValues.erase(PN);
  2289. PN->eraseFromParent();
  2290. }
  2291. return User->getOperand(0);
  2292. }
  2293. namespace {
  2294. // Search for a SCEV subexpression that is not safe to expand. Any expression
  2295. // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
  2296. // UDiv expressions. We don't know if the UDiv is derived from an IR divide
  2297. // instruction, but the important thing is that we prove the denominator is
  2298. // nonzero before expansion.
  2299. //
  2300. // IVUsers already checks that IV-derived expressions are safe. So this check is
  2301. // only needed when the expression includes some subexpression that is not IV
  2302. // derived.
  2303. //
  2304. // Currently, we only allow division by a value provably non-zero here.
  2305. //
  2306. // We cannot generally expand recurrences unless the step dominates the loop
  2307. // header. The expander handles the special case of affine recurrences by
  2308. // scaling the recurrence outside the loop, but this technique isn't generally
  2309. // applicable. Expanding a nested recurrence outside a loop requires computing
  2310. // binomial coefficients. This could be done, but the recurrence has to be in a
  2311. // perfectly reduced form, which can't be guaranteed.
  2312. struct SCEVFindUnsafe {
  2313. ScalarEvolution &SE;
  2314. bool CanonicalMode;
  2315. bool IsUnsafe = false;
  2316. SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
  2317. : SE(SE), CanonicalMode(CanonicalMode) {}
  2318. bool follow(const SCEV *S) {
  2319. if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
  2320. if (!SE.isKnownNonZero(D->getRHS())) {
  2321. IsUnsafe = true;
  2322. return false;
  2323. }
  2324. }
  2325. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
  2326. const SCEV *Step = AR->getStepRecurrence(SE);
  2327. if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
  2328. IsUnsafe = true;
  2329. return false;
  2330. }
  2331. // For non-affine addrecs or in non-canonical mode we need a preheader
  2332. // to insert into.
  2333. if (!AR->getLoop()->getLoopPreheader() &&
  2334. (!CanonicalMode || !AR->isAffine())) {
  2335. IsUnsafe = true;
  2336. return false;
  2337. }
  2338. }
  2339. return true;
  2340. }
  2341. bool isDone() const { return IsUnsafe; }
  2342. };
  2343. } // namespace
  2344. bool SCEVExpander::isSafeToExpand(const SCEV *S) const {
  2345. SCEVFindUnsafe Search(SE, CanonicalMode);
  2346. visitAll(S, Search);
  2347. return !Search.IsUnsafe;
  2348. }
  2349. bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
  2350. const Instruction *InsertionPoint) const {
  2351. if (!isSafeToExpand(S))
  2352. return false;
  2353. // We have to prove that the expanded site of S dominates InsertionPoint.
  2354. // This is easy when not in the same block, but hard when S is an instruction
  2355. // to be expanded somewhere inside the same block as our insertion point.
  2356. // What we really need here is something analogous to an OrderedBasicBlock,
  2357. // but for the moment, we paper over the problem by handling two common and
  2358. // cheap to check cases.
  2359. if (SE.properlyDominates(S, InsertionPoint->getParent()))
  2360. return true;
  2361. if (SE.dominates(S, InsertionPoint->getParent())) {
  2362. if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
  2363. return true;
  2364. if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
  2365. if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue()))
  2366. return true;
  2367. }
  2368. return false;
  2369. }
  2370. void SCEVExpanderCleaner::cleanup() {
  2371. // Result is used, nothing to remove.
  2372. if (ResultUsed)
  2373. return;
  2374. auto InsertedInstructions = Expander.getAllInsertedInstructions();
  2375. #ifndef NDEBUG
  2376. SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
  2377. InsertedInstructions.end());
  2378. (void)InsertedSet;
  2379. #endif
  2380. // Remove sets with value handles.
  2381. Expander.clear();
  2382. // Remove all inserted instructions.
  2383. for (Instruction *I : reverse(InsertedInstructions)) {
  2384. #ifndef NDEBUG
  2385. assert(all_of(I->users(),
  2386. [&InsertedSet](Value *U) {
  2387. return InsertedSet.contains(cast<Instruction>(U));
  2388. }) &&
  2389. "removed instruction should only be used by instructions inserted "
  2390. "during expansion");
  2391. #endif
  2392. assert(!I->getType()->isVoidTy() &&
  2393. "inserted instruction should have non-void types");
  2394. I->replaceAllUsesWith(PoisonValue::get(I->getType()));
  2395. I->eraseFromParent();
  2396. }
  2397. }