InstCombineLoadStoreAlloca.cpp 62 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657
  1. //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the visit functions for load, store and alloca.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "InstCombineInternal.h"
  13. #include "llvm/ADT/MapVector.h"
  14. #include "llvm/ADT/SmallString.h"
  15. #include "llvm/ADT/Statistic.h"
  16. #include "llvm/Analysis/AliasAnalysis.h"
  17. #include "llvm/Analysis/Loads.h"
  18. #include "llvm/IR/DataLayout.h"
  19. #include "llvm/IR/DebugInfoMetadata.h"
  20. #include "llvm/IR/IntrinsicInst.h"
  21. #include "llvm/IR/LLVMContext.h"
  22. #include "llvm/IR/PatternMatch.h"
  23. #include "llvm/Transforms/InstCombine/InstCombiner.h"
  24. #include "llvm/Transforms/Utils/Local.h"
  25. using namespace llvm;
  26. using namespace PatternMatch;
  27. #define DEBUG_TYPE "instcombine"
  28. STATISTIC(NumDeadStore, "Number of dead stores eliminated");
  29. STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
  30. static cl::opt<unsigned> MaxCopiedFromConstantUsers(
  31. "instcombine-max-copied-from-constant-users", cl::init(128),
  32. cl::desc("Maximum users to visit in copy from constant transform"),
  33. cl::Hidden);
  34. /// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
  35. /// pointer to an alloca. Ignore any reads of the pointer, return false if we
  36. /// see any stores or other unknown uses. If we see pointer arithmetic, keep
  37. /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
  38. /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
  39. /// the alloca, and if the source pointer is a pointer to a constant memory
  40. /// location, we can optimize this.
  41. static bool
  42. isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V,
  43. MemTransferInst *&TheCopy,
  44. SmallVectorImpl<Instruction *> &ToDelete) {
  45. // We track lifetime intrinsics as we encounter them. If we decide to go
  46. // ahead and replace the value with the memory location, this lets the caller
  47. // quickly eliminate the markers.
  48. using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>;
  49. SmallVector<ValueAndIsOffset, 32> Worklist;
  50. SmallPtrSet<ValueAndIsOffset, 32> Visited;
  51. Worklist.emplace_back(V, false);
  52. while (!Worklist.empty()) {
  53. ValueAndIsOffset Elem = Worklist.pop_back_val();
  54. if (!Visited.insert(Elem).second)
  55. continue;
  56. if (Visited.size() > MaxCopiedFromConstantUsers)
  57. return false;
  58. const auto [Value, IsOffset] = Elem;
  59. for (auto &U : Value->uses()) {
  60. auto *I = cast<Instruction>(U.getUser());
  61. if (auto *LI = dyn_cast<LoadInst>(I)) {
  62. // Ignore non-volatile loads, they are always ok.
  63. if (!LI->isSimple()) return false;
  64. continue;
  65. }
  66. if (isa<PHINode, SelectInst>(I)) {
  67. // We set IsOffset=true, to forbid the memcpy from occurring after the
  68. // phi: If one of the phi operands is not based on the alloca, we
  69. // would incorrectly omit a write.
  70. Worklist.emplace_back(I, true);
  71. continue;
  72. }
  73. if (isa<BitCastInst, AddrSpaceCastInst>(I)) {
  74. // If uses of the bitcast are ok, we are ok.
  75. Worklist.emplace_back(I, IsOffset);
  76. continue;
  77. }
  78. if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
  79. // If the GEP has all zero indices, it doesn't offset the pointer. If it
  80. // doesn't, it does.
  81. Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
  82. continue;
  83. }
  84. if (auto *Call = dyn_cast<CallBase>(I)) {
  85. // If this is the function being called then we treat it like a load and
  86. // ignore it.
  87. if (Call->isCallee(&U))
  88. continue;
  89. unsigned DataOpNo = Call->getDataOperandNo(&U);
  90. bool IsArgOperand = Call->isArgOperand(&U);
  91. // Inalloca arguments are clobbered by the call.
  92. if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
  93. return false;
  94. // If this call site doesn't modify the memory, then we know it is just
  95. // a load (but one that potentially returns the value itself), so we can
  96. // ignore it if we know that the value isn't captured.
  97. bool NoCapture = Call->doesNotCapture(DataOpNo);
  98. if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) ||
  99. (Call->onlyReadsMemory(DataOpNo) && NoCapture))
  100. continue;
  101. // If this is being passed as a byval argument, the caller is making a
  102. // copy, so it is only a read of the alloca.
  103. if (IsArgOperand && Call->isByValArgument(DataOpNo))
  104. continue;
  105. }
  106. // Lifetime intrinsics can be handled by the caller.
  107. if (I->isLifetimeStartOrEnd()) {
  108. assert(I->use_empty() && "Lifetime markers have no result to use!");
  109. ToDelete.push_back(I);
  110. continue;
  111. }
  112. // If this is isn't our memcpy/memmove, reject it as something we can't
  113. // handle.
  114. MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
  115. if (!MI)
  116. return false;
  117. // If the transfer is volatile, reject it.
  118. if (MI->isVolatile())
  119. return false;
  120. // If the transfer is using the alloca as a source of the transfer, then
  121. // ignore it since it is a load (unless the transfer is volatile).
  122. if (U.getOperandNo() == 1)
  123. continue;
  124. // If we already have seen a copy, reject the second one.
  125. if (TheCopy) return false;
  126. // If the pointer has been offset from the start of the alloca, we can't
  127. // safely handle this.
  128. if (IsOffset) return false;
  129. // If the memintrinsic isn't using the alloca as the dest, reject it.
  130. if (U.getOperandNo() != 0) return false;
  131. // If the source of the memcpy/move is not constant, reject it.
  132. if (isModSet(AA->getModRefInfoMask(MI->getSource())))
  133. return false;
  134. // Otherwise, the transform is safe. Remember the copy instruction.
  135. TheCopy = MI;
  136. }
  137. }
  138. return true;
  139. }
  140. /// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
  141. /// modified by a copy from a constant memory location. If we can prove this, we
  142. /// can replace any uses of the alloca with uses of the memory location
  143. /// directly.
  144. static MemTransferInst *
  145. isOnlyCopiedFromConstantMemory(AAResults *AA,
  146. AllocaInst *AI,
  147. SmallVectorImpl<Instruction *> &ToDelete) {
  148. MemTransferInst *TheCopy = nullptr;
  149. if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
  150. return TheCopy;
  151. return nullptr;
  152. }
  153. /// Returns true if V is dereferenceable for size of alloca.
  154. static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
  155. const DataLayout &DL) {
  156. if (AI->isArrayAllocation())
  157. return false;
  158. uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
  159. if (!AllocaSize)
  160. return false;
  161. return isDereferenceableAndAlignedPointer(V, AI->getAlign(),
  162. APInt(64, AllocaSize), DL);
  163. }
  164. static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
  165. AllocaInst &AI, DominatorTree &DT) {
  166. // Check for array size of 1 (scalar allocation).
  167. if (!AI.isArrayAllocation()) {
  168. // i32 1 is the canonical array size for scalar allocations.
  169. if (AI.getArraySize()->getType()->isIntegerTy(32))
  170. return nullptr;
  171. // Canonicalize it.
  172. return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
  173. }
  174. // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
  175. if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
  176. if (C->getValue().getActiveBits() <= 64) {
  177. Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
  178. AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
  179. nullptr, AI.getName());
  180. New->setAlignment(AI.getAlign());
  181. replaceAllDbgUsesWith(AI, *New, *New, DT);
  182. // Scan to the end of the allocation instructions, to skip over a block of
  183. // allocas if possible...also skip interleaved debug info
  184. //
  185. BasicBlock::iterator It(New);
  186. while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
  187. ++It;
  188. // Now that I is pointing to the first non-allocation-inst in the block,
  189. // insert our getelementptr instruction...
  190. //
  191. Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
  192. Value *NullIdx = Constant::getNullValue(IdxTy);
  193. Value *Idx[2] = {NullIdx, NullIdx};
  194. Instruction *GEP = GetElementPtrInst::CreateInBounds(
  195. NewTy, New, Idx, New->getName() + ".sub");
  196. IC.InsertNewInstBefore(GEP, *It);
  197. // Now make everything use the getelementptr instead of the original
  198. // allocation.
  199. return IC.replaceInstUsesWith(AI, GEP);
  200. }
  201. }
  202. if (isa<UndefValue>(AI.getArraySize()))
  203. return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
  204. // Ensure that the alloca array size argument has type intptr_t, so that
  205. // any casting is exposed early.
  206. Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
  207. if (AI.getArraySize()->getType() != IntPtrTy) {
  208. Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
  209. return IC.replaceOperand(AI, 0, V);
  210. }
  211. return nullptr;
  212. }
  213. namespace {
  214. // If I and V are pointers in different address space, it is not allowed to
  215. // use replaceAllUsesWith since I and V have different types. A
  216. // non-target-specific transformation should not use addrspacecast on V since
  217. // the two address space may be disjoint depending on target.
  218. //
  219. // This class chases down uses of the old pointer until reaching the load
  220. // instructions, then replaces the old pointer in the load instructions with
  221. // the new pointer. If during the chasing it sees bitcast or GEP, it will
  222. // create new bitcast or GEP with the new pointer and use them in the load
  223. // instruction.
  224. class PointerReplacer {
  225. public:
  226. PointerReplacer(InstCombinerImpl &IC, Instruction &Root)
  227. : IC(IC), Root(Root) {}
  228. bool collectUsers();
  229. void replacePointer(Value *V);
  230. private:
  231. bool collectUsersRecursive(Instruction &I);
  232. void replace(Instruction *I);
  233. Value *getReplacement(Value *I);
  234. bool isAvailable(Instruction *I) const {
  235. return I == &Root || Worklist.contains(I);
  236. }
  237. SmallPtrSet<Instruction *, 32> ValuesToRevisit;
  238. SmallSetVector<Instruction *, 4> Worklist;
  239. MapVector<Value *, Value *> WorkMap;
  240. InstCombinerImpl &IC;
  241. Instruction &Root;
  242. };
  243. } // end anonymous namespace
  244. bool PointerReplacer::collectUsers() {
  245. if (!collectUsersRecursive(Root))
  246. return false;
  247. // Ensure that all outstanding (indirect) users of I
  248. // are inserted into the Worklist. Return false
  249. // otherwise.
  250. for (auto *Inst : ValuesToRevisit)
  251. if (!Worklist.contains(Inst))
  252. return false;
  253. return true;
  254. }
  255. bool PointerReplacer::collectUsersRecursive(Instruction &I) {
  256. for (auto *U : I.users()) {
  257. auto *Inst = cast<Instruction>(&*U);
  258. if (auto *Load = dyn_cast<LoadInst>(Inst)) {
  259. if (Load->isVolatile())
  260. return false;
  261. Worklist.insert(Load);
  262. } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
  263. // All incoming values must be instructions for replacability
  264. if (any_of(PHI->incoming_values(),
  265. [](Value *V) { return !isa<Instruction>(V); }))
  266. return false;
  267. // If at least one incoming value of the PHI is not in Worklist,
  268. // store the PHI for revisiting and skip this iteration of the
  269. // loop.
  270. if (any_of(PHI->incoming_values(), [this](Value *V) {
  271. return !isAvailable(cast<Instruction>(V));
  272. })) {
  273. ValuesToRevisit.insert(Inst);
  274. continue;
  275. }
  276. Worklist.insert(PHI);
  277. if (!collectUsersRecursive(*PHI))
  278. return false;
  279. } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
  280. if (!isa<Instruction>(SI->getTrueValue()) ||
  281. !isa<Instruction>(SI->getFalseValue()))
  282. return false;
  283. if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
  284. !isAvailable(cast<Instruction>(SI->getFalseValue()))) {
  285. ValuesToRevisit.insert(Inst);
  286. continue;
  287. }
  288. Worklist.insert(SI);
  289. if (!collectUsersRecursive(*SI))
  290. return false;
  291. } else if (isa<GetElementPtrInst, BitCastInst>(Inst)) {
  292. Worklist.insert(Inst);
  293. if (!collectUsersRecursive(*Inst))
  294. return false;
  295. } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
  296. if (MI->isVolatile())
  297. return false;
  298. Worklist.insert(Inst);
  299. } else if (Inst->isLifetimeStartOrEnd()) {
  300. continue;
  301. } else {
  302. LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
  303. return false;
  304. }
  305. }
  306. return true;
  307. }
  308. Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
  309. void PointerReplacer::replace(Instruction *I) {
  310. if (getReplacement(I))
  311. return;
  312. if (auto *LT = dyn_cast<LoadInst>(I)) {
  313. auto *V = getReplacement(LT->getPointerOperand());
  314. assert(V && "Operand not replaced");
  315. auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
  316. LT->getAlign(), LT->getOrdering(),
  317. LT->getSyncScopeID());
  318. NewI->takeName(LT);
  319. copyMetadataForLoad(*NewI, *LT);
  320. IC.InsertNewInstWith(NewI, *LT);
  321. IC.replaceInstUsesWith(*LT, NewI);
  322. WorkMap[LT] = NewI;
  323. } else if (auto *PHI = dyn_cast<PHINode>(I)) {
  324. Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
  325. auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
  326. PHI->getName(), PHI);
  327. for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
  328. NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
  329. PHI->getIncomingBlock(I));
  330. WorkMap[PHI] = NewPHI;
  331. } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
  332. auto *V = getReplacement(GEP->getPointerOperand());
  333. assert(V && "Operand not replaced");
  334. SmallVector<Value *, 8> Indices;
  335. Indices.append(GEP->idx_begin(), GEP->idx_end());
  336. auto *NewI =
  337. GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
  338. IC.InsertNewInstWith(NewI, *GEP);
  339. NewI->takeName(GEP);
  340. WorkMap[GEP] = NewI;
  341. } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
  342. auto *V = getReplacement(BC->getOperand(0));
  343. assert(V && "Operand not replaced");
  344. auto *NewT = PointerType::getWithSamePointeeType(
  345. cast<PointerType>(BC->getType()),
  346. V->getType()->getPointerAddressSpace());
  347. auto *NewI = new BitCastInst(V, NewT);
  348. IC.InsertNewInstWith(NewI, *BC);
  349. NewI->takeName(BC);
  350. WorkMap[BC] = NewI;
  351. } else if (auto *SI = dyn_cast<SelectInst>(I)) {
  352. auto *NewSI = SelectInst::Create(
  353. SI->getCondition(), getReplacement(SI->getTrueValue()),
  354. getReplacement(SI->getFalseValue()), SI->getName(), nullptr, SI);
  355. IC.InsertNewInstWith(NewSI, *SI);
  356. NewSI->takeName(SI);
  357. WorkMap[SI] = NewSI;
  358. } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
  359. auto *SrcV = getReplacement(MemCpy->getRawSource());
  360. // The pointer may appear in the destination of a copy, but we don't want to
  361. // replace it.
  362. if (!SrcV) {
  363. assert(getReplacement(MemCpy->getRawDest()) &&
  364. "destination not in replace list");
  365. return;
  366. }
  367. IC.Builder.SetInsertPoint(MemCpy);
  368. auto *NewI = IC.Builder.CreateMemTransferInst(
  369. MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(),
  370. SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(),
  371. MemCpy->isVolatile());
  372. AAMDNodes AAMD = MemCpy->getAAMetadata();
  373. if (AAMD)
  374. NewI->setAAMetadata(AAMD);
  375. IC.eraseInstFromFunction(*MemCpy);
  376. WorkMap[MemCpy] = NewI;
  377. } else {
  378. llvm_unreachable("should never reach here");
  379. }
  380. }
  381. void PointerReplacer::replacePointer(Value *V) {
  382. #ifndef NDEBUG
  383. auto *PT = cast<PointerType>(Root.getType());
  384. auto *NT = cast<PointerType>(V->getType());
  385. assert(PT != NT && PT->hasSameElementTypeAs(NT) && "Invalid usage");
  386. #endif
  387. WorkMap[&Root] = V;
  388. for (Instruction *Workitem : Worklist)
  389. replace(Workitem);
  390. }
  391. Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
  392. if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
  393. return I;
  394. if (AI.getAllocatedType()->isSized()) {
  395. // Move all alloca's of zero byte objects to the entry block and merge them
  396. // together. Note that we only do this for alloca's, because malloc should
  397. // allocate and return a unique pointer, even for a zero byte allocation.
  398. if (DL.getTypeAllocSize(AI.getAllocatedType()).getKnownMinValue() == 0) {
  399. // For a zero sized alloca there is no point in doing an array allocation.
  400. // This is helpful if the array size is a complicated expression not used
  401. // elsewhere.
  402. if (AI.isArrayAllocation())
  403. return replaceOperand(AI, 0,
  404. ConstantInt::get(AI.getArraySize()->getType(), 1));
  405. // Get the first instruction in the entry block.
  406. BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
  407. Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
  408. if (FirstInst != &AI) {
  409. // If the entry block doesn't start with a zero-size alloca then move
  410. // this one to the start of the entry block. There is no problem with
  411. // dominance as the array size was forced to a constant earlier already.
  412. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
  413. if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
  414. DL.getTypeAllocSize(EntryAI->getAllocatedType())
  415. .getKnownMinValue() != 0) {
  416. AI.moveBefore(FirstInst);
  417. return &AI;
  418. }
  419. // Replace this zero-sized alloca with the one at the start of the entry
  420. // block after ensuring that the address will be aligned enough for both
  421. // types.
  422. const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
  423. EntryAI->setAlignment(MaxAlign);
  424. if (AI.getType() != EntryAI->getType())
  425. return new BitCastInst(EntryAI, AI.getType());
  426. return replaceInstUsesWith(AI, EntryAI);
  427. }
  428. }
  429. }
  430. // Check to see if this allocation is only modified by a memcpy/memmove from
  431. // a memory location whose alignment is equal to or exceeds that of the
  432. // allocation. If this is the case, we can change all users to use the
  433. // constant memory location instead. This is commonly produced by the CFE by
  434. // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
  435. // is only subsequently read.
  436. SmallVector<Instruction *, 4> ToDelete;
  437. if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
  438. Value *TheSrc = Copy->getSource();
  439. Align AllocaAlign = AI.getAlign();
  440. Align SourceAlign = getOrEnforceKnownAlignment(
  441. TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
  442. if (AllocaAlign <= SourceAlign &&
  443. isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
  444. !isa<Instruction>(TheSrc)) {
  445. // FIXME: Can we sink instructions without violating dominance when TheSrc
  446. // is an instruction instead of a constant or argument?
  447. LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
  448. LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
  449. unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
  450. auto *DestTy = PointerType::get(AI.getAllocatedType(), SrcAddrSpace);
  451. if (AI.getAddressSpace() == SrcAddrSpace) {
  452. for (Instruction *Delete : ToDelete)
  453. eraseInstFromFunction(*Delete);
  454. Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
  455. Instruction *NewI = replaceInstUsesWith(AI, Cast);
  456. eraseInstFromFunction(*Copy);
  457. ++NumGlobalCopies;
  458. return NewI;
  459. }
  460. PointerReplacer PtrReplacer(*this, AI);
  461. if (PtrReplacer.collectUsers()) {
  462. for (Instruction *Delete : ToDelete)
  463. eraseInstFromFunction(*Delete);
  464. Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
  465. PtrReplacer.replacePointer(Cast);
  466. ++NumGlobalCopies;
  467. }
  468. }
  469. }
  470. // At last, use the generic allocation site handler to aggressively remove
  471. // unused allocas.
  472. return visitAllocSite(AI);
  473. }
  474. // Are we allowed to form a atomic load or store of this type?
  475. static bool isSupportedAtomicType(Type *Ty) {
  476. return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
  477. }
  478. /// Helper to combine a load to a new type.
  479. ///
  480. /// This just does the work of combining a load to a new type. It handles
  481. /// metadata, etc., and returns the new instruction. The \c NewTy should be the
  482. /// loaded *value* type. This will convert it to a pointer, cast the operand to
  483. /// that pointer type, load it, etc.
  484. ///
  485. /// Note that this will create all of the instructions with whatever insert
  486. /// point the \c InstCombinerImpl currently is using.
  487. LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy,
  488. const Twine &Suffix) {
  489. assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
  490. "can't fold an atomic load to requested type");
  491. Value *Ptr = LI.getPointerOperand();
  492. unsigned AS = LI.getPointerAddressSpace();
  493. Type *NewPtrTy = NewTy->getPointerTo(AS);
  494. Value *NewPtr = nullptr;
  495. if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
  496. NewPtr->getType() == NewPtrTy))
  497. NewPtr = Builder.CreateBitCast(Ptr, NewPtrTy);
  498. LoadInst *NewLoad = Builder.CreateAlignedLoad(
  499. NewTy, NewPtr, LI.getAlign(), LI.isVolatile(), LI.getName() + Suffix);
  500. NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
  501. copyMetadataForLoad(*NewLoad, LI);
  502. return NewLoad;
  503. }
  504. /// Combine a store to a new type.
  505. ///
  506. /// Returns the newly created store instruction.
  507. static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,
  508. Value *V) {
  509. assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
  510. "can't fold an atomic store of requested type");
  511. Value *Ptr = SI.getPointerOperand();
  512. unsigned AS = SI.getPointerAddressSpace();
  513. SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
  514. SI.getAllMetadata(MD);
  515. StoreInst *NewStore = IC.Builder.CreateAlignedStore(
  516. V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
  517. SI.getAlign(), SI.isVolatile());
  518. NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
  519. for (const auto &MDPair : MD) {
  520. unsigned ID = MDPair.first;
  521. MDNode *N = MDPair.second;
  522. // Note, essentially every kind of metadata should be preserved here! This
  523. // routine is supposed to clone a store instruction changing *only its
  524. // type*. The only metadata it makes sense to drop is metadata which is
  525. // invalidated when the pointer type changes. This should essentially
  526. // never be the case in LLVM, but we explicitly switch over only known
  527. // metadata to be conservatively correct. If you are adding metadata to
  528. // LLVM which pertains to stores, you almost certainly want to add it
  529. // here.
  530. switch (ID) {
  531. case LLVMContext::MD_dbg:
  532. case LLVMContext::MD_DIAssignID:
  533. case LLVMContext::MD_tbaa:
  534. case LLVMContext::MD_prof:
  535. case LLVMContext::MD_fpmath:
  536. case LLVMContext::MD_tbaa_struct:
  537. case LLVMContext::MD_alias_scope:
  538. case LLVMContext::MD_noalias:
  539. case LLVMContext::MD_nontemporal:
  540. case LLVMContext::MD_mem_parallel_loop_access:
  541. case LLVMContext::MD_access_group:
  542. // All of these directly apply.
  543. NewStore->setMetadata(ID, N);
  544. break;
  545. case LLVMContext::MD_invariant_load:
  546. case LLVMContext::MD_nonnull:
  547. case LLVMContext::MD_noundef:
  548. case LLVMContext::MD_range:
  549. case LLVMContext::MD_align:
  550. case LLVMContext::MD_dereferenceable:
  551. case LLVMContext::MD_dereferenceable_or_null:
  552. // These don't apply for stores.
  553. break;
  554. }
  555. }
  556. return NewStore;
  557. }
  558. /// Returns true if instruction represent minmax pattern like:
  559. /// select ((cmp load V1, load V2), V1, V2).
  560. static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) {
  561. assert(V->getType()->isPointerTy() && "Expected pointer type.");
  562. // Ignore possible ty* to ixx* bitcast.
  563. V = InstCombiner::peekThroughBitcast(V);
  564. // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
  565. // pattern.
  566. CmpInst::Predicate Pred;
  567. Instruction *L1;
  568. Instruction *L2;
  569. Value *LHS;
  570. Value *RHS;
  571. if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
  572. m_Value(LHS), m_Value(RHS))))
  573. return false;
  574. LoadTy = L1->getType();
  575. return (match(L1, m_Load(m_Specific(LHS))) &&
  576. match(L2, m_Load(m_Specific(RHS)))) ||
  577. (match(L1, m_Load(m_Specific(RHS))) &&
  578. match(L2, m_Load(m_Specific(LHS))));
  579. }
  580. /// Combine loads to match the type of their uses' value after looking
  581. /// through intervening bitcasts.
  582. ///
  583. /// The core idea here is that if the result of a load is used in an operation,
  584. /// we should load the type most conducive to that operation. For example, when
  585. /// loading an integer and converting that immediately to a pointer, we should
  586. /// instead directly load a pointer.
  587. ///
  588. /// However, this routine must never change the width of a load or the number of
  589. /// loads as that would introduce a semantic change. This combine is expected to
  590. /// be a semantic no-op which just allows loads to more closely model the types
  591. /// of their consuming operations.
  592. ///
  593. /// Currently, we also refuse to change the precise type used for an atomic load
  594. /// or a volatile load. This is debatable, and might be reasonable to change
  595. /// later. However, it is risky in case some backend or other part of LLVM is
  596. /// relying on the exact type loaded to select appropriate atomic operations.
  597. static Instruction *combineLoadToOperationType(InstCombinerImpl &IC,
  598. LoadInst &Load) {
  599. // FIXME: We could probably with some care handle both volatile and ordered
  600. // atomic loads here but it isn't clear that this is important.
  601. if (!Load.isUnordered())
  602. return nullptr;
  603. if (Load.use_empty())
  604. return nullptr;
  605. // swifterror values can't be bitcasted.
  606. if (Load.getPointerOperand()->isSwiftError())
  607. return nullptr;
  608. // Fold away bit casts of the loaded value by loading the desired type.
  609. // Note that we should not do this for pointer<->integer casts,
  610. // because that would result in type punning.
  611. if (Load.hasOneUse()) {
  612. // Don't transform when the type is x86_amx, it makes the pass that lower
  613. // x86_amx type happy.
  614. Type *LoadTy = Load.getType();
  615. if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) {
  616. assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
  617. if (BC->getType()->isX86_AMXTy())
  618. return nullptr;
  619. }
  620. if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) {
  621. Type *DestTy = CastUser->getDestTy();
  622. if (CastUser->isNoopCast(IC.getDataLayout()) &&
  623. LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
  624. (!Load.isAtomic() || isSupportedAtomicType(DestTy))) {
  625. LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy);
  626. CastUser->replaceAllUsesWith(NewLoad);
  627. IC.eraseInstFromFunction(*CastUser);
  628. return &Load;
  629. }
  630. }
  631. }
  632. // FIXME: We should also canonicalize loads of vectors when their elements are
  633. // cast to other types.
  634. return nullptr;
  635. }
  636. static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
  637. // FIXME: We could probably with some care handle both volatile and atomic
  638. // stores here but it isn't clear that this is important.
  639. if (!LI.isSimple())
  640. return nullptr;
  641. Type *T = LI.getType();
  642. if (!T->isAggregateType())
  643. return nullptr;
  644. StringRef Name = LI.getName();
  645. if (auto *ST = dyn_cast<StructType>(T)) {
  646. // If the struct only have one element, we unpack.
  647. auto NumElements = ST->getNumElements();
  648. if (NumElements == 1) {
  649. LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
  650. ".unpack");
  651. NewLoad->setAAMetadata(LI.getAAMetadata());
  652. return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
  653. PoisonValue::get(T), NewLoad, 0, Name));
  654. }
  655. // We don't want to break loads with padding here as we'd loose
  656. // the knowledge that padding exists for the rest of the pipeline.
  657. const DataLayout &DL = IC.getDataLayout();
  658. auto *SL = DL.getStructLayout(ST);
  659. if (SL->hasPadding())
  660. return nullptr;
  661. const auto Align = LI.getAlign();
  662. auto *Addr = LI.getPointerOperand();
  663. auto *IdxType = Type::getInt32Ty(T->getContext());
  664. auto *Zero = ConstantInt::get(IdxType, 0);
  665. Value *V = PoisonValue::get(T);
  666. for (unsigned i = 0; i < NumElements; i++) {
  667. Value *Indices[2] = {
  668. Zero,
  669. ConstantInt::get(IdxType, i),
  670. };
  671. auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices),
  672. Name + ".elt");
  673. auto *L = IC.Builder.CreateAlignedLoad(
  674. ST->getElementType(i), Ptr,
  675. commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
  676. // Propagate AA metadata. It'll still be valid on the narrowed load.
  677. L->setAAMetadata(LI.getAAMetadata());
  678. V = IC.Builder.CreateInsertValue(V, L, i);
  679. }
  680. V->setName(Name);
  681. return IC.replaceInstUsesWith(LI, V);
  682. }
  683. if (auto *AT = dyn_cast<ArrayType>(T)) {
  684. auto *ET = AT->getElementType();
  685. auto NumElements = AT->getNumElements();
  686. if (NumElements == 1) {
  687. LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
  688. NewLoad->setAAMetadata(LI.getAAMetadata());
  689. return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
  690. PoisonValue::get(T), NewLoad, 0, Name));
  691. }
  692. // Bail out if the array is too large. Ideally we would like to optimize
  693. // arrays of arbitrary size but this has a terrible impact on compile time.
  694. // The threshold here is chosen arbitrarily, maybe needs a little bit of
  695. // tuning.
  696. if (NumElements > IC.MaxArraySizeForCombine)
  697. return nullptr;
  698. const DataLayout &DL = IC.getDataLayout();
  699. auto EltSize = DL.getTypeAllocSize(ET);
  700. const auto Align = LI.getAlign();
  701. auto *Addr = LI.getPointerOperand();
  702. auto *IdxType = Type::getInt64Ty(T->getContext());
  703. auto *Zero = ConstantInt::get(IdxType, 0);
  704. Value *V = PoisonValue::get(T);
  705. uint64_t Offset = 0;
  706. for (uint64_t i = 0; i < NumElements; i++) {
  707. Value *Indices[2] = {
  708. Zero,
  709. ConstantInt::get(IdxType, i),
  710. };
  711. auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
  712. Name + ".elt");
  713. auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
  714. commonAlignment(Align, Offset),
  715. Name + ".unpack");
  716. L->setAAMetadata(LI.getAAMetadata());
  717. V = IC.Builder.CreateInsertValue(V, L, i);
  718. Offset += EltSize;
  719. }
  720. V->setName(Name);
  721. return IC.replaceInstUsesWith(LI, V);
  722. }
  723. return nullptr;
  724. }
  725. // If we can determine that all possible objects pointed to by the provided
  726. // pointer value are, not only dereferenceable, but also definitively less than
  727. // or equal to the provided maximum size, then return true. Otherwise, return
  728. // false (constant global values and allocas fall into this category).
  729. //
  730. // FIXME: This should probably live in ValueTracking (or similar).
  731. static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
  732. const DataLayout &DL) {
  733. SmallPtrSet<Value *, 4> Visited;
  734. SmallVector<Value *, 4> Worklist(1, V);
  735. do {
  736. Value *P = Worklist.pop_back_val();
  737. P = P->stripPointerCasts();
  738. if (!Visited.insert(P).second)
  739. continue;
  740. if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
  741. Worklist.push_back(SI->getTrueValue());
  742. Worklist.push_back(SI->getFalseValue());
  743. continue;
  744. }
  745. if (PHINode *PN = dyn_cast<PHINode>(P)) {
  746. append_range(Worklist, PN->incoming_values());
  747. continue;
  748. }
  749. if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
  750. if (GA->isInterposable())
  751. return false;
  752. Worklist.push_back(GA->getAliasee());
  753. continue;
  754. }
  755. // If we know how big this object is, and it is less than MaxSize, continue
  756. // searching. Otherwise, return false.
  757. if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
  758. if (!AI->getAllocatedType()->isSized())
  759. return false;
  760. ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
  761. if (!CS)
  762. return false;
  763. TypeSize TS = DL.getTypeAllocSize(AI->getAllocatedType());
  764. if (TS.isScalable())
  765. return false;
  766. // Make sure that, even if the multiplication below would wrap as an
  767. // uint64_t, we still do the right thing.
  768. if ((CS->getValue().zext(128) * APInt(128, TS.getFixedValue()))
  769. .ugt(MaxSize))
  770. return false;
  771. continue;
  772. }
  773. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
  774. if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
  775. return false;
  776. uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
  777. if (InitSize > MaxSize)
  778. return false;
  779. continue;
  780. }
  781. return false;
  782. } while (!Worklist.empty());
  783. return true;
  784. }
  785. // If we're indexing into an object of a known size, and the outer index is
  786. // not a constant, but having any value but zero would lead to undefined
  787. // behavior, replace it with zero.
  788. //
  789. // For example, if we have:
  790. // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
  791. // ...
  792. // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
  793. // ... = load i32* %arrayidx, align 4
  794. // Then we know that we can replace %x in the GEP with i64 0.
  795. //
  796. // FIXME: We could fold any GEP index to zero that would cause UB if it were
  797. // not zero. Currently, we only handle the first such index. Also, we could
  798. // also search through non-zero constant indices if we kept track of the
  799. // offsets those indices implied.
  800. static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
  801. GetElementPtrInst *GEPI, Instruction *MemI,
  802. unsigned &Idx) {
  803. if (GEPI->getNumOperands() < 2)
  804. return false;
  805. // Find the first non-zero index of a GEP. If all indices are zero, return
  806. // one past the last index.
  807. auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
  808. unsigned I = 1;
  809. for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
  810. Value *V = GEPI->getOperand(I);
  811. if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
  812. if (CI->isZero())
  813. continue;
  814. break;
  815. }
  816. return I;
  817. };
  818. // Skip through initial 'zero' indices, and find the corresponding pointer
  819. // type. See if the next index is not a constant.
  820. Idx = FirstNZIdx(GEPI);
  821. if (Idx == GEPI->getNumOperands())
  822. return false;
  823. if (isa<Constant>(GEPI->getOperand(Idx)))
  824. return false;
  825. SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
  826. Type *SourceElementType = GEPI->getSourceElementType();
  827. // Size information about scalable vectors is not available, so we cannot
  828. // deduce whether indexing at n is undefined behaviour or not. Bail out.
  829. if (isa<ScalableVectorType>(SourceElementType))
  830. return false;
  831. Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
  832. if (!AllocTy || !AllocTy->isSized())
  833. return false;
  834. const DataLayout &DL = IC.getDataLayout();
  835. uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue();
  836. // If there are more indices after the one we might replace with a zero, make
  837. // sure they're all non-negative. If any of them are negative, the overall
  838. // address being computed might be before the base address determined by the
  839. // first non-zero index.
  840. auto IsAllNonNegative = [&]() {
  841. for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
  842. KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
  843. if (Known.isNonNegative())
  844. continue;
  845. return false;
  846. }
  847. return true;
  848. };
  849. // FIXME: If the GEP is not inbounds, and there are extra indices after the
  850. // one we'll replace, those could cause the address computation to wrap
  851. // (rendering the IsAllNonNegative() check below insufficient). We can do
  852. // better, ignoring zero indices (and other indices we can prove small
  853. // enough not to wrap).
  854. if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
  855. return false;
  856. // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
  857. // also known to be dereferenceable.
  858. return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
  859. IsAllNonNegative();
  860. }
  861. // If we're indexing into an object with a variable index for the memory
  862. // access, but the object has only one element, we can assume that the index
  863. // will always be zero. If we replace the GEP, return it.
  864. template <typename T>
  865. static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
  866. T &MemI) {
  867. if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
  868. unsigned Idx;
  869. if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
  870. Instruction *NewGEPI = GEPI->clone();
  871. NewGEPI->setOperand(Idx,
  872. ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
  873. NewGEPI->insertBefore(GEPI);
  874. MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
  875. return NewGEPI;
  876. }
  877. }
  878. return nullptr;
  879. }
  880. static bool canSimplifyNullStoreOrGEP(StoreInst &SI) {
  881. if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
  882. return false;
  883. auto *Ptr = SI.getPointerOperand();
  884. if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
  885. Ptr = GEPI->getOperand(0);
  886. return (isa<ConstantPointerNull>(Ptr) &&
  887. !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
  888. }
  889. static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
  890. if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
  891. const Value *GEPI0 = GEPI->getOperand(0);
  892. if (isa<ConstantPointerNull>(GEPI0) &&
  893. !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
  894. return true;
  895. }
  896. if (isa<UndefValue>(Op) ||
  897. (isa<ConstantPointerNull>(Op) &&
  898. !NullPointerIsDefined(LI.getFunction(), LI.getPointerAddressSpace())))
  899. return true;
  900. return false;
  901. }
  902. Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
  903. Value *Op = LI.getOperand(0);
  904. // Try to canonicalize the loaded type.
  905. if (Instruction *Res = combineLoadToOperationType(*this, LI))
  906. return Res;
  907. // Attempt to improve the alignment.
  908. Align KnownAlign = getOrEnforceKnownAlignment(
  909. Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);
  910. if (KnownAlign > LI.getAlign())
  911. LI.setAlignment(KnownAlign);
  912. // Replace GEP indices if possible.
  913. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
  914. Worklist.push(NewGEPI);
  915. return &LI;
  916. }
  917. if (Instruction *Res = unpackLoadToAggregate(*this, LI))
  918. return Res;
  919. // Do really simple store-to-load forwarding and load CSE, to catch cases
  920. // where there are several consecutive memory accesses to the same location,
  921. // separated by a few arithmetic operations.
  922. bool IsLoadCSE = false;
  923. if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) {
  924. if (IsLoadCSE)
  925. combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
  926. return replaceInstUsesWith(
  927. LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
  928. LI.getName() + ".cast"));
  929. }
  930. // None of the following transforms are legal for volatile/ordered atomic
  931. // loads. Most of them do apply for unordered atomics.
  932. if (!LI.isUnordered()) return nullptr;
  933. // load(gep null, ...) -> unreachable
  934. // load null/undef -> unreachable
  935. // TODO: Consider a target hook for valid address spaces for this xforms.
  936. if (canSimplifyNullLoadOrGEP(LI, Op)) {
  937. // Insert a new store to null instruction before the load to indicate
  938. // that this code is not reachable. We do this instead of inserting
  939. // an unreachable instruction directly because we cannot modify the
  940. // CFG.
  941. StoreInst *SI = new StoreInst(PoisonValue::get(LI.getType()),
  942. Constant::getNullValue(Op->getType()), &LI);
  943. SI->setDebugLoc(LI.getDebugLoc());
  944. return replaceInstUsesWith(LI, PoisonValue::get(LI.getType()));
  945. }
  946. if (Op->hasOneUse()) {
  947. // Change select and PHI nodes to select values instead of addresses: this
  948. // helps alias analysis out a lot, allows many others simplifications, and
  949. // exposes redundancy in the code.
  950. //
  951. // Note that we cannot do the transformation unless we know that the
  952. // introduced loads cannot trap! Something like this is valid as long as
  953. // the condition is always false: load (select bool %C, int* null, int* %G),
  954. // but it would not be valid if we transformed it to load from null
  955. // unconditionally.
  956. //
  957. if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
  958. // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
  959. Align Alignment = LI.getAlign();
  960. if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
  961. Alignment, DL, SI) &&
  962. isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
  963. Alignment, DL, SI)) {
  964. LoadInst *V1 =
  965. Builder.CreateLoad(LI.getType(), SI->getOperand(1),
  966. SI->getOperand(1)->getName() + ".val");
  967. LoadInst *V2 =
  968. Builder.CreateLoad(LI.getType(), SI->getOperand(2),
  969. SI->getOperand(2)->getName() + ".val");
  970. assert(LI.isUnordered() && "implied by above");
  971. V1->setAlignment(Alignment);
  972. V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
  973. V2->setAlignment(Alignment);
  974. V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
  975. return SelectInst::Create(SI->getCondition(), V1, V2);
  976. }
  977. // load (select (cond, null, P)) -> load P
  978. if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
  979. !NullPointerIsDefined(SI->getFunction(),
  980. LI.getPointerAddressSpace()))
  981. return replaceOperand(LI, 0, SI->getOperand(2));
  982. // load (select (cond, P, null)) -> load P
  983. if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
  984. !NullPointerIsDefined(SI->getFunction(),
  985. LI.getPointerAddressSpace()))
  986. return replaceOperand(LI, 0, SI->getOperand(1));
  987. }
  988. }
  989. return nullptr;
  990. }
  991. /// Look for extractelement/insertvalue sequence that acts like a bitcast.
  992. ///
  993. /// \returns underlying value that was "cast", or nullptr otherwise.
  994. ///
  995. /// For example, if we have:
  996. ///
  997. /// %E0 = extractelement <2 x double> %U, i32 0
  998. /// %V0 = insertvalue [2 x double] undef, double %E0, 0
  999. /// %E1 = extractelement <2 x double> %U, i32 1
  1000. /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
  1001. ///
  1002. /// and the layout of a <2 x double> is isomorphic to a [2 x double],
  1003. /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
  1004. /// Note that %U may contain non-undef values where %V1 has undef.
  1005. static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) {
  1006. Value *U = nullptr;
  1007. while (auto *IV = dyn_cast<InsertValueInst>(V)) {
  1008. auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
  1009. if (!E)
  1010. return nullptr;
  1011. auto *W = E->getVectorOperand();
  1012. if (!U)
  1013. U = W;
  1014. else if (U != W)
  1015. return nullptr;
  1016. auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
  1017. if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
  1018. return nullptr;
  1019. V = IV->getAggregateOperand();
  1020. }
  1021. if (!match(V, m_Undef()) || !U)
  1022. return nullptr;
  1023. auto *UT = cast<VectorType>(U->getType());
  1024. auto *VT = V->getType();
  1025. // Check that types UT and VT are bitwise isomorphic.
  1026. const auto &DL = IC.getDataLayout();
  1027. if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
  1028. return nullptr;
  1029. }
  1030. if (auto *AT = dyn_cast<ArrayType>(VT)) {
  1031. if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
  1032. return nullptr;
  1033. } else {
  1034. auto *ST = cast<StructType>(VT);
  1035. if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
  1036. return nullptr;
  1037. for (const auto *EltT : ST->elements()) {
  1038. if (EltT != UT->getElementType())
  1039. return nullptr;
  1040. }
  1041. }
  1042. return U;
  1043. }
  1044. /// Combine stores to match the type of value being stored.
  1045. ///
  1046. /// The core idea here is that the memory does not have any intrinsic type and
  1047. /// where we can we should match the type of a store to the type of value being
  1048. /// stored.
  1049. ///
  1050. /// However, this routine must never change the width of a store or the number of
  1051. /// stores as that would introduce a semantic change. This combine is expected to
  1052. /// be a semantic no-op which just allows stores to more closely model the types
  1053. /// of their incoming values.
  1054. ///
  1055. /// Currently, we also refuse to change the precise type used for an atomic or
  1056. /// volatile store. This is debatable, and might be reasonable to change later.
  1057. /// However, it is risky in case some backend or other part of LLVM is relying
  1058. /// on the exact type stored to select appropriate atomic operations.
  1059. ///
  1060. /// \returns true if the store was successfully combined away. This indicates
  1061. /// the caller must erase the store instruction. We have to let the caller erase
  1062. /// the store instruction as otherwise there is no way to signal whether it was
  1063. /// combined or not: IC.EraseInstFromFunction returns a null pointer.
  1064. static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) {
  1065. // FIXME: We could probably with some care handle both volatile and ordered
  1066. // atomic stores here but it isn't clear that this is important.
  1067. if (!SI.isUnordered())
  1068. return false;
  1069. // swifterror values can't be bitcasted.
  1070. if (SI.getPointerOperand()->isSwiftError())
  1071. return false;
  1072. Value *V = SI.getValueOperand();
  1073. // Fold away bit casts of the stored value by storing the original type.
  1074. if (auto *BC = dyn_cast<BitCastInst>(V)) {
  1075. assert(!BC->getType()->isX86_AMXTy() &&
  1076. "store to x86_amx* should not happen!");
  1077. V = BC->getOperand(0);
  1078. // Don't transform when the type is x86_amx, it makes the pass that lower
  1079. // x86_amx type happy.
  1080. if (V->getType()->isX86_AMXTy())
  1081. return false;
  1082. if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
  1083. combineStoreToNewValue(IC, SI, V);
  1084. return true;
  1085. }
  1086. }
  1087. if (Value *U = likeBitCastFromVector(IC, V))
  1088. if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
  1089. combineStoreToNewValue(IC, SI, U);
  1090. return true;
  1091. }
  1092. // FIXME: We should also canonicalize stores of vectors when their elements
  1093. // are cast to other types.
  1094. return false;
  1095. }
  1096. static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
  1097. // FIXME: We could probably with some care handle both volatile and atomic
  1098. // stores here but it isn't clear that this is important.
  1099. if (!SI.isSimple())
  1100. return false;
  1101. Value *V = SI.getValueOperand();
  1102. Type *T = V->getType();
  1103. if (!T->isAggregateType())
  1104. return false;
  1105. if (auto *ST = dyn_cast<StructType>(T)) {
  1106. // If the struct only have one element, we unpack.
  1107. unsigned Count = ST->getNumElements();
  1108. if (Count == 1) {
  1109. V = IC.Builder.CreateExtractValue(V, 0);
  1110. combineStoreToNewValue(IC, SI, V);
  1111. return true;
  1112. }
  1113. // We don't want to break loads with padding here as we'd loose
  1114. // the knowledge that padding exists for the rest of the pipeline.
  1115. const DataLayout &DL = IC.getDataLayout();
  1116. auto *SL = DL.getStructLayout(ST);
  1117. if (SL->hasPadding())
  1118. return false;
  1119. const auto Align = SI.getAlign();
  1120. SmallString<16> EltName = V->getName();
  1121. EltName += ".elt";
  1122. auto *Addr = SI.getPointerOperand();
  1123. SmallString<16> AddrName = Addr->getName();
  1124. AddrName += ".repack";
  1125. auto *IdxType = Type::getInt32Ty(ST->getContext());
  1126. auto *Zero = ConstantInt::get(IdxType, 0);
  1127. for (unsigned i = 0; i < Count; i++) {
  1128. Value *Indices[2] = {
  1129. Zero,
  1130. ConstantInt::get(IdxType, i),
  1131. };
  1132. auto *Ptr =
  1133. IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices), AddrName);
  1134. auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
  1135. auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
  1136. llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
  1137. NS->setAAMetadata(SI.getAAMetadata());
  1138. }
  1139. return true;
  1140. }
  1141. if (auto *AT = dyn_cast<ArrayType>(T)) {
  1142. // If the array only have one element, we unpack.
  1143. auto NumElements = AT->getNumElements();
  1144. if (NumElements == 1) {
  1145. V = IC.Builder.CreateExtractValue(V, 0);
  1146. combineStoreToNewValue(IC, SI, V);
  1147. return true;
  1148. }
  1149. // Bail out if the array is too large. Ideally we would like to optimize
  1150. // arrays of arbitrary size but this has a terrible impact on compile time.
  1151. // The threshold here is chosen arbitrarily, maybe needs a little bit of
  1152. // tuning.
  1153. if (NumElements > IC.MaxArraySizeForCombine)
  1154. return false;
  1155. const DataLayout &DL = IC.getDataLayout();
  1156. auto EltSize = DL.getTypeAllocSize(AT->getElementType());
  1157. const auto Align = SI.getAlign();
  1158. SmallString<16> EltName = V->getName();
  1159. EltName += ".elt";
  1160. auto *Addr = SI.getPointerOperand();
  1161. SmallString<16> AddrName = Addr->getName();
  1162. AddrName += ".repack";
  1163. auto *IdxType = Type::getInt64Ty(T->getContext());
  1164. auto *Zero = ConstantInt::get(IdxType, 0);
  1165. uint64_t Offset = 0;
  1166. for (uint64_t i = 0; i < NumElements; i++) {
  1167. Value *Indices[2] = {
  1168. Zero,
  1169. ConstantInt::get(IdxType, i),
  1170. };
  1171. auto *Ptr =
  1172. IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
  1173. auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
  1174. auto EltAlign = commonAlignment(Align, Offset);
  1175. Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
  1176. NS->setAAMetadata(SI.getAAMetadata());
  1177. Offset += EltSize;
  1178. }
  1179. return true;
  1180. }
  1181. return false;
  1182. }
  1183. /// equivalentAddressValues - Test if A and B will obviously have the same
  1184. /// value. This includes recognizing that %t0 and %t1 will have the same
  1185. /// value in code like this:
  1186. /// %t0 = getelementptr \@a, 0, 3
  1187. /// store i32 0, i32* %t0
  1188. /// %t1 = getelementptr \@a, 0, 3
  1189. /// %t2 = load i32* %t1
  1190. ///
  1191. static bool equivalentAddressValues(Value *A, Value *B) {
  1192. // Test if the values are trivially equivalent.
  1193. if (A == B) return true;
  1194. // Test if the values come form identical arithmetic instructions.
  1195. // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
  1196. // its only used to compare two uses within the same basic block, which
  1197. // means that they'll always either have the same value or one of them
  1198. // will have an undefined value.
  1199. if (isa<BinaryOperator>(A) ||
  1200. isa<CastInst>(A) ||
  1201. isa<PHINode>(A) ||
  1202. isa<GetElementPtrInst>(A))
  1203. if (Instruction *BI = dyn_cast<Instruction>(B))
  1204. if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
  1205. return true;
  1206. // Otherwise they may not be equivalent.
  1207. return false;
  1208. }
  1209. /// Converts store (bitcast (load (bitcast (select ...)))) to
  1210. /// store (load (select ...)), where select is minmax:
  1211. /// select ((cmp load V1, load V2), V1, V2).
  1212. static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC,
  1213. StoreInst &SI) {
  1214. // bitcast?
  1215. if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
  1216. return false;
  1217. // load? integer?
  1218. Value *LoadAddr;
  1219. if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
  1220. return false;
  1221. auto *LI = cast<LoadInst>(SI.getValueOperand());
  1222. if (!LI->getType()->isIntegerTy())
  1223. return false;
  1224. Type *CmpLoadTy;
  1225. if (!isMinMaxWithLoads(LoadAddr, CmpLoadTy))
  1226. return false;
  1227. // Make sure the type would actually change.
  1228. // This condition can be hit with chains of bitcasts.
  1229. if (LI->getType() == CmpLoadTy)
  1230. return false;
  1231. // Make sure we're not changing the size of the load/store.
  1232. const auto &DL = IC.getDataLayout();
  1233. if (DL.getTypeStoreSizeInBits(LI->getType()) !=
  1234. DL.getTypeStoreSizeInBits(CmpLoadTy))
  1235. return false;
  1236. if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
  1237. auto *SI = dyn_cast<StoreInst>(U);
  1238. return SI && SI->getPointerOperand() != LI &&
  1239. InstCombiner::peekThroughBitcast(SI->getPointerOperand()) !=
  1240. LoadAddr &&
  1241. !SI->getPointerOperand()->isSwiftError();
  1242. }))
  1243. return false;
  1244. IC.Builder.SetInsertPoint(LI);
  1245. LoadInst *NewLI = IC.combineLoadToNewType(*LI, CmpLoadTy);
  1246. // Replace all the stores with stores of the newly loaded value.
  1247. for (auto *UI : LI->users()) {
  1248. auto *USI = cast<StoreInst>(UI);
  1249. IC.Builder.SetInsertPoint(USI);
  1250. combineStoreToNewValue(IC, *USI, NewLI);
  1251. }
  1252. IC.replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
  1253. IC.eraseInstFromFunction(*LI);
  1254. return true;
  1255. }
  1256. Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
  1257. Value *Val = SI.getOperand(0);
  1258. Value *Ptr = SI.getOperand(1);
  1259. // Try to canonicalize the stored type.
  1260. if (combineStoreToValueType(*this, SI))
  1261. return eraseInstFromFunction(SI);
  1262. // Attempt to improve the alignment.
  1263. const Align KnownAlign = getOrEnforceKnownAlignment(
  1264. Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);
  1265. if (KnownAlign > SI.getAlign())
  1266. SI.setAlignment(KnownAlign);
  1267. // Try to canonicalize the stored type.
  1268. if (unpackStoreToAggregate(*this, SI))
  1269. return eraseInstFromFunction(SI);
  1270. if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
  1271. return eraseInstFromFunction(SI);
  1272. // Replace GEP indices if possible.
  1273. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
  1274. Worklist.push(NewGEPI);
  1275. return &SI;
  1276. }
  1277. // Don't hack volatile/ordered stores.
  1278. // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
  1279. if (!SI.isUnordered()) return nullptr;
  1280. // If the RHS is an alloca with a single use, zapify the store, making the
  1281. // alloca dead.
  1282. if (Ptr->hasOneUse()) {
  1283. if (isa<AllocaInst>(Ptr))
  1284. return eraseInstFromFunction(SI);
  1285. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
  1286. if (isa<AllocaInst>(GEP->getOperand(0))) {
  1287. if (GEP->getOperand(0)->hasOneUse())
  1288. return eraseInstFromFunction(SI);
  1289. }
  1290. }
  1291. }
  1292. // If we have a store to a location which is known constant, we can conclude
  1293. // that the store must be storing the constant value (else the memory
  1294. // wouldn't be constant), and this must be a noop.
  1295. if (!isModSet(AA->getModRefInfoMask(Ptr)))
  1296. return eraseInstFromFunction(SI);
  1297. // Do really simple DSE, to catch cases where there are several consecutive
  1298. // stores to the same location, separated by a few arithmetic operations. This
  1299. // situation often occurs with bitfield accesses.
  1300. BasicBlock::iterator BBI(SI);
  1301. for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
  1302. --ScanInsts) {
  1303. --BBI;
  1304. // Don't count debug info directives, lest they affect codegen,
  1305. // and we skip pointer-to-pointer bitcasts, which are NOPs.
  1306. if (BBI->isDebugOrPseudoInst() ||
  1307. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
  1308. ScanInsts++;
  1309. continue;
  1310. }
  1311. if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
  1312. // Prev store isn't volatile, and stores to the same location?
  1313. if (PrevSI->isUnordered() &&
  1314. equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
  1315. PrevSI->getValueOperand()->getType() ==
  1316. SI.getValueOperand()->getType()) {
  1317. ++NumDeadStore;
  1318. // Manually add back the original store to the worklist now, so it will
  1319. // be processed after the operands of the removed store, as this may
  1320. // expose additional DSE opportunities.
  1321. Worklist.push(&SI);
  1322. eraseInstFromFunction(*PrevSI);
  1323. return nullptr;
  1324. }
  1325. break;
  1326. }
  1327. // If this is a load, we have to stop. However, if the loaded value is from
  1328. // the pointer we're loading and is producing the pointer we're storing,
  1329. // then *this* store is dead (X = load P; store X -> P).
  1330. if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
  1331. if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
  1332. assert(SI.isUnordered() && "can't eliminate ordering operation");
  1333. return eraseInstFromFunction(SI);
  1334. }
  1335. // Otherwise, this is a load from some other location. Stores before it
  1336. // may not be dead.
  1337. break;
  1338. }
  1339. // Don't skip over loads, throws or things that can modify memory.
  1340. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
  1341. break;
  1342. }
  1343. // store X, null -> turns into 'unreachable' in SimplifyCFG
  1344. // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
  1345. if (canSimplifyNullStoreOrGEP(SI)) {
  1346. if (!isa<PoisonValue>(Val))
  1347. return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
  1348. return nullptr; // Do not modify these!
  1349. }
  1350. // store undef, Ptr -> noop
  1351. // FIXME: This is technically incorrect because it might overwrite a poison
  1352. // value. Change to PoisonValue once #52930 is resolved.
  1353. if (isa<UndefValue>(Val))
  1354. return eraseInstFromFunction(SI);
  1355. return nullptr;
  1356. }
  1357. /// Try to transform:
  1358. /// if () { *P = v1; } else { *P = v2 }
  1359. /// or:
  1360. /// *P = v1; if () { *P = v2; }
  1361. /// into a phi node with a store in the successor.
  1362. bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
  1363. if (!SI.isUnordered())
  1364. return false; // This code has not been audited for volatile/ordered case.
  1365. // Check if the successor block has exactly 2 incoming edges.
  1366. BasicBlock *StoreBB = SI.getParent();
  1367. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
  1368. if (!DestBB->hasNPredecessors(2))
  1369. return false;
  1370. // Capture the other block (the block that doesn't contain our store).
  1371. pred_iterator PredIter = pred_begin(DestBB);
  1372. if (*PredIter == StoreBB)
  1373. ++PredIter;
  1374. BasicBlock *OtherBB = *PredIter;
  1375. // Bail out if all of the relevant blocks aren't distinct. This can happen,
  1376. // for example, if SI is in an infinite loop.
  1377. if (StoreBB == DestBB || OtherBB == DestBB)
  1378. return false;
  1379. // Verify that the other block ends in a branch and is not otherwise empty.
  1380. BasicBlock::iterator BBI(OtherBB->getTerminator());
  1381. BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
  1382. if (!OtherBr || BBI == OtherBB->begin())
  1383. return false;
  1384. // If the other block ends in an unconditional branch, check for the 'if then
  1385. // else' case. There is an instruction before the branch.
  1386. StoreInst *OtherStore = nullptr;
  1387. if (OtherBr->isUnconditional()) {
  1388. --BBI;
  1389. // Skip over debugging info and pseudo probes.
  1390. while (BBI->isDebugOrPseudoInst() ||
  1391. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
  1392. if (BBI==OtherBB->begin())
  1393. return false;
  1394. --BBI;
  1395. }
  1396. // If this isn't a store, isn't a store to the same location, or is not the
  1397. // right kind of store, bail out.
  1398. OtherStore = dyn_cast<StoreInst>(BBI);
  1399. if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
  1400. !SI.isSameOperationAs(OtherStore))
  1401. return false;
  1402. } else {
  1403. // Otherwise, the other block ended with a conditional branch. If one of the
  1404. // destinations is StoreBB, then we have the if/then case.
  1405. if (OtherBr->getSuccessor(0) != StoreBB &&
  1406. OtherBr->getSuccessor(1) != StoreBB)
  1407. return false;
  1408. // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
  1409. // if/then triangle. See if there is a store to the same ptr as SI that
  1410. // lives in OtherBB.
  1411. for (;; --BBI) {
  1412. // Check to see if we find the matching store.
  1413. if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
  1414. if (OtherStore->getOperand(1) != SI.getOperand(1) ||
  1415. !SI.isSameOperationAs(OtherStore))
  1416. return false;
  1417. break;
  1418. }
  1419. // If we find something that may be using or overwriting the stored
  1420. // value, or if we run out of instructions, we can't do the transform.
  1421. if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
  1422. BBI->mayWriteToMemory() || BBI == OtherBB->begin())
  1423. return false;
  1424. }
  1425. // In order to eliminate the store in OtherBr, we have to make sure nothing
  1426. // reads or overwrites the stored value in StoreBB.
  1427. for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
  1428. // FIXME: This should really be AA driven.
  1429. if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
  1430. return false;
  1431. }
  1432. }
  1433. // Insert a PHI node now if we need it.
  1434. Value *MergedVal = OtherStore->getOperand(0);
  1435. // The debug locations of the original instructions might differ. Merge them.
  1436. DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
  1437. OtherStore->getDebugLoc());
  1438. if (MergedVal != SI.getOperand(0)) {
  1439. PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
  1440. PN->addIncoming(SI.getOperand(0), SI.getParent());
  1441. PN->addIncoming(OtherStore->getOperand(0), OtherBB);
  1442. MergedVal = InsertNewInstBefore(PN, DestBB->front());
  1443. PN->setDebugLoc(MergedLoc);
  1444. }
  1445. // Advance to a place where it is safe to insert the new store and insert it.
  1446. BBI = DestBB->getFirstInsertionPt();
  1447. StoreInst *NewSI =
  1448. new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
  1449. SI.getOrdering(), SI.getSyncScopeID());
  1450. InsertNewInstBefore(NewSI, *BBI);
  1451. NewSI->setDebugLoc(MergedLoc);
  1452. NewSI->mergeDIAssignID({&SI, OtherStore});
  1453. // If the two stores had AA tags, merge them.
  1454. AAMDNodes AATags = SI.getAAMetadata();
  1455. if (AATags)
  1456. NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
  1457. // Nuke the old stores.
  1458. eraseInstFromFunction(SI);
  1459. eraseInstFromFunction(*OtherStore);
  1460. return true;
  1461. }