InstCombineLoadStoreAlloca.cpp 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559
  1. //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the visit functions for load, store and alloca.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "InstCombineInternal.h"
  13. #include "llvm/ADT/MapVector.h"
  14. #include "llvm/ADT/SmallString.h"
  15. #include "llvm/ADT/Statistic.h"
  16. #include "llvm/Analysis/AliasAnalysis.h"
  17. #include "llvm/Analysis/Loads.h"
  18. #include "llvm/IR/ConstantRange.h"
  19. #include "llvm/IR/DataLayout.h"
  20. #include "llvm/IR/DebugInfoMetadata.h"
  21. #include "llvm/IR/IntrinsicInst.h"
  22. #include "llvm/IR/LLVMContext.h"
  23. #include "llvm/IR/MDBuilder.h"
  24. #include "llvm/IR/PatternMatch.h"
  25. #include "llvm/Transforms/InstCombine/InstCombiner.h"
  26. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  27. #include "llvm/Transforms/Utils/Local.h"
  28. using namespace llvm;
  29. using namespace PatternMatch;
  30. #define DEBUG_TYPE "instcombine"
  31. STATISTIC(NumDeadStore, "Number of dead stores eliminated");
  32. STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
  33. /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
  34. /// pointer to an alloca. Ignore any reads of the pointer, return false if we
  35. /// see any stores or other unknown uses. If we see pointer arithmetic, keep
  36. /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
  37. /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
  38. /// the alloca, and if the source pointer is a pointer to a constant global, we
  39. /// can optimize this.
  40. static bool
  41. isOnlyCopiedFromConstantMemory(AAResults *AA,
  42. Value *V, MemTransferInst *&TheCopy,
  43. SmallVectorImpl<Instruction *> &ToDelete) {
  44. // We track lifetime intrinsics as we encounter them. If we decide to go
  45. // ahead and replace the value with the global, this lets the caller quickly
  46. // eliminate the markers.
  47. SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
  48. ValuesToInspect.emplace_back(V, false);
  49. while (!ValuesToInspect.empty()) {
  50. auto ValuePair = ValuesToInspect.pop_back_val();
  51. const bool IsOffset = ValuePair.second;
  52. for (auto &U : ValuePair.first->uses()) {
  53. auto *I = cast<Instruction>(U.getUser());
  54. if (auto *LI = dyn_cast<LoadInst>(I)) {
  55. // Ignore non-volatile loads, they are always ok.
  56. if (!LI->isSimple()) return false;
  57. continue;
  58. }
  59. if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
  60. // If uses of the bitcast are ok, we are ok.
  61. ValuesToInspect.emplace_back(I, IsOffset);
  62. continue;
  63. }
  64. if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
  65. // If the GEP has all zero indices, it doesn't offset the pointer. If it
  66. // doesn't, it does.
  67. ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
  68. continue;
  69. }
  70. if (auto *Call = dyn_cast<CallBase>(I)) {
  71. // If this is the function being called then we treat it like a load and
  72. // ignore it.
  73. if (Call->isCallee(&U))
  74. continue;
  75. unsigned DataOpNo = Call->getDataOperandNo(&U);
  76. bool IsArgOperand = Call->isArgOperand(&U);
  77. // Inalloca arguments are clobbered by the call.
  78. if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
  79. return false;
  80. // If this is a readonly/readnone call site, then we know it is just a
  81. // load (but one that potentially returns the value itself), so we can
  82. // ignore it if we know that the value isn't captured.
  83. if (Call->onlyReadsMemory() &&
  84. (Call->use_empty() || Call->doesNotCapture(DataOpNo)))
  85. continue;
  86. // If this is being passed as a byval argument, the caller is making a
  87. // copy, so it is only a read of the alloca.
  88. if (IsArgOperand && Call->isByValArgument(DataOpNo))
  89. continue;
  90. }
  91. // Lifetime intrinsics can be handled by the caller.
  92. if (I->isLifetimeStartOrEnd()) {
  93. assert(I->use_empty() && "Lifetime markers have no result to use!");
  94. ToDelete.push_back(I);
  95. continue;
  96. }
  97. // If this is isn't our memcpy/memmove, reject it as something we can't
  98. // handle.
  99. MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
  100. if (!MI)
  101. return false;
  102. // If the transfer is using the alloca as a source of the transfer, then
  103. // ignore it since it is a load (unless the transfer is volatile).
  104. if (U.getOperandNo() == 1) {
  105. if (MI->isVolatile()) return false;
  106. continue;
  107. }
  108. // If we already have seen a copy, reject the second one.
  109. if (TheCopy) return false;
  110. // If the pointer has been offset from the start of the alloca, we can't
  111. // safely handle this.
  112. if (IsOffset) return false;
  113. // If the memintrinsic isn't using the alloca as the dest, reject it.
  114. if (U.getOperandNo() != 0) return false;
  115. // If the source of the memcpy/move is not a constant global, reject it.
  116. if (!AA->pointsToConstantMemory(MI->getSource()))
  117. return false;
  118. // Otherwise, the transform is safe. Remember the copy instruction.
  119. TheCopy = MI;
  120. }
  121. }
  122. return true;
  123. }
  124. /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
  125. /// modified by a copy from a constant global. If we can prove this, we can
  126. /// replace any uses of the alloca with uses of the global directly.
  127. static MemTransferInst *
  128. isOnlyCopiedFromConstantMemory(AAResults *AA,
  129. AllocaInst *AI,
  130. SmallVectorImpl<Instruction *> &ToDelete) {
  131. MemTransferInst *TheCopy = nullptr;
  132. if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
  133. return TheCopy;
  134. return nullptr;
  135. }
  136. /// Returns true if V is dereferenceable for size of alloca.
  137. static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
  138. const DataLayout &DL) {
  139. if (AI->isArrayAllocation())
  140. return false;
  141. uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
  142. if (!AllocaSize)
  143. return false;
  144. return isDereferenceableAndAlignedPointer(V, AI->getAlign(),
  145. APInt(64, AllocaSize), DL);
  146. }
  147. static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
  148. AllocaInst &AI) {
  149. // Check for array size of 1 (scalar allocation).
  150. if (!AI.isArrayAllocation()) {
  151. // i32 1 is the canonical array size for scalar allocations.
  152. if (AI.getArraySize()->getType()->isIntegerTy(32))
  153. return nullptr;
  154. // Canonicalize it.
  155. return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
  156. }
  157. // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
  158. if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
  159. if (C->getValue().getActiveBits() <= 64) {
  160. Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
  161. AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
  162. nullptr, AI.getName());
  163. New->setAlignment(AI.getAlign());
  164. // Scan to the end of the allocation instructions, to skip over a block of
  165. // allocas if possible...also skip interleaved debug info
  166. //
  167. BasicBlock::iterator It(New);
  168. while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
  169. ++It;
  170. // Now that I is pointing to the first non-allocation-inst in the block,
  171. // insert our getelementptr instruction...
  172. //
  173. Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
  174. Value *NullIdx = Constant::getNullValue(IdxTy);
  175. Value *Idx[2] = {NullIdx, NullIdx};
  176. Instruction *GEP = GetElementPtrInst::CreateInBounds(
  177. NewTy, New, Idx, New->getName() + ".sub");
  178. IC.InsertNewInstBefore(GEP, *It);
  179. // Now make everything use the getelementptr instead of the original
  180. // allocation.
  181. return IC.replaceInstUsesWith(AI, GEP);
  182. }
  183. }
  184. if (isa<UndefValue>(AI.getArraySize()))
  185. return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
  186. // Ensure that the alloca array size argument has type intptr_t, so that
  187. // any casting is exposed early.
  188. Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
  189. if (AI.getArraySize()->getType() != IntPtrTy) {
  190. Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
  191. return IC.replaceOperand(AI, 0, V);
  192. }
  193. return nullptr;
  194. }
  195. namespace {
  196. // If I and V are pointers in different address space, it is not allowed to
  197. // use replaceAllUsesWith since I and V have different types. A
  198. // non-target-specific transformation should not use addrspacecast on V since
  199. // the two address space may be disjoint depending on target.
  200. //
  201. // This class chases down uses of the old pointer until reaching the load
  202. // instructions, then replaces the old pointer in the load instructions with
  203. // the new pointer. If during the chasing it sees bitcast or GEP, it will
  204. // create new bitcast or GEP with the new pointer and use them in the load
  205. // instruction.
  206. class PointerReplacer {
  207. public:
  208. PointerReplacer(InstCombinerImpl &IC) : IC(IC) {}
  209. bool collectUsers(Instruction &I);
  210. void replacePointer(Instruction &I, Value *V);
  211. private:
  212. void replace(Instruction *I);
  213. Value *getReplacement(Value *I);
  214. SmallSetVector<Instruction *, 4> Worklist;
  215. MapVector<Value *, Value *> WorkMap;
  216. InstCombinerImpl &IC;
  217. };
  218. } // end anonymous namespace
  219. bool PointerReplacer::collectUsers(Instruction &I) {
  220. for (auto U : I.users()) {
  221. auto *Inst = cast<Instruction>(&*U);
  222. if (auto *Load = dyn_cast<LoadInst>(Inst)) {
  223. if (Load->isVolatile())
  224. return false;
  225. Worklist.insert(Load);
  226. } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
  227. Worklist.insert(Inst);
  228. if (!collectUsers(*Inst))
  229. return false;
  230. } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
  231. if (MI->isVolatile())
  232. return false;
  233. Worklist.insert(Inst);
  234. } else if (Inst->isLifetimeStartOrEnd()) {
  235. continue;
  236. } else {
  237. LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
  238. return false;
  239. }
  240. }
  241. return true;
  242. }
  243. Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
  244. void PointerReplacer::replace(Instruction *I) {
  245. if (getReplacement(I))
  246. return;
  247. if (auto *LT = dyn_cast<LoadInst>(I)) {
  248. auto *V = getReplacement(LT->getPointerOperand());
  249. assert(V && "Operand not replaced");
  250. auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
  251. LT->getAlign(), LT->getOrdering(),
  252. LT->getSyncScopeID());
  253. NewI->takeName(LT);
  254. copyMetadataForLoad(*NewI, *LT);
  255. IC.InsertNewInstWith(NewI, *LT);
  256. IC.replaceInstUsesWith(*LT, NewI);
  257. WorkMap[LT] = NewI;
  258. } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
  259. auto *V = getReplacement(GEP->getPointerOperand());
  260. assert(V && "Operand not replaced");
  261. SmallVector<Value *, 8> Indices;
  262. Indices.append(GEP->idx_begin(), GEP->idx_end());
  263. auto *NewI =
  264. GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
  265. IC.InsertNewInstWith(NewI, *GEP);
  266. NewI->takeName(GEP);
  267. WorkMap[GEP] = NewI;
  268. } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
  269. auto *V = getReplacement(BC->getOperand(0));
  270. assert(V && "Operand not replaced");
  271. auto *NewT = PointerType::getWithSamePointeeType(
  272. cast<PointerType>(BC->getType()),
  273. V->getType()->getPointerAddressSpace());
  274. auto *NewI = new BitCastInst(V, NewT);
  275. IC.InsertNewInstWith(NewI, *BC);
  276. NewI->takeName(BC);
  277. WorkMap[BC] = NewI;
  278. } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
  279. auto *SrcV = getReplacement(MemCpy->getRawSource());
  280. // The pointer may appear in the destination of a copy, but we don't want to
  281. // replace it.
  282. if (!SrcV) {
  283. assert(getReplacement(MemCpy->getRawDest()) &&
  284. "destination not in replace list");
  285. return;
  286. }
  287. IC.Builder.SetInsertPoint(MemCpy);
  288. auto *NewI = IC.Builder.CreateMemTransferInst(
  289. MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(),
  290. SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(),
  291. MemCpy->isVolatile());
  292. AAMDNodes AAMD = MemCpy->getAAMetadata();
  293. if (AAMD)
  294. NewI->setAAMetadata(AAMD);
  295. IC.eraseInstFromFunction(*MemCpy);
  296. WorkMap[MemCpy] = NewI;
  297. } else {
  298. llvm_unreachable("should never reach here");
  299. }
  300. }
  301. void PointerReplacer::replacePointer(Instruction &I, Value *V) {
  302. #ifndef NDEBUG
  303. auto *PT = cast<PointerType>(I.getType());
  304. auto *NT = cast<PointerType>(V->getType());
  305. assert(PT != NT && PT->hasSameElementTypeAs(NT) && "Invalid usage");
  306. #endif
  307. WorkMap[&I] = V;
  308. for (Instruction *Workitem : Worklist)
  309. replace(Workitem);
  310. }
  311. Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
  312. if (auto *I = simplifyAllocaArraySize(*this, AI))
  313. return I;
  314. if (AI.getAllocatedType()->isSized()) {
  315. // Move all alloca's of zero byte objects to the entry block and merge them
  316. // together. Note that we only do this for alloca's, because malloc should
  317. // allocate and return a unique pointer, even for a zero byte allocation.
  318. if (DL.getTypeAllocSize(AI.getAllocatedType()).getKnownMinSize() == 0) {
  319. // For a zero sized alloca there is no point in doing an array allocation.
  320. // This is helpful if the array size is a complicated expression not used
  321. // elsewhere.
  322. if (AI.isArrayAllocation())
  323. return replaceOperand(AI, 0,
  324. ConstantInt::get(AI.getArraySize()->getType(), 1));
  325. // Get the first instruction in the entry block.
  326. BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
  327. Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
  328. if (FirstInst != &AI) {
  329. // If the entry block doesn't start with a zero-size alloca then move
  330. // this one to the start of the entry block. There is no problem with
  331. // dominance as the array size was forced to a constant earlier already.
  332. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
  333. if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
  334. DL.getTypeAllocSize(EntryAI->getAllocatedType())
  335. .getKnownMinSize() != 0) {
  336. AI.moveBefore(FirstInst);
  337. return &AI;
  338. }
  339. // Replace this zero-sized alloca with the one at the start of the entry
  340. // block after ensuring that the address will be aligned enough for both
  341. // types.
  342. const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
  343. EntryAI->setAlignment(MaxAlign);
  344. if (AI.getType() != EntryAI->getType())
  345. return new BitCastInst(EntryAI, AI.getType());
  346. return replaceInstUsesWith(AI, EntryAI);
  347. }
  348. }
  349. }
  350. // Check to see if this allocation is only modified by a memcpy/memmove from
  351. // a constant whose alignment is equal to or exceeds that of the allocation.
  352. // If this is the case, we can change all users to use the constant global
  353. // instead. This is commonly produced by the CFE by constructs like "void
  354. // foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' is only subsequently
  355. // read.
  356. SmallVector<Instruction *, 4> ToDelete;
  357. if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
  358. Value *TheSrc = Copy->getSource();
  359. Align AllocaAlign = AI.getAlign();
  360. Align SourceAlign = getOrEnforceKnownAlignment(
  361. TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
  362. if (AllocaAlign <= SourceAlign &&
  363. isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
  364. !isa<Instruction>(TheSrc)) {
  365. // FIXME: Can we sink instructions without violating dominance when TheSrc
  366. // is an instruction instead of a constant or argument?
  367. LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
  368. LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
  369. unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
  370. auto *DestTy = PointerType::get(AI.getAllocatedType(), SrcAddrSpace);
  371. if (AI.getType()->getAddressSpace() == SrcAddrSpace) {
  372. for (Instruction *Delete : ToDelete)
  373. eraseInstFromFunction(*Delete);
  374. Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
  375. Instruction *NewI = replaceInstUsesWith(AI, Cast);
  376. eraseInstFromFunction(*Copy);
  377. ++NumGlobalCopies;
  378. return NewI;
  379. }
  380. PointerReplacer PtrReplacer(*this);
  381. if (PtrReplacer.collectUsers(AI)) {
  382. for (Instruction *Delete : ToDelete)
  383. eraseInstFromFunction(*Delete);
  384. Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
  385. PtrReplacer.replacePointer(AI, Cast);
  386. ++NumGlobalCopies;
  387. }
  388. }
  389. }
  390. // At last, use the generic allocation site handler to aggressively remove
  391. // unused allocas.
  392. return visitAllocSite(AI);
  393. }
  394. // Are we allowed to form a atomic load or store of this type?
  395. static bool isSupportedAtomicType(Type *Ty) {
  396. return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
  397. }
  398. /// Helper to combine a load to a new type.
  399. ///
  400. /// This just does the work of combining a load to a new type. It handles
  401. /// metadata, etc., and returns the new instruction. The \c NewTy should be the
  402. /// loaded *value* type. This will convert it to a pointer, cast the operand to
  403. /// that pointer type, load it, etc.
  404. ///
  405. /// Note that this will create all of the instructions with whatever insert
  406. /// point the \c InstCombinerImpl currently is using.
  407. LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy,
  408. const Twine &Suffix) {
  409. assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
  410. "can't fold an atomic load to requested type");
  411. Value *Ptr = LI.getPointerOperand();
  412. unsigned AS = LI.getPointerAddressSpace();
  413. Type *NewPtrTy = NewTy->getPointerTo(AS);
  414. Value *NewPtr = nullptr;
  415. if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
  416. NewPtr->getType() == NewPtrTy))
  417. NewPtr = Builder.CreateBitCast(Ptr, NewPtrTy);
  418. LoadInst *NewLoad = Builder.CreateAlignedLoad(
  419. NewTy, NewPtr, LI.getAlign(), LI.isVolatile(), LI.getName() + Suffix);
  420. NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
  421. copyMetadataForLoad(*NewLoad, LI);
  422. return NewLoad;
  423. }
  424. /// Combine a store to a new type.
  425. ///
  426. /// Returns the newly created store instruction.
  427. static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,
  428. Value *V) {
  429. assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
  430. "can't fold an atomic store of requested type");
  431. Value *Ptr = SI.getPointerOperand();
  432. unsigned AS = SI.getPointerAddressSpace();
  433. SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
  434. SI.getAllMetadata(MD);
  435. StoreInst *NewStore = IC.Builder.CreateAlignedStore(
  436. V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
  437. SI.getAlign(), SI.isVolatile());
  438. NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
  439. for (const auto &MDPair : MD) {
  440. unsigned ID = MDPair.first;
  441. MDNode *N = MDPair.second;
  442. // Note, essentially every kind of metadata should be preserved here! This
  443. // routine is supposed to clone a store instruction changing *only its
  444. // type*. The only metadata it makes sense to drop is metadata which is
  445. // invalidated when the pointer type changes. This should essentially
  446. // never be the case in LLVM, but we explicitly switch over only known
  447. // metadata to be conservatively correct. If you are adding metadata to
  448. // LLVM which pertains to stores, you almost certainly want to add it
  449. // here.
  450. switch (ID) {
  451. case LLVMContext::MD_dbg:
  452. case LLVMContext::MD_tbaa:
  453. case LLVMContext::MD_prof:
  454. case LLVMContext::MD_fpmath:
  455. case LLVMContext::MD_tbaa_struct:
  456. case LLVMContext::MD_alias_scope:
  457. case LLVMContext::MD_noalias:
  458. case LLVMContext::MD_nontemporal:
  459. case LLVMContext::MD_mem_parallel_loop_access:
  460. case LLVMContext::MD_access_group:
  461. // All of these directly apply.
  462. NewStore->setMetadata(ID, N);
  463. break;
  464. case LLVMContext::MD_invariant_load:
  465. case LLVMContext::MD_nonnull:
  466. case LLVMContext::MD_noundef:
  467. case LLVMContext::MD_range:
  468. case LLVMContext::MD_align:
  469. case LLVMContext::MD_dereferenceable:
  470. case LLVMContext::MD_dereferenceable_or_null:
  471. // These don't apply for stores.
  472. break;
  473. }
  474. }
  475. return NewStore;
  476. }
  477. /// Returns true if instruction represent minmax pattern like:
  478. /// select ((cmp load V1, load V2), V1, V2).
  479. static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) {
  480. assert(V->getType()->isPointerTy() && "Expected pointer type.");
  481. // Ignore possible ty* to ixx* bitcast.
  482. V = InstCombiner::peekThroughBitcast(V);
  483. // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
  484. // pattern.
  485. CmpInst::Predicate Pred;
  486. Instruction *L1;
  487. Instruction *L2;
  488. Value *LHS;
  489. Value *RHS;
  490. if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
  491. m_Value(LHS), m_Value(RHS))))
  492. return false;
  493. LoadTy = L1->getType();
  494. return (match(L1, m_Load(m_Specific(LHS))) &&
  495. match(L2, m_Load(m_Specific(RHS)))) ||
  496. (match(L1, m_Load(m_Specific(RHS))) &&
  497. match(L2, m_Load(m_Specific(LHS))));
  498. }
  499. /// Combine loads to match the type of their uses' value after looking
  500. /// through intervening bitcasts.
  501. ///
  502. /// The core idea here is that if the result of a load is used in an operation,
  503. /// we should load the type most conducive to that operation. For example, when
  504. /// loading an integer and converting that immediately to a pointer, we should
  505. /// instead directly load a pointer.
  506. ///
  507. /// However, this routine must never change the width of a load or the number of
  508. /// loads as that would introduce a semantic change. This combine is expected to
  509. /// be a semantic no-op which just allows loads to more closely model the types
  510. /// of their consuming operations.
  511. ///
  512. /// Currently, we also refuse to change the precise type used for an atomic load
  513. /// or a volatile load. This is debatable, and might be reasonable to change
  514. /// later. However, it is risky in case some backend or other part of LLVM is
  515. /// relying on the exact type loaded to select appropriate atomic operations.
  516. static Instruction *combineLoadToOperationType(InstCombinerImpl &IC,
  517. LoadInst &LI) {
  518. // FIXME: We could probably with some care handle both volatile and ordered
  519. // atomic loads here but it isn't clear that this is important.
  520. if (!LI.isUnordered())
  521. return nullptr;
  522. if (LI.use_empty())
  523. return nullptr;
  524. // swifterror values can't be bitcasted.
  525. if (LI.getPointerOperand()->isSwiftError())
  526. return nullptr;
  527. const DataLayout &DL = IC.getDataLayout();
  528. // Fold away bit casts of the loaded value by loading the desired type.
  529. // Note that we should not do this for pointer<->integer casts,
  530. // because that would result in type punning.
  531. if (LI.hasOneUse()) {
  532. // Don't transform when the type is x86_amx, it makes the pass that lower
  533. // x86_amx type happy.
  534. if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
  535. assert(!LI.getType()->isX86_AMXTy() &&
  536. "load from x86_amx* should not happen!");
  537. if (BC->getType()->isX86_AMXTy())
  538. return nullptr;
  539. }
  540. if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
  541. if (CI->isNoopCast(DL) && LI.getType()->isPtrOrPtrVectorTy() ==
  542. CI->getDestTy()->isPtrOrPtrVectorTy())
  543. if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
  544. LoadInst *NewLoad = IC.combineLoadToNewType(LI, CI->getDestTy());
  545. CI->replaceAllUsesWith(NewLoad);
  546. IC.eraseInstFromFunction(*CI);
  547. return &LI;
  548. }
  549. }
  550. // FIXME: We should also canonicalize loads of vectors when their elements are
  551. // cast to other types.
  552. return nullptr;
  553. }
  554. static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
  555. // FIXME: We could probably with some care handle both volatile and atomic
  556. // stores here but it isn't clear that this is important.
  557. if (!LI.isSimple())
  558. return nullptr;
  559. Type *T = LI.getType();
  560. if (!T->isAggregateType())
  561. return nullptr;
  562. StringRef Name = LI.getName();
  563. if (auto *ST = dyn_cast<StructType>(T)) {
  564. // If the struct only have one element, we unpack.
  565. auto NumElements = ST->getNumElements();
  566. if (NumElements == 1) {
  567. LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
  568. ".unpack");
  569. NewLoad->setAAMetadata(LI.getAAMetadata());
  570. return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
  571. UndefValue::get(T), NewLoad, 0, Name));
  572. }
  573. // We don't want to break loads with padding here as we'd loose
  574. // the knowledge that padding exists for the rest of the pipeline.
  575. const DataLayout &DL = IC.getDataLayout();
  576. auto *SL = DL.getStructLayout(ST);
  577. if (SL->hasPadding())
  578. return nullptr;
  579. const auto Align = LI.getAlign();
  580. auto *Addr = LI.getPointerOperand();
  581. auto *IdxType = Type::getInt32Ty(T->getContext());
  582. auto *Zero = ConstantInt::get(IdxType, 0);
  583. Value *V = UndefValue::get(T);
  584. for (unsigned i = 0; i < NumElements; i++) {
  585. Value *Indices[2] = {
  586. Zero,
  587. ConstantInt::get(IdxType, i),
  588. };
  589. auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
  590. Name + ".elt");
  591. auto *L = IC.Builder.CreateAlignedLoad(
  592. ST->getElementType(i), Ptr,
  593. commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
  594. // Propagate AA metadata. It'll still be valid on the narrowed load.
  595. L->setAAMetadata(LI.getAAMetadata());
  596. V = IC.Builder.CreateInsertValue(V, L, i);
  597. }
  598. V->setName(Name);
  599. return IC.replaceInstUsesWith(LI, V);
  600. }
  601. if (auto *AT = dyn_cast<ArrayType>(T)) {
  602. auto *ET = AT->getElementType();
  603. auto NumElements = AT->getNumElements();
  604. if (NumElements == 1) {
  605. LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
  606. NewLoad->setAAMetadata(LI.getAAMetadata());
  607. return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
  608. UndefValue::get(T), NewLoad, 0, Name));
  609. }
  610. // Bail out if the array is too large. Ideally we would like to optimize
  611. // arrays of arbitrary size but this has a terrible impact on compile time.
  612. // The threshold here is chosen arbitrarily, maybe needs a little bit of
  613. // tuning.
  614. if (NumElements > IC.MaxArraySizeForCombine)
  615. return nullptr;
  616. const DataLayout &DL = IC.getDataLayout();
  617. auto EltSize = DL.getTypeAllocSize(ET);
  618. const auto Align = LI.getAlign();
  619. auto *Addr = LI.getPointerOperand();
  620. auto *IdxType = Type::getInt64Ty(T->getContext());
  621. auto *Zero = ConstantInt::get(IdxType, 0);
  622. Value *V = UndefValue::get(T);
  623. uint64_t Offset = 0;
  624. for (uint64_t i = 0; i < NumElements; i++) {
  625. Value *Indices[2] = {
  626. Zero,
  627. ConstantInt::get(IdxType, i),
  628. };
  629. auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
  630. Name + ".elt");
  631. auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
  632. commonAlignment(Align, Offset),
  633. Name + ".unpack");
  634. L->setAAMetadata(LI.getAAMetadata());
  635. V = IC.Builder.CreateInsertValue(V, L, i);
  636. Offset += EltSize;
  637. }
  638. V->setName(Name);
  639. return IC.replaceInstUsesWith(LI, V);
  640. }
  641. return nullptr;
  642. }
  643. // If we can determine that all possible objects pointed to by the provided
  644. // pointer value are, not only dereferenceable, but also definitively less than
  645. // or equal to the provided maximum size, then return true. Otherwise, return
  646. // false (constant global values and allocas fall into this category).
  647. //
  648. // FIXME: This should probably live in ValueTracking (or similar).
  649. static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
  650. const DataLayout &DL) {
  651. SmallPtrSet<Value *, 4> Visited;
  652. SmallVector<Value *, 4> Worklist(1, V);
  653. do {
  654. Value *P = Worklist.pop_back_val();
  655. P = P->stripPointerCasts();
  656. if (!Visited.insert(P).second)
  657. continue;
  658. if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
  659. Worklist.push_back(SI->getTrueValue());
  660. Worklist.push_back(SI->getFalseValue());
  661. continue;
  662. }
  663. if (PHINode *PN = dyn_cast<PHINode>(P)) {
  664. append_range(Worklist, PN->incoming_values());
  665. continue;
  666. }
  667. if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
  668. if (GA->isInterposable())
  669. return false;
  670. Worklist.push_back(GA->getAliasee());
  671. continue;
  672. }
  673. // If we know how big this object is, and it is less than MaxSize, continue
  674. // searching. Otherwise, return false.
  675. if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
  676. if (!AI->getAllocatedType()->isSized())
  677. return false;
  678. ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
  679. if (!CS)
  680. return false;
  681. uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
  682. // Make sure that, even if the multiplication below would wrap as an
  683. // uint64_t, we still do the right thing.
  684. if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
  685. return false;
  686. continue;
  687. }
  688. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
  689. if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
  690. return false;
  691. uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
  692. if (InitSize > MaxSize)
  693. return false;
  694. continue;
  695. }
  696. return false;
  697. } while (!Worklist.empty());
  698. return true;
  699. }
  700. // If we're indexing into an object of a known size, and the outer index is
  701. // not a constant, but having any value but zero would lead to undefined
  702. // behavior, replace it with zero.
  703. //
  704. // For example, if we have:
  705. // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
  706. // ...
  707. // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
  708. // ... = load i32* %arrayidx, align 4
  709. // Then we know that we can replace %x in the GEP with i64 0.
  710. //
  711. // FIXME: We could fold any GEP index to zero that would cause UB if it were
  712. // not zero. Currently, we only handle the first such index. Also, we could
  713. // also search through non-zero constant indices if we kept track of the
  714. // offsets those indices implied.
  715. static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
  716. GetElementPtrInst *GEPI, Instruction *MemI,
  717. unsigned &Idx) {
  718. if (GEPI->getNumOperands() < 2)
  719. return false;
  720. // Find the first non-zero index of a GEP. If all indices are zero, return
  721. // one past the last index.
  722. auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
  723. unsigned I = 1;
  724. for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
  725. Value *V = GEPI->getOperand(I);
  726. if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
  727. if (CI->isZero())
  728. continue;
  729. break;
  730. }
  731. return I;
  732. };
  733. // Skip through initial 'zero' indices, and find the corresponding pointer
  734. // type. See if the next index is not a constant.
  735. Idx = FirstNZIdx(GEPI);
  736. if (Idx == GEPI->getNumOperands())
  737. return false;
  738. if (isa<Constant>(GEPI->getOperand(Idx)))
  739. return false;
  740. SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
  741. Type *SourceElementType = GEPI->getSourceElementType();
  742. // Size information about scalable vectors is not available, so we cannot
  743. // deduce whether indexing at n is undefined behaviour or not. Bail out.
  744. if (isa<ScalableVectorType>(SourceElementType))
  745. return false;
  746. Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
  747. if (!AllocTy || !AllocTy->isSized())
  748. return false;
  749. const DataLayout &DL = IC.getDataLayout();
  750. uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedSize();
  751. // If there are more indices after the one we might replace with a zero, make
  752. // sure they're all non-negative. If any of them are negative, the overall
  753. // address being computed might be before the base address determined by the
  754. // first non-zero index.
  755. auto IsAllNonNegative = [&]() {
  756. for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
  757. KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
  758. if (Known.isNonNegative())
  759. continue;
  760. return false;
  761. }
  762. return true;
  763. };
  764. // FIXME: If the GEP is not inbounds, and there are extra indices after the
  765. // one we'll replace, those could cause the address computation to wrap
  766. // (rendering the IsAllNonNegative() check below insufficient). We can do
  767. // better, ignoring zero indices (and other indices we can prove small
  768. // enough not to wrap).
  769. if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
  770. return false;
  771. // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
  772. // also known to be dereferenceable.
  773. return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
  774. IsAllNonNegative();
  775. }
  776. // If we're indexing into an object with a variable index for the memory
  777. // access, but the object has only one element, we can assume that the index
  778. // will always be zero. If we replace the GEP, return it.
  779. template <typename T>
  780. static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
  781. T &MemI) {
  782. if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
  783. unsigned Idx;
  784. if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
  785. Instruction *NewGEPI = GEPI->clone();
  786. NewGEPI->setOperand(Idx,
  787. ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
  788. NewGEPI->insertBefore(GEPI);
  789. MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
  790. return NewGEPI;
  791. }
  792. }
  793. return nullptr;
  794. }
  795. static bool canSimplifyNullStoreOrGEP(StoreInst &SI) {
  796. if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
  797. return false;
  798. auto *Ptr = SI.getPointerOperand();
  799. if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
  800. Ptr = GEPI->getOperand(0);
  801. return (isa<ConstantPointerNull>(Ptr) &&
  802. !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
  803. }
  804. static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
  805. if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
  806. const Value *GEPI0 = GEPI->getOperand(0);
  807. if (isa<ConstantPointerNull>(GEPI0) &&
  808. !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
  809. return true;
  810. }
  811. if (isa<UndefValue>(Op) ||
  812. (isa<ConstantPointerNull>(Op) &&
  813. !NullPointerIsDefined(LI.getFunction(), LI.getPointerAddressSpace())))
  814. return true;
  815. return false;
  816. }
  817. Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
  818. Value *Op = LI.getOperand(0);
  819. // Try to canonicalize the loaded type.
  820. if (Instruction *Res = combineLoadToOperationType(*this, LI))
  821. return Res;
  822. // Attempt to improve the alignment.
  823. Align KnownAlign = getOrEnforceKnownAlignment(
  824. Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);
  825. if (KnownAlign > LI.getAlign())
  826. LI.setAlignment(KnownAlign);
  827. // Replace GEP indices if possible.
  828. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
  829. Worklist.push(NewGEPI);
  830. return &LI;
  831. }
  832. if (Instruction *Res = unpackLoadToAggregate(*this, LI))
  833. return Res;
  834. // Do really simple store-to-load forwarding and load CSE, to catch cases
  835. // where there are several consecutive memory accesses to the same location,
  836. // separated by a few arithmetic operations.
  837. bool IsLoadCSE = false;
  838. if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) {
  839. if (IsLoadCSE)
  840. combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
  841. return replaceInstUsesWith(
  842. LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
  843. LI.getName() + ".cast"));
  844. }
  845. // None of the following transforms are legal for volatile/ordered atomic
  846. // loads. Most of them do apply for unordered atomics.
  847. if (!LI.isUnordered()) return nullptr;
  848. // load(gep null, ...) -> unreachable
  849. // load null/undef -> unreachable
  850. // TODO: Consider a target hook for valid address spaces for this xforms.
  851. if (canSimplifyNullLoadOrGEP(LI, Op)) {
  852. // Insert a new store to null instruction before the load to indicate
  853. // that this code is not reachable. We do this instead of inserting
  854. // an unreachable instruction directly because we cannot modify the
  855. // CFG.
  856. StoreInst *SI = new StoreInst(PoisonValue::get(LI.getType()),
  857. Constant::getNullValue(Op->getType()), &LI);
  858. SI->setDebugLoc(LI.getDebugLoc());
  859. return replaceInstUsesWith(LI, PoisonValue::get(LI.getType()));
  860. }
  861. if (Op->hasOneUse()) {
  862. // Change select and PHI nodes to select values instead of addresses: this
  863. // helps alias analysis out a lot, allows many others simplifications, and
  864. // exposes redundancy in the code.
  865. //
  866. // Note that we cannot do the transformation unless we know that the
  867. // introduced loads cannot trap! Something like this is valid as long as
  868. // the condition is always false: load (select bool %C, int* null, int* %G),
  869. // but it would not be valid if we transformed it to load from null
  870. // unconditionally.
  871. //
  872. if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
  873. // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
  874. Align Alignment = LI.getAlign();
  875. if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
  876. Alignment, DL, SI) &&
  877. isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
  878. Alignment, DL, SI)) {
  879. LoadInst *V1 =
  880. Builder.CreateLoad(LI.getType(), SI->getOperand(1),
  881. SI->getOperand(1)->getName() + ".val");
  882. LoadInst *V2 =
  883. Builder.CreateLoad(LI.getType(), SI->getOperand(2),
  884. SI->getOperand(2)->getName() + ".val");
  885. assert(LI.isUnordered() && "implied by above");
  886. V1->setAlignment(Alignment);
  887. V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
  888. V2->setAlignment(Alignment);
  889. V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
  890. return SelectInst::Create(SI->getCondition(), V1, V2);
  891. }
  892. // load (select (cond, null, P)) -> load P
  893. if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
  894. !NullPointerIsDefined(SI->getFunction(),
  895. LI.getPointerAddressSpace()))
  896. return replaceOperand(LI, 0, SI->getOperand(2));
  897. // load (select (cond, P, null)) -> load P
  898. if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
  899. !NullPointerIsDefined(SI->getFunction(),
  900. LI.getPointerAddressSpace()))
  901. return replaceOperand(LI, 0, SI->getOperand(1));
  902. }
  903. }
  904. return nullptr;
  905. }
  906. /// Look for extractelement/insertvalue sequence that acts like a bitcast.
  907. ///
  908. /// \returns underlying value that was "cast", or nullptr otherwise.
  909. ///
  910. /// For example, if we have:
  911. ///
  912. /// %E0 = extractelement <2 x double> %U, i32 0
  913. /// %V0 = insertvalue [2 x double] undef, double %E0, 0
  914. /// %E1 = extractelement <2 x double> %U, i32 1
  915. /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
  916. ///
  917. /// and the layout of a <2 x double> is isomorphic to a [2 x double],
  918. /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
  919. /// Note that %U may contain non-undef values where %V1 has undef.
  920. static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) {
  921. Value *U = nullptr;
  922. while (auto *IV = dyn_cast<InsertValueInst>(V)) {
  923. auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
  924. if (!E)
  925. return nullptr;
  926. auto *W = E->getVectorOperand();
  927. if (!U)
  928. U = W;
  929. else if (U != W)
  930. return nullptr;
  931. auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
  932. if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
  933. return nullptr;
  934. V = IV->getAggregateOperand();
  935. }
  936. if (!match(V, m_Undef()) || !U)
  937. return nullptr;
  938. auto *UT = cast<VectorType>(U->getType());
  939. auto *VT = V->getType();
  940. // Check that types UT and VT are bitwise isomorphic.
  941. const auto &DL = IC.getDataLayout();
  942. if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
  943. return nullptr;
  944. }
  945. if (auto *AT = dyn_cast<ArrayType>(VT)) {
  946. if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
  947. return nullptr;
  948. } else {
  949. auto *ST = cast<StructType>(VT);
  950. if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
  951. return nullptr;
  952. for (const auto *EltT : ST->elements()) {
  953. if (EltT != UT->getElementType())
  954. return nullptr;
  955. }
  956. }
  957. return U;
  958. }
  959. /// Combine stores to match the type of value being stored.
  960. ///
  961. /// The core idea here is that the memory does not have any intrinsic type and
  962. /// where we can we should match the type of a store to the type of value being
  963. /// stored.
  964. ///
  965. /// However, this routine must never change the width of a store or the number of
  966. /// stores as that would introduce a semantic change. This combine is expected to
  967. /// be a semantic no-op which just allows stores to more closely model the types
  968. /// of their incoming values.
  969. ///
  970. /// Currently, we also refuse to change the precise type used for an atomic or
  971. /// volatile store. This is debatable, and might be reasonable to change later.
  972. /// However, it is risky in case some backend or other part of LLVM is relying
  973. /// on the exact type stored to select appropriate atomic operations.
  974. ///
  975. /// \returns true if the store was successfully combined away. This indicates
  976. /// the caller must erase the store instruction. We have to let the caller erase
  977. /// the store instruction as otherwise there is no way to signal whether it was
  978. /// combined or not: IC.EraseInstFromFunction returns a null pointer.
  979. static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) {
  980. // FIXME: We could probably with some care handle both volatile and ordered
  981. // atomic stores here but it isn't clear that this is important.
  982. if (!SI.isUnordered())
  983. return false;
  984. // swifterror values can't be bitcasted.
  985. if (SI.getPointerOperand()->isSwiftError())
  986. return false;
  987. Value *V = SI.getValueOperand();
  988. // Fold away bit casts of the stored value by storing the original type.
  989. if (auto *BC = dyn_cast<BitCastInst>(V)) {
  990. assert(!BC->getType()->isX86_AMXTy() &&
  991. "store to x86_amx* should not happen!");
  992. V = BC->getOperand(0);
  993. // Don't transform when the type is x86_amx, it makes the pass that lower
  994. // x86_amx type happy.
  995. if (V->getType()->isX86_AMXTy())
  996. return false;
  997. if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
  998. combineStoreToNewValue(IC, SI, V);
  999. return true;
  1000. }
  1001. }
  1002. if (Value *U = likeBitCastFromVector(IC, V))
  1003. if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
  1004. combineStoreToNewValue(IC, SI, U);
  1005. return true;
  1006. }
  1007. // FIXME: We should also canonicalize stores of vectors when their elements
  1008. // are cast to other types.
  1009. return false;
  1010. }
  1011. static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
  1012. // FIXME: We could probably with some care handle both volatile and atomic
  1013. // stores here but it isn't clear that this is important.
  1014. if (!SI.isSimple())
  1015. return false;
  1016. Value *V = SI.getValueOperand();
  1017. Type *T = V->getType();
  1018. if (!T->isAggregateType())
  1019. return false;
  1020. if (auto *ST = dyn_cast<StructType>(T)) {
  1021. // If the struct only have one element, we unpack.
  1022. unsigned Count = ST->getNumElements();
  1023. if (Count == 1) {
  1024. V = IC.Builder.CreateExtractValue(V, 0);
  1025. combineStoreToNewValue(IC, SI, V);
  1026. return true;
  1027. }
  1028. // We don't want to break loads with padding here as we'd loose
  1029. // the knowledge that padding exists for the rest of the pipeline.
  1030. const DataLayout &DL = IC.getDataLayout();
  1031. auto *SL = DL.getStructLayout(ST);
  1032. if (SL->hasPadding())
  1033. return false;
  1034. const auto Align = SI.getAlign();
  1035. SmallString<16> EltName = V->getName();
  1036. EltName += ".elt";
  1037. auto *Addr = SI.getPointerOperand();
  1038. SmallString<16> AddrName = Addr->getName();
  1039. AddrName += ".repack";
  1040. auto *IdxType = Type::getInt32Ty(ST->getContext());
  1041. auto *Zero = ConstantInt::get(IdxType, 0);
  1042. for (unsigned i = 0; i < Count; i++) {
  1043. Value *Indices[2] = {
  1044. Zero,
  1045. ConstantInt::get(IdxType, i),
  1046. };
  1047. auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
  1048. AddrName);
  1049. auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
  1050. auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
  1051. llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
  1052. NS->setAAMetadata(SI.getAAMetadata());
  1053. }
  1054. return true;
  1055. }
  1056. if (auto *AT = dyn_cast<ArrayType>(T)) {
  1057. // If the array only have one element, we unpack.
  1058. auto NumElements = AT->getNumElements();
  1059. if (NumElements == 1) {
  1060. V = IC.Builder.CreateExtractValue(V, 0);
  1061. combineStoreToNewValue(IC, SI, V);
  1062. return true;
  1063. }
  1064. // Bail out if the array is too large. Ideally we would like to optimize
  1065. // arrays of arbitrary size but this has a terrible impact on compile time.
  1066. // The threshold here is chosen arbitrarily, maybe needs a little bit of
  1067. // tuning.
  1068. if (NumElements > IC.MaxArraySizeForCombine)
  1069. return false;
  1070. const DataLayout &DL = IC.getDataLayout();
  1071. auto EltSize = DL.getTypeAllocSize(AT->getElementType());
  1072. const auto Align = SI.getAlign();
  1073. SmallString<16> EltName = V->getName();
  1074. EltName += ".elt";
  1075. auto *Addr = SI.getPointerOperand();
  1076. SmallString<16> AddrName = Addr->getName();
  1077. AddrName += ".repack";
  1078. auto *IdxType = Type::getInt64Ty(T->getContext());
  1079. auto *Zero = ConstantInt::get(IdxType, 0);
  1080. uint64_t Offset = 0;
  1081. for (uint64_t i = 0; i < NumElements; i++) {
  1082. Value *Indices[2] = {
  1083. Zero,
  1084. ConstantInt::get(IdxType, i),
  1085. };
  1086. auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
  1087. AddrName);
  1088. auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
  1089. auto EltAlign = commonAlignment(Align, Offset);
  1090. Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
  1091. NS->setAAMetadata(SI.getAAMetadata());
  1092. Offset += EltSize;
  1093. }
  1094. return true;
  1095. }
  1096. return false;
  1097. }
  1098. /// equivalentAddressValues - Test if A and B will obviously have the same
  1099. /// value. This includes recognizing that %t0 and %t1 will have the same
  1100. /// value in code like this:
  1101. /// %t0 = getelementptr \@a, 0, 3
  1102. /// store i32 0, i32* %t0
  1103. /// %t1 = getelementptr \@a, 0, 3
  1104. /// %t2 = load i32* %t1
  1105. ///
  1106. static bool equivalentAddressValues(Value *A, Value *B) {
  1107. // Test if the values are trivially equivalent.
  1108. if (A == B) return true;
  1109. // Test if the values come form identical arithmetic instructions.
  1110. // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
  1111. // its only used to compare two uses within the same basic block, which
  1112. // means that they'll always either have the same value or one of them
  1113. // will have an undefined value.
  1114. if (isa<BinaryOperator>(A) ||
  1115. isa<CastInst>(A) ||
  1116. isa<PHINode>(A) ||
  1117. isa<GetElementPtrInst>(A))
  1118. if (Instruction *BI = dyn_cast<Instruction>(B))
  1119. if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
  1120. return true;
  1121. // Otherwise they may not be equivalent.
  1122. return false;
  1123. }
  1124. /// Converts store (bitcast (load (bitcast (select ...)))) to
  1125. /// store (load (select ...)), where select is minmax:
  1126. /// select ((cmp load V1, load V2), V1, V2).
  1127. static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC,
  1128. StoreInst &SI) {
  1129. // bitcast?
  1130. if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
  1131. return false;
  1132. // load? integer?
  1133. Value *LoadAddr;
  1134. if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
  1135. return false;
  1136. auto *LI = cast<LoadInst>(SI.getValueOperand());
  1137. if (!LI->getType()->isIntegerTy())
  1138. return false;
  1139. Type *CmpLoadTy;
  1140. if (!isMinMaxWithLoads(LoadAddr, CmpLoadTy))
  1141. return false;
  1142. // Make sure the type would actually change.
  1143. // This condition can be hit with chains of bitcasts.
  1144. if (LI->getType() == CmpLoadTy)
  1145. return false;
  1146. // Make sure we're not changing the size of the load/store.
  1147. const auto &DL = IC.getDataLayout();
  1148. if (DL.getTypeStoreSizeInBits(LI->getType()) !=
  1149. DL.getTypeStoreSizeInBits(CmpLoadTy))
  1150. return false;
  1151. if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
  1152. auto *SI = dyn_cast<StoreInst>(U);
  1153. return SI && SI->getPointerOperand() != LI &&
  1154. InstCombiner::peekThroughBitcast(SI->getPointerOperand()) !=
  1155. LoadAddr &&
  1156. !SI->getPointerOperand()->isSwiftError();
  1157. }))
  1158. return false;
  1159. IC.Builder.SetInsertPoint(LI);
  1160. LoadInst *NewLI = IC.combineLoadToNewType(*LI, CmpLoadTy);
  1161. // Replace all the stores with stores of the newly loaded value.
  1162. for (auto *UI : LI->users()) {
  1163. auto *USI = cast<StoreInst>(UI);
  1164. IC.Builder.SetInsertPoint(USI);
  1165. combineStoreToNewValue(IC, *USI, NewLI);
  1166. }
  1167. IC.replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
  1168. IC.eraseInstFromFunction(*LI);
  1169. return true;
  1170. }
  1171. Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
  1172. Value *Val = SI.getOperand(0);
  1173. Value *Ptr = SI.getOperand(1);
  1174. // Try to canonicalize the stored type.
  1175. if (combineStoreToValueType(*this, SI))
  1176. return eraseInstFromFunction(SI);
  1177. // Attempt to improve the alignment.
  1178. const Align KnownAlign = getOrEnforceKnownAlignment(
  1179. Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);
  1180. if (KnownAlign > SI.getAlign())
  1181. SI.setAlignment(KnownAlign);
  1182. // Try to canonicalize the stored type.
  1183. if (unpackStoreToAggregate(*this, SI))
  1184. return eraseInstFromFunction(SI);
  1185. if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
  1186. return eraseInstFromFunction(SI);
  1187. // Replace GEP indices if possible.
  1188. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
  1189. Worklist.push(NewGEPI);
  1190. return &SI;
  1191. }
  1192. // Don't hack volatile/ordered stores.
  1193. // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
  1194. if (!SI.isUnordered()) return nullptr;
  1195. // If the RHS is an alloca with a single use, zapify the store, making the
  1196. // alloca dead.
  1197. if (Ptr->hasOneUse()) {
  1198. if (isa<AllocaInst>(Ptr))
  1199. return eraseInstFromFunction(SI);
  1200. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
  1201. if (isa<AllocaInst>(GEP->getOperand(0))) {
  1202. if (GEP->getOperand(0)->hasOneUse())
  1203. return eraseInstFromFunction(SI);
  1204. }
  1205. }
  1206. }
  1207. // If we have a store to a location which is known constant, we can conclude
  1208. // that the store must be storing the constant value (else the memory
  1209. // wouldn't be constant), and this must be a noop.
  1210. if (AA->pointsToConstantMemory(Ptr))
  1211. return eraseInstFromFunction(SI);
  1212. // Do really simple DSE, to catch cases where there are several consecutive
  1213. // stores to the same location, separated by a few arithmetic operations. This
  1214. // situation often occurs with bitfield accesses.
  1215. BasicBlock::iterator BBI(SI);
  1216. for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
  1217. --ScanInsts) {
  1218. --BBI;
  1219. // Don't count debug info directives, lest they affect codegen,
  1220. // and we skip pointer-to-pointer bitcasts, which are NOPs.
  1221. if (BBI->isDebugOrPseudoInst() ||
  1222. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
  1223. ScanInsts++;
  1224. continue;
  1225. }
  1226. if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
  1227. // Prev store isn't volatile, and stores to the same location?
  1228. if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
  1229. SI.getOperand(1))) {
  1230. ++NumDeadStore;
  1231. // Manually add back the original store to the worklist now, so it will
  1232. // be processed after the operands of the removed store, as this may
  1233. // expose additional DSE opportunities.
  1234. Worklist.push(&SI);
  1235. eraseInstFromFunction(*PrevSI);
  1236. return nullptr;
  1237. }
  1238. break;
  1239. }
  1240. // If this is a load, we have to stop. However, if the loaded value is from
  1241. // the pointer we're loading and is producing the pointer we're storing,
  1242. // then *this* store is dead (X = load P; store X -> P).
  1243. if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
  1244. if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
  1245. assert(SI.isUnordered() && "can't eliminate ordering operation");
  1246. return eraseInstFromFunction(SI);
  1247. }
  1248. // Otherwise, this is a load from some other location. Stores before it
  1249. // may not be dead.
  1250. break;
  1251. }
  1252. // Don't skip over loads, throws or things that can modify memory.
  1253. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
  1254. break;
  1255. }
  1256. // store X, null -> turns into 'unreachable' in SimplifyCFG
  1257. // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
  1258. if (canSimplifyNullStoreOrGEP(SI)) {
  1259. if (!isa<PoisonValue>(Val))
  1260. return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
  1261. return nullptr; // Do not modify these!
  1262. }
  1263. // store undef, Ptr -> noop
  1264. if (isa<UndefValue>(Val))
  1265. return eraseInstFromFunction(SI);
  1266. return nullptr;
  1267. }
  1268. /// Try to transform:
  1269. /// if () { *P = v1; } else { *P = v2 }
  1270. /// or:
  1271. /// *P = v1; if () { *P = v2; }
  1272. /// into a phi node with a store in the successor.
  1273. bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
  1274. if (!SI.isUnordered())
  1275. return false; // This code has not been audited for volatile/ordered case.
  1276. // Check if the successor block has exactly 2 incoming edges.
  1277. BasicBlock *StoreBB = SI.getParent();
  1278. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
  1279. if (!DestBB->hasNPredecessors(2))
  1280. return false;
  1281. // Capture the other block (the block that doesn't contain our store).
  1282. pred_iterator PredIter = pred_begin(DestBB);
  1283. if (*PredIter == StoreBB)
  1284. ++PredIter;
  1285. BasicBlock *OtherBB = *PredIter;
  1286. // Bail out if all of the relevant blocks aren't distinct. This can happen,
  1287. // for example, if SI is in an infinite loop.
  1288. if (StoreBB == DestBB || OtherBB == DestBB)
  1289. return false;
  1290. // Verify that the other block ends in a branch and is not otherwise empty.
  1291. BasicBlock::iterator BBI(OtherBB->getTerminator());
  1292. BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
  1293. if (!OtherBr || BBI == OtherBB->begin())
  1294. return false;
  1295. // If the other block ends in an unconditional branch, check for the 'if then
  1296. // else' case. There is an instruction before the branch.
  1297. StoreInst *OtherStore = nullptr;
  1298. if (OtherBr->isUnconditional()) {
  1299. --BBI;
  1300. // Skip over debugging info and pseudo probes.
  1301. while (BBI->isDebugOrPseudoInst() ||
  1302. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
  1303. if (BBI==OtherBB->begin())
  1304. return false;
  1305. --BBI;
  1306. }
  1307. // If this isn't a store, isn't a store to the same location, or is not the
  1308. // right kind of store, bail out.
  1309. OtherStore = dyn_cast<StoreInst>(BBI);
  1310. if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
  1311. !SI.isSameOperationAs(OtherStore))
  1312. return false;
  1313. } else {
  1314. // Otherwise, the other block ended with a conditional branch. If one of the
  1315. // destinations is StoreBB, then we have the if/then case.
  1316. if (OtherBr->getSuccessor(0) != StoreBB &&
  1317. OtherBr->getSuccessor(1) != StoreBB)
  1318. return false;
  1319. // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
  1320. // if/then triangle. See if there is a store to the same ptr as SI that
  1321. // lives in OtherBB.
  1322. for (;; --BBI) {
  1323. // Check to see if we find the matching store.
  1324. if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
  1325. if (OtherStore->getOperand(1) != SI.getOperand(1) ||
  1326. !SI.isSameOperationAs(OtherStore))
  1327. return false;
  1328. break;
  1329. }
  1330. // If we find something that may be using or overwriting the stored
  1331. // value, or if we run out of instructions, we can't do the transform.
  1332. if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
  1333. BBI->mayWriteToMemory() || BBI == OtherBB->begin())
  1334. return false;
  1335. }
  1336. // In order to eliminate the store in OtherBr, we have to make sure nothing
  1337. // reads or overwrites the stored value in StoreBB.
  1338. for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
  1339. // FIXME: This should really be AA driven.
  1340. if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
  1341. return false;
  1342. }
  1343. }
  1344. // Insert a PHI node now if we need it.
  1345. Value *MergedVal = OtherStore->getOperand(0);
  1346. // The debug locations of the original instructions might differ. Merge them.
  1347. DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
  1348. OtherStore->getDebugLoc());
  1349. if (MergedVal != SI.getOperand(0)) {
  1350. PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
  1351. PN->addIncoming(SI.getOperand(0), SI.getParent());
  1352. PN->addIncoming(OtherStore->getOperand(0), OtherBB);
  1353. MergedVal = InsertNewInstBefore(PN, DestBB->front());
  1354. PN->setDebugLoc(MergedLoc);
  1355. }
  1356. // Advance to a place where it is safe to insert the new store and insert it.
  1357. BBI = DestBB->getFirstInsertionPt();
  1358. StoreInst *NewSI =
  1359. new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
  1360. SI.getOrdering(), SI.getSyncScopeID());
  1361. InsertNewInstBefore(NewSI, *BBI);
  1362. NewSI->setDebugLoc(MergedLoc);
  1363. // If the two stores had AA tags, merge them.
  1364. AAMDNodes AATags = SI.getAAMetadata();
  1365. if (AATags)
  1366. NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
  1367. // Nuke the old stores.
  1368. eraseInstFromFunction(SI);
  1369. eraseInstFromFunction(*OtherStore);
  1370. return true;
  1371. }