LoadStoreOpt.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. //===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file implements the LoadStoreOpt optimization pass.
  10. //===----------------------------------------------------------------------===//
  11. #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
  12. #include "llvm/ADT/Statistic.h"
  13. #include "llvm/Analysis/AliasAnalysis.h"
  14. #include "llvm/Analysis/MemoryLocation.h"
  15. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  16. #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
  17. #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  18. #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  19. #include "llvm/CodeGen/GlobalISel/Utils.h"
  20. #include "llvm/CodeGen/LowLevelType.h"
  21. #include "llvm/CodeGen/MachineBasicBlock.h"
  22. #include "llvm/CodeGen/MachineFrameInfo.h"
  23. #include "llvm/CodeGen/MachineFunction.h"
  24. #include "llvm/CodeGen/MachineInstr.h"
  25. #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
  26. #include "llvm/CodeGen/MachineRegisterInfo.h"
  27. #include "llvm/CodeGen/Register.h"
  28. #include "llvm/CodeGen/TargetLowering.h"
  29. #include "llvm/CodeGen/TargetOpcodes.h"
  30. #include "llvm/IR/DebugInfoMetadata.h"
  31. #include "llvm/InitializePasses.h"
  32. #include "llvm/Support/AtomicOrdering.h"
  33. #include "llvm/Support/Casting.h"
  34. #include "llvm/Support/Debug.h"
  35. #include "llvm/Support/ErrorHandling.h"
  36. #include "llvm/Support/MathExtras.h"
  37. #include <algorithm>
  38. #define DEBUG_TYPE "loadstore-opt"
  39. using namespace llvm;
  40. using namespace ore;
  41. using namespace MIPatternMatch;
  42. STATISTIC(NumStoresMerged, "Number of stores merged");
  43. const unsigned MaxStoreSizeToForm = 128;
  44. char LoadStoreOpt::ID = 0;
  45. INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
  46. false, false)
  47. INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
  48. false, false)
  49. LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F)
  50. : MachineFunctionPass(ID), DoNotRunPass(F) {}
  51. LoadStoreOpt::LoadStoreOpt()
  52. : LoadStoreOpt([](const MachineFunction &) { return false; }) {}
  53. void LoadStoreOpt::init(MachineFunction &MF) {
  54. this->MF = &MF;
  55. MRI = &MF.getRegInfo();
  56. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  57. TLI = MF.getSubtarget().getTargetLowering();
  58. LI = MF.getSubtarget().getLegalizerInfo();
  59. Builder.setMF(MF);
  60. IsPreLegalizer = !MF.getProperties().hasProperty(
  61. MachineFunctionProperties::Property::Legalized);
  62. InstsToErase.clear();
  63. }
  64. void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
  65. AU.addRequired<AAResultsWrapperPass>();
  66. getSelectionDAGFallbackAnalysisUsage(AU);
  67. MachineFunctionPass::getAnalysisUsage(AU);
  68. }
  69. BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
  70. MachineRegisterInfo &MRI) {
  71. BaseIndexOffset Info;
  72. Register PtrAddRHS;
  73. if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
  74. Info.BaseReg = Ptr;
  75. Info.IndexReg = Register();
  76. Info.IsIndexSignExt = false;
  77. return Info;
  78. }
  79. auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
  80. if (RHSCst)
  81. Info.Offset = RHSCst->Value.getSExtValue();
  82. // Just recognize a simple case for now. In future we'll need to match
  83. // indexing patterns for base + index + constant.
  84. Info.IndexReg = PtrAddRHS;
  85. Info.IsIndexSignExt = false;
  86. return Info;
  87. }
  88. bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
  89. const MachineInstr &MI2,
  90. bool &IsAlias,
  91. MachineRegisterInfo &MRI) {
  92. auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
  93. auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
  94. if (!LdSt1 || !LdSt2)
  95. return false;
  96. BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
  97. BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
  98. if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
  99. return false;
  100. int64_t Size1 = LdSt1->getMemSize();
  101. int64_t Size2 = LdSt2->getMemSize();
  102. int64_t PtrDiff;
  103. if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
  104. PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
  105. // If the size of memory access is unknown, do not use it to do analysis.
  106. // One example of unknown size memory access is to load/store scalable
  107. // vector objects on the stack.
  108. // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
  109. // following situations arise:
  110. if (PtrDiff >= 0 &&
  111. Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
  112. // [----BasePtr0----]
  113. // [---BasePtr1--]
  114. // ========PtrDiff========>
  115. IsAlias = !(Size1 <= PtrDiff);
  116. return true;
  117. }
  118. if (PtrDiff < 0 &&
  119. Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
  120. // [----BasePtr0----]
  121. // [---BasePtr1--]
  122. // =====(-PtrDiff)====>
  123. IsAlias = !((PtrDiff + Size2) <= 0);
  124. return true;
  125. }
  126. return false;
  127. }
  128. // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
  129. // able to calculate their relative offset if at least one arises
  130. // from an alloca. However, these allocas cannot overlap and we
  131. // can infer there is no alias.
  132. auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
  133. auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
  134. if (!Base0Def || !Base1Def)
  135. return false; // Couldn't tell anything.
  136. if (Base0Def->getOpcode() != Base1Def->getOpcode())
  137. return false;
  138. if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
  139. MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
  140. // If the bases have the same frame index but we couldn't find a
  141. // constant offset, (indices are different) be conservative.
  142. if (Base0Def != Base1Def &&
  143. (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) ||
  144. !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) {
  145. IsAlias = false;
  146. return true;
  147. }
  148. }
  149. // This implementation is a lot more primitive than the SDAG one for now.
  150. // FIXME: what about constant pools?
  151. if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
  152. auto GV0 = Base0Def->getOperand(1).getGlobal();
  153. auto GV1 = Base1Def->getOperand(1).getGlobal();
  154. if (GV0 != GV1) {
  155. IsAlias = false;
  156. return true;
  157. }
  158. }
  159. // Can't tell anything about aliasing.
  160. return false;
  161. }
  162. bool GISelAddressing::instMayAlias(const MachineInstr &MI,
  163. const MachineInstr &Other,
  164. MachineRegisterInfo &MRI,
  165. AliasAnalysis *AA) {
  166. struct MemUseCharacteristics {
  167. bool IsVolatile;
  168. bool IsAtomic;
  169. Register BasePtr;
  170. int64_t Offset;
  171. uint64_t NumBytes;
  172. MachineMemOperand *MMO;
  173. };
  174. auto getCharacteristics =
  175. [&](const MachineInstr *MI) -> MemUseCharacteristics {
  176. if (const auto *LS = dyn_cast<GLoadStore>(MI)) {
  177. Register BaseReg;
  178. int64_t Offset = 0;
  179. // No pre/post-inc addressing modes are considered here, unlike in SDAG.
  180. if (!mi_match(LS->getPointerReg(), MRI,
  181. m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) {
  182. BaseReg = LS->getPointerReg();
  183. Offset = 0;
  184. }
  185. uint64_t Size = MemoryLocation::getSizeOrUnknown(
  186. LS->getMMO().getMemoryType().getSizeInBytes());
  187. return {LS->isVolatile(), LS->isAtomic(), BaseReg,
  188. Offset /*base offset*/, Size, &LS->getMMO()};
  189. }
  190. // FIXME: support recognizing lifetime instructions.
  191. // Default.
  192. return {false /*isvolatile*/,
  193. /*isAtomic*/ false, Register(),
  194. (int64_t)0 /*offset*/, 0 /*size*/,
  195. (MachineMemOperand *)nullptr};
  196. };
  197. MemUseCharacteristics MUC0 = getCharacteristics(&MI),
  198. MUC1 = getCharacteristics(&Other);
  199. // If they are to the same address, then they must be aliases.
  200. if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
  201. MUC0.Offset == MUC1.Offset)
  202. return true;
  203. // If they are both volatile then they cannot be reordered.
  204. if (MUC0.IsVolatile && MUC1.IsVolatile)
  205. return true;
  206. // Be conservative about atomics for the moment
  207. // TODO: This is way overconservative for unordered atomics (see D66309)
  208. if (MUC0.IsAtomic && MUC1.IsAtomic)
  209. return true;
  210. // If one operation reads from invariant memory, and the other may store, they
  211. // cannot alias.
  212. if (MUC0.MMO && MUC1.MMO) {
  213. if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
  214. (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
  215. return false;
  216. }
  217. // Try to prove that there is aliasing, or that there is no aliasing. Either
  218. // way, we can return now. If nothing can be proved, proceed with more tests.
  219. bool IsAlias;
  220. if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
  221. return IsAlias;
  222. // The following all rely on MMO0 and MMO1 being valid.
  223. if (!MUC0.MMO || !MUC1.MMO)
  224. return true;
  225. // FIXME: port the alignment based alias analysis from SDAG's isAlias().
  226. int64_t SrcValOffset0 = MUC0.MMO->getOffset();
  227. int64_t SrcValOffset1 = MUC1.MMO->getOffset();
  228. uint64_t Size0 = MUC0.NumBytes;
  229. uint64_t Size1 = MUC1.NumBytes;
  230. if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
  231. Size0 != MemoryLocation::UnknownSize &&
  232. Size1 != MemoryLocation::UnknownSize) {
  233. // Use alias analysis information.
  234. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
  235. int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
  236. int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
  237. if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
  238. MUC0.MMO->getAAInfo()),
  239. MemoryLocation(MUC1.MMO->getValue(), Overlap1,
  240. MUC1.MMO->getAAInfo())))
  241. return false;
  242. }
  243. // Otherwise we have to assume they alias.
  244. return true;
  245. }
  246. /// Returns true if the instruction creates an unavoidable hazard that
  247. /// forces a boundary between store merge candidates.
  248. static bool isInstHardMergeHazard(MachineInstr &MI) {
  249. return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef();
  250. }
  251. bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
  252. // Try to merge all the stores in the vector, splitting into separate segments
  253. // as necessary.
  254. assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge");
  255. LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg());
  256. LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg());
  257. unsigned AS = PtrTy.getAddressSpace();
  258. // Ensure the legal store info is computed for this address space.
  259. initializeStoreMergeTargetInfo(AS);
  260. const auto &LegalSizes = LegalStoreSizes[AS];
  261. #ifndef NDEBUG
  262. for (auto StoreMI : StoresToMerge)
  263. assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
  264. #endif
  265. const auto &DL = MF->getFunction().getParent()->getDataLayout();
  266. bool AnyMerged = false;
  267. do {
  268. unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
  269. unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize();
  270. // Compute the biggest store we can generate to handle the number of stores.
  271. unsigned MergeSizeBits;
  272. for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
  273. LLT StoreTy = LLT::scalar(MergeSizeBits);
  274. EVT StoreEVT =
  275. getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext());
  276. if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
  277. TLI->canMergeStoresTo(AS, StoreEVT, *MF) &&
  278. (TLI->isTypeLegal(StoreEVT)))
  279. break; // We can generate a MergeSize bits store.
  280. }
  281. if (MergeSizeBits <= OrigTy.getSizeInBits())
  282. return AnyMerged; // No greater merge.
  283. unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
  284. // Perform the actual merging.
  285. SmallVector<GStore *, 8> SingleMergeStores(
  286. StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
  287. AnyMerged |= doSingleStoreMerge(SingleMergeStores);
  288. StoresToMerge.erase(StoresToMerge.begin(),
  289. StoresToMerge.begin() + NumStoresToMerge);
  290. } while (StoresToMerge.size() > 1);
  291. return AnyMerged;
  292. }
  293. bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
  294. MachineFunction &MF) const {
  295. auto Action = LI->getAction(Query).Action;
  296. // If the instruction is unsupported, it can't be legalized at all.
  297. if (Action == LegalizeActions::Unsupported)
  298. return false;
  299. return IsPreLegalizer || Action == LegalizeAction::Legal;
  300. }
  301. bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
  302. assert(Stores.size() > 1);
  303. // We know that all the stores are consecutive and there are no aliasing
  304. // operations in the range. However, the values that are being stored may be
  305. // generated anywhere before each store. To ensure we have the values
  306. // available, we materialize the wide value and new store at the place of the
  307. // final store in the merge sequence.
  308. GStore *FirstStore = Stores[0];
  309. const unsigned NumStores = Stores.size();
  310. LLT SmallTy = MRI->getType(FirstStore->getValueReg());
  311. LLT WideValueTy =
  312. LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedSize());
  313. // For each store, compute pairwise merged debug locs.
  314. DebugLoc MergedLoc;
  315. for (unsigned AIdx = 0, BIdx = 1; BIdx < NumStores; ++AIdx, ++BIdx)
  316. MergedLoc = DILocation::getMergedLocation(Stores[AIdx]->getDebugLoc(),
  317. Stores[BIdx]->getDebugLoc());
  318. Builder.setInstr(*Stores.back());
  319. Builder.setDebugLoc(MergedLoc);
  320. // If all of the store values are constants, then create a wide constant
  321. // directly. Otherwise, we need to generate some instructions to merge the
  322. // existing values together into a wider type.
  323. SmallVector<APInt, 8> ConstantVals;
  324. for (auto Store : Stores) {
  325. auto MaybeCst =
  326. getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
  327. if (!MaybeCst) {
  328. ConstantVals.clear();
  329. break;
  330. }
  331. ConstantVals.emplace_back(MaybeCst->Value);
  332. }
  333. Register WideReg;
  334. auto *WideMMO =
  335. MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy);
  336. if (ConstantVals.empty()) {
  337. // Mimic the SDAG behaviour here and don't try to do anything for unknown
  338. // values. In future, we should also support the cases of loads and
  339. // extracted vector elements.
  340. return false;
  341. }
  342. assert(ConstantVals.size() == NumStores);
  343. // Check if our wide constant is legal.
  344. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
  345. return false;
  346. APInt WideConst(WideValueTy.getSizeInBits(), 0);
  347. for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
  348. // Insert the smaller constant into the corresponding position in the
  349. // wider one.
  350. WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits());
  351. }
  352. WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0);
  353. auto NewStore =
  354. Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
  355. (void) NewStore;
  356. LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore);
  357. NumStoresMerged += Stores.size();
  358. MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
  359. MORE.emit([&]() {
  360. MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
  361. FirstStore->getDebugLoc(),
  362. FirstStore->getParent());
  363. R << "Merged " << NV("NumMerged", Stores.size()) << " stores of "
  364. << NV("OrigWidth", SmallTy.getSizeInBytes())
  365. << " bytes into a single store of "
  366. << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
  367. return R;
  368. });
  369. for (auto MI : Stores)
  370. InstsToErase.insert(MI);
  371. return true;
  372. }
  373. bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
  374. if (C.Stores.size() < 2) {
  375. C.reset();
  376. return false;
  377. }
  378. LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
  379. << " stores, starting with " << *C.Stores[0]);
  380. // We know that the stores in the candidate are adjacent.
  381. // Now we need to check if any potential aliasing instructions recorded
  382. // during the search alias with load/stores added to the candidate after.
  383. // For example, if we have the candidate:
  384. // C.Stores = [ST1, ST2, ST3, ST4]
  385. // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
  386. // ST2, then we would have recorded it into the PotentialAliases structure
  387. // with the associated index value of "1". Then we see ST3 and ST4 and add
  388. // them to the candidate group. We know that LD1 does not alias with ST1 or
  389. // ST2, since we already did that check. However we don't yet know if it
  390. // may alias ST3 and ST4, so we perform those checks now.
  391. SmallVector<GStore *> StoresToMerge;
  392. auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
  393. for (auto AliasInfo : reverse(C.PotentialAliases)) {
  394. MachineInstr *PotentialAliasOp = AliasInfo.first;
  395. unsigned PreCheckedIdx = AliasInfo.second;
  396. if (static_cast<unsigned>(Idx) > PreCheckedIdx) {
  397. // Need to check this alias.
  398. if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
  399. AA)) {
  400. LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
  401. << " detected\n");
  402. return true;
  403. }
  404. } else {
  405. // Once our store index is lower than the index associated with the
  406. // potential alias, we know that we've already checked for this alias
  407. // and all of the earlier potential aliases too.
  408. return false;
  409. }
  410. }
  411. return false;
  412. };
  413. // Start from the last store in the group, and check if it aliases with any
  414. // of the potential aliasing operations in the list.
  415. for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
  416. auto *CheckStore = C.Stores[StoreIdx];
  417. if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
  418. continue;
  419. StoresToMerge.emplace_back(CheckStore);
  420. }
  421. LLVM_DEBUG(dbgs() << StoresToMerge.size()
  422. << " stores remaining after alias checks. Merging...\n");
  423. // Now we've checked for aliasing hazards, merge any stores left.
  424. C.reset();
  425. if (StoresToMerge.size() < 2)
  426. return false;
  427. return mergeStores(StoresToMerge);
  428. }
  429. bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
  430. StoreMergeCandidate &C) {
  431. if (C.Stores.empty())
  432. return false;
  433. return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) {
  434. return instMayAlias(MI, *OtherMI, *MRI, AA);
  435. });
  436. }
  437. void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
  438. PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1));
  439. }
  440. bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
  441. StoreMergeCandidate &C) {
  442. // Check if the given store writes to an adjacent address, and other
  443. // requirements.
  444. LLT ValueTy = MRI->getType(StoreMI.getValueReg());
  445. LLT PtrTy = MRI->getType(StoreMI.getPointerReg());
  446. // Only handle scalars.
  447. if (!ValueTy.isScalar())
  448. return false;
  449. // Don't allow truncating stores for now.
  450. if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
  451. return false;
  452. Register StoreAddr = StoreMI.getPointerReg();
  453. auto BIO = getPointerInfo(StoreAddr, *MRI);
  454. Register StoreBase = BIO.BaseReg;
  455. uint64_t StoreOffCst = BIO.Offset;
  456. if (C.Stores.empty()) {
  457. // This is the first store of the candidate.
  458. // If the offset can't possibly allow for a lower addressed store with the
  459. // same base, don't bother adding it.
  460. if (StoreOffCst < ValueTy.getSizeInBytes())
  461. return false;
  462. C.BasePtr = StoreBase;
  463. C.CurrentLowestOffset = StoreOffCst;
  464. C.Stores.emplace_back(&StoreMI);
  465. LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
  466. << StoreMI);
  467. return true;
  468. }
  469. // Check the store is the same size as the existing ones in the candidate.
  470. if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() !=
  471. ValueTy.getSizeInBits())
  472. return false;
  473. if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() !=
  474. PtrTy.getAddressSpace())
  475. return false;
  476. // There are other stores in the candidate. Check that the store address
  477. // writes to the next lowest adjacent address.
  478. if (C.BasePtr != StoreBase)
  479. return false;
  480. if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
  481. static_cast<uint64_t>(StoreOffCst))
  482. return false;
  483. // This writes to an adjacent address. Allow it.
  484. C.Stores.emplace_back(&StoreMI);
  485. C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
  486. LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
  487. return true;
  488. }
  489. bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
  490. bool Changed = false;
  491. // Walk through the block bottom-up, looking for merging candidates.
  492. StoreMergeCandidate Candidate;
  493. for (MachineInstr &MI : llvm::reverse(MBB)) {
  494. if (InstsToErase.contains(&MI))
  495. continue;
  496. if (auto *StoreMI = dyn_cast<GStore>(&MI)) {
  497. // We have a G_STORE. Add it to the candidate if it writes to an adjacent
  498. // address.
  499. if (!addStoreToCandidate(*StoreMI, Candidate)) {
  500. // Store wasn't eligible to be added. May need to record it as a
  501. // potential alias.
  502. if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
  503. Changed |= processMergeCandidate(Candidate);
  504. continue;
  505. }
  506. Candidate.addPotentialAlias(*StoreMI);
  507. }
  508. continue;
  509. }
  510. // If we don't have any stores yet, this instruction can't pose a problem.
  511. if (Candidate.Stores.empty())
  512. continue;
  513. // We're dealing with some other kind of instruction.
  514. if (isInstHardMergeHazard(MI)) {
  515. Changed |= processMergeCandidate(Candidate);
  516. Candidate.Stores.clear();
  517. continue;
  518. }
  519. if (!MI.mayLoadOrStore())
  520. continue;
  521. if (operationAliasesWithCandidate(MI, Candidate)) {
  522. // We have a potential alias, so process the current candidate if we can
  523. // and then continue looking for a new candidate.
  524. Changed |= processMergeCandidate(Candidate);
  525. continue;
  526. }
  527. // Record this instruction as a potential alias for future stores that are
  528. // added to the candidate.
  529. Candidate.addPotentialAlias(MI);
  530. }
  531. // Process any candidate left after finishing searching the entire block.
  532. Changed |= processMergeCandidate(Candidate);
  533. // Erase instructions now that we're no longer iterating over the block.
  534. for (auto *MI : InstsToErase)
  535. MI->eraseFromParent();
  536. InstsToErase.clear();
  537. return Changed;
  538. }
  539. bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
  540. bool Changed = false;
  541. for (auto &BB : MF) {
  542. Changed |= mergeBlockStores(BB);
  543. }
  544. return Changed;
  545. }
  546. void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
  547. // Query the legalizer info to record what store types are legal.
  548. // We record this because we don't want to bother trying to merge stores into
  549. // illegal ones, which would just result in being split again.
  550. if (LegalStoreSizes.count(AddrSpace)) {
  551. assert(LegalStoreSizes[AddrSpace].any());
  552. return; // Already cached sizes for this address space.
  553. }
  554. // Need to reserve at least MaxStoreSizeToForm + 1 bits.
  555. BitVector LegalSizes(MaxStoreSizeToForm * 2);
  556. const auto &LI = *MF->getSubtarget().getLegalizerInfo();
  557. const auto &DL = MF->getFunction().getParent()->getDataLayout();
  558. Type *IntPtrIRTy =
  559. DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
  560. LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
  561. // We assume that we're not going to be generating any stores wider than
  562. // MaxStoreSizeToForm bits for now.
  563. for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
  564. LLT Ty = LLT::scalar(Size);
  565. SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
  566. {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}});
  567. SmallVector<LLT> StoreTys({Ty, PtrTy});
  568. LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
  569. LegalizeActionStep ActionStep = LI.getAction(Q);
  570. if (ActionStep.Action == LegalizeActions::Legal)
  571. LegalSizes.set(Size);
  572. }
  573. assert(LegalSizes.any() && "Expected some store sizes to be legal!");
  574. LegalStoreSizes[AddrSpace] = LegalSizes;
  575. }
  576. bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) {
  577. // If the ISel pipeline failed, do not bother running that pass.
  578. if (MF.getProperties().hasProperty(
  579. MachineFunctionProperties::Property::FailedISel))
  580. return false;
  581. LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
  582. << '\n');
  583. init(MF);
  584. bool Changed = false;
  585. Changed |= mergeFunctionStores(MF);
  586. LegalStoreSizes.clear();
  587. return Changed;
  588. }