LoadStoreOpt.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. //===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file implements the LoadStoreOpt optimization pass.
  10. //===----------------------------------------------------------------------===//
  11. #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
  12. #include "llvm/ADT/Statistic.h"
  13. #include "llvm/Analysis/AliasAnalysis.h"
  14. #include "llvm/Analysis/MemoryLocation.h"
  15. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  16. #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
  17. #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  18. #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  19. #include "llvm/CodeGen/GlobalISel/Utils.h"
  20. #include "llvm/CodeGen/LowLevelType.h"
  21. #include "llvm/CodeGen/MachineBasicBlock.h"
  22. #include "llvm/CodeGen/MachineFrameInfo.h"
  23. #include "llvm/CodeGen/MachineFunction.h"
  24. #include "llvm/CodeGen/MachineInstr.h"
  25. #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
  26. #include "llvm/CodeGen/MachineRegisterInfo.h"
  27. #include "llvm/CodeGen/Register.h"
  28. #include "llvm/CodeGen/TargetLowering.h"
  29. #include "llvm/CodeGen/TargetOpcodes.h"
  30. #include "llvm/IR/DebugInfoMetadata.h"
  31. #include "llvm/InitializePasses.h"
  32. #include "llvm/Support/AtomicOrdering.h"
  33. #include "llvm/Support/Casting.h"
  34. #include "llvm/Support/Debug.h"
  35. #include "llvm/Support/ErrorHandling.h"
  36. #include "llvm/Support/MathExtras.h"
  37. #include <algorithm>
  38. #define DEBUG_TYPE "loadstore-opt"
  39. using namespace llvm;
  40. using namespace ore;
  41. using namespace MIPatternMatch;
  42. STATISTIC(NumStoresMerged, "Number of stores merged");
  43. const unsigned MaxStoreSizeToForm = 128;
  44. char LoadStoreOpt::ID = 0;
  45. INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
  46. false, false)
  47. INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
  48. false, false)
  49. LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F)
  50. : MachineFunctionPass(ID), DoNotRunPass(F) {}
  51. LoadStoreOpt::LoadStoreOpt()
  52. : LoadStoreOpt([](const MachineFunction &) { return false; }) {}
  53. void LoadStoreOpt::init(MachineFunction &MF) {
  54. this->MF = &MF;
  55. MRI = &MF.getRegInfo();
  56. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  57. TLI = MF.getSubtarget().getTargetLowering();
  58. LI = MF.getSubtarget().getLegalizerInfo();
  59. Builder.setMF(MF);
  60. IsPreLegalizer = !MF.getProperties().hasProperty(
  61. MachineFunctionProperties::Property::Legalized);
  62. InstsToErase.clear();
  63. }
  64. void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
  65. AU.addRequired<AAResultsWrapperPass>();
  66. AU.setPreservesAll();
  67. getSelectionDAGFallbackAnalysisUsage(AU);
  68. MachineFunctionPass::getAnalysisUsage(AU);
  69. }
  70. BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
  71. MachineRegisterInfo &MRI) {
  72. BaseIndexOffset Info;
  73. Register PtrAddRHS;
  74. if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
  75. Info.BaseReg = Ptr;
  76. Info.IndexReg = Register();
  77. Info.IsIndexSignExt = false;
  78. return Info;
  79. }
  80. auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
  81. if (RHSCst)
  82. Info.Offset = RHSCst->Value.getSExtValue();
  83. // Just recognize a simple case for now. In future we'll need to match
  84. // indexing patterns for base + index + constant.
  85. Info.IndexReg = PtrAddRHS;
  86. Info.IsIndexSignExt = false;
  87. return Info;
  88. }
  89. bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
  90. const MachineInstr &MI2,
  91. bool &IsAlias,
  92. MachineRegisterInfo &MRI) {
  93. auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
  94. auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
  95. if (!LdSt1 || !LdSt2)
  96. return false;
  97. BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
  98. BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
  99. if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
  100. return false;
  101. int64_t Size1 = LdSt1->getMemSize();
  102. int64_t Size2 = LdSt2->getMemSize();
  103. int64_t PtrDiff;
  104. if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
  105. PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
  106. // If the size of memory access is unknown, do not use it to do analysis.
  107. // One example of unknown size memory access is to load/store scalable
  108. // vector objects on the stack.
  109. // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
  110. // following situations arise:
  111. if (PtrDiff >= 0 &&
  112. Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
  113. // [----BasePtr0----]
  114. // [---BasePtr1--]
  115. // ========PtrDiff========>
  116. IsAlias = !(Size1 <= PtrDiff);
  117. return true;
  118. }
  119. if (PtrDiff < 0 &&
  120. Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
  121. // [----BasePtr0----]
  122. // [---BasePtr1--]
  123. // =====(-PtrDiff)====>
  124. IsAlias = !((PtrDiff + Size2) <= 0);
  125. return true;
  126. }
  127. return false;
  128. }
  129. // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
  130. // able to calculate their relative offset if at least one arises
  131. // from an alloca. However, these allocas cannot overlap and we
  132. // can infer there is no alias.
  133. auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
  134. auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
  135. if (!Base0Def || !Base1Def)
  136. return false; // Couldn't tell anything.
  137. if (Base0Def->getOpcode() != Base1Def->getOpcode())
  138. return false;
  139. if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
  140. MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
  141. // If the bases have the same frame index but we couldn't find a
  142. // constant offset, (indices are different) be conservative.
  143. if (Base0Def != Base1Def &&
  144. (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) ||
  145. !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) {
  146. IsAlias = false;
  147. return true;
  148. }
  149. }
  150. // This implementation is a lot more primitive than the SDAG one for now.
  151. // FIXME: what about constant pools?
  152. if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
  153. auto GV0 = Base0Def->getOperand(1).getGlobal();
  154. auto GV1 = Base1Def->getOperand(1).getGlobal();
  155. if (GV0 != GV1) {
  156. IsAlias = false;
  157. return true;
  158. }
  159. }
  160. // Can't tell anything about aliasing.
  161. return false;
  162. }
  163. bool GISelAddressing::instMayAlias(const MachineInstr &MI,
  164. const MachineInstr &Other,
  165. MachineRegisterInfo &MRI,
  166. AliasAnalysis *AA) {
  167. struct MemUseCharacteristics {
  168. bool IsVolatile;
  169. bool IsAtomic;
  170. Register BasePtr;
  171. int64_t Offset;
  172. uint64_t NumBytes;
  173. MachineMemOperand *MMO;
  174. };
  175. auto getCharacteristics =
  176. [&](const MachineInstr *MI) -> MemUseCharacteristics {
  177. if (const auto *LS = dyn_cast<GLoadStore>(MI)) {
  178. Register BaseReg;
  179. int64_t Offset = 0;
  180. // No pre/post-inc addressing modes are considered here, unlike in SDAG.
  181. if (!mi_match(LS->getPointerReg(), MRI,
  182. m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) {
  183. BaseReg = LS->getPointerReg();
  184. Offset = 0;
  185. }
  186. uint64_t Size = MemoryLocation::getSizeOrUnknown(
  187. LS->getMMO().getMemoryType().getSizeInBytes());
  188. return {LS->isVolatile(), LS->isAtomic(), BaseReg,
  189. Offset /*base offset*/, Size, &LS->getMMO()};
  190. }
  191. // FIXME: support recognizing lifetime instructions.
  192. // Default.
  193. return {false /*isvolatile*/,
  194. /*isAtomic*/ false, Register(),
  195. (int64_t)0 /*offset*/, 0 /*size*/,
  196. (MachineMemOperand *)nullptr};
  197. };
  198. MemUseCharacteristics MUC0 = getCharacteristics(&MI),
  199. MUC1 = getCharacteristics(&Other);
  200. // If they are to the same address, then they must be aliases.
  201. if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
  202. MUC0.Offset == MUC1.Offset)
  203. return true;
  204. // If they are both volatile then they cannot be reordered.
  205. if (MUC0.IsVolatile && MUC1.IsVolatile)
  206. return true;
  207. // Be conservative about atomics for the moment
  208. // TODO: This is way overconservative for unordered atomics (see D66309)
  209. if (MUC0.IsAtomic && MUC1.IsAtomic)
  210. return true;
  211. // If one operation reads from invariant memory, and the other may store, they
  212. // cannot alias.
  213. if (MUC0.MMO && MUC1.MMO) {
  214. if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
  215. (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
  216. return false;
  217. }
  218. // Try to prove that there is aliasing, or that there is no aliasing. Either
  219. // way, we can return now. If nothing can be proved, proceed with more tests.
  220. bool IsAlias;
  221. if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
  222. return IsAlias;
  223. // The following all rely on MMO0 and MMO1 being valid.
  224. if (!MUC0.MMO || !MUC1.MMO)
  225. return true;
  226. // FIXME: port the alignment based alias analysis from SDAG's isAlias().
  227. int64_t SrcValOffset0 = MUC0.MMO->getOffset();
  228. int64_t SrcValOffset1 = MUC1.MMO->getOffset();
  229. uint64_t Size0 = MUC0.NumBytes;
  230. uint64_t Size1 = MUC1.NumBytes;
  231. if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
  232. Size0 != MemoryLocation::UnknownSize &&
  233. Size1 != MemoryLocation::UnknownSize) {
  234. // Use alias analysis information.
  235. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
  236. int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
  237. int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
  238. if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
  239. MUC0.MMO->getAAInfo()),
  240. MemoryLocation(MUC1.MMO->getValue(), Overlap1,
  241. MUC1.MMO->getAAInfo())))
  242. return false;
  243. }
  244. // Otherwise we have to assume they alias.
  245. return true;
  246. }
  247. /// Returns true if the instruction creates an unavoidable hazard that
  248. /// forces a boundary between store merge candidates.
  249. static bool isInstHardMergeHazard(MachineInstr &MI) {
  250. return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef();
  251. }
  252. bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
  253. // Try to merge all the stores in the vector, splitting into separate segments
  254. // as necessary.
  255. assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge");
  256. LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg());
  257. LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg());
  258. unsigned AS = PtrTy.getAddressSpace();
  259. // Ensure the legal store info is computed for this address space.
  260. initializeStoreMergeTargetInfo(AS);
  261. const auto &LegalSizes = LegalStoreSizes[AS];
  262. #ifndef NDEBUG
  263. for (auto *StoreMI : StoresToMerge)
  264. assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
  265. #endif
  266. const auto &DL = MF->getFunction().getParent()->getDataLayout();
  267. bool AnyMerged = false;
  268. do {
  269. unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
  270. unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
  271. // Compute the biggest store we can generate to handle the number of stores.
  272. unsigned MergeSizeBits;
  273. for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
  274. LLT StoreTy = LLT::scalar(MergeSizeBits);
  275. EVT StoreEVT =
  276. getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext());
  277. if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
  278. TLI->canMergeStoresTo(AS, StoreEVT, *MF) &&
  279. (TLI->isTypeLegal(StoreEVT)))
  280. break; // We can generate a MergeSize bits store.
  281. }
  282. if (MergeSizeBits <= OrigTy.getSizeInBits())
  283. return AnyMerged; // No greater merge.
  284. unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
  285. // Perform the actual merging.
  286. SmallVector<GStore *, 8> SingleMergeStores(
  287. StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
  288. AnyMerged |= doSingleStoreMerge(SingleMergeStores);
  289. StoresToMerge.erase(StoresToMerge.begin(),
  290. StoresToMerge.begin() + NumStoresToMerge);
  291. } while (StoresToMerge.size() > 1);
  292. return AnyMerged;
  293. }
  294. bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
  295. MachineFunction &MF) const {
  296. auto Action = LI->getAction(Query).Action;
  297. // If the instruction is unsupported, it can't be legalized at all.
  298. if (Action == LegalizeActions::Unsupported)
  299. return false;
  300. return IsPreLegalizer || Action == LegalizeAction::Legal;
  301. }
  302. bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
  303. assert(Stores.size() > 1);
  304. // We know that all the stores are consecutive and there are no aliasing
  305. // operations in the range. However, the values that are being stored may be
  306. // generated anywhere before each store. To ensure we have the values
  307. // available, we materialize the wide value and new store at the place of the
  308. // final store in the merge sequence.
  309. GStore *FirstStore = Stores[0];
  310. const unsigned NumStores = Stores.size();
  311. LLT SmallTy = MRI->getType(FirstStore->getValueReg());
  312. LLT WideValueTy =
  313. LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedValue());
  314. // For each store, compute pairwise merged debug locs.
  315. DebugLoc MergedLoc = Stores.front()->getDebugLoc();
  316. for (auto *Store : drop_begin(Stores))
  317. MergedLoc = DILocation::getMergedLocation(MergedLoc, Store->getDebugLoc());
  318. Builder.setInstr(*Stores.back());
  319. Builder.setDebugLoc(MergedLoc);
  320. // If all of the store values are constants, then create a wide constant
  321. // directly. Otherwise, we need to generate some instructions to merge the
  322. // existing values together into a wider type.
  323. SmallVector<APInt, 8> ConstantVals;
  324. for (auto *Store : Stores) {
  325. auto MaybeCst =
  326. getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
  327. if (!MaybeCst) {
  328. ConstantVals.clear();
  329. break;
  330. }
  331. ConstantVals.emplace_back(MaybeCst->Value);
  332. }
  333. Register WideReg;
  334. auto *WideMMO =
  335. MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy);
  336. if (ConstantVals.empty()) {
  337. // Mimic the SDAG behaviour here and don't try to do anything for unknown
  338. // values. In future, we should also support the cases of loads and
  339. // extracted vector elements.
  340. return false;
  341. }
  342. assert(ConstantVals.size() == NumStores);
  343. // Check if our wide constant is legal.
  344. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
  345. return false;
  346. APInt WideConst(WideValueTy.getSizeInBits(), 0);
  347. for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
  348. // Insert the smaller constant into the corresponding position in the
  349. // wider one.
  350. WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits());
  351. }
  352. WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0);
  353. auto NewStore =
  354. Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
  355. (void) NewStore;
  356. LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore);
  357. NumStoresMerged += Stores.size();
  358. MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
  359. MORE.emit([&]() {
  360. MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
  361. FirstStore->getDebugLoc(),
  362. FirstStore->getParent());
  363. R << "Merged " << NV("NumMerged", Stores.size()) << " stores of "
  364. << NV("OrigWidth", SmallTy.getSizeInBytes())
  365. << " bytes into a single store of "
  366. << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
  367. return R;
  368. });
  369. for (auto *MI : Stores)
  370. InstsToErase.insert(MI);
  371. return true;
  372. }
  373. bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
  374. if (C.Stores.size() < 2) {
  375. C.reset();
  376. return false;
  377. }
  378. LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
  379. << " stores, starting with " << *C.Stores[0]);
  380. // We know that the stores in the candidate are adjacent.
  381. // Now we need to check if any potential aliasing instructions recorded
  382. // during the search alias with load/stores added to the candidate after.
  383. // For example, if we have the candidate:
  384. // C.Stores = [ST1, ST2, ST3, ST4]
  385. // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
  386. // ST2, then we would have recorded it into the PotentialAliases structure
  387. // with the associated index value of "1". Then we see ST3 and ST4 and add
  388. // them to the candidate group. We know that LD1 does not alias with ST1 or
  389. // ST2, since we already did that check. However we don't yet know if it
  390. // may alias ST3 and ST4, so we perform those checks now.
  391. SmallVector<GStore *> StoresToMerge;
  392. auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
  393. for (auto AliasInfo : reverse(C.PotentialAliases)) {
  394. MachineInstr *PotentialAliasOp = AliasInfo.first;
  395. unsigned PreCheckedIdx = AliasInfo.second;
  396. if (static_cast<unsigned>(Idx) > PreCheckedIdx) {
  397. // Need to check this alias.
  398. if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
  399. AA)) {
  400. LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
  401. << " detected\n");
  402. return true;
  403. }
  404. } else {
  405. // Once our store index is lower than the index associated with the
  406. // potential alias, we know that we've already checked for this alias
  407. // and all of the earlier potential aliases too.
  408. return false;
  409. }
  410. }
  411. return false;
  412. };
  413. // Start from the last store in the group, and check if it aliases with any
  414. // of the potential aliasing operations in the list.
  415. for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
  416. auto *CheckStore = C.Stores[StoreIdx];
  417. if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
  418. continue;
  419. StoresToMerge.emplace_back(CheckStore);
  420. }
  421. LLVM_DEBUG(dbgs() << StoresToMerge.size()
  422. << " stores remaining after alias checks. Merging...\n");
  423. // Now we've checked for aliasing hazards, merge any stores left.
  424. C.reset();
  425. if (StoresToMerge.size() < 2)
  426. return false;
  427. return mergeStores(StoresToMerge);
  428. }
  429. bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
  430. StoreMergeCandidate &C) {
  431. if (C.Stores.empty())
  432. return false;
  433. return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) {
  434. return instMayAlias(MI, *OtherMI, *MRI, AA);
  435. });
  436. }
  437. void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
  438. PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1));
  439. }
  440. bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
  441. StoreMergeCandidate &C) {
  442. // Check if the given store writes to an adjacent address, and other
  443. // requirements.
  444. LLT ValueTy = MRI->getType(StoreMI.getValueReg());
  445. LLT PtrTy = MRI->getType(StoreMI.getPointerReg());
  446. // Only handle scalars.
  447. if (!ValueTy.isScalar())
  448. return false;
  449. // Don't allow truncating stores for now.
  450. if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
  451. return false;
  452. // Avoid adding volatile or ordered stores to the candidate. We already have a
  453. // check for this in instMayAlias() but that only get's called later between
  454. // potential aliasing hazards.
  455. if (!StoreMI.isSimple())
  456. return false;
  457. Register StoreAddr = StoreMI.getPointerReg();
  458. auto BIO = getPointerInfo(StoreAddr, *MRI);
  459. Register StoreBase = BIO.BaseReg;
  460. uint64_t StoreOffCst = BIO.Offset;
  461. if (C.Stores.empty()) {
  462. // This is the first store of the candidate.
  463. // If the offset can't possibly allow for a lower addressed store with the
  464. // same base, don't bother adding it.
  465. if (StoreOffCst < ValueTy.getSizeInBytes())
  466. return false;
  467. C.BasePtr = StoreBase;
  468. C.CurrentLowestOffset = StoreOffCst;
  469. C.Stores.emplace_back(&StoreMI);
  470. LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
  471. << StoreMI);
  472. return true;
  473. }
  474. // Check the store is the same size as the existing ones in the candidate.
  475. if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() !=
  476. ValueTy.getSizeInBits())
  477. return false;
  478. if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() !=
  479. PtrTy.getAddressSpace())
  480. return false;
  481. // There are other stores in the candidate. Check that the store address
  482. // writes to the next lowest adjacent address.
  483. if (C.BasePtr != StoreBase)
  484. return false;
  485. if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
  486. static_cast<uint64_t>(StoreOffCst))
  487. return false;
  488. // This writes to an adjacent address. Allow it.
  489. C.Stores.emplace_back(&StoreMI);
  490. C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
  491. LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
  492. return true;
  493. }
  494. bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
  495. bool Changed = false;
  496. // Walk through the block bottom-up, looking for merging candidates.
  497. StoreMergeCandidate Candidate;
  498. for (MachineInstr &MI : llvm::reverse(MBB)) {
  499. if (InstsToErase.contains(&MI))
  500. continue;
  501. if (auto *StoreMI = dyn_cast<GStore>(&MI)) {
  502. // We have a G_STORE. Add it to the candidate if it writes to an adjacent
  503. // address.
  504. if (!addStoreToCandidate(*StoreMI, Candidate)) {
  505. // Store wasn't eligible to be added. May need to record it as a
  506. // potential alias.
  507. if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
  508. Changed |= processMergeCandidate(Candidate);
  509. continue;
  510. }
  511. Candidate.addPotentialAlias(*StoreMI);
  512. }
  513. continue;
  514. }
  515. // If we don't have any stores yet, this instruction can't pose a problem.
  516. if (Candidate.Stores.empty())
  517. continue;
  518. // We're dealing with some other kind of instruction.
  519. if (isInstHardMergeHazard(MI)) {
  520. Changed |= processMergeCandidate(Candidate);
  521. Candidate.Stores.clear();
  522. continue;
  523. }
  524. if (!MI.mayLoadOrStore())
  525. continue;
  526. if (operationAliasesWithCandidate(MI, Candidate)) {
  527. // We have a potential alias, so process the current candidate if we can
  528. // and then continue looking for a new candidate.
  529. Changed |= processMergeCandidate(Candidate);
  530. continue;
  531. }
  532. // Record this instruction as a potential alias for future stores that are
  533. // added to the candidate.
  534. Candidate.addPotentialAlias(MI);
  535. }
  536. // Process any candidate left after finishing searching the entire block.
  537. Changed |= processMergeCandidate(Candidate);
  538. // Erase instructions now that we're no longer iterating over the block.
  539. for (auto *MI : InstsToErase)
  540. MI->eraseFromParent();
  541. InstsToErase.clear();
  542. return Changed;
  543. }
  544. bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
  545. bool Changed = false;
  546. for (auto &BB : MF) {
  547. Changed |= mergeBlockStores(BB);
  548. }
  549. return Changed;
  550. }
  551. void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
  552. // Query the legalizer info to record what store types are legal.
  553. // We record this because we don't want to bother trying to merge stores into
  554. // illegal ones, which would just result in being split again.
  555. if (LegalStoreSizes.count(AddrSpace)) {
  556. assert(LegalStoreSizes[AddrSpace].any());
  557. return; // Already cached sizes for this address space.
  558. }
  559. // Need to reserve at least MaxStoreSizeToForm + 1 bits.
  560. BitVector LegalSizes(MaxStoreSizeToForm * 2);
  561. const auto &LI = *MF->getSubtarget().getLegalizerInfo();
  562. const auto &DL = MF->getFunction().getParent()->getDataLayout();
  563. Type *IntPtrIRTy =
  564. DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
  565. LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
  566. // We assume that we're not going to be generating any stores wider than
  567. // MaxStoreSizeToForm bits for now.
  568. for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
  569. LLT Ty = LLT::scalar(Size);
  570. SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
  571. {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}});
  572. SmallVector<LLT> StoreTys({Ty, PtrTy});
  573. LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
  574. LegalizeActionStep ActionStep = LI.getAction(Q);
  575. if (ActionStep.Action == LegalizeActions::Legal)
  576. LegalSizes.set(Size);
  577. }
  578. assert(LegalSizes.any() && "Expected some store sizes to be legal!");
  579. LegalStoreSizes[AddrSpace] = LegalSizes;
  580. }
  581. bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) {
  582. // If the ISel pipeline failed, do not bother running that pass.
  583. if (MF.getProperties().hasProperty(
  584. MachineFunctionProperties::Property::FailedISel))
  585. return false;
  586. LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
  587. << '\n');
  588. init(MF);
  589. bool Changed = false;
  590. Changed |= mergeFunctionStores(MF);
  591. LegalStoreSizes.clear();
  592. return Changed;
  593. }