InterleavedAccessPass.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. //===- InterleavedAccessPass.cpp ------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the Interleaved Access pass, which identifies
  10. // interleaved memory accesses and transforms them into target specific
  11. // intrinsics.
  12. //
  13. // An interleaved load reads data from memory into several vectors, with
  14. // DE-interleaving the data on a factor. An interleaved store writes several
  15. // vectors to memory with RE-interleaving the data on a factor.
  16. //
  17. // As interleaved accesses are difficult to identified in CodeGen (mainly
  18. // because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector
  19. // IR), we identify and transform them to intrinsics in this pass so the
  20. // intrinsics can be easily matched into target specific instructions later in
  21. // CodeGen.
  22. //
  23. // E.g. An interleaved load (Factor = 2):
  24. // %wide.vec = load <8 x i32>, <8 x i32>* %ptr
  25. // %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
  26. // %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
  27. //
  28. // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
  29. // intrinsic in ARM backend.
  30. //
  31. // In X86, this can be further optimized into a set of target
  32. // specific loads followed by an optimized sequence of shuffles.
  33. //
  34. // E.g. An interleaved store (Factor = 3):
  35. // %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
  36. // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
  37. // store <12 x i32> %i.vec, <12 x i32>* %ptr
  38. //
  39. // It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
  40. // intrinsic in ARM backend.
  41. //
  42. // Similarly, a set of interleaved stores can be transformed into an optimized
  43. // sequence of shuffles followed by a set of target specific stores for X86.
  44. //
  45. //===----------------------------------------------------------------------===//
  46. #include "llvm/ADT/ArrayRef.h"
  47. #include "llvm/ADT/DenseMap.h"
  48. #include "llvm/ADT/SmallVector.h"
  49. #include "llvm/CodeGen/TargetLowering.h"
  50. #include "llvm/CodeGen/TargetPassConfig.h"
  51. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  52. #include "llvm/IR/Constants.h"
  53. #include "llvm/IR/Dominators.h"
  54. #include "llvm/IR/Function.h"
  55. #include "llvm/IR/IRBuilder.h"
  56. #include "llvm/IR/InstIterator.h"
  57. #include "llvm/IR/Instruction.h"
  58. #include "llvm/IR/Instructions.h"
  59. #include "llvm/IR/Type.h"
  60. #include "llvm/InitializePasses.h"
  61. #include "llvm/Pass.h"
  62. #include "llvm/Support/Casting.h"
  63. #include "llvm/Support/CommandLine.h"
  64. #include "llvm/Support/Debug.h"
  65. #include "llvm/Support/MathExtras.h"
  66. #include "llvm/Support/raw_ostream.h"
  67. #include "llvm/Target/TargetMachine.h"
  68. #include "llvm/Transforms/Utils/Local.h"
  69. #include <cassert>
  70. #include <utility>
  71. using namespace llvm;
  72. #define DEBUG_TYPE "interleaved-access"
  73. static cl::opt<bool> LowerInterleavedAccesses(
  74. "lower-interleaved-accesses",
  75. cl::desc("Enable lowering interleaved accesses to intrinsics"),
  76. cl::init(true), cl::Hidden);
  77. namespace {
  78. class InterleavedAccess : public FunctionPass {
  79. public:
  80. static char ID;
  81. InterleavedAccess() : FunctionPass(ID) {
  82. initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
  83. }
  84. StringRef getPassName() const override { return "Interleaved Access Pass"; }
  85. bool runOnFunction(Function &F) override;
  86. void getAnalysisUsage(AnalysisUsage &AU) const override {
  87. AU.addRequired<DominatorTreeWrapperPass>();
  88. AU.setPreservesCFG();
  89. }
  90. private:
  91. DominatorTree *DT = nullptr;
  92. const TargetLowering *TLI = nullptr;
  93. /// The maximum supported interleave factor.
  94. unsigned MaxFactor;
  95. /// Transform an interleaved load into target specific intrinsics.
  96. bool lowerInterleavedLoad(LoadInst *LI,
  97. SmallVector<Instruction *, 32> &DeadInsts);
  98. /// Transform an interleaved store into target specific intrinsics.
  99. bool lowerInterleavedStore(StoreInst *SI,
  100. SmallVector<Instruction *, 32> &DeadInsts);
  101. /// Returns true if the uses of an interleaved load by the
  102. /// extractelement instructions in \p Extracts can be replaced by uses of the
  103. /// shufflevector instructions in \p Shuffles instead. If so, the necessary
  104. /// replacements are also performed.
  105. bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
  106. ArrayRef<ShuffleVectorInst *> Shuffles);
  107. /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
  108. /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
  109. /// interleaving load. Any newly created shuffles that operate on \p LI will
  110. /// be added to \p Shuffles. Returns true, if any changes to the IR have been
  111. /// made.
  112. bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
  113. SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
  114. LoadInst *LI);
  115. };
  116. } // end anonymous namespace.
  117. char InterleavedAccess::ID = 0;
  118. INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
  119. "Lower interleaved memory accesses to target specific intrinsics", false,
  120. false)
  121. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  122. INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE,
  123. "Lower interleaved memory accesses to target specific intrinsics", false,
  124. false)
  125. FunctionPass *llvm::createInterleavedAccessPass() {
  126. return new InterleavedAccess();
  127. }
  128. /// Check if the mask is a DE-interleave mask of the given factor
  129. /// \p Factor like:
  130. /// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
  131. static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
  132. unsigned &Index) {
  133. // Check all potential start indices from 0 to (Factor - 1).
  134. for (Index = 0; Index < Factor; Index++) {
  135. unsigned i = 0;
  136. // Check that elements are in ascending order by Factor. Ignore undef
  137. // elements.
  138. for (; i < Mask.size(); i++)
  139. if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
  140. break;
  141. if (i == Mask.size())
  142. return true;
  143. }
  144. return false;
  145. }
  146. /// Check if the mask is a DE-interleave mask for an interleaved load.
  147. ///
  148. /// E.g. DE-interleave masks (Factor = 2) could be:
  149. /// <0, 2, 4, 6> (mask of index 0 to extract even elements)
  150. /// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
  151. static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
  152. unsigned &Index, unsigned MaxFactor,
  153. unsigned NumLoadElements) {
  154. if (Mask.size() < 2)
  155. return false;
  156. // Check potential Factors.
  157. for (Factor = 2; Factor <= MaxFactor; Factor++) {
  158. // Make sure we don't produce a load wider than the input load.
  159. if (Mask.size() * Factor > NumLoadElements)
  160. return false;
  161. if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
  162. return true;
  163. }
  164. return false;
  165. }
  166. /// Check if the mask can be used in an interleaved store.
  167. //
  168. /// It checks for a more general pattern than the RE-interleave mask.
  169. /// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
  170. /// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
  171. /// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
  172. /// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
  173. ///
  174. /// The particular case of an RE-interleave mask is:
  175. /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
  176. /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
  177. static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
  178. unsigned MaxFactor, unsigned OpNumElts) {
  179. unsigned NumElts = Mask.size();
  180. if (NumElts < 4)
  181. return false;
  182. // Check potential Factors.
  183. for (Factor = 2; Factor <= MaxFactor; Factor++) {
  184. if (NumElts % Factor)
  185. continue;
  186. unsigned LaneLen = NumElts / Factor;
  187. if (!isPowerOf2_32(LaneLen))
  188. continue;
  189. // Check whether each element matches the general interleaved rule.
  190. // Ignore undef elements, as long as the defined elements match the rule.
  191. // Outer loop processes all factors (x, y, z in the above example)
  192. unsigned I = 0, J;
  193. for (; I < Factor; I++) {
  194. unsigned SavedLaneValue;
  195. unsigned SavedNoUndefs = 0;
  196. // Inner loop processes consecutive accesses (x, x+1... in the example)
  197. for (J = 0; J < LaneLen - 1; J++) {
  198. // Lane computes x's position in the Mask
  199. unsigned Lane = J * Factor + I;
  200. unsigned NextLane = Lane + Factor;
  201. int LaneValue = Mask[Lane];
  202. int NextLaneValue = Mask[NextLane];
  203. // If both are defined, values must be sequential
  204. if (LaneValue >= 0 && NextLaneValue >= 0 &&
  205. LaneValue + 1 != NextLaneValue)
  206. break;
  207. // If the next value is undef, save the current one as reference
  208. if (LaneValue >= 0 && NextLaneValue < 0) {
  209. SavedLaneValue = LaneValue;
  210. SavedNoUndefs = 1;
  211. }
  212. // Undefs are allowed, but defined elements must still be consecutive:
  213. // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
  214. // Verify this by storing the last non-undef followed by an undef
  215. // Check that following non-undef masks are incremented with the
  216. // corresponding distance.
  217. if (SavedNoUndefs > 0 && LaneValue < 0) {
  218. SavedNoUndefs++;
  219. if (NextLaneValue >= 0 &&
  220. SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
  221. break;
  222. }
  223. }
  224. if (J < LaneLen - 1)
  225. break;
  226. int StartMask = 0;
  227. if (Mask[I] >= 0) {
  228. // Check that the start of the I range (J=0) is greater than 0
  229. StartMask = Mask[I];
  230. } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
  231. // StartMask defined by the last value in lane
  232. StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
  233. } else if (SavedNoUndefs > 0) {
  234. // StartMask defined by some non-zero value in the j loop
  235. StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
  236. }
  237. // else StartMask remains set to 0, i.e. all elements are undefs
  238. if (StartMask < 0)
  239. break;
  240. // We must stay within the vectors; This case can happen with undefs.
  241. if (StartMask + LaneLen > OpNumElts*2)
  242. break;
  243. }
  244. // Found an interleaved mask of current factor.
  245. if (I == Factor)
  246. return true;
  247. }
  248. return false;
  249. }
  250. bool InterleavedAccess::lowerInterleavedLoad(
  251. LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) {
  252. if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
  253. return false;
  254. // Check if all users of this load are shufflevectors. If we encounter any
  255. // users that are extractelement instructions or binary operators, we save
  256. // them to later check if they can be modified to extract from one of the
  257. // shufflevectors instead of the load.
  258. SmallVector<ShuffleVectorInst *, 4> Shuffles;
  259. SmallVector<ExtractElementInst *, 4> Extracts;
  260. // BinOpShuffles need to be handled a single time in case both operands of the
  261. // binop are the same load.
  262. SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
  263. for (auto *User : LI->users()) {
  264. auto *Extract = dyn_cast<ExtractElementInst>(User);
  265. if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
  266. Extracts.push_back(Extract);
  267. continue;
  268. }
  269. auto *BI = dyn_cast<BinaryOperator>(User);
  270. if (BI && BI->hasOneUse()) {
  271. if (auto *SVI = dyn_cast<ShuffleVectorInst>(*BI->user_begin())) {
  272. BinOpShuffles.insert(SVI);
  273. continue;
  274. }
  275. }
  276. auto *SVI = dyn_cast<ShuffleVectorInst>(User);
  277. if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
  278. return false;
  279. Shuffles.push_back(SVI);
  280. }
  281. if (Shuffles.empty() && BinOpShuffles.empty())
  282. return false;
  283. unsigned Factor, Index;
  284. unsigned NumLoadElements =
  285. cast<FixedVectorType>(LI->getType())->getNumElements();
  286. auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
  287. // Check if the first shufflevector is DE-interleave shuffle.
  288. if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
  289. NumLoadElements))
  290. return false;
  291. // Holds the corresponding index for each DE-interleave shuffle.
  292. SmallVector<unsigned, 4> Indices;
  293. Type *VecTy = FirstSVI->getType();
  294. // Check if other shufflevectors are also DE-interleaved of the same type
  295. // and factor as the first shufflevector.
  296. for (auto *Shuffle : Shuffles) {
  297. if (Shuffle->getType() != VecTy)
  298. return false;
  299. if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
  300. Index))
  301. return false;
  302. assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
  303. Indices.push_back(Index);
  304. }
  305. for (auto *Shuffle : BinOpShuffles) {
  306. if (Shuffle->getType() != VecTy)
  307. return false;
  308. if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
  309. Index))
  310. return false;
  311. assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
  312. if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
  313. Indices.push_back(Index);
  314. if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
  315. Indices.push_back(Index);
  316. }
  317. // Try and modify users of the load that are extractelement instructions to
  318. // use the shufflevector instructions instead of the load.
  319. if (!tryReplaceExtracts(Extracts, Shuffles))
  320. return false;
  321. bool BinOpShuffleChanged =
  322. replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
  323. LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
  324. // Try to create target specific intrinsics to replace the load and shuffles.
  325. if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
  326. // If Extracts is not empty, tryReplaceExtracts made changes earlier.
  327. return !Extracts.empty() || BinOpShuffleChanged;
  328. }
  329. append_range(DeadInsts, Shuffles);
  330. DeadInsts.push_back(LI);
  331. return true;
  332. }
  333. bool InterleavedAccess::replaceBinOpShuffles(
  334. ArrayRef<ShuffleVectorInst *> BinOpShuffles,
  335. SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
  336. for (auto *SVI : BinOpShuffles) {
  337. BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
  338. Type *BIOp0Ty = BI->getOperand(0)->getType();
  339. ArrayRef<int> Mask = SVI->getShuffleMask();
  340. assert(all_of(Mask, [&](int Idx) {
  341. return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
  342. }));
  343. auto *NewSVI1 =
  344. new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
  345. Mask, SVI->getName(), SVI);
  346. auto *NewSVI2 = new ShuffleVectorInst(
  347. BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
  348. SVI->getName(), SVI);
  349. BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags(
  350. BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI);
  351. SVI->replaceAllUsesWith(NewBI);
  352. LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
  353. << "\n With : " << *NewSVI1 << "\n And : "
  354. << *NewSVI2 << "\n And : " << *NewBI << "\n");
  355. RecursivelyDeleteTriviallyDeadInstructions(SVI);
  356. if (NewSVI1->getOperand(0) == LI)
  357. Shuffles.push_back(NewSVI1);
  358. if (NewSVI2->getOperand(0) == LI)
  359. Shuffles.push_back(NewSVI2);
  360. }
  361. return !BinOpShuffles.empty();
  362. }
  363. bool InterleavedAccess::tryReplaceExtracts(
  364. ArrayRef<ExtractElementInst *> Extracts,
  365. ArrayRef<ShuffleVectorInst *> Shuffles) {
  366. // If there aren't any extractelement instructions to modify, there's nothing
  367. // to do.
  368. if (Extracts.empty())
  369. return true;
  370. // Maps extractelement instructions to vector-index pairs. The extractlement
  371. // instructions will be modified to use the new vector and index operands.
  372. DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
  373. for (auto *Extract : Extracts) {
  374. // The vector index that is extracted.
  375. auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
  376. auto Index = IndexOperand->getSExtValue();
  377. // Look for a suitable shufflevector instruction. The goal is to modify the
  378. // extractelement instruction (which uses an interleaved load) to use one
  379. // of the shufflevector instructions instead of the load.
  380. for (auto *Shuffle : Shuffles) {
  381. // If the shufflevector instruction doesn't dominate the extract, we
  382. // can't create a use of it.
  383. if (!DT->dominates(Shuffle, Extract))
  384. continue;
  385. // Inspect the indices of the shufflevector instruction. If the shuffle
  386. // selects the same index that is extracted, we can modify the
  387. // extractelement instruction.
  388. SmallVector<int, 4> Indices;
  389. Shuffle->getShuffleMask(Indices);
  390. for (unsigned I = 0; I < Indices.size(); ++I)
  391. if (Indices[I] == Index) {
  392. assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
  393. "Vector operations do not match");
  394. ReplacementMap[Extract] = std::make_pair(Shuffle, I);
  395. break;
  396. }
  397. // If we found a suitable shufflevector instruction, stop looking.
  398. if (ReplacementMap.count(Extract))
  399. break;
  400. }
  401. // If we did not find a suitable shufflevector instruction, the
  402. // extractelement instruction cannot be modified, so we must give up.
  403. if (!ReplacementMap.count(Extract))
  404. return false;
  405. }
  406. // Finally, perform the replacements.
  407. IRBuilder<> Builder(Extracts[0]->getContext());
  408. for (auto &Replacement : ReplacementMap) {
  409. auto *Extract = Replacement.first;
  410. auto *Vector = Replacement.second.first;
  411. auto Index = Replacement.second.second;
  412. Builder.SetInsertPoint(Extract);
  413. Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
  414. Extract->eraseFromParent();
  415. }
  416. return true;
  417. }
  418. bool InterleavedAccess::lowerInterleavedStore(
  419. StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
  420. if (!SI->isSimple())
  421. return false;
  422. auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
  423. if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
  424. return false;
  425. // Check if the shufflevector is RE-interleave shuffle.
  426. unsigned Factor;
  427. unsigned OpNumElts =
  428. cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
  429. if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
  430. return false;
  431. LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
  432. // Try to create target specific intrinsics to replace the store and shuffle.
  433. if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
  434. return false;
  435. // Already have a new target specific interleaved store. Erase the old store.
  436. DeadInsts.push_back(SI);
  437. DeadInsts.push_back(SVI);
  438. return true;
  439. }
  440. bool InterleavedAccess::runOnFunction(Function &F) {
  441. auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
  442. if (!TPC || !LowerInterleavedAccesses)
  443. return false;
  444. LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
  445. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  446. auto &TM = TPC->getTM<TargetMachine>();
  447. TLI = TM.getSubtargetImpl(F)->getTargetLowering();
  448. MaxFactor = TLI->getMaxSupportedInterleaveFactor();
  449. // Holds dead instructions that will be erased later.
  450. SmallVector<Instruction *, 32> DeadInsts;
  451. bool Changed = false;
  452. for (auto &I : instructions(F)) {
  453. if (auto *LI = dyn_cast<LoadInst>(&I))
  454. Changed |= lowerInterleavedLoad(LI, DeadInsts);
  455. if (auto *SI = dyn_cast<StoreInst>(&I))
  456. Changed |= lowerInterleavedStore(SI, DeadInsts);
  457. }
  458. for (auto I : DeadInsts)
  459. I->eraseFromParent();
  460. return Changed;
  461. }