InterleavedAccessPass.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548
  1. //===- InterleavedAccessPass.cpp ------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the Interleaved Access pass, which identifies
  10. // interleaved memory accesses and transforms them into target specific
  11. // intrinsics.
  12. //
  13. // An interleaved load reads data from memory into several vectors, with
  14. // DE-interleaving the data on a factor. An interleaved store writes several
  15. // vectors to memory with RE-interleaving the data on a factor.
  16. //
  17. // As interleaved accesses are difficult to identified in CodeGen (mainly
  18. // because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector
  19. // IR), we identify and transform them to intrinsics in this pass so the
  20. // intrinsics can be easily matched into target specific instructions later in
  21. // CodeGen.
  22. //
  23. // E.g. An interleaved load (Factor = 2):
  24. // %wide.vec = load <8 x i32>, <8 x i32>* %ptr
  25. // %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
  26. // %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
  27. //
  28. // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
  29. // intrinsic in ARM backend.
  30. //
  31. // In X86, this can be further optimized into a set of target
  32. // specific loads followed by an optimized sequence of shuffles.
  33. //
  34. // E.g. An interleaved store (Factor = 3):
  35. // %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
  36. // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
  37. // store <12 x i32> %i.vec, <12 x i32>* %ptr
  38. //
  39. // It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
  40. // intrinsic in ARM backend.
  41. //
  42. // Similarly, a set of interleaved stores can be transformed into an optimized
  43. // sequence of shuffles followed by a set of target specific stores for X86.
  44. //
  45. //===----------------------------------------------------------------------===//
  46. #include "llvm/ADT/ArrayRef.h"
  47. #include "llvm/ADT/DenseMap.h"
  48. #include "llvm/ADT/SetVector.h"
  49. #include "llvm/ADT/SmallVector.h"
  50. #include "llvm/CodeGen/TargetLowering.h"
  51. #include "llvm/CodeGen/TargetPassConfig.h"
  52. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  53. #include "llvm/IR/Constants.h"
  54. #include "llvm/IR/Dominators.h"
  55. #include "llvm/IR/Function.h"
  56. #include "llvm/IR/IRBuilder.h"
  57. #include "llvm/IR/InstIterator.h"
  58. #include "llvm/IR/Instruction.h"
  59. #include "llvm/IR/Instructions.h"
  60. #include "llvm/InitializePasses.h"
  61. #include "llvm/Pass.h"
  62. #include "llvm/Support/Casting.h"
  63. #include "llvm/Support/CommandLine.h"
  64. #include "llvm/Support/Debug.h"
  65. #include "llvm/Support/MathExtras.h"
  66. #include "llvm/Support/raw_ostream.h"
  67. #include "llvm/Target/TargetMachine.h"
  68. #include "llvm/Transforms/Utils/Local.h"
  69. #include <cassert>
  70. #include <utility>
  71. using namespace llvm;
  72. #define DEBUG_TYPE "interleaved-access"
  73. static cl::opt<bool> LowerInterleavedAccesses(
  74. "lower-interleaved-accesses",
  75. cl::desc("Enable lowering interleaved accesses to intrinsics"),
  76. cl::init(true), cl::Hidden);
  77. namespace {
  78. class InterleavedAccess : public FunctionPass {
  79. public:
  80. static char ID;
  81. InterleavedAccess() : FunctionPass(ID) {
  82. initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
  83. }
  84. StringRef getPassName() const override { return "Interleaved Access Pass"; }
  85. bool runOnFunction(Function &F) override;
  86. void getAnalysisUsage(AnalysisUsage &AU) const override {
  87. AU.addRequired<DominatorTreeWrapperPass>();
  88. AU.setPreservesCFG();
  89. }
  90. private:
  91. DominatorTree *DT = nullptr;
  92. const TargetLowering *TLI = nullptr;
  93. /// The maximum supported interleave factor.
  94. unsigned MaxFactor;
  95. /// Transform an interleaved load into target specific intrinsics.
  96. bool lowerInterleavedLoad(LoadInst *LI,
  97. SmallVector<Instruction *, 32> &DeadInsts);
  98. /// Transform an interleaved store into target specific intrinsics.
  99. bool lowerInterleavedStore(StoreInst *SI,
  100. SmallVector<Instruction *, 32> &DeadInsts);
  101. /// Returns true if the uses of an interleaved load by the
  102. /// extractelement instructions in \p Extracts can be replaced by uses of the
  103. /// shufflevector instructions in \p Shuffles instead. If so, the necessary
  104. /// replacements are also performed.
  105. bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
  106. ArrayRef<ShuffleVectorInst *> Shuffles);
  107. /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
  108. /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
  109. /// interleaving load. Any newly created shuffles that operate on \p LI will
  110. /// be added to \p Shuffles. Returns true, if any changes to the IR have been
  111. /// made.
  112. bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
  113. SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
  114. LoadInst *LI);
  115. };
  116. } // end anonymous namespace.
  117. char InterleavedAccess::ID = 0;
  118. INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
  119. "Lower interleaved memory accesses to target specific intrinsics", false,
  120. false)
  121. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  122. INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE,
  123. "Lower interleaved memory accesses to target specific intrinsics", false,
  124. false)
  125. FunctionPass *llvm::createInterleavedAccessPass() {
  126. return new InterleavedAccess();
  127. }
  128. /// Check if the mask is a DE-interleave mask of the given factor
  129. /// \p Factor like:
  130. /// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
  131. static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
  132. unsigned &Index) {
  133. // Check all potential start indices from 0 to (Factor - 1).
  134. for (Index = 0; Index < Factor; Index++) {
  135. unsigned i = 0;
  136. // Check that elements are in ascending order by Factor. Ignore undef
  137. // elements.
  138. for (; i < Mask.size(); i++)
  139. if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
  140. break;
  141. if (i == Mask.size())
  142. return true;
  143. }
  144. return false;
  145. }
  146. /// Check if the mask is a DE-interleave mask for an interleaved load.
  147. ///
  148. /// E.g. DE-interleave masks (Factor = 2) could be:
  149. /// <0, 2, 4, 6> (mask of index 0 to extract even elements)
  150. /// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
  151. static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
  152. unsigned &Index, unsigned MaxFactor,
  153. unsigned NumLoadElements) {
  154. if (Mask.size() < 2)
  155. return false;
  156. // Check potential Factors.
  157. for (Factor = 2; Factor <= MaxFactor; Factor++) {
  158. // Make sure we don't produce a load wider than the input load.
  159. if (Mask.size() * Factor > NumLoadElements)
  160. return false;
  161. if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
  162. return true;
  163. }
  164. return false;
  165. }
  166. /// Check if the mask can be used in an interleaved store.
  167. //
  168. /// It checks for a more general pattern than the RE-interleave mask.
  169. /// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
  170. /// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
  171. /// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
  172. /// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
  173. ///
  174. /// The particular case of an RE-interleave mask is:
  175. /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
  176. /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
  177. static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
  178. unsigned MaxFactor, unsigned OpNumElts) {
  179. unsigned NumElts = Mask.size();
  180. if (NumElts < 4)
  181. return false;
  182. // Check potential Factors.
  183. for (Factor = 2; Factor <= MaxFactor; Factor++) {
  184. if (NumElts % Factor)
  185. continue;
  186. unsigned LaneLen = NumElts / Factor;
  187. if (!isPowerOf2_32(LaneLen))
  188. continue;
  189. // Check whether each element matches the general interleaved rule.
  190. // Ignore undef elements, as long as the defined elements match the rule.
  191. // Outer loop processes all factors (x, y, z in the above example)
  192. unsigned I = 0, J;
  193. for (; I < Factor; I++) {
  194. unsigned SavedLaneValue;
  195. unsigned SavedNoUndefs = 0;
  196. // Inner loop processes consecutive accesses (x, x+1... in the example)
  197. for (J = 0; J < LaneLen - 1; J++) {
  198. // Lane computes x's position in the Mask
  199. unsigned Lane = J * Factor + I;
  200. unsigned NextLane = Lane + Factor;
  201. int LaneValue = Mask[Lane];
  202. int NextLaneValue = Mask[NextLane];
  203. // If both are defined, values must be sequential
  204. if (LaneValue >= 0 && NextLaneValue >= 0 &&
  205. LaneValue + 1 != NextLaneValue)
  206. break;
  207. // If the next value is undef, save the current one as reference
  208. if (LaneValue >= 0 && NextLaneValue < 0) {
  209. SavedLaneValue = LaneValue;
  210. SavedNoUndefs = 1;
  211. }
  212. // Undefs are allowed, but defined elements must still be consecutive:
  213. // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
  214. // Verify this by storing the last non-undef followed by an undef
  215. // Check that following non-undef masks are incremented with the
  216. // corresponding distance.
  217. if (SavedNoUndefs > 0 && LaneValue < 0) {
  218. SavedNoUndefs++;
  219. if (NextLaneValue >= 0 &&
  220. SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
  221. break;
  222. }
  223. }
  224. if (J < LaneLen - 1)
  225. break;
  226. int StartMask = 0;
  227. if (Mask[I] >= 0) {
  228. // Check that the start of the I range (J=0) is greater than 0
  229. StartMask = Mask[I];
  230. } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
  231. // StartMask defined by the last value in lane
  232. StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
  233. } else if (SavedNoUndefs > 0) {
  234. // StartMask defined by some non-zero value in the j loop
  235. StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
  236. }
  237. // else StartMask remains set to 0, i.e. all elements are undefs
  238. if (StartMask < 0)
  239. break;
  240. // We must stay within the vectors; This case can happen with undefs.
  241. if (StartMask + LaneLen > OpNumElts*2)
  242. break;
  243. }
  244. // Found an interleaved mask of current factor.
  245. if (I == Factor)
  246. return true;
  247. }
  248. return false;
  249. }
  250. bool InterleavedAccess::lowerInterleavedLoad(
  251. LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) {
  252. if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
  253. return false;
  254. // Check if all users of this load are shufflevectors. If we encounter any
  255. // users that are extractelement instructions or binary operators, we save
  256. // them to later check if they can be modified to extract from one of the
  257. // shufflevectors instead of the load.
  258. SmallVector<ShuffleVectorInst *, 4> Shuffles;
  259. SmallVector<ExtractElementInst *, 4> Extracts;
  260. // BinOpShuffles need to be handled a single time in case both operands of the
  261. // binop are the same load.
  262. SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
  263. for (auto *User : LI->users()) {
  264. auto *Extract = dyn_cast<ExtractElementInst>(User);
  265. if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
  266. Extracts.push_back(Extract);
  267. continue;
  268. }
  269. if (auto *BI = dyn_cast<BinaryOperator>(User)) {
  270. if (all_of(BI->users(),
  271. [](auto *U) { return isa<ShuffleVectorInst>(U); })) {
  272. for (auto *SVI : BI->users())
  273. BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
  274. continue;
  275. }
  276. }
  277. auto *SVI = dyn_cast<ShuffleVectorInst>(User);
  278. if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
  279. return false;
  280. Shuffles.push_back(SVI);
  281. }
  282. if (Shuffles.empty() && BinOpShuffles.empty())
  283. return false;
  284. unsigned Factor, Index;
  285. unsigned NumLoadElements =
  286. cast<FixedVectorType>(LI->getType())->getNumElements();
  287. auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
  288. // Check if the first shufflevector is DE-interleave shuffle.
  289. if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
  290. NumLoadElements))
  291. return false;
  292. // Holds the corresponding index for each DE-interleave shuffle.
  293. SmallVector<unsigned, 4> Indices;
  294. Type *VecTy = FirstSVI->getType();
  295. // Check if other shufflevectors are also DE-interleaved of the same type
  296. // and factor as the first shufflevector.
  297. for (auto *Shuffle : Shuffles) {
  298. if (Shuffle->getType() != VecTy)
  299. return false;
  300. if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
  301. Index))
  302. return false;
  303. assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
  304. Indices.push_back(Index);
  305. }
  306. for (auto *Shuffle : BinOpShuffles) {
  307. if (Shuffle->getType() != VecTy)
  308. return false;
  309. if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
  310. Index))
  311. return false;
  312. assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
  313. if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
  314. Indices.push_back(Index);
  315. if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
  316. Indices.push_back(Index);
  317. }
  318. // Try and modify users of the load that are extractelement instructions to
  319. // use the shufflevector instructions instead of the load.
  320. if (!tryReplaceExtracts(Extracts, Shuffles))
  321. return false;
  322. bool BinOpShuffleChanged =
  323. replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
  324. LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
  325. // Try to create target specific intrinsics to replace the load and shuffles.
  326. if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
  327. // If Extracts is not empty, tryReplaceExtracts made changes earlier.
  328. return !Extracts.empty() || BinOpShuffleChanged;
  329. }
  330. append_range(DeadInsts, Shuffles);
  331. DeadInsts.push_back(LI);
  332. return true;
  333. }
  334. bool InterleavedAccess::replaceBinOpShuffles(
  335. ArrayRef<ShuffleVectorInst *> BinOpShuffles,
  336. SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
  337. for (auto *SVI : BinOpShuffles) {
  338. BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
  339. Type *BIOp0Ty = BI->getOperand(0)->getType();
  340. ArrayRef<int> Mask = SVI->getShuffleMask();
  341. assert(all_of(Mask, [&](int Idx) {
  342. return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
  343. }));
  344. auto *NewSVI1 =
  345. new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
  346. Mask, SVI->getName(), SVI);
  347. auto *NewSVI2 = new ShuffleVectorInst(
  348. BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
  349. SVI->getName(), SVI);
  350. BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags(
  351. BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI);
  352. SVI->replaceAllUsesWith(NewBI);
  353. LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
  354. << "\n With : " << *NewSVI1 << "\n And : "
  355. << *NewSVI2 << "\n And : " << *NewBI << "\n");
  356. RecursivelyDeleteTriviallyDeadInstructions(SVI);
  357. if (NewSVI1->getOperand(0) == LI)
  358. Shuffles.push_back(NewSVI1);
  359. if (NewSVI2->getOperand(0) == LI)
  360. Shuffles.push_back(NewSVI2);
  361. }
  362. return !BinOpShuffles.empty();
  363. }
  364. bool InterleavedAccess::tryReplaceExtracts(
  365. ArrayRef<ExtractElementInst *> Extracts,
  366. ArrayRef<ShuffleVectorInst *> Shuffles) {
  367. // If there aren't any extractelement instructions to modify, there's nothing
  368. // to do.
  369. if (Extracts.empty())
  370. return true;
  371. // Maps extractelement instructions to vector-index pairs. The extractlement
  372. // instructions will be modified to use the new vector and index operands.
  373. DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
  374. for (auto *Extract : Extracts) {
  375. // The vector index that is extracted.
  376. auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
  377. auto Index = IndexOperand->getSExtValue();
  378. // Look for a suitable shufflevector instruction. The goal is to modify the
  379. // extractelement instruction (which uses an interleaved load) to use one
  380. // of the shufflevector instructions instead of the load.
  381. for (auto *Shuffle : Shuffles) {
  382. // If the shufflevector instruction doesn't dominate the extract, we
  383. // can't create a use of it.
  384. if (!DT->dominates(Shuffle, Extract))
  385. continue;
  386. // Inspect the indices of the shufflevector instruction. If the shuffle
  387. // selects the same index that is extracted, we can modify the
  388. // extractelement instruction.
  389. SmallVector<int, 4> Indices;
  390. Shuffle->getShuffleMask(Indices);
  391. for (unsigned I = 0; I < Indices.size(); ++I)
  392. if (Indices[I] == Index) {
  393. assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
  394. "Vector operations do not match");
  395. ReplacementMap[Extract] = std::make_pair(Shuffle, I);
  396. break;
  397. }
  398. // If we found a suitable shufflevector instruction, stop looking.
  399. if (ReplacementMap.count(Extract))
  400. break;
  401. }
  402. // If we did not find a suitable shufflevector instruction, the
  403. // extractelement instruction cannot be modified, so we must give up.
  404. if (!ReplacementMap.count(Extract))
  405. return false;
  406. }
  407. // Finally, perform the replacements.
  408. IRBuilder<> Builder(Extracts[0]->getContext());
  409. for (auto &Replacement : ReplacementMap) {
  410. auto *Extract = Replacement.first;
  411. auto *Vector = Replacement.second.first;
  412. auto Index = Replacement.second.second;
  413. Builder.SetInsertPoint(Extract);
  414. Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
  415. Extract->eraseFromParent();
  416. }
  417. return true;
  418. }
  419. bool InterleavedAccess::lowerInterleavedStore(
  420. StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
  421. if (!SI->isSimple())
  422. return false;
  423. auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
  424. if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
  425. return false;
  426. // Check if the shufflevector is RE-interleave shuffle.
  427. unsigned Factor;
  428. unsigned OpNumElts =
  429. cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
  430. if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
  431. return false;
  432. LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
  433. // Try to create target specific intrinsics to replace the store and shuffle.
  434. if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
  435. return false;
  436. // Already have a new target specific interleaved store. Erase the old store.
  437. DeadInsts.push_back(SI);
  438. DeadInsts.push_back(SVI);
  439. return true;
  440. }
  441. bool InterleavedAccess::runOnFunction(Function &F) {
  442. auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
  443. if (!TPC || !LowerInterleavedAccesses)
  444. return false;
  445. LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
  446. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  447. auto &TM = TPC->getTM<TargetMachine>();
  448. TLI = TM.getSubtargetImpl(F)->getTargetLowering();
  449. MaxFactor = TLI->getMaxSupportedInterleaveFactor();
  450. // Holds dead instructions that will be erased later.
  451. SmallVector<Instruction *, 32> DeadInsts;
  452. bool Changed = false;
  453. for (auto &I : instructions(F)) {
  454. if (auto *LI = dyn_cast<LoadInst>(&I))
  455. Changed |= lowerInterleavedLoad(LI, DeadInsts);
  456. if (auto *SI = dyn_cast<StoreInst>(&I))
  457. Changed |= lowerInterleavedStore(SI, DeadInsts);
  458. }
  459. for (auto *I : DeadInsts)
  460. I->eraseFromParent();
  461. return Changed;
  462. }