ScalarizeMaskedMemIntrin.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995
  1. //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
  2. // instrinsics
  3. //
  4. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5. // See https://llvm.org/LICENSE.txt for license information.
  6. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass replaces masked memory intrinsics - when unsupported by the target
  11. // - with a chain of basic blocks, that deal with the elements one-by-one if the
  12. // appropriate mask bit is set.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
  16. #include "llvm/ADT/Twine.h"
  17. #include "llvm/Analysis/DomTreeUpdater.h"
  18. #include "llvm/Analysis/TargetTransformInfo.h"
  19. #include "llvm/IR/BasicBlock.h"
  20. #include "llvm/IR/Constant.h"
  21. #include "llvm/IR/Constants.h"
  22. #include "llvm/IR/DerivedTypes.h"
  23. #include "llvm/IR/Dominators.h"
  24. #include "llvm/IR/Function.h"
  25. #include "llvm/IR/IRBuilder.h"
  26. #include "llvm/IR/InstrTypes.h"
  27. #include "llvm/IR/Instruction.h"
  28. #include "llvm/IR/Instructions.h"
  29. #include "llvm/IR/IntrinsicInst.h"
  30. #include "llvm/IR/Intrinsics.h"
  31. #include "llvm/IR/Type.h"
  32. #include "llvm/IR/Value.h"
  33. #include "llvm/InitializePasses.h"
  34. #include "llvm/Pass.h"
  35. #include "llvm/Support/Casting.h"
  36. #include "llvm/Transforms/Scalar.h"
  37. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  38. #include <algorithm>
  39. #include <cassert>
  40. using namespace llvm;
  41. #define DEBUG_TYPE "scalarize-masked-mem-intrin"
  42. namespace {
  43. class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
  44. public:
  45. static char ID; // Pass identification, replacement for typeid
  46. explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
  47. initializeScalarizeMaskedMemIntrinLegacyPassPass(
  48. *PassRegistry::getPassRegistry());
  49. }
  50. bool runOnFunction(Function &F) override;
  51. StringRef getPassName() const override {
  52. return "Scalarize Masked Memory Intrinsics";
  53. }
  54. void getAnalysisUsage(AnalysisUsage &AU) const override {
  55. AU.addRequired<TargetTransformInfoWrapperPass>();
  56. AU.addPreserved<DominatorTreeWrapperPass>();
  57. }
  58. };
  59. } // end anonymous namespace
  60. static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
  61. const TargetTransformInfo &TTI, const DataLayout &DL,
  62. DomTreeUpdater *DTU);
  63. static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
  64. const TargetTransformInfo &TTI,
  65. const DataLayout &DL, DomTreeUpdater *DTU);
  66. char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
  67. INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
  68. "Scalarize unsupported masked memory intrinsics", false,
  69. false)
  70. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  71. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  72. INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
  73. "Scalarize unsupported masked memory intrinsics", false,
  74. false)
  75. FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
  76. return new ScalarizeMaskedMemIntrinLegacyPass();
  77. }
  78. static bool isConstantIntVector(Value *Mask) {
  79. Constant *C = dyn_cast<Constant>(Mask);
  80. if (!C)
  81. return false;
  82. unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
  83. for (unsigned i = 0; i != NumElts; ++i) {
  84. Constant *CElt = C->getAggregateElement(i);
  85. if (!CElt || !isa<ConstantInt>(CElt))
  86. return false;
  87. }
  88. return true;
  89. }
  90. static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
  91. unsigned Idx) {
  92. return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
  93. }
  94. // Translate a masked load intrinsic like
  95. // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
  96. // <16 x i1> %mask, <16 x i32> %passthru)
  97. // to a chain of basic blocks, with loading element one-by-one if
  98. // the appropriate mask bit is set
  99. //
  100. // %1 = bitcast i8* %addr to i32*
  101. // %2 = extractelement <16 x i1> %mask, i32 0
  102. // br i1 %2, label %cond.load, label %else
  103. //
  104. // cond.load: ; preds = %0
  105. // %3 = getelementptr i32* %1, i32 0
  106. // %4 = load i32* %3
  107. // %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
  108. // br label %else
  109. //
  110. // else: ; preds = %0, %cond.load
  111. // %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
  112. // %6 = extractelement <16 x i1> %mask, i32 1
  113. // br i1 %6, label %cond.load1, label %else2
  114. //
  115. // cond.load1: ; preds = %else
  116. // %7 = getelementptr i32* %1, i32 1
  117. // %8 = load i32* %7
  118. // %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
  119. // br label %else2
  120. //
  121. // else2: ; preds = %else, %cond.load1
  122. // %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
  123. // %10 = extractelement <16 x i1> %mask, i32 2
  124. // br i1 %10, label %cond.load4, label %else5
  125. //
  126. static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
  127. DomTreeUpdater *DTU, bool &ModifiedDT) {
  128. Value *Ptr = CI->getArgOperand(0);
  129. Value *Alignment = CI->getArgOperand(1);
  130. Value *Mask = CI->getArgOperand(2);
  131. Value *Src0 = CI->getArgOperand(3);
  132. const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
  133. VectorType *VecType = cast<FixedVectorType>(CI->getType());
  134. Type *EltTy = VecType->getElementType();
  135. IRBuilder<> Builder(CI->getContext());
  136. Instruction *InsertPt = CI;
  137. BasicBlock *IfBlock = CI->getParent();
  138. Builder.SetInsertPoint(InsertPt);
  139. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  140. // Short-cut if the mask is all-true.
  141. if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
  142. Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
  143. CI->replaceAllUsesWith(NewI);
  144. CI->eraseFromParent();
  145. return;
  146. }
  147. // Adjust alignment for the scalar instruction.
  148. const Align AdjustedAlignVal =
  149. commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
  150. // Bitcast %addr from i8* to EltTy*
  151. Type *NewPtrType =
  152. EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
  153. Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
  154. unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
  155. // The result vector
  156. Value *VResult = Src0;
  157. if (isConstantIntVector(Mask)) {
  158. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  159. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
  160. continue;
  161. Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
  162. LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
  163. VResult = Builder.CreateInsertElement(VResult, Load, Idx);
  164. }
  165. CI->replaceAllUsesWith(VResult);
  166. CI->eraseFromParent();
  167. return;
  168. }
  169. // If the mask is not v1i1, use scalar bit test operations. This generates
  170. // better results on X86 at least.
  171. Value *SclrMask;
  172. if (VectorWidth != 1) {
  173. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  174. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  175. }
  176. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  177. // Fill the "else" block, created in the previous iteration
  178. //
  179. // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
  180. // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
  181. // %cond = icmp ne i16 %mask_1, 0
  182. // br i1 %mask_1, label %cond.load, label %else
  183. //
  184. Value *Predicate;
  185. if (VectorWidth != 1) {
  186. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  187. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  188. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  189. Builder.getIntN(VectorWidth, 0));
  190. } else {
  191. Predicate = Builder.CreateExtractElement(Mask, Idx);
  192. }
  193. // Create "cond" block
  194. //
  195. // %EltAddr = getelementptr i32* %1, i32 0
  196. // %Elt = load i32* %EltAddr
  197. // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
  198. //
  199. Instruction *ThenTerm =
  200. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  201. /*BranchWeights=*/nullptr, DTU);
  202. BasicBlock *CondBlock = ThenTerm->getParent();
  203. CondBlock->setName("cond.load");
  204. Builder.SetInsertPoint(CondBlock->getTerminator());
  205. Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
  206. LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
  207. Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
  208. // Create "else" block, fill it in the next iteration
  209. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  210. NewIfBlock->setName("else");
  211. BasicBlock *PrevIfBlock = IfBlock;
  212. IfBlock = NewIfBlock;
  213. // Create the phi to join the new and previous value.
  214. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  215. PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
  216. Phi->addIncoming(NewVResult, CondBlock);
  217. Phi->addIncoming(VResult, PrevIfBlock);
  218. VResult = Phi;
  219. }
  220. CI->replaceAllUsesWith(VResult);
  221. CI->eraseFromParent();
  222. ModifiedDT = true;
  223. }
  224. // Translate a masked store intrinsic, like
  225. // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
  226. // <16 x i1> %mask)
  227. // to a chain of basic blocks, that stores element one-by-one if
  228. // the appropriate mask bit is set
  229. //
  230. // %1 = bitcast i8* %addr to i32*
  231. // %2 = extractelement <16 x i1> %mask, i32 0
  232. // br i1 %2, label %cond.store, label %else
  233. //
  234. // cond.store: ; preds = %0
  235. // %3 = extractelement <16 x i32> %val, i32 0
  236. // %4 = getelementptr i32* %1, i32 0
  237. // store i32 %3, i32* %4
  238. // br label %else
  239. //
  240. // else: ; preds = %0, %cond.store
  241. // %5 = extractelement <16 x i1> %mask, i32 1
  242. // br i1 %5, label %cond.store1, label %else2
  243. //
  244. // cond.store1: ; preds = %else
  245. // %6 = extractelement <16 x i32> %val, i32 1
  246. // %7 = getelementptr i32* %1, i32 1
  247. // store i32 %6, i32* %7
  248. // br label %else2
  249. // . . .
  250. static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
  251. DomTreeUpdater *DTU, bool &ModifiedDT) {
  252. Value *Src = CI->getArgOperand(0);
  253. Value *Ptr = CI->getArgOperand(1);
  254. Value *Alignment = CI->getArgOperand(2);
  255. Value *Mask = CI->getArgOperand(3);
  256. const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
  257. auto *VecType = cast<VectorType>(Src->getType());
  258. Type *EltTy = VecType->getElementType();
  259. IRBuilder<> Builder(CI->getContext());
  260. Instruction *InsertPt = CI;
  261. Builder.SetInsertPoint(InsertPt);
  262. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  263. // Short-cut if the mask is all-true.
  264. if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
  265. Builder.CreateAlignedStore(Src, Ptr, AlignVal);
  266. CI->eraseFromParent();
  267. return;
  268. }
  269. // Adjust alignment for the scalar instruction.
  270. const Align AdjustedAlignVal =
  271. commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
  272. // Bitcast %addr from i8* to EltTy*
  273. Type *NewPtrType =
  274. EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
  275. Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
  276. unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
  277. if (isConstantIntVector(Mask)) {
  278. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  279. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
  280. continue;
  281. Value *OneElt = Builder.CreateExtractElement(Src, Idx);
  282. Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
  283. Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
  284. }
  285. CI->eraseFromParent();
  286. return;
  287. }
  288. // If the mask is not v1i1, use scalar bit test operations. This generates
  289. // better results on X86 at least.
  290. Value *SclrMask;
  291. if (VectorWidth != 1) {
  292. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  293. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  294. }
  295. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  296. // Fill the "else" block, created in the previous iteration
  297. //
  298. // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
  299. // %cond = icmp ne i16 %mask_1, 0
  300. // br i1 %mask_1, label %cond.store, label %else
  301. //
  302. Value *Predicate;
  303. if (VectorWidth != 1) {
  304. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  305. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  306. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  307. Builder.getIntN(VectorWidth, 0));
  308. } else {
  309. Predicate = Builder.CreateExtractElement(Mask, Idx);
  310. }
  311. // Create "cond" block
  312. //
  313. // %OneElt = extractelement <16 x i32> %Src, i32 Idx
  314. // %EltAddr = getelementptr i32* %1, i32 0
  315. // %store i32 %OneElt, i32* %EltAddr
  316. //
  317. Instruction *ThenTerm =
  318. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  319. /*BranchWeights=*/nullptr, DTU);
  320. BasicBlock *CondBlock = ThenTerm->getParent();
  321. CondBlock->setName("cond.store");
  322. Builder.SetInsertPoint(CondBlock->getTerminator());
  323. Value *OneElt = Builder.CreateExtractElement(Src, Idx);
  324. Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
  325. Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
  326. // Create "else" block, fill it in the next iteration
  327. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  328. NewIfBlock->setName("else");
  329. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  330. }
  331. CI->eraseFromParent();
  332. ModifiedDT = true;
  333. }
  334. // Translate a masked gather intrinsic like
  335. // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
  336. // <16 x i1> %Mask, <16 x i32> %Src)
  337. // to a chain of basic blocks, with loading element one-by-one if
  338. // the appropriate mask bit is set
  339. //
  340. // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
  341. // %Mask0 = extractelement <16 x i1> %Mask, i32 0
  342. // br i1 %Mask0, label %cond.load, label %else
  343. //
  344. // cond.load:
  345. // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
  346. // %Load0 = load i32, i32* %Ptr0, align 4
  347. // %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
  348. // br label %else
  349. //
  350. // else:
  351. // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
  352. // %Mask1 = extractelement <16 x i1> %Mask, i32 1
  353. // br i1 %Mask1, label %cond.load1, label %else2
  354. //
  355. // cond.load1:
  356. // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
  357. // %Load1 = load i32, i32* %Ptr1, align 4
  358. // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
  359. // br label %else2
  360. // . . .
  361. // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
  362. // ret <16 x i32> %Result
  363. static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI,
  364. DomTreeUpdater *DTU, bool &ModifiedDT) {
  365. Value *Ptrs = CI->getArgOperand(0);
  366. Value *Alignment = CI->getArgOperand(1);
  367. Value *Mask = CI->getArgOperand(2);
  368. Value *Src0 = CI->getArgOperand(3);
  369. auto *VecType = cast<FixedVectorType>(CI->getType());
  370. Type *EltTy = VecType->getElementType();
  371. IRBuilder<> Builder(CI->getContext());
  372. Instruction *InsertPt = CI;
  373. BasicBlock *IfBlock = CI->getParent();
  374. Builder.SetInsertPoint(InsertPt);
  375. MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
  376. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  377. // The result vector
  378. Value *VResult = Src0;
  379. unsigned VectorWidth = VecType->getNumElements();
  380. // Shorten the way if the mask is a vector of constants.
  381. if (isConstantIntVector(Mask)) {
  382. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  383. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
  384. continue;
  385. Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
  386. LoadInst *Load =
  387. Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
  388. VResult =
  389. Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
  390. }
  391. CI->replaceAllUsesWith(VResult);
  392. CI->eraseFromParent();
  393. return;
  394. }
  395. // If the mask is not v1i1, use scalar bit test operations. This generates
  396. // better results on X86 at least.
  397. Value *SclrMask;
  398. if (VectorWidth != 1) {
  399. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  400. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  401. }
  402. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  403. // Fill the "else" block, created in the previous iteration
  404. //
  405. // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
  406. // %cond = icmp ne i16 %mask_1, 0
  407. // br i1 %Mask1, label %cond.load, label %else
  408. //
  409. Value *Predicate;
  410. if (VectorWidth != 1) {
  411. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  412. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  413. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  414. Builder.getIntN(VectorWidth, 0));
  415. } else {
  416. Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
  417. }
  418. // Create "cond" block
  419. //
  420. // %EltAddr = getelementptr i32* %1, i32 0
  421. // %Elt = load i32* %EltAddr
  422. // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
  423. //
  424. Instruction *ThenTerm =
  425. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  426. /*BranchWeights=*/nullptr, DTU);
  427. BasicBlock *CondBlock = ThenTerm->getParent();
  428. CondBlock->setName("cond.load");
  429. Builder.SetInsertPoint(CondBlock->getTerminator());
  430. Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
  431. LoadInst *Load =
  432. Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
  433. Value *NewVResult =
  434. Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
  435. // Create "else" block, fill it in the next iteration
  436. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  437. NewIfBlock->setName("else");
  438. BasicBlock *PrevIfBlock = IfBlock;
  439. IfBlock = NewIfBlock;
  440. // Create the phi to join the new and previous value.
  441. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  442. PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
  443. Phi->addIncoming(NewVResult, CondBlock);
  444. Phi->addIncoming(VResult, PrevIfBlock);
  445. VResult = Phi;
  446. }
  447. CI->replaceAllUsesWith(VResult);
  448. CI->eraseFromParent();
  449. ModifiedDT = true;
  450. }
  451. // Translate a masked scatter intrinsic, like
  452. // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
  453. // <16 x i1> %Mask)
  454. // to a chain of basic blocks, that stores element one-by-one if
  455. // the appropriate mask bit is set.
  456. //
  457. // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
  458. // %Mask0 = extractelement <16 x i1> %Mask, i32 0
  459. // br i1 %Mask0, label %cond.store, label %else
  460. //
  461. // cond.store:
  462. // %Elt0 = extractelement <16 x i32> %Src, i32 0
  463. // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
  464. // store i32 %Elt0, i32* %Ptr0, align 4
  465. // br label %else
  466. //
  467. // else:
  468. // %Mask1 = extractelement <16 x i1> %Mask, i32 1
  469. // br i1 %Mask1, label %cond.store1, label %else2
  470. //
  471. // cond.store1:
  472. // %Elt1 = extractelement <16 x i32> %Src, i32 1
  473. // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
  474. // store i32 %Elt1, i32* %Ptr1, align 4
  475. // br label %else2
  476. // . . .
  477. static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI,
  478. DomTreeUpdater *DTU, bool &ModifiedDT) {
  479. Value *Src = CI->getArgOperand(0);
  480. Value *Ptrs = CI->getArgOperand(1);
  481. Value *Alignment = CI->getArgOperand(2);
  482. Value *Mask = CI->getArgOperand(3);
  483. auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
  484. assert(
  485. isa<VectorType>(Ptrs->getType()) &&
  486. isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
  487. "Vector of pointers is expected in masked scatter intrinsic");
  488. IRBuilder<> Builder(CI->getContext());
  489. Instruction *InsertPt = CI;
  490. Builder.SetInsertPoint(InsertPt);
  491. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  492. MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
  493. unsigned VectorWidth = SrcFVTy->getNumElements();
  494. // Shorten the way if the mask is a vector of constants.
  495. if (isConstantIntVector(Mask)) {
  496. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  497. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
  498. continue;
  499. Value *OneElt =
  500. Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
  501. Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
  502. Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
  503. }
  504. CI->eraseFromParent();
  505. return;
  506. }
  507. // If the mask is not v1i1, use scalar bit test operations. This generates
  508. // better results on X86 at least.
  509. Value *SclrMask;
  510. if (VectorWidth != 1) {
  511. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  512. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  513. }
  514. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  515. // Fill the "else" block, created in the previous iteration
  516. //
  517. // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
  518. // %cond = icmp ne i16 %mask_1, 0
  519. // br i1 %Mask1, label %cond.store, label %else
  520. //
  521. Value *Predicate;
  522. if (VectorWidth != 1) {
  523. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  524. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  525. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  526. Builder.getIntN(VectorWidth, 0));
  527. } else {
  528. Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
  529. }
  530. // Create "cond" block
  531. //
  532. // %Elt1 = extractelement <16 x i32> %Src, i32 1
  533. // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
  534. // %store i32 %Elt1, i32* %Ptr1
  535. //
  536. Instruction *ThenTerm =
  537. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  538. /*BranchWeights=*/nullptr, DTU);
  539. BasicBlock *CondBlock = ThenTerm->getParent();
  540. CondBlock->setName("cond.store");
  541. Builder.SetInsertPoint(CondBlock->getTerminator());
  542. Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
  543. Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
  544. Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
  545. // Create "else" block, fill it in the next iteration
  546. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  547. NewIfBlock->setName("else");
  548. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  549. }
  550. CI->eraseFromParent();
  551. ModifiedDT = true;
  552. }
  553. static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
  554. DomTreeUpdater *DTU, bool &ModifiedDT) {
  555. Value *Ptr = CI->getArgOperand(0);
  556. Value *Mask = CI->getArgOperand(1);
  557. Value *PassThru = CI->getArgOperand(2);
  558. auto *VecType = cast<FixedVectorType>(CI->getType());
  559. Type *EltTy = VecType->getElementType();
  560. IRBuilder<> Builder(CI->getContext());
  561. Instruction *InsertPt = CI;
  562. BasicBlock *IfBlock = CI->getParent();
  563. Builder.SetInsertPoint(InsertPt);
  564. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  565. unsigned VectorWidth = VecType->getNumElements();
  566. // The result vector
  567. Value *VResult = PassThru;
  568. // Shorten the way if the mask is a vector of constants.
  569. // Create a build_vector pattern, with loads/undefs as necessary and then
  570. // shuffle blend with the pass through value.
  571. if (isConstantIntVector(Mask)) {
  572. unsigned MemIndex = 0;
  573. VResult = UndefValue::get(VecType);
  574. SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem);
  575. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  576. Value *InsertElt;
  577. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
  578. InsertElt = UndefValue::get(EltTy);
  579. ShuffleMask[Idx] = Idx + VectorWidth;
  580. } else {
  581. Value *NewPtr =
  582. Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
  583. InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
  584. "Load" + Twine(Idx));
  585. ShuffleMask[Idx] = Idx;
  586. ++MemIndex;
  587. }
  588. VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
  589. "Res" + Twine(Idx));
  590. }
  591. VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
  592. CI->replaceAllUsesWith(VResult);
  593. CI->eraseFromParent();
  594. return;
  595. }
  596. // If the mask is not v1i1, use scalar bit test operations. This generates
  597. // better results on X86 at least.
  598. Value *SclrMask;
  599. if (VectorWidth != 1) {
  600. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  601. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  602. }
  603. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  604. // Fill the "else" block, created in the previous iteration
  605. //
  606. // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
  607. // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
  608. // br i1 %mask_1, label %cond.load, label %else
  609. //
  610. Value *Predicate;
  611. if (VectorWidth != 1) {
  612. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  613. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  614. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  615. Builder.getIntN(VectorWidth, 0));
  616. } else {
  617. Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
  618. }
  619. // Create "cond" block
  620. //
  621. // %EltAddr = getelementptr i32* %1, i32 0
  622. // %Elt = load i32* %EltAddr
  623. // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
  624. //
  625. Instruction *ThenTerm =
  626. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  627. /*BranchWeights=*/nullptr, DTU);
  628. BasicBlock *CondBlock = ThenTerm->getParent();
  629. CondBlock->setName("cond.load");
  630. Builder.SetInsertPoint(CondBlock->getTerminator());
  631. LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
  632. Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
  633. // Move the pointer if there are more blocks to come.
  634. Value *NewPtr;
  635. if ((Idx + 1) != VectorWidth)
  636. NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
  637. // Create "else" block, fill it in the next iteration
  638. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  639. NewIfBlock->setName("else");
  640. BasicBlock *PrevIfBlock = IfBlock;
  641. IfBlock = NewIfBlock;
  642. // Create the phi to join the new and previous value.
  643. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  644. PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
  645. ResultPhi->addIncoming(NewVResult, CondBlock);
  646. ResultPhi->addIncoming(VResult, PrevIfBlock);
  647. VResult = ResultPhi;
  648. // Add a PHI for the pointer if this isn't the last iteration.
  649. if ((Idx + 1) != VectorWidth) {
  650. PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
  651. PtrPhi->addIncoming(NewPtr, CondBlock);
  652. PtrPhi->addIncoming(Ptr, PrevIfBlock);
  653. Ptr = PtrPhi;
  654. }
  655. }
  656. CI->replaceAllUsesWith(VResult);
  657. CI->eraseFromParent();
  658. ModifiedDT = true;
  659. }
  660. static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
  661. DomTreeUpdater *DTU,
  662. bool &ModifiedDT) {
  663. Value *Src = CI->getArgOperand(0);
  664. Value *Ptr = CI->getArgOperand(1);
  665. Value *Mask = CI->getArgOperand(2);
  666. auto *VecType = cast<FixedVectorType>(Src->getType());
  667. IRBuilder<> Builder(CI->getContext());
  668. Instruction *InsertPt = CI;
  669. BasicBlock *IfBlock = CI->getParent();
  670. Builder.SetInsertPoint(InsertPt);
  671. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  672. Type *EltTy = VecType->getElementType();
  673. unsigned VectorWidth = VecType->getNumElements();
  674. // Shorten the way if the mask is a vector of constants.
  675. if (isConstantIntVector(Mask)) {
  676. unsigned MemIndex = 0;
  677. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  678. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
  679. continue;
  680. Value *OneElt =
  681. Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
  682. Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
  683. Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
  684. ++MemIndex;
  685. }
  686. CI->eraseFromParent();
  687. return;
  688. }
  689. // If the mask is not v1i1, use scalar bit test operations. This generates
  690. // better results on X86 at least.
  691. Value *SclrMask;
  692. if (VectorWidth != 1) {
  693. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  694. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  695. }
  696. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  697. // Fill the "else" block, created in the previous iteration
  698. //
  699. // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
  700. // br i1 %mask_1, label %cond.store, label %else
  701. //
  702. Value *Predicate;
  703. if (VectorWidth != 1) {
  704. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  705. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  706. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  707. Builder.getIntN(VectorWidth, 0));
  708. } else {
  709. Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
  710. }
  711. // Create "cond" block
  712. //
  713. // %OneElt = extractelement <16 x i32> %Src, i32 Idx
  714. // %EltAddr = getelementptr i32* %1, i32 0
  715. // %store i32 %OneElt, i32* %EltAddr
  716. //
  717. Instruction *ThenTerm =
  718. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  719. /*BranchWeights=*/nullptr, DTU);
  720. BasicBlock *CondBlock = ThenTerm->getParent();
  721. CondBlock->setName("cond.store");
  722. Builder.SetInsertPoint(CondBlock->getTerminator());
  723. Value *OneElt = Builder.CreateExtractElement(Src, Idx);
  724. Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
  725. // Move the pointer if there are more blocks to come.
  726. Value *NewPtr;
  727. if ((Idx + 1) != VectorWidth)
  728. NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
  729. // Create "else" block, fill it in the next iteration
  730. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  731. NewIfBlock->setName("else");
  732. BasicBlock *PrevIfBlock = IfBlock;
  733. IfBlock = NewIfBlock;
  734. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  735. // Add a PHI for the pointer if this isn't the last iteration.
  736. if ((Idx + 1) != VectorWidth) {
  737. PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
  738. PtrPhi->addIncoming(NewPtr, CondBlock);
  739. PtrPhi->addIncoming(Ptr, PrevIfBlock);
  740. Ptr = PtrPhi;
  741. }
  742. }
  743. CI->eraseFromParent();
  744. ModifiedDT = true;
  745. }
  746. static bool runImpl(Function &F, const TargetTransformInfo &TTI,
  747. DominatorTree *DT) {
  748. Optional<DomTreeUpdater> DTU;
  749. if (DT)
  750. DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
  751. bool EverMadeChange = false;
  752. bool MadeChange = true;
  753. auto &DL = F.getParent()->getDataLayout();
  754. while (MadeChange) {
  755. MadeChange = false;
  756. for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
  757. bool ModifiedDTOnIteration = false;
  758. MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
  759. DTU.hasValue() ? DTU.getPointer() : nullptr);
  760. // Restart BB iteration if the dominator tree of the Function was changed
  761. if (ModifiedDTOnIteration)
  762. break;
  763. }
  764. EverMadeChange |= MadeChange;
  765. }
  766. return EverMadeChange;
  767. }
  768. bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
  769. auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  770. DominatorTree *DT = nullptr;
  771. if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
  772. DT = &DTWP->getDomTree();
  773. return runImpl(F, TTI, DT);
  774. }
  775. PreservedAnalyses
  776. ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
  777. auto &TTI = AM.getResult<TargetIRAnalysis>(F);
  778. auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
  779. if (!runImpl(F, TTI, DT))
  780. return PreservedAnalyses::all();
  781. PreservedAnalyses PA;
  782. PA.preserve<TargetIRAnalysis>();
  783. PA.preserve<DominatorTreeAnalysis>();
  784. return PA;
  785. }
  786. static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
  787. const TargetTransformInfo &TTI, const DataLayout &DL,
  788. DomTreeUpdater *DTU) {
  789. bool MadeChange = false;
  790. BasicBlock::iterator CurInstIterator = BB.begin();
  791. while (CurInstIterator != BB.end()) {
  792. if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
  793. MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
  794. if (ModifiedDT)
  795. return true;
  796. }
  797. return MadeChange;
  798. }
  799. static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
  800. const TargetTransformInfo &TTI,
  801. const DataLayout &DL, DomTreeUpdater *DTU) {
  802. IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
  803. if (II) {
  804. // The scalarization code below does not work for scalable vectors.
  805. if (isa<ScalableVectorType>(II->getType()) ||
  806. any_of(II->args(),
  807. [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
  808. return false;
  809. switch (II->getIntrinsicID()) {
  810. default:
  811. break;
  812. case Intrinsic::masked_load:
  813. // Scalarize unsupported vector masked load
  814. if (TTI.isLegalMaskedLoad(
  815. CI->getType(),
  816. cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
  817. return false;
  818. scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
  819. return true;
  820. case Intrinsic::masked_store:
  821. if (TTI.isLegalMaskedStore(
  822. CI->getArgOperand(0)->getType(),
  823. cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
  824. return false;
  825. scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
  826. return true;
  827. case Intrinsic::masked_gather: {
  828. MaybeAlign MA =
  829. cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
  830. Type *LoadTy = CI->getType();
  831. Align Alignment = DL.getValueOrABITypeAlignment(MA,
  832. LoadTy->getScalarType());
  833. if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
  834. !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
  835. return false;
  836. scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
  837. return true;
  838. }
  839. case Intrinsic::masked_scatter: {
  840. MaybeAlign MA =
  841. cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
  842. Type *StoreTy = CI->getArgOperand(0)->getType();
  843. Align Alignment = DL.getValueOrABITypeAlignment(MA,
  844. StoreTy->getScalarType());
  845. if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
  846. !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
  847. Alignment))
  848. return false;
  849. scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
  850. return true;
  851. }
  852. case Intrinsic::masked_expandload:
  853. if (TTI.isLegalMaskedExpandLoad(CI->getType()))
  854. return false;
  855. scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
  856. return true;
  857. case Intrinsic::masked_compressstore:
  858. if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
  859. return false;
  860. scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
  861. return true;
  862. }
  863. }
  864. return false;
  865. }