ScalarizeMaskedMemIntrin.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993
  1. //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
  2. // intrinsics
  3. //
  4. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5. // See https://llvm.org/LICENSE.txt for license information.
  6. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass replaces masked memory intrinsics - when unsupported by the target
  11. // - with a chain of basic blocks, that deal with the elements one-by-one if the
  12. // appropriate mask bit is set.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
  16. #include "llvm/ADT/Twine.h"
  17. #include "llvm/Analysis/DomTreeUpdater.h"
  18. #include "llvm/Analysis/TargetTransformInfo.h"
  19. #include "llvm/IR/BasicBlock.h"
  20. #include "llvm/IR/Constant.h"
  21. #include "llvm/IR/Constants.h"
  22. #include "llvm/IR/DerivedTypes.h"
  23. #include "llvm/IR/Dominators.h"
  24. #include "llvm/IR/Function.h"
  25. #include "llvm/IR/IRBuilder.h"
  26. #include "llvm/IR/Instruction.h"
  27. #include "llvm/IR/Instructions.h"
  28. #include "llvm/IR/IntrinsicInst.h"
  29. #include "llvm/IR/Type.h"
  30. #include "llvm/IR/Value.h"
  31. #include "llvm/InitializePasses.h"
  32. #include "llvm/Pass.h"
  33. #include "llvm/Support/Casting.h"
  34. #include "llvm/Transforms/Scalar.h"
  35. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  36. #include <cassert>
  37. #include <optional>
  38. using namespace llvm;
  39. #define DEBUG_TYPE "scalarize-masked-mem-intrin"
  40. namespace {
  41. class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
  42. public:
  43. static char ID; // Pass identification, replacement for typeid
  44. explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
  45. initializeScalarizeMaskedMemIntrinLegacyPassPass(
  46. *PassRegistry::getPassRegistry());
  47. }
  48. bool runOnFunction(Function &F) override;
  49. StringRef getPassName() const override {
  50. return "Scalarize Masked Memory Intrinsics";
  51. }
  52. void getAnalysisUsage(AnalysisUsage &AU) const override {
  53. AU.addRequired<TargetTransformInfoWrapperPass>();
  54. AU.addPreserved<DominatorTreeWrapperPass>();
  55. }
  56. };
  57. } // end anonymous namespace
  58. static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
  59. const TargetTransformInfo &TTI, const DataLayout &DL,
  60. DomTreeUpdater *DTU);
  61. static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
  62. const TargetTransformInfo &TTI,
  63. const DataLayout &DL, DomTreeUpdater *DTU);
  64. char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
  65. INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
  66. "Scalarize unsupported masked memory intrinsics", false,
  67. false)
  68. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  69. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  70. INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
  71. "Scalarize unsupported masked memory intrinsics", false,
  72. false)
  73. FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
  74. return new ScalarizeMaskedMemIntrinLegacyPass();
  75. }
  76. static bool isConstantIntVector(Value *Mask) {
  77. Constant *C = dyn_cast<Constant>(Mask);
  78. if (!C)
  79. return false;
  80. unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
  81. for (unsigned i = 0; i != NumElts; ++i) {
  82. Constant *CElt = C->getAggregateElement(i);
  83. if (!CElt || !isa<ConstantInt>(CElt))
  84. return false;
  85. }
  86. return true;
  87. }
  88. static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
  89. unsigned Idx) {
  90. return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
  91. }
  92. // Translate a masked load intrinsic like
  93. // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
  94. // <16 x i1> %mask, <16 x i32> %passthru)
  95. // to a chain of basic blocks, with loading element one-by-one if
  96. // the appropriate mask bit is set
  97. //
  98. // %1 = bitcast i8* %addr to i32*
  99. // %2 = extractelement <16 x i1> %mask, i32 0
  100. // br i1 %2, label %cond.load, label %else
  101. //
  102. // cond.load: ; preds = %0
  103. // %3 = getelementptr i32* %1, i32 0
  104. // %4 = load i32* %3
  105. // %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
  106. // br label %else
  107. //
  108. // else: ; preds = %0, %cond.load
  109. // %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
  110. // %6 = extractelement <16 x i1> %mask, i32 1
  111. // br i1 %6, label %cond.load1, label %else2
  112. //
  113. // cond.load1: ; preds = %else
  114. // %7 = getelementptr i32* %1, i32 1
  115. // %8 = load i32* %7
  116. // %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
  117. // br label %else2
  118. //
  119. // else2: ; preds = %else, %cond.load1
  120. // %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
  121. // %10 = extractelement <16 x i1> %mask, i32 2
  122. // br i1 %10, label %cond.load4, label %else5
  123. //
  124. static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
  125. DomTreeUpdater *DTU, bool &ModifiedDT) {
  126. Value *Ptr = CI->getArgOperand(0);
  127. Value *Alignment = CI->getArgOperand(1);
  128. Value *Mask = CI->getArgOperand(2);
  129. Value *Src0 = CI->getArgOperand(3);
  130. const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
  131. VectorType *VecType = cast<FixedVectorType>(CI->getType());
  132. Type *EltTy = VecType->getElementType();
  133. IRBuilder<> Builder(CI->getContext());
  134. Instruction *InsertPt = CI;
  135. BasicBlock *IfBlock = CI->getParent();
  136. Builder.SetInsertPoint(InsertPt);
  137. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  138. // Short-cut if the mask is all-true.
  139. if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
  140. Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
  141. CI->replaceAllUsesWith(NewI);
  142. CI->eraseFromParent();
  143. return;
  144. }
  145. // Adjust alignment for the scalar instruction.
  146. const Align AdjustedAlignVal =
  147. commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
  148. // Bitcast %addr from i8* to EltTy*
  149. Type *NewPtrType =
  150. EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
  151. Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
  152. unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
  153. // The result vector
  154. Value *VResult = Src0;
  155. if (isConstantIntVector(Mask)) {
  156. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  157. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
  158. continue;
  159. Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
  160. LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
  161. VResult = Builder.CreateInsertElement(VResult, Load, Idx);
  162. }
  163. CI->replaceAllUsesWith(VResult);
  164. CI->eraseFromParent();
  165. return;
  166. }
  167. // If the mask is not v1i1, use scalar bit test operations. This generates
  168. // better results on X86 at least.
  169. Value *SclrMask;
  170. if (VectorWidth != 1) {
  171. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  172. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  173. }
  174. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  175. // Fill the "else" block, created in the previous iteration
  176. //
  177. // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
  178. // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
  179. // %cond = icmp ne i16 %mask_1, 0
  180. // br i1 %mask_1, label %cond.load, label %else
  181. //
  182. Value *Predicate;
  183. if (VectorWidth != 1) {
  184. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  185. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  186. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  187. Builder.getIntN(VectorWidth, 0));
  188. } else {
  189. Predicate = Builder.CreateExtractElement(Mask, Idx);
  190. }
  191. // Create "cond" block
  192. //
  193. // %EltAddr = getelementptr i32* %1, i32 0
  194. // %Elt = load i32* %EltAddr
  195. // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
  196. //
  197. Instruction *ThenTerm =
  198. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  199. /*BranchWeights=*/nullptr, DTU);
  200. BasicBlock *CondBlock = ThenTerm->getParent();
  201. CondBlock->setName("cond.load");
  202. Builder.SetInsertPoint(CondBlock->getTerminator());
  203. Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
  204. LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
  205. Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
  206. // Create "else" block, fill it in the next iteration
  207. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  208. NewIfBlock->setName("else");
  209. BasicBlock *PrevIfBlock = IfBlock;
  210. IfBlock = NewIfBlock;
  211. // Create the phi to join the new and previous value.
  212. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  213. PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
  214. Phi->addIncoming(NewVResult, CondBlock);
  215. Phi->addIncoming(VResult, PrevIfBlock);
  216. VResult = Phi;
  217. }
  218. CI->replaceAllUsesWith(VResult);
  219. CI->eraseFromParent();
  220. ModifiedDT = true;
  221. }
  222. // Translate a masked store intrinsic, like
  223. // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
  224. // <16 x i1> %mask)
  225. // to a chain of basic blocks, that stores element one-by-one if
  226. // the appropriate mask bit is set
  227. //
  228. // %1 = bitcast i8* %addr to i32*
  229. // %2 = extractelement <16 x i1> %mask, i32 0
  230. // br i1 %2, label %cond.store, label %else
  231. //
  232. // cond.store: ; preds = %0
  233. // %3 = extractelement <16 x i32> %val, i32 0
  234. // %4 = getelementptr i32* %1, i32 0
  235. // store i32 %3, i32* %4
  236. // br label %else
  237. //
  238. // else: ; preds = %0, %cond.store
  239. // %5 = extractelement <16 x i1> %mask, i32 1
  240. // br i1 %5, label %cond.store1, label %else2
  241. //
  242. // cond.store1: ; preds = %else
  243. // %6 = extractelement <16 x i32> %val, i32 1
  244. // %7 = getelementptr i32* %1, i32 1
  245. // store i32 %6, i32* %7
  246. // br label %else2
  247. // . . .
  248. static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
  249. DomTreeUpdater *DTU, bool &ModifiedDT) {
  250. Value *Src = CI->getArgOperand(0);
  251. Value *Ptr = CI->getArgOperand(1);
  252. Value *Alignment = CI->getArgOperand(2);
  253. Value *Mask = CI->getArgOperand(3);
  254. const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
  255. auto *VecType = cast<VectorType>(Src->getType());
  256. Type *EltTy = VecType->getElementType();
  257. IRBuilder<> Builder(CI->getContext());
  258. Instruction *InsertPt = CI;
  259. Builder.SetInsertPoint(InsertPt);
  260. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  261. // Short-cut if the mask is all-true.
  262. if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
  263. Builder.CreateAlignedStore(Src, Ptr, AlignVal);
  264. CI->eraseFromParent();
  265. return;
  266. }
  267. // Adjust alignment for the scalar instruction.
  268. const Align AdjustedAlignVal =
  269. commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
  270. // Bitcast %addr from i8* to EltTy*
  271. Type *NewPtrType =
  272. EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
  273. Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
  274. unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
  275. if (isConstantIntVector(Mask)) {
  276. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  277. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
  278. continue;
  279. Value *OneElt = Builder.CreateExtractElement(Src, Idx);
  280. Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
  281. Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
  282. }
  283. CI->eraseFromParent();
  284. return;
  285. }
  286. // If the mask is not v1i1, use scalar bit test operations. This generates
  287. // better results on X86 at least.
  288. Value *SclrMask;
  289. if (VectorWidth != 1) {
  290. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  291. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  292. }
  293. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  294. // Fill the "else" block, created in the previous iteration
  295. //
  296. // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
  297. // %cond = icmp ne i16 %mask_1, 0
  298. // br i1 %mask_1, label %cond.store, label %else
  299. //
  300. Value *Predicate;
  301. if (VectorWidth != 1) {
  302. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  303. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  304. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  305. Builder.getIntN(VectorWidth, 0));
  306. } else {
  307. Predicate = Builder.CreateExtractElement(Mask, Idx);
  308. }
  309. // Create "cond" block
  310. //
  311. // %OneElt = extractelement <16 x i32> %Src, i32 Idx
  312. // %EltAddr = getelementptr i32* %1, i32 0
  313. // %store i32 %OneElt, i32* %EltAddr
  314. //
  315. Instruction *ThenTerm =
  316. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  317. /*BranchWeights=*/nullptr, DTU);
  318. BasicBlock *CondBlock = ThenTerm->getParent();
  319. CondBlock->setName("cond.store");
  320. Builder.SetInsertPoint(CondBlock->getTerminator());
  321. Value *OneElt = Builder.CreateExtractElement(Src, Idx);
  322. Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
  323. Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
  324. // Create "else" block, fill it in the next iteration
  325. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  326. NewIfBlock->setName("else");
  327. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  328. }
  329. CI->eraseFromParent();
  330. ModifiedDT = true;
  331. }
  332. // Translate a masked gather intrinsic like
  333. // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
  334. // <16 x i1> %Mask, <16 x i32> %Src)
  335. // to a chain of basic blocks, with loading element one-by-one if
  336. // the appropriate mask bit is set
  337. //
  338. // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
  339. // %Mask0 = extractelement <16 x i1> %Mask, i32 0
  340. // br i1 %Mask0, label %cond.load, label %else
  341. //
  342. // cond.load:
  343. // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
  344. // %Load0 = load i32, i32* %Ptr0, align 4
  345. // %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
  346. // br label %else
  347. //
  348. // else:
  349. // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
  350. // %Mask1 = extractelement <16 x i1> %Mask, i32 1
  351. // br i1 %Mask1, label %cond.load1, label %else2
  352. //
  353. // cond.load1:
  354. // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
  355. // %Load1 = load i32, i32* %Ptr1, align 4
  356. // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
  357. // br label %else2
  358. // . . .
  359. // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
  360. // ret <16 x i32> %Result
  361. static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI,
  362. DomTreeUpdater *DTU, bool &ModifiedDT) {
  363. Value *Ptrs = CI->getArgOperand(0);
  364. Value *Alignment = CI->getArgOperand(1);
  365. Value *Mask = CI->getArgOperand(2);
  366. Value *Src0 = CI->getArgOperand(3);
  367. auto *VecType = cast<FixedVectorType>(CI->getType());
  368. Type *EltTy = VecType->getElementType();
  369. IRBuilder<> Builder(CI->getContext());
  370. Instruction *InsertPt = CI;
  371. BasicBlock *IfBlock = CI->getParent();
  372. Builder.SetInsertPoint(InsertPt);
  373. MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
  374. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  375. // The result vector
  376. Value *VResult = Src0;
  377. unsigned VectorWidth = VecType->getNumElements();
  378. // Shorten the way if the mask is a vector of constants.
  379. if (isConstantIntVector(Mask)) {
  380. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  381. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
  382. continue;
  383. Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
  384. LoadInst *Load =
  385. Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
  386. VResult =
  387. Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
  388. }
  389. CI->replaceAllUsesWith(VResult);
  390. CI->eraseFromParent();
  391. return;
  392. }
  393. // If the mask is not v1i1, use scalar bit test operations. This generates
  394. // better results on X86 at least.
  395. Value *SclrMask;
  396. if (VectorWidth != 1) {
  397. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  398. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  399. }
  400. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  401. // Fill the "else" block, created in the previous iteration
  402. //
  403. // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
  404. // %cond = icmp ne i16 %mask_1, 0
  405. // br i1 %Mask1, label %cond.load, label %else
  406. //
  407. Value *Predicate;
  408. if (VectorWidth != 1) {
  409. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  410. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  411. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  412. Builder.getIntN(VectorWidth, 0));
  413. } else {
  414. Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
  415. }
  416. // Create "cond" block
  417. //
  418. // %EltAddr = getelementptr i32* %1, i32 0
  419. // %Elt = load i32* %EltAddr
  420. // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
  421. //
  422. Instruction *ThenTerm =
  423. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  424. /*BranchWeights=*/nullptr, DTU);
  425. BasicBlock *CondBlock = ThenTerm->getParent();
  426. CondBlock->setName("cond.load");
  427. Builder.SetInsertPoint(CondBlock->getTerminator());
  428. Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
  429. LoadInst *Load =
  430. Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
  431. Value *NewVResult =
  432. Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
  433. // Create "else" block, fill it in the next iteration
  434. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  435. NewIfBlock->setName("else");
  436. BasicBlock *PrevIfBlock = IfBlock;
  437. IfBlock = NewIfBlock;
  438. // Create the phi to join the new and previous value.
  439. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  440. PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
  441. Phi->addIncoming(NewVResult, CondBlock);
  442. Phi->addIncoming(VResult, PrevIfBlock);
  443. VResult = Phi;
  444. }
  445. CI->replaceAllUsesWith(VResult);
  446. CI->eraseFromParent();
  447. ModifiedDT = true;
  448. }
  449. // Translate a masked scatter intrinsic, like
  450. // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
  451. // <16 x i1> %Mask)
  452. // to a chain of basic blocks, that stores element one-by-one if
  453. // the appropriate mask bit is set.
  454. //
  455. // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
  456. // %Mask0 = extractelement <16 x i1> %Mask, i32 0
  457. // br i1 %Mask0, label %cond.store, label %else
  458. //
  459. // cond.store:
  460. // %Elt0 = extractelement <16 x i32> %Src, i32 0
  461. // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
  462. // store i32 %Elt0, i32* %Ptr0, align 4
  463. // br label %else
  464. //
  465. // else:
  466. // %Mask1 = extractelement <16 x i1> %Mask, i32 1
  467. // br i1 %Mask1, label %cond.store1, label %else2
  468. //
  469. // cond.store1:
  470. // %Elt1 = extractelement <16 x i32> %Src, i32 1
  471. // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
  472. // store i32 %Elt1, i32* %Ptr1, align 4
  473. // br label %else2
  474. // . . .
  475. static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI,
  476. DomTreeUpdater *DTU, bool &ModifiedDT) {
  477. Value *Src = CI->getArgOperand(0);
  478. Value *Ptrs = CI->getArgOperand(1);
  479. Value *Alignment = CI->getArgOperand(2);
  480. Value *Mask = CI->getArgOperand(3);
  481. auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
  482. assert(
  483. isa<VectorType>(Ptrs->getType()) &&
  484. isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
  485. "Vector of pointers is expected in masked scatter intrinsic");
  486. IRBuilder<> Builder(CI->getContext());
  487. Instruction *InsertPt = CI;
  488. Builder.SetInsertPoint(InsertPt);
  489. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  490. MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
  491. unsigned VectorWidth = SrcFVTy->getNumElements();
  492. // Shorten the way if the mask is a vector of constants.
  493. if (isConstantIntVector(Mask)) {
  494. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  495. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
  496. continue;
  497. Value *OneElt =
  498. Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
  499. Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
  500. Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
  501. }
  502. CI->eraseFromParent();
  503. return;
  504. }
  505. // If the mask is not v1i1, use scalar bit test operations. This generates
  506. // better results on X86 at least.
  507. Value *SclrMask;
  508. if (VectorWidth != 1) {
  509. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  510. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  511. }
  512. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  513. // Fill the "else" block, created in the previous iteration
  514. //
  515. // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
  516. // %cond = icmp ne i16 %mask_1, 0
  517. // br i1 %Mask1, label %cond.store, label %else
  518. //
  519. Value *Predicate;
  520. if (VectorWidth != 1) {
  521. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  522. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  523. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  524. Builder.getIntN(VectorWidth, 0));
  525. } else {
  526. Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
  527. }
  528. // Create "cond" block
  529. //
  530. // %Elt1 = extractelement <16 x i32> %Src, i32 1
  531. // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
  532. // %store i32 %Elt1, i32* %Ptr1
  533. //
  534. Instruction *ThenTerm =
  535. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  536. /*BranchWeights=*/nullptr, DTU);
  537. BasicBlock *CondBlock = ThenTerm->getParent();
  538. CondBlock->setName("cond.store");
  539. Builder.SetInsertPoint(CondBlock->getTerminator());
  540. Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
  541. Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
  542. Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
  543. // Create "else" block, fill it in the next iteration
  544. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  545. NewIfBlock->setName("else");
  546. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  547. }
  548. CI->eraseFromParent();
  549. ModifiedDT = true;
  550. }
  551. static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
  552. DomTreeUpdater *DTU, bool &ModifiedDT) {
  553. Value *Ptr = CI->getArgOperand(0);
  554. Value *Mask = CI->getArgOperand(1);
  555. Value *PassThru = CI->getArgOperand(2);
  556. auto *VecType = cast<FixedVectorType>(CI->getType());
  557. Type *EltTy = VecType->getElementType();
  558. IRBuilder<> Builder(CI->getContext());
  559. Instruction *InsertPt = CI;
  560. BasicBlock *IfBlock = CI->getParent();
  561. Builder.SetInsertPoint(InsertPt);
  562. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  563. unsigned VectorWidth = VecType->getNumElements();
  564. // The result vector
  565. Value *VResult = PassThru;
  566. // Shorten the way if the mask is a vector of constants.
  567. // Create a build_vector pattern, with loads/undefs as necessary and then
  568. // shuffle blend with the pass through value.
  569. if (isConstantIntVector(Mask)) {
  570. unsigned MemIndex = 0;
  571. VResult = PoisonValue::get(VecType);
  572. SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem);
  573. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  574. Value *InsertElt;
  575. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
  576. InsertElt = UndefValue::get(EltTy);
  577. ShuffleMask[Idx] = Idx + VectorWidth;
  578. } else {
  579. Value *NewPtr =
  580. Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
  581. InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
  582. "Load" + Twine(Idx));
  583. ShuffleMask[Idx] = Idx;
  584. ++MemIndex;
  585. }
  586. VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
  587. "Res" + Twine(Idx));
  588. }
  589. VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
  590. CI->replaceAllUsesWith(VResult);
  591. CI->eraseFromParent();
  592. return;
  593. }
  594. // If the mask is not v1i1, use scalar bit test operations. This generates
  595. // better results on X86 at least.
  596. Value *SclrMask;
  597. if (VectorWidth != 1) {
  598. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  599. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  600. }
  601. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  602. // Fill the "else" block, created in the previous iteration
  603. //
  604. // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
  605. // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
  606. // br i1 %mask_1, label %cond.load, label %else
  607. //
  608. Value *Predicate;
  609. if (VectorWidth != 1) {
  610. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  611. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  612. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  613. Builder.getIntN(VectorWidth, 0));
  614. } else {
  615. Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
  616. }
  617. // Create "cond" block
  618. //
  619. // %EltAddr = getelementptr i32* %1, i32 0
  620. // %Elt = load i32* %EltAddr
  621. // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
  622. //
  623. Instruction *ThenTerm =
  624. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  625. /*BranchWeights=*/nullptr, DTU);
  626. BasicBlock *CondBlock = ThenTerm->getParent();
  627. CondBlock->setName("cond.load");
  628. Builder.SetInsertPoint(CondBlock->getTerminator());
  629. LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
  630. Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
  631. // Move the pointer if there are more blocks to come.
  632. Value *NewPtr;
  633. if ((Idx + 1) != VectorWidth)
  634. NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
  635. // Create "else" block, fill it in the next iteration
  636. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  637. NewIfBlock->setName("else");
  638. BasicBlock *PrevIfBlock = IfBlock;
  639. IfBlock = NewIfBlock;
  640. // Create the phi to join the new and previous value.
  641. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  642. PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
  643. ResultPhi->addIncoming(NewVResult, CondBlock);
  644. ResultPhi->addIncoming(VResult, PrevIfBlock);
  645. VResult = ResultPhi;
  646. // Add a PHI for the pointer if this isn't the last iteration.
  647. if ((Idx + 1) != VectorWidth) {
  648. PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
  649. PtrPhi->addIncoming(NewPtr, CondBlock);
  650. PtrPhi->addIncoming(Ptr, PrevIfBlock);
  651. Ptr = PtrPhi;
  652. }
  653. }
  654. CI->replaceAllUsesWith(VResult);
  655. CI->eraseFromParent();
  656. ModifiedDT = true;
  657. }
  658. static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
  659. DomTreeUpdater *DTU,
  660. bool &ModifiedDT) {
  661. Value *Src = CI->getArgOperand(0);
  662. Value *Ptr = CI->getArgOperand(1);
  663. Value *Mask = CI->getArgOperand(2);
  664. auto *VecType = cast<FixedVectorType>(Src->getType());
  665. IRBuilder<> Builder(CI->getContext());
  666. Instruction *InsertPt = CI;
  667. BasicBlock *IfBlock = CI->getParent();
  668. Builder.SetInsertPoint(InsertPt);
  669. Builder.SetCurrentDebugLocation(CI->getDebugLoc());
  670. Type *EltTy = VecType->getElementType();
  671. unsigned VectorWidth = VecType->getNumElements();
  672. // Shorten the way if the mask is a vector of constants.
  673. if (isConstantIntVector(Mask)) {
  674. unsigned MemIndex = 0;
  675. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  676. if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
  677. continue;
  678. Value *OneElt =
  679. Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
  680. Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
  681. Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
  682. ++MemIndex;
  683. }
  684. CI->eraseFromParent();
  685. return;
  686. }
  687. // If the mask is not v1i1, use scalar bit test operations. This generates
  688. // better results on X86 at least.
  689. Value *SclrMask;
  690. if (VectorWidth != 1) {
  691. Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
  692. SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
  693. }
  694. for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
  695. // Fill the "else" block, created in the previous iteration
  696. //
  697. // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
  698. // br i1 %mask_1, label %cond.store, label %else
  699. //
  700. Value *Predicate;
  701. if (VectorWidth != 1) {
  702. Value *Mask = Builder.getInt(APInt::getOneBitSet(
  703. VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
  704. Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
  705. Builder.getIntN(VectorWidth, 0));
  706. } else {
  707. Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
  708. }
  709. // Create "cond" block
  710. //
  711. // %OneElt = extractelement <16 x i32> %Src, i32 Idx
  712. // %EltAddr = getelementptr i32* %1, i32 0
  713. // %store i32 %OneElt, i32* %EltAddr
  714. //
  715. Instruction *ThenTerm =
  716. SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
  717. /*BranchWeights=*/nullptr, DTU);
  718. BasicBlock *CondBlock = ThenTerm->getParent();
  719. CondBlock->setName("cond.store");
  720. Builder.SetInsertPoint(CondBlock->getTerminator());
  721. Value *OneElt = Builder.CreateExtractElement(Src, Idx);
  722. Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
  723. // Move the pointer if there are more blocks to come.
  724. Value *NewPtr;
  725. if ((Idx + 1) != VectorWidth)
  726. NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
  727. // Create "else" block, fill it in the next iteration
  728. BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
  729. NewIfBlock->setName("else");
  730. BasicBlock *PrevIfBlock = IfBlock;
  731. IfBlock = NewIfBlock;
  732. Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
  733. // Add a PHI for the pointer if this isn't the last iteration.
  734. if ((Idx + 1) != VectorWidth) {
  735. PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
  736. PtrPhi->addIncoming(NewPtr, CondBlock);
  737. PtrPhi->addIncoming(Ptr, PrevIfBlock);
  738. Ptr = PtrPhi;
  739. }
  740. }
  741. CI->eraseFromParent();
  742. ModifiedDT = true;
  743. }
  744. static bool runImpl(Function &F, const TargetTransformInfo &TTI,
  745. DominatorTree *DT) {
  746. std::optional<DomTreeUpdater> DTU;
  747. if (DT)
  748. DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
  749. bool EverMadeChange = false;
  750. bool MadeChange = true;
  751. auto &DL = F.getParent()->getDataLayout();
  752. while (MadeChange) {
  753. MadeChange = false;
  754. for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
  755. bool ModifiedDTOnIteration = false;
  756. MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
  757. DTU ? &*DTU : nullptr);
  758. // Restart BB iteration if the dominator tree of the Function was changed
  759. if (ModifiedDTOnIteration)
  760. break;
  761. }
  762. EverMadeChange |= MadeChange;
  763. }
  764. return EverMadeChange;
  765. }
  766. bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
  767. auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  768. DominatorTree *DT = nullptr;
  769. if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
  770. DT = &DTWP->getDomTree();
  771. return runImpl(F, TTI, DT);
  772. }
  773. PreservedAnalyses
  774. ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
  775. auto &TTI = AM.getResult<TargetIRAnalysis>(F);
  776. auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
  777. if (!runImpl(F, TTI, DT))
  778. return PreservedAnalyses::all();
  779. PreservedAnalyses PA;
  780. PA.preserve<TargetIRAnalysis>();
  781. PA.preserve<DominatorTreeAnalysis>();
  782. return PA;
  783. }
  784. static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
  785. const TargetTransformInfo &TTI, const DataLayout &DL,
  786. DomTreeUpdater *DTU) {
  787. bool MadeChange = false;
  788. BasicBlock::iterator CurInstIterator = BB.begin();
  789. while (CurInstIterator != BB.end()) {
  790. if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
  791. MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
  792. if (ModifiedDT)
  793. return true;
  794. }
  795. return MadeChange;
  796. }
  797. static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
  798. const TargetTransformInfo &TTI,
  799. const DataLayout &DL, DomTreeUpdater *DTU) {
  800. IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
  801. if (II) {
  802. // The scalarization code below does not work for scalable vectors.
  803. if (isa<ScalableVectorType>(II->getType()) ||
  804. any_of(II->args(),
  805. [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
  806. return false;
  807. switch (II->getIntrinsicID()) {
  808. default:
  809. break;
  810. case Intrinsic::masked_load:
  811. // Scalarize unsupported vector masked load
  812. if (TTI.isLegalMaskedLoad(
  813. CI->getType(),
  814. cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
  815. return false;
  816. scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
  817. return true;
  818. case Intrinsic::masked_store:
  819. if (TTI.isLegalMaskedStore(
  820. CI->getArgOperand(0)->getType(),
  821. cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
  822. return false;
  823. scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
  824. return true;
  825. case Intrinsic::masked_gather: {
  826. MaybeAlign MA =
  827. cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
  828. Type *LoadTy = CI->getType();
  829. Align Alignment = DL.getValueOrABITypeAlignment(MA,
  830. LoadTy->getScalarType());
  831. if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
  832. !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
  833. return false;
  834. scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
  835. return true;
  836. }
  837. case Intrinsic::masked_scatter: {
  838. MaybeAlign MA =
  839. cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
  840. Type *StoreTy = CI->getArgOperand(0)->getType();
  841. Align Alignment = DL.getValueOrABITypeAlignment(MA,
  842. StoreTy->getScalarType());
  843. if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
  844. !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
  845. Alignment))
  846. return false;
  847. scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
  848. return true;
  849. }
  850. case Intrinsic::masked_expandload:
  851. if (TTI.isLegalMaskedExpandLoad(CI->getType()))
  852. return false;
  853. scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
  854. return true;
  855. case Intrinsic::masked_compressstore:
  856. if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
  857. return false;
  858. scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
  859. return true;
  860. }
  861. }
  862. return false;
  863. }