LowerMemIntrinsics.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
  9. #include "llvm/Analysis/ScalarEvolution.h"
  10. #include "llvm/Analysis/TargetTransformInfo.h"
  11. #include "llvm/IR/IRBuilder.h"
  12. #include "llvm/IR/IntrinsicInst.h"
  13. #include "llvm/IR/MDBuilder.h"
  14. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  15. #include <optional>
  16. using namespace llvm;
  17. void llvm::createMemCpyLoopKnownSize(
  18. Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
  19. ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile,
  20. bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
  21. std::optional<uint32_t> AtomicElementSize) {
  22. // No need to expand zero length copies.
  23. if (CopyLen->isZero())
  24. return;
  25. BasicBlock *PreLoopBB = InsertBefore->getParent();
  26. BasicBlock *PostLoopBB = nullptr;
  27. Function *ParentFunc = PreLoopBB->getParent();
  28. LLVMContext &Ctx = PreLoopBB->getContext();
  29. const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
  30. MDBuilder MDB(Ctx);
  31. MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
  32. StringRef Name = "MemCopyAliasScope";
  33. MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
  34. unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
  35. unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
  36. Type *TypeOfCopyLen = CopyLen->getType();
  37. Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
  38. Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
  39. AtomicElementSize);
  40. assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
  41. "Atomic memcpy lowering is not supported for vector operand type");
  42. unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
  43. assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
  44. "Atomic memcpy lowering is not supported for selected operand size");
  45. uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize;
  46. if (LoopEndCount != 0) {
  47. // Split
  48. PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split");
  49. BasicBlock *LoopBB =
  50. BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB);
  51. PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
  52. IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
  53. // Cast the Src and Dst pointers to pointers to the loop operand type (if
  54. // needed).
  55. PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
  56. PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
  57. if (SrcAddr->getType() != SrcOpType) {
  58. SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
  59. }
  60. if (DstAddr->getType() != DstOpType) {
  61. DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
  62. }
  63. Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
  64. Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
  65. IRBuilder<> LoopBuilder(LoopBB);
  66. PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index");
  67. LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB);
  68. // Loop Body
  69. Value *SrcGEP =
  70. LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
  71. LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
  72. PartSrcAlign, SrcIsVolatile);
  73. if (!CanOverlap) {
  74. // Set alias scope for loads.
  75. Load->setMetadata(LLVMContext::MD_alias_scope,
  76. MDNode::get(Ctx, NewScope));
  77. }
  78. Value *DstGEP =
  79. LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
  80. StoreInst *Store = LoopBuilder.CreateAlignedStore(
  81. Load, DstGEP, PartDstAlign, DstIsVolatile);
  82. if (!CanOverlap) {
  83. // Indicate that stores don't overlap loads.
  84. Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
  85. }
  86. if (AtomicElementSize) {
  87. Load->setAtomic(AtomicOrdering::Unordered);
  88. Store->setAtomic(AtomicOrdering::Unordered);
  89. }
  90. Value *NewIndex =
  91. LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
  92. LoopIndex->addIncoming(NewIndex, LoopBB);
  93. // Create the loop branch condition.
  94. Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount);
  95. LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI),
  96. LoopBB, PostLoopBB);
  97. }
  98. uint64_t BytesCopied = LoopEndCount * LoopOpSize;
  99. uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
  100. if (RemainingBytes) {
  101. IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI()
  102. : InsertBefore);
  103. SmallVector<Type *, 5> RemainingOps;
  104. TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
  105. SrcAS, DstAS, SrcAlign.value(),
  106. DstAlign.value(), AtomicElementSize);
  107. for (auto *OpTy : RemainingOps) {
  108. Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
  109. Align PartDstAlign(commonAlignment(DstAlign, BytesCopied));
  110. // Calculate the new index
  111. unsigned OperandSize = DL.getTypeStoreSize(OpTy);
  112. assert(
  113. (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
  114. "Atomic memcpy lowering is not supported for selected operand size");
  115. uint64_t GepIndex = BytesCopied / OperandSize;
  116. assert(GepIndex * OperandSize == BytesCopied &&
  117. "Division should have no Remainder!");
  118. // Cast source to operand type and load
  119. PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS);
  120. Value *CastedSrc = SrcAddr->getType() == SrcPtrType
  121. ? SrcAddr
  122. : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
  123. Value *SrcGEP = RBuilder.CreateInBoundsGEP(
  124. OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
  125. LoadInst *Load =
  126. RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
  127. if (!CanOverlap) {
  128. // Set alias scope for loads.
  129. Load->setMetadata(LLVMContext::MD_alias_scope,
  130. MDNode::get(Ctx, NewScope));
  131. }
  132. // Cast destination to operand type and store.
  133. PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
  134. Value *CastedDst = DstAddr->getType() == DstPtrType
  135. ? DstAddr
  136. : RBuilder.CreateBitCast(DstAddr, DstPtrType);
  137. Value *DstGEP = RBuilder.CreateInBoundsGEP(
  138. OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
  139. StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
  140. DstIsVolatile);
  141. if (!CanOverlap) {
  142. // Indicate that stores don't overlap loads.
  143. Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
  144. }
  145. if (AtomicElementSize) {
  146. Load->setAtomic(AtomicOrdering::Unordered);
  147. Store->setAtomic(AtomicOrdering::Unordered);
  148. }
  149. BytesCopied += OperandSize;
  150. }
  151. }
  152. assert(BytesCopied == CopyLen->getZExtValue() &&
  153. "Bytes copied should match size in the call!");
  154. }
  155. void llvm::createMemCpyLoopUnknownSize(
  156. Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
  157. Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
  158. bool CanOverlap, const TargetTransformInfo &TTI,
  159. std::optional<uint32_t> AtomicElementSize) {
  160. BasicBlock *PreLoopBB = InsertBefore->getParent();
  161. BasicBlock *PostLoopBB =
  162. PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
  163. Function *ParentFunc = PreLoopBB->getParent();
  164. const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
  165. LLVMContext &Ctx = PreLoopBB->getContext();
  166. MDBuilder MDB(Ctx);
  167. MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
  168. StringRef Name = "MemCopyAliasScope";
  169. MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
  170. unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
  171. unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
  172. Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
  173. Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
  174. AtomicElementSize);
  175. assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
  176. "Atomic memcpy lowering is not supported for vector operand type");
  177. unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
  178. assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
  179. "Atomic memcpy lowering is not supported for selected operand size");
  180. IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
  181. PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
  182. PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
  183. if (SrcAddr->getType() != SrcOpType) {
  184. SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
  185. }
  186. if (DstAddr->getType() != DstOpType) {
  187. DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
  188. }
  189. // Calculate the loop trip count, and remaining bytes to copy after the loop.
  190. Type *CopyLenType = CopyLen->getType();
  191. IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
  192. assert(ILengthType &&
  193. "expected size argument to memcpy to be an integer type!");
  194. Type *Int8Type = Type::getInt8Ty(Ctx);
  195. bool LoopOpIsInt8 = LoopOpType == Int8Type;
  196. ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
  197. Value *RuntimeLoopCount = LoopOpIsInt8 ?
  198. CopyLen :
  199. PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
  200. BasicBlock *LoopBB =
  201. BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB);
  202. IRBuilder<> LoopBuilder(LoopBB);
  203. Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
  204. Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
  205. PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
  206. LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
  207. Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
  208. LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
  209. PartSrcAlign, SrcIsVolatile);
  210. if (!CanOverlap) {
  211. // Set alias scope for loads.
  212. Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope));
  213. }
  214. Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
  215. StoreInst *Store =
  216. LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
  217. if (!CanOverlap) {
  218. // Indicate that stores don't overlap loads.
  219. Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
  220. }
  221. if (AtomicElementSize) {
  222. Load->setAtomic(AtomicOrdering::Unordered);
  223. Store->setAtomic(AtomicOrdering::Unordered);
  224. }
  225. Value *NewIndex =
  226. LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
  227. LoopIndex->addIncoming(NewIndex, LoopBB);
  228. bool requiresResidual =
  229. !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize);
  230. if (requiresResidual) {
  231. Type *ResLoopOpType = AtomicElementSize
  232. ? Type::getIntNTy(Ctx, *AtomicElementSize * 8)
  233. : Int8Type;
  234. unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType);
  235. assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) &&
  236. "Store size is expected to match type size");
  237. // Add in the
  238. Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
  239. Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
  240. // Loop body for the residual copy.
  241. BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
  242. PreLoopBB->getParent(),
  243. PostLoopBB);
  244. // Residual loop header.
  245. BasicBlock *ResHeaderBB = BasicBlock::Create(
  246. Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr);
  247. // Need to update the pre-loop basic block to branch to the correct place.
  248. // branch to the main loop if the count is non-zero, branch to the residual
  249. // loop if the copy size is smaller then 1 iteration of the main loop but
  250. // non-zero and finally branch to after the residual loop if the memcpy
  251. // size is zero.
  252. ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
  253. PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
  254. LoopBB, ResHeaderBB);
  255. PreLoopBB->getTerminator()->eraseFromParent();
  256. LoopBuilder.CreateCondBr(
  257. LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
  258. ResHeaderBB);
  259. // Determine if we need to branch to the residual loop or bypass it.
  260. IRBuilder<> RHBuilder(ResHeaderBB);
  261. RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero),
  262. ResLoopBB, PostLoopBB);
  263. // Copy the residual with single byte load/store loop.
  264. IRBuilder<> ResBuilder(ResLoopBB);
  265. PHINode *ResidualIndex =
  266. ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
  267. ResidualIndex->addIncoming(Zero, ResHeaderBB);
  268. Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast(
  269. SrcAddr, PointerType::get(ResLoopOpType, SrcAS));
  270. Value *DstAsResLoopOpType = ResBuilder.CreateBitCast(
  271. DstAddr, PointerType::get(ResLoopOpType, DstAS));
  272. Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
  273. Value *SrcGEP = ResBuilder.CreateInBoundsGEP(
  274. ResLoopOpType, SrcAsResLoopOpType, FullOffset);
  275. LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP,
  276. PartSrcAlign, SrcIsVolatile);
  277. if (!CanOverlap) {
  278. // Set alias scope for loads.
  279. Load->setMetadata(LLVMContext::MD_alias_scope,
  280. MDNode::get(Ctx, NewScope));
  281. }
  282. Value *DstGEP = ResBuilder.CreateInBoundsGEP(
  283. ResLoopOpType, DstAsResLoopOpType, FullOffset);
  284. StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
  285. DstIsVolatile);
  286. if (!CanOverlap) {
  287. // Indicate that stores don't overlap loads.
  288. Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
  289. }
  290. if (AtomicElementSize) {
  291. Load->setAtomic(AtomicOrdering::Unordered);
  292. Store->setAtomic(AtomicOrdering::Unordered);
  293. }
  294. Value *ResNewIndex = ResBuilder.CreateAdd(
  295. ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize));
  296. ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
  297. // Create the loop branch condition.
  298. ResBuilder.CreateCondBr(
  299. ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
  300. PostLoopBB);
  301. } else {
  302. // In this case the loop operand type was a byte, and there is no need for a
  303. // residual loop to copy the remaining memory after the main loop.
  304. // We do however need to patch up the control flow by creating the
  305. // terminators for the preloop block and the memcpy loop.
  306. ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
  307. PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
  308. LoopBB, PostLoopBB);
  309. PreLoopBB->getTerminator()->eraseFromParent();
  310. LoopBuilder.CreateCondBr(
  311. LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
  312. PostLoopBB);
  313. }
  314. }
  315. // Lower memmove to IR. memmove is required to correctly copy overlapping memory
  316. // regions; therefore, it has to check the relative positions of the source and
  317. // destination pointers and choose the copy direction accordingly.
  318. //
  319. // The code below is an IR rendition of this C function:
  320. //
  321. // void* memmove(void* dst, const void* src, size_t n) {
  322. // unsigned char* d = dst;
  323. // const unsigned char* s = src;
  324. // if (s < d) {
  325. // // copy backwards
  326. // while (n--) {
  327. // d[n] = s[n];
  328. // }
  329. // } else {
  330. // // copy forward
  331. // for (size_t i = 0; i < n; ++i) {
  332. // d[i] = s[i];
  333. // }
  334. // }
  335. // return dst;
  336. // }
  337. static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
  338. Value *DstAddr, Value *CopyLen, Align SrcAlign,
  339. Align DstAlign, bool SrcIsVolatile,
  340. bool DstIsVolatile) {
  341. Type *TypeOfCopyLen = CopyLen->getType();
  342. BasicBlock *OrigBB = InsertBefore->getParent();
  343. Function *F = OrigBB->getParent();
  344. const DataLayout &DL = F->getParent()->getDataLayout();
  345. // TODO: Use different element type if possible?
  346. IRBuilder<> CastBuilder(InsertBefore);
  347. Type *EltTy = CastBuilder.getInt8Ty();
  348. Type *PtrTy =
  349. CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace());
  350. SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy);
  351. DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy);
  352. // Create the a comparison of src and dst, based on which we jump to either
  353. // the forward-copy part of the function (if src >= dst) or the backwards-copy
  354. // part (if src < dst).
  355. // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
  356. // structure. Its block terminators (unconditional branches) are replaced by
  357. // the appropriate conditional branches when the loop is built.
  358. ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT,
  359. SrcAddr, DstAddr, "compare_src_dst");
  360. Instruction *ThenTerm, *ElseTerm;
  361. SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm,
  362. &ElseTerm);
  363. // Each part of the function consists of two blocks:
  364. // copy_backwards: used to skip the loop when n == 0
  365. // copy_backwards_loop: the actual backwards loop BB
  366. // copy_forward: used to skip the loop when n == 0
  367. // copy_forward_loop: the actual forward loop BB
  368. BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
  369. CopyBackwardsBB->setName("copy_backwards");
  370. BasicBlock *CopyForwardBB = ElseTerm->getParent();
  371. CopyForwardBB->setName("copy_forward");
  372. BasicBlock *ExitBB = InsertBefore->getParent();
  373. ExitBB->setName("memmove_done");
  374. unsigned PartSize = DL.getTypeStoreSize(EltTy);
  375. Align PartSrcAlign(commonAlignment(SrcAlign, PartSize));
  376. Align PartDstAlign(commonAlignment(DstAlign, PartSize));
  377. // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
  378. // between both backwards and forward copy clauses.
  379. ICmpInst *CompareN =
  380. new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
  381. ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
  382. // Copying backwards.
  383. BasicBlock *LoopBB =
  384. BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB);
  385. IRBuilder<> LoopBuilder(LoopBB);
  386. PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
  387. Value *IndexPtr = LoopBuilder.CreateSub(
  388. LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
  389. Value *Element = LoopBuilder.CreateAlignedLoad(
  390. EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr),
  391. PartSrcAlign, "element");
  392. LoopBuilder.CreateAlignedStore(
  393. Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr),
  394. PartDstAlign);
  395. LoopBuilder.CreateCondBr(
  396. LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
  397. ExitBB, LoopBB);
  398. LoopPhi->addIncoming(IndexPtr, LoopBB);
  399. LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
  400. BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
  401. ThenTerm->eraseFromParent();
  402. // Copying forward.
  403. BasicBlock *FwdLoopBB =
  404. BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB);
  405. IRBuilder<> FwdLoopBuilder(FwdLoopBB);
  406. PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
  407. Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi);
  408. Value *FwdElement =
  409. FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element");
  410. Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi);
  411. FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign);
  412. Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
  413. FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
  414. FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
  415. ExitBB, FwdLoopBB);
  416. FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
  417. FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
  418. BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
  419. ElseTerm->eraseFromParent();
  420. }
  421. static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
  422. Value *CopyLen, Value *SetValue, Align DstAlign,
  423. bool IsVolatile) {
  424. Type *TypeOfCopyLen = CopyLen->getType();
  425. BasicBlock *OrigBB = InsertBefore->getParent();
  426. Function *F = OrigBB->getParent();
  427. const DataLayout &DL = F->getParent()->getDataLayout();
  428. BasicBlock *NewBB =
  429. OrigBB->splitBasicBlock(InsertBefore, "split");
  430. BasicBlock *LoopBB
  431. = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
  432. IRBuilder<> Builder(OrigBB->getTerminator());
  433. // Cast pointer to the type of value getting stored
  434. unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
  435. DstAddr = Builder.CreateBitCast(DstAddr,
  436. PointerType::get(SetValue->getType(), dstAS));
  437. Builder.CreateCondBr(
  438. Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
  439. LoopBB);
  440. OrigBB->getTerminator()->eraseFromParent();
  441. unsigned PartSize = DL.getTypeStoreSize(SetValue->getType());
  442. Align PartAlign(commonAlignment(DstAlign, PartSize));
  443. IRBuilder<> LoopBuilder(LoopBB);
  444. PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
  445. LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
  446. LoopBuilder.CreateAlignedStore(
  447. SetValue,
  448. LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
  449. PartAlign, IsVolatile);
  450. Value *NewIndex =
  451. LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
  452. LoopIndex->addIncoming(NewIndex, LoopBB);
  453. LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
  454. NewBB);
  455. }
  456. template <typename T>
  457. static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) {
  458. if (SE) {
  459. auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource());
  460. auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest());
  461. if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy))
  462. return false;
  463. }
  464. return true;
  465. }
  466. void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
  467. const TargetTransformInfo &TTI,
  468. ScalarEvolution *SE) {
  469. bool CanOverlap = canOverlap(Memcpy, SE);
  470. if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
  471. createMemCpyLoopKnownSize(
  472. /* InsertBefore */ Memcpy,
  473. /* SrcAddr */ Memcpy->getRawSource(),
  474. /* DstAddr */ Memcpy->getRawDest(),
  475. /* CopyLen */ CI,
  476. /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
  477. /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
  478. /* SrcIsVolatile */ Memcpy->isVolatile(),
  479. /* DstIsVolatile */ Memcpy->isVolatile(),
  480. /* CanOverlap */ CanOverlap,
  481. /* TargetTransformInfo */ TTI);
  482. } else {
  483. createMemCpyLoopUnknownSize(
  484. /* InsertBefore */ Memcpy,
  485. /* SrcAddr */ Memcpy->getRawSource(),
  486. /* DstAddr */ Memcpy->getRawDest(),
  487. /* CopyLen */ Memcpy->getLength(),
  488. /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
  489. /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
  490. /* SrcIsVolatile */ Memcpy->isVolatile(),
  491. /* DstIsVolatile */ Memcpy->isVolatile(),
  492. /* CanOverlap */ CanOverlap,
  493. /* TargetTransformInfo */ TTI);
  494. }
  495. }
  496. void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
  497. createMemMoveLoop(/* InsertBefore */ Memmove,
  498. /* SrcAddr */ Memmove->getRawSource(),
  499. /* DstAddr */ Memmove->getRawDest(),
  500. /* CopyLen */ Memmove->getLength(),
  501. /* SrcAlign */ Memmove->getSourceAlign().valueOrOne(),
  502. /* DestAlign */ Memmove->getDestAlign().valueOrOne(),
  503. /* SrcIsVolatile */ Memmove->isVolatile(),
  504. /* DstIsVolatile */ Memmove->isVolatile());
  505. }
  506. void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
  507. createMemSetLoop(/* InsertBefore */ Memset,
  508. /* DstAddr */ Memset->getRawDest(),
  509. /* CopyLen */ Memset->getLength(),
  510. /* SetValue */ Memset->getValue(),
  511. /* Alignment */ Memset->getDestAlign().valueOrOne(),
  512. Memset->isVolatile());
  513. }
  514. void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy,
  515. const TargetTransformInfo &TTI,
  516. ScalarEvolution *SE) {
  517. if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) {
  518. createMemCpyLoopKnownSize(
  519. /* InsertBefore */ AtomicMemcpy,
  520. /* SrcAddr */ AtomicMemcpy->getRawSource(),
  521. /* DstAddr */ AtomicMemcpy->getRawDest(),
  522. /* CopyLen */ CI,
  523. /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
  524. /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
  525. /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
  526. /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
  527. /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
  528. /* TargetTransformInfo */ TTI,
  529. /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
  530. } else {
  531. createMemCpyLoopUnknownSize(
  532. /* InsertBefore */ AtomicMemcpy,
  533. /* SrcAddr */ AtomicMemcpy->getRawSource(),
  534. /* DstAddr */ AtomicMemcpy->getRawDest(),
  535. /* CopyLen */ AtomicMemcpy->getLength(),
  536. /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
  537. /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
  538. /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
  539. /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
  540. /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
  541. /* TargetTransformInfo */ TTI,
  542. /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
  543. }
  544. }