//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI) { // No need to expand zero length copies. if (CopyLen->isZero()) return; BasicBlock *PreLoopBB = InsertBefore->getParent(); BasicBlock *PostLoopBB = nullptr; Function *ParentFunc = PreLoopBB->getParent(); LLVMContext &Ctx = PreLoopBB->getContext(); const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); Type *TypeOfCopyLen = CopyLen->getType(); Type *LoopOpType = TTI.getMemcpyLoopLoweringType( Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value()); unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; if (LoopEndCount != 0) { // Split PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); // Cast the Src and Dst pointers to pointers to the loop operand type (if // needed). PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); if (SrcAddr->getType() != SrcOpType) { SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); } if (DstAddr->getType() != DstOpType) { DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); } Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); IRBuilder<> LoopBuilder(LoopBB); PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); // Loop Body Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile); Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); LoopIndex->addIncoming(NewIndex, LoopBB); // Create the loop branch condition. Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), LoopBB, PostLoopBB); } uint64_t BytesCopied = LoopEndCount * LoopOpSize; uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; if (RemainingBytes) { IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() : InsertBefore); SmallVector RemainingOps; TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, SrcAS, DstAS, SrcAlign.value(), DstAlign.value()); for (auto OpTy : RemainingOps) { Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied)); Align PartDstAlign(commonAlignment(DstAlign, BytesCopied)); // Calaculate the new index unsigned OperandSize = DL.getTypeStoreSize(OpTy); uint64_t GepIndex = BytesCopied / OperandSize; assert(GepIndex * OperandSize == BytesCopied && "Division should have no Remainder!"); // Cast source to operand type and load PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); Value *CastedSrc = SrcAddr->getType() == SrcPtrType ? SrcAddr : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); Value *SrcGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); Value *Load = RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); // Cast destination to operand type and store. PointerType *DstPtrType = PointerType::get(OpTy, DstAS); Value *CastedDst = DstAddr->getType() == DstPtrType ? DstAddr : RBuilder.CreateBitCast(DstAddr, DstPtrType); Value *DstGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); BytesCopied += OperandSize; } } assert(BytesCopied == CopyLen->getZExtValue() && "Bytes copied should match size in the call!"); } void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI) { BasicBlock *PreLoopBB = InsertBefore->getParent(); BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); Function *ParentFunc = PreLoopBB->getParent(); const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); LLVMContext &Ctx = PreLoopBB->getContext(); unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); Type *LoopOpType = TTI.getMemcpyLoopLoweringType( Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value()); unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); if (SrcAddr->getType() != SrcOpType) { SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); } if (DstAddr->getType() != DstOpType) { DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); } // Calculate the loop trip count, and remaining bytes to copy after the loop. Type *CopyLenType = CopyLen->getType(); IntegerType *ILengthType = dyn_cast(CopyLenType); assert(ILengthType && "expected size argument to memcpy to be an integer type!"); Type *Int8Type = Type::getInt8Ty(Ctx); bool LoopOpIsInt8 = LoopOpType == Int8Type; ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); Value *RuntimeLoopCount = LoopOpIsInt8 ? CopyLen : PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); IRBuilder<> LoopBuilder(LoopBB); Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile); Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); LoopIndex->addIncoming(NewIndex, LoopBB); if (!LoopOpIsInt8) { // Add in the Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); // Loop body for the residual copy. BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", PreLoopBB->getParent(), PostLoopBB); // Residual loop header. BasicBlock *ResHeaderBB = BasicBlock::Create( Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); // Need to update the pre-loop basic block to branch to the correct place. // branch to the main loop if the count is non-zero, branch to the residual // loop if the copy size is smaller then 1 iteration of the main loop but // non-zero and finally branch to after the residual loop if the memcpy // size is zero. ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), LoopBB, ResHeaderBB); PreLoopBB->getTerminator()->eraseFromParent(); LoopBuilder.CreateCondBr( LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, ResHeaderBB); // Determine if we need to branch to the residual loop or bypass it. IRBuilder<> RHBuilder(ResHeaderBB); RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), ResLoopBB, PostLoopBB); // Copy the residual with single byte load/store loop. IRBuilder<> ResBuilder(ResLoopBB); PHINode *ResidualIndex = ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); ResidualIndex->addIncoming(Zero, ResHeaderBB); Value *SrcAsInt8 = ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); Value *DstAsInt8 = ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); Value *SrcGEP = ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); Value *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, PartSrcAlign, SrcIsVolatile); Value *DstGEP = ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); Value *ResNewIndex = ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); // Create the loop branch condition. ResBuilder.CreateCondBr( ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, PostLoopBB); } else { // In this case the loop operand type was a byte, and there is no need for a // residual loop to copy the remaining memory after the main loop. // We do however need to patch up the control flow by creating the // terminators for the preloop block and the memcpy loop. ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), LoopBB, PostLoopBB); PreLoopBB->getTerminator()->eraseFromParent(); LoopBuilder.CreateCondBr( LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, PostLoopBB); } } // Lower memmove to IR. memmove is required to correctly copy overlapping memory // regions; therefore, it has to check the relative positions of the source and // destination pointers and choose the copy direction accordingly. // // The code below is an IR rendition of this C function: // // void* memmove(void* dst, const void* src, size_t n) { // unsigned char* d = dst; // const unsigned char* s = src; // if (s < d) { // // copy backwards // while (n--) { // d[n] = s[n]; // } // } else { // // copy forward // for (size_t i = 0; i < n; ++i) { // d[i] = s[i]; // } // } // return dst; // } static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile) { Type *TypeOfCopyLen = CopyLen->getType(); BasicBlock *OrigBB = InsertBefore->getParent(); Function *F = OrigBB->getParent(); const DataLayout &DL = F->getParent()->getDataLayout(); Type *EltTy = SrcAddr->getType()->getPointerElementType(); // Create the a comparison of src and dst, based on which we jump to either // the forward-copy part of the function (if src >= dst) or the backwards-copy // part (if src < dst). // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else // structure. Its block terminators (unconditional branches) are replaced by // the appropriate conditional branches when the loop is built. ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, SrcAddr, DstAddr, "compare_src_dst"); Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, &ElseTerm); // Each part of the function consists of two blocks: // copy_backwards: used to skip the loop when n == 0 // copy_backwards_loop: the actual backwards loop BB // copy_forward: used to skip the loop when n == 0 // copy_forward_loop: the actual forward loop BB BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); CopyBackwardsBB->setName("copy_backwards"); BasicBlock *CopyForwardBB = ElseTerm->getParent(); CopyForwardBB->setName("copy_forward"); BasicBlock *ExitBB = InsertBefore->getParent(); ExitBB->setName("memmove_done"); unsigned PartSize = DL.getTypeStoreSize(EltTy); Align PartSrcAlign(commonAlignment(SrcAlign, PartSize)); Align PartDstAlign(commonAlignment(DstAlign, PartSize)); // Initial comparison of n == 0 that lets us skip the loops altogether. Shared // between both backwards and forward copy clauses. ICmpInst *CompareN = new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); // Copying backwards. BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); IRBuilder<> LoopBuilder(LoopBB); PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); Value *IndexPtr = LoopBuilder.CreateSub( LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); Value *Element = LoopBuilder.CreateAlignedLoad( EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr), PartSrcAlign, "element"); LoopBuilder.CreateAlignedStore( Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr), PartDstAlign); LoopBuilder.CreateCondBr( LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), ExitBB, LoopBB); LoopPhi->addIncoming(IndexPtr, LoopBB); LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); ThenTerm->eraseFromParent(); // Copying forward. BasicBlock *FwdLoopBB = BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); IRBuilder<> FwdLoopBuilder(FwdLoopBB); PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi); Value *FwdElement = FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element"); Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi); FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign); Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), ExitBB, FwdLoopBB); FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); ElseTerm->eraseFromParent(); } static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, Align DstAlign, bool IsVolatile) { Type *TypeOfCopyLen = CopyLen->getType(); BasicBlock *OrigBB = InsertBefore->getParent(); Function *F = OrigBB->getParent(); const DataLayout &DL = F->getParent()->getDataLayout(); BasicBlock *NewBB = OrigBB->splitBasicBlock(InsertBefore, "split"); BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); IRBuilder<> Builder(OrigBB->getTerminator()); // Cast pointer to the type of value getting stored unsigned dstAS = cast(DstAddr->getType())->getAddressSpace(); DstAddr = Builder.CreateBitCast(DstAddr, PointerType::get(SetValue->getType(), dstAS)); Builder.CreateCondBr( Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, LoopBB); OrigBB->getTerminator()->eraseFromParent(); unsigned PartSize = DL.getTypeStoreSize(SetValue->getType()); Align PartAlign(commonAlignment(DstAlign, PartSize)); IRBuilder<> LoopBuilder(LoopBB); PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); LoopBuilder.CreateAlignedStore( SetValue, LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), PartAlign, IsVolatile); Value *NewIndex = LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); LoopIndex->addIncoming(NewIndex, LoopBB); LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, NewBB); } void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, const TargetTransformInfo &TTI) { if (ConstantInt *CI = dyn_cast(Memcpy->getLength())) { createMemCpyLoopKnownSize( /* InsertBefore */ Memcpy, /* SrcAddr */ Memcpy->getRawSource(), /* DstAddr */ Memcpy->getRawDest(), /* CopyLen */ CI, /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), /* TargetTransformInfo */ TTI); } else { createMemCpyLoopUnknownSize( /* InsertBefore */ Memcpy, /* SrcAddr */ Memcpy->getRawSource(), /* DstAddr */ Memcpy->getRawDest(), /* CopyLen */ Memcpy->getLength(), /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), /* TargetTransformInfo */ TTI); } } void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { createMemMoveLoop(/* InsertBefore */ Memmove, /* SrcAddr */ Memmove->getRawSource(), /* DstAddr */ Memmove->getRawDest(), /* CopyLen */ Memmove->getLength(), /* SrcAlign */ Memmove->getSourceAlign().valueOrOne(), /* DestAlign */ Memmove->getDestAlign().valueOrOne(), /* SrcIsVolatile */ Memmove->isVolatile(), /* DstIsVolatile */ Memmove->isVolatile()); } void llvm::expandMemSetAsLoop(MemSetInst *Memset) { createMemSetLoop(/* InsertBefore */ Memset, /* DstAddr */ Memset->getRawDest(), /* CopyLen */ Memset->getLength(), /* SetValue */ Memset->getValue(), /* Alignment */ Memset->getDestAlign().valueOrOne(), Memset->isVolatile()); }