123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347 |
- //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- /// \file
- /// This file a TargetTransformInfo::Concept conforming object specific to the
- /// ARM target machine. It uses the target's detailed information to
- /// provide more precise answers to certain TTI queries, while letting the
- /// target independent and default TTI implementations handle the rest.
- //
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
- #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
- #include "ARM.h"
- #include "ARMSubtarget.h"
- #include "ARMTargetMachine.h"
- #include "llvm/ADT/ArrayRef.h"
- #include "llvm/Analysis/TargetTransformInfo.h"
- #include "llvm/CodeGen/BasicTTIImpl.h"
- #include "llvm/IR/Constant.h"
- #include "llvm/IR/Function.h"
- #include "llvm/MC/SubtargetFeature.h"
- namespace llvm {
- class APInt;
- class ARMTargetLowering;
- class Instruction;
- class Loop;
- class SCEV;
- class ScalarEvolution;
- class Type;
- class Value;
- namespace TailPredication {
- enum Mode {
- Disabled = 0,
- EnabledNoReductions,
- Enabled,
- ForceEnabledNoReductions,
- ForceEnabled
- };
- }
- // For controlling conversion of memcpy into Tail Predicated loop.
- namespace TPLoop {
- enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
- }
- class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
- using BaseT = BasicTTIImplBase<ARMTTIImpl>;
- using TTI = TargetTransformInfo;
- friend BaseT;
- const ARMSubtarget *ST;
- const ARMTargetLowering *TLI;
- // Currently the following features are excluded from InlineFeaturesAllowed.
- // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
- // Depending on whether they are set or unset, different
- // instructions/registers are available. For example, inlining a callee with
- // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
- // fail if the callee uses ARM only instructions, e.g. in inline asm.
- const FeatureBitset InlineFeaturesAllowed = {
- ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
- ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
- ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
- ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
- ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
- ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
- ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
- ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
- ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
- ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
- ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
- ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
- ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
- ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
- ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
- ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
- ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
- ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
- ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
- ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
- ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
- ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
- ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
- ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
- };
- const ARMSubtarget *getST() const { return ST; }
- const ARMTargetLowering *getTLI() const { return TLI; }
- public:
- explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
- TLI(ST->getTargetLowering()) {}
- bool areInlineCompatible(const Function *Caller,
- const Function *Callee) const;
- bool enableInterleavedAccessVectorization() { return true; }
- TTI::AddressingModeKind
- getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
- /// Floating-point computation using ARMv8 AArch32 Advanced
- /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
- /// and Arm MVE are IEEE-754 compliant.
- bool isFPVectorizationPotentiallyUnsafe() {
- return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
- }
- Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
- IntrinsicInst &II) const;
- Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
- InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
- APInt &UndefElts2, APInt &UndefElts3,
- std::function<void(Instruction *, unsigned, APInt, APInt &)>
- SimplifyAndSetOp) const;
- /// \name Scalar TTI Implementations
- /// @{
- InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty);
- using BaseT::getIntImmCost;
- InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind);
- InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind,
- Instruction *Inst = nullptr);
- /// @}
- /// \name Vector TTI Implementations
- /// @{
- unsigned getNumberOfRegisters(unsigned ClassID) const {
- bool Vector = (ClassID == 1);
- if (Vector) {
- if (ST->hasNEON())
- return 16;
- if (ST->hasMVEIntegerOps())
- return 8;
- return 0;
- }
- if (ST->isThumb1Only())
- return 8;
- return 13;
- }
- TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
- switch (K) {
- case TargetTransformInfo::RGK_Scalar:
- return TypeSize::getFixed(32);
- case TargetTransformInfo::RGK_FixedWidthVector:
- if (ST->hasNEON())
- return TypeSize::getFixed(128);
- if (ST->hasMVEIntegerOps())
- return TypeSize::getFixed(128);
- return TypeSize::getFixed(0);
- case TargetTransformInfo::RGK_ScalableVector:
- return TypeSize::getScalable(0);
- }
- llvm_unreachable("Unsupported register kind");
- }
- unsigned getMaxInterleaveFactor(unsigned VF) {
- return ST->getMaxInterleaveFactor();
- }
- bool isProfitableLSRChainElement(Instruction *I);
- bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
- bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
- return isLegalMaskedLoad(DataTy, Alignment);
- }
- bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
- // For MVE, we have a custom lowering pass that will already have custom
- // legalised any gathers that we can lower to MVE intrinsics, and want to
- // expand all the rest. The pass runs before the masked intrinsic lowering
- // pass.
- return true;
- }
- bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
- return forceScalarizeMaskedGather(VTy, Alignment);
- }
- bool isLegalMaskedGather(Type *Ty, Align Alignment);
- bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
- return isLegalMaskedGather(Ty, Alignment);
- }
- InstructionCost getMemcpyCost(const Instruction *I);
- int getNumMemOps(const IntrinsicInst *I) const;
- InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask, int Index,
- VectorType *SubTp);
- bool preferInLoopReduction(unsigned Opcode, Type *Ty,
- TTI::ReductionFlags Flags) const;
- bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
- TTI::ReductionFlags Flags) const;
- bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
- InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::CastContextHint CCH,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- CmpInst::Predicate VecPred,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index);
- InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
- const SCEV *Ptr);
- InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
- TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
- TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
- TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
- const Instruction *CxtI = nullptr);
- InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
- MaybeAlign Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind);
- InstructionCost getInterleavedMemoryOpCost(
- unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
- bool UseMaskForCond = false, bool UseMaskForGaps = false);
- InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
- const Value *Ptr, bool VariableMask,
- Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
- Optional<FastMathFlags> FMF,
- TTI::TargetCostKind CostKind);
- InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
- Type *ResTy, VectorType *ValTy,
- TTI::TargetCostKind CostKind);
- InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind);
- bool maybeLoweredToCall(Instruction &I);
- bool isLoweredToCall(const Function *F);
- bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
- AssumptionCache &AC,
- TargetLibraryInfo *LibInfo,
- HardwareLoopInfo &HWLoopInfo);
- bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
- ScalarEvolution &SE,
- AssumptionCache &AC,
- TargetLibraryInfo *TLI,
- DominatorTree *DT,
- const LoopAccessInfo *LAI);
- void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP,
- OptimizationRemarkEmitter *ORE);
- bool emitGetActiveLaneMask() const;
- void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::PeelingPreferences &PP);
- bool shouldBuildLookupTablesForConstant(Constant *C) const {
- // In the ROPI and RWPI relocation models we can't have pointers to global
- // variables or functions in constant data, so don't convert switches to
- // lookup tables if any of the values would need relocation.
- if (ST->isROPI() || ST->isRWPI())
- return !C->needsDynamicRelocation();
- return true;
- }
- /// @}
- };
- /// isVREVMask - Check if a vector shuffle corresponds to a VREV
- /// instruction with the specified blocksize. (The order of the elements
- /// within each block of the vector is reversed.)
- inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
- assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
- "Only possible block sizes for VREV are: 16, 32, 64");
- unsigned EltSz = VT.getScalarSizeInBits();
- if (EltSz != 8 && EltSz != 16 && EltSz != 32)
- return false;
- unsigned BlockElts = M[0] + 1;
- // If the first shuffle index is UNDEF, be optimistic.
- if (M[0] < 0)
- BlockElts = BlockSize / EltSz;
- if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
- return false;
- for (unsigned i = 0, e = M.size(); i < e; ++i) {
- if (M[i] < 0)
- continue; // ignore UNDEF indices
- if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
- return false;
- }
- return true;
- }
- } // end namespace llvm
- #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
|