123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 |
- //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- /// \file
- /// This file a TargetTransformInfo::Concept conforming object specific to the
- /// AArch64 target machine. It uses the target's detailed information to
- /// provide more precise answers to certain TTI queries, while letting the
- /// target independent and default TTI implementations handle the rest.
- ///
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
- #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
- #include "AArch64.h"
- #include "AArch64Subtarget.h"
- #include "AArch64TargetMachine.h"
- #include "llvm/ADT/ArrayRef.h"
- #include "llvm/Analysis/TargetTransformInfo.h"
- #include "llvm/CodeGen/BasicTTIImpl.h"
- #include "llvm/IR/Function.h"
- #include "llvm/IR/Intrinsics.h"
- #include <cstdint>
- #include <optional>
- namespace llvm {
- class APInt;
- class Instruction;
- class IntrinsicInst;
- class Loop;
- class SCEV;
- class ScalarEvolution;
- class Type;
- class Value;
- class VectorType;
- class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
- using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
- using TTI = TargetTransformInfo;
- friend BaseT;
- const AArch64Subtarget *ST;
- const AArch64TargetLowering *TLI;
- const AArch64Subtarget *getST() const { return ST; }
- const AArch64TargetLowering *getTLI() const { return TLI; }
- enum MemIntrinsicType {
- VECTOR_LDST_TWO_ELEMENTS,
- VECTOR_LDST_THREE_ELEMENTS,
- VECTOR_LDST_FOUR_ELEMENTS
- };
- bool isWideningInstruction(Type *Ty, unsigned Opcode,
- ArrayRef<const Value *> Args);
- // A helper function called by 'getVectorInstrCost'.
- //
- // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
- // indicates whether the vector instruction is available in the input IR or
- // just imaginary in vectorizer passes.
- InstructionCost getVectorInstrCostHelper(Type *Val, unsigned Index,
- bool HasRealUse);
- public:
- explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
- TLI(ST->getTargetLowering()) {}
- bool areInlineCompatible(const Function *Caller,
- const Function *Callee) const;
- /// \name Scalar TTI Implementations
- /// @{
- using BaseT::getIntImmCost;
- InstructionCost getIntImmCost(int64_t Val);
- InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind);
- InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind,
- Instruction *Inst = nullptr);
- InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind);
- TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
- /// @}
- /// \name Vector TTI Implementations
- /// @{
- bool enableInterleavedAccessVectorization() { return true; }
- unsigned getNumberOfRegisters(unsigned ClassID) const {
- bool Vector = (ClassID == 1);
- if (Vector) {
- if (ST->hasNEON())
- return 32;
- return 0;
- }
- return 31;
- }
- InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind);
- std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
- IntrinsicInst &II) const;
- std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
- InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
- APInt &UndefElts2, APInt &UndefElts3,
- std::function<void(Instruction *, unsigned, APInt, APInt &)>
- SimplifyAndSetOp) const;
- TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
- unsigned getMinVectorRegisterBitWidth() const {
- return ST->getMinVectorRegisterBitWidth();
- }
- std::optional<unsigned> getVScaleForTuning() const {
- return ST->getVScaleForTuning();
- }
- bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
- /// Try to return an estimate cost factor that can be used as a multiplier
- /// when scalarizing an operation for a vector with ElementCount \p VF.
- /// For scalable vectors this currently takes the most pessimistic view based
- /// upon the maximum possible value for vscale.
- unsigned getMaxNumElements(ElementCount VF) const {
- if (!VF.isScalable())
- return VF.getFixedValue();
- return VF.getKnownMinValue() * ST->getVScaleForTuning();
- }
- unsigned getMaxInterleaveFactor(unsigned VF);
- bool prefersVectorizedAddressing() const;
- InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind);
- InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
- const Value *Ptr, bool VariableMask,
- Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::CastContextHint CCH,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
- VectorType *VecTy, unsigned Index);
- InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
- TTI::TargetCostKind CostKind,
- unsigned Index, Value *Op0, Value *Op1);
- InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
- TTI::TargetCostKind CostKind,
- unsigned Index);
- InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned,
- TTI::TargetCostKind CostKind);
- InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
- VectorType *ValTy,
- TTI::TargetCostKind CostKind);
- InstructionCost getSpliceCost(VectorType *Tp, int Index);
- InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
- TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
- TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
- ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
- const Instruction *CxtI = nullptr);
- InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
- const SCEV *Ptr);
- InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- CmpInst::Predicate VecPred,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
- bool IsZeroCmp) const;
- bool useNeonVector(const Type *Ty) const;
- InstructionCost
- getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
- unsigned AddressSpace, TTI::TargetCostKind CostKind,
- TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
- const Instruction *I = nullptr);
- InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
- void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP,
- OptimizationRemarkEmitter *ORE);
- void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::PeelingPreferences &PP);
- Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
- Type *ExpectedType);
- bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
- bool isElementTypeLegalForScalableVector(Type *Ty) const {
- if (Ty->isPointerTy())
- return true;
- if (Ty->isBFloatTy() && ST->hasBF16())
- return true;
- if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
- return true;
- if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
- Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
- return true;
- return false;
- }
- bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
- if (!ST->hasSVE())
- return false;
- // For fixed vectors, avoid scalarization if using SVE for them.
- if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
- return false; // Fall back to scalarization of masked operations.
- return isElementTypeLegalForScalableVector(DataType->getScalarType());
- }
- bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
- return isLegalMaskedLoadStore(DataType, Alignment);
- }
- bool isLegalMaskedStore(Type *DataType, Align Alignment) {
- return isLegalMaskedLoadStore(DataType, Alignment);
- }
- bool isLegalMaskedGatherScatter(Type *DataType) const {
- if (!ST->hasSVE() || ST->forceStreamingCompatibleSVE())
- return false;
- // For fixed vectors, scalarize if not using SVE for them.
- auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
- if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
- DataTypeFVTy->getNumElements() < 2))
- return false;
- return isElementTypeLegalForScalableVector(DataType->getScalarType());
- }
- bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
- return isLegalMaskedGatherScatter(DataType);
- }
- bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
- return isLegalMaskedGatherScatter(DataType);
- }
- bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
- // Return true if we can generate a `ld1r` splat load instruction.
- if (!ST->hasNEON() || NumElements.isScalable())
- return false;
- switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
- case 8:
- case 16:
- case 32:
- case 64: {
- // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
- unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
- return VectorBits >= 64;
- }
- }
- return false;
- }
- bool isLegalNTStoreLoad(Type *DataType, Align Alignment) {
- // NOTE: The logic below is mostly geared towards LV, which calls it with
- // vectors with 2 elements. We might want to improve that, if other
- // users show up.
- // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if
- // the vector can be halved so that each half fits into a register. That's
- // the case if the element type fits into a register and the number of
- // elements is a power of 2 > 1.
- if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) {
- unsigned NumElements = DataTypeTy->getNumElements();
- unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
- return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
- EltSize <= 128 && isPowerOf2_64(EltSize);
- }
- return BaseT::isLegalNTStore(DataType, Alignment);
- }
- bool isLegalNTStore(Type *DataType, Align Alignment) {
- return isLegalNTStoreLoad(DataType, Alignment);
- }
- bool isLegalNTLoad(Type *DataType, Align Alignment) {
- // Only supports little-endian targets.
- if (ST->isLittleEndian())
- return isLegalNTStoreLoad(DataType, Alignment);
- return BaseT::isLegalNTLoad(DataType, Alignment);
- }
- bool enableOrderedReductions() const { return true; }
- InstructionCost getInterleavedMemoryOpCost(
- unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
- bool UseMaskForCond = false, bool UseMaskForGaps = false);
- bool
- shouldConsiderAddressTypePromotion(const Instruction &I,
- bool &AllowPromotionWithoutCommonHeader);
- bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
- unsigned getGISelRematGlobalCost() const {
- return 2;
- }
- unsigned getMinTripCountTailFoldingThreshold() const {
- return ST->hasSVE() ? 5 : 0;
- }
- PredicationStyle emitGetActiveLaneMask() const {
- if (ST->hasSVE())
- return PredicationStyle::DataAndControlFlow;
- return PredicationStyle::None;
- }
- bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
- AssumptionCache &AC, TargetLibraryInfo *TLI,
- DominatorTree *DT,
- LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI);
- bool supportsScalableVectors() const { return ST->hasSVE(); }
- bool enableScalableVectorization() const { return ST->hasSVE(); }
- bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
- ElementCount VF) const;
- bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
- TTI::ReductionFlags Flags) const {
- return ST->hasSVE();
- }
- InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
- std::optional<FastMathFlags> FMF,
- TTI::TargetCostKind CostKind);
- InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask,
- TTI::TargetCostKind CostKind, int Index,
- VectorType *SubTp,
- ArrayRef<const Value *> Args = std::nullopt);
- /// Return the cost of the scaling factor used in the addressing
- /// mode represented by AM for this target, for a load/store
- /// of the specified type.
- /// If the AM is supported, the return value must be >= 0.
- /// If the AM is not supported, it returns a negative value.
- InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
- int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale, unsigned AddrSpace) const;
- /// @}
- bool enableSelectOptimize() { return ST->enableSelectOptimize(); }
- };
- } // end namespace llvm
- #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
|