123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997 |
- #pragma once
- #ifdef __GNUC__
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wunused-parameter"
- #endif
- //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines some vectorizer utilities.
- //
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_ANALYSIS_VECTORUTILS_H
- #define LLVM_ANALYSIS_VECTORUTILS_H
- #include "llvm/ADT/MapVector.h"
- #include "llvm/ADT/SmallVector.h"
- #include "llvm/Analysis/LoopAccessAnalysis.h"
- #include "llvm/Support/CheckedArithmetic.h"
- namespace llvm {
- class TargetLibraryInfo;
- /// Describes the type of Parameters
- enum class VFParamKind {
- Vector, // No semantic information.
- OMP_Linear, // declare simd linear(i)
- OMP_LinearRef, // declare simd linear(ref(i))
- OMP_LinearVal, // declare simd linear(val(i))
- OMP_LinearUVal, // declare simd linear(uval(i))
- OMP_LinearPos, // declare simd linear(i:c) uniform(c)
- OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
- OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
- OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c)
- OMP_Uniform, // declare simd uniform(i)
- GlobalPredicate, // Global logical predicate that acts on all lanes
- // of the input and output mask concurrently. For
- // example, it is implied by the `M` token in the
- // Vector Function ABI mangled name.
- Unknown
- };
- /// Describes the type of Instruction Set Architecture
- enum class VFISAKind {
- AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
- SVE, // AArch64 Scalable Vector Extension
- SSE, // x86 SSE
- AVX, // x86 AVX
- AVX2, // x86 AVX2
- AVX512, // x86 AVX512
- LLVM, // LLVM internal ISA for functions that are not
- // attached to an existing ABI via name mangling.
- Unknown // Unknown ISA
- };
- /// Encapsulates information needed to describe a parameter.
- ///
- /// The description of the parameter is not linked directly to
- /// OpenMP or any other vector function description. This structure
- /// is extendible to handle other paradigms that describe vector
- /// functions and their parameters.
- struct VFParameter {
- unsigned ParamPos; // Parameter Position in Scalar Function.
- VFParamKind ParamKind; // Kind of Parameter.
- int LinearStepOrPos = 0; // Step or Position of the Parameter.
- Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1.
- // Comparison operator.
- bool operator==(const VFParameter &Other) const {
- return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
- std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
- Other.Alignment);
- }
- };
- /// Contains the information about the kind of vectorization
- /// available.
- ///
- /// This object in independent on the paradigm used to
- /// represent vector functions. in particular, it is not attached to
- /// any target-specific ABI.
- struct VFShape {
- ElementCount VF; // Vectorization factor.
- SmallVector<VFParameter, 8> Parameters; // List of parameter information.
- // Comparison operator.
- bool operator==(const VFShape &Other) const {
- return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters);
- }
- /// Update the parameter in position P.ParamPos to P.
- void updateParam(VFParameter P) {
- assert(P.ParamPos < Parameters.size() && "Invalid parameter position.");
- Parameters[P.ParamPos] = P;
- assert(hasValidParameterList() && "Invalid parameter list");
- }
- // Retrieve the VFShape that can be used to map a (scalar) function to itself,
- // with VF = 1.
- static VFShape getScalarShape(const CallInst &CI) {
- return VFShape::get(CI, ElementCount::getFixed(1),
- /*HasGlobalPredicate*/ false);
- }
- // Retrieve the basic vectorization shape of the function, where all
- // parameters are mapped to VFParamKind::Vector with \p EC
- // lanes. Specifies whether the function has a Global Predicate
- // argument via \p HasGlobalPred.
- static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) {
- SmallVector<VFParameter, 8> Parameters;
- for (unsigned I = 0; I < CI.arg_size(); ++I)
- Parameters.push_back(VFParameter({I, VFParamKind::Vector}));
- if (HasGlobalPred)
- Parameters.push_back(
- VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate}));
- return {EC, Parameters};
- }
- /// Validation check on the Parameters in the VFShape.
- bool hasValidParameterList() const;
- };
- /// Holds the VFShape for a specific scalar to vector function mapping.
- struct VFInfo {
- VFShape Shape; /// Classification of the vector function.
- std::string ScalarName; /// Scalar Function Name.
- std::string VectorName; /// Vector Function Name associated to this VFInfo.
- VFISAKind ISA; /// Instruction Set Architecture.
- };
- namespace VFABI {
- /// LLVM Internal VFABI ISA token for vector functions.
- static constexpr char const *_LLVM_ = "_LLVM_";
- /// Prefix for internal name redirection for vector function that
- /// tells the compiler to scalarize the call using the scalar name
- /// of the function. For example, a mangled name like
- /// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the
- /// vectorizer to vectorize the scalar call `foo`, and to scalarize
- /// it once vectorization is done.
- static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_";
- /// Function to construct a VFInfo out of a mangled names in the
- /// following format:
- ///
- /// <VFABI_name>{(<redirection>)}
- ///
- /// where <VFABI_name> is the name of the vector function, mangled according
- /// to the rules described in the Vector Function ABI of the target vector
- /// extension (or <isa> from now on). The <VFABI_name> is in the following
- /// format:
- ///
- /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
- ///
- /// This methods support demangling rules for the following <isa>:
- ///
- /// * AArch64: https://developer.arm.com/docs/101129/latest
- ///
- /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
- /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
- ///
- /// \param MangledName -> input string in the format
- /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
- /// \param M -> Module used to retrieve informations about the vector
- /// function that are not possible to retrieve from the mangled
- /// name. At the moment, this parameter is needed only to retrieve the
- /// Vectorization Factor of scalable vector functions from their
- /// respective IR declarations.
- std::optional<VFInfo> tryDemangleForVFABI(StringRef MangledName,
- const Module &M);
- /// This routine mangles the given VectorName according to the LangRef
- /// specification for vector-function-abi-variant attribute and is specific to
- /// the TLI mappings. It is the responsibility of the caller to make sure that
- /// this is only used if all parameters in the vector function are vector type.
- /// This returned string holds scalar-to-vector mapping:
- /// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
- ///
- /// where:
- ///
- /// <isa> = "_LLVM_"
- /// <mask> = "N". Note: TLI does not support masked interfaces.
- /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
- /// field of the `VecDesc` struct. If the number of lanes is scalable
- /// then 'x' is printed instead.
- /// <vparams> = "v", as many as are the numArgs.
- /// <scalarname> = the name of the scalar function.
- /// <vectorname> = the name of the vector function.
- std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
- unsigned numArgs, ElementCount VF);
- /// Retrieve the `VFParamKind` from a string token.
- VFParamKind getVFParamKindFromString(const StringRef Token);
- // Name of the attribute where the variant mappings are stored.
- static constexpr char const *MappingsAttrName = "vector-function-abi-variant";
- /// Populates a set of strings representing the Vector Function ABI variants
- /// associated to the CallInst CI. If the CI does not contain the
- /// vector-function-abi-variant attribute, we return without populating
- /// VariantMappings, i.e. callers of getVectorVariantNames need not check for
- /// the presence of the attribute (see InjectTLIMappings).
- void getVectorVariantNames(const CallInst &CI,
- SmallVectorImpl<std::string> &VariantMappings);
- } // end namespace VFABI
- /// The Vector Function Database.
- ///
- /// Helper class used to find the vector functions associated to a
- /// scalar CallInst.
- class VFDatabase {
- /// The Module of the CallInst CI.
- const Module *M;
- /// The CallInst instance being queried for scalar to vector mappings.
- const CallInst &CI;
- /// List of vector functions descriptors associated to the call
- /// instruction.
- const SmallVector<VFInfo, 8> ScalarToVectorMappings;
- /// Retrieve the scalar-to-vector mappings associated to the rule of
- /// a vector Function ABI.
- static void getVFABIMappings(const CallInst &CI,
- SmallVectorImpl<VFInfo> &Mappings) {
- if (!CI.getCalledFunction())
- return;
- const StringRef ScalarName = CI.getCalledFunction()->getName();
- SmallVector<std::string, 8> ListOfStrings;
- // The check for the vector-function-abi-variant attribute is done when
- // retrieving the vector variant names here.
- VFABI::getVectorVariantNames(CI, ListOfStrings);
- if (ListOfStrings.empty())
- return;
- for (const auto &MangledName : ListOfStrings) {
- const std::optional<VFInfo> Shape =
- VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule()));
- // A match is found via scalar and vector names, and also by
- // ensuring that the variant described in the attribute has a
- // corresponding definition or declaration of the vector
- // function in the Module M.
- if (Shape && (Shape->ScalarName == ScalarName)) {
- assert(CI.getModule()->getFunction(Shape->VectorName) &&
- "Vector function is missing.");
- Mappings.push_back(*Shape);
- }
- }
- }
- public:
- /// Retrieve all the VFInfo instances associated to the CallInst CI.
- static SmallVector<VFInfo, 8> getMappings(const CallInst &CI) {
- SmallVector<VFInfo, 8> Ret;
- // Get mappings from the Vector Function ABI variants.
- getVFABIMappings(CI, Ret);
- // Other non-VFABI variants should be retrieved here.
- return Ret;
- }
- /// Constructor, requires a CallInst instance.
- VFDatabase(CallInst &CI)
- : M(CI.getModule()), CI(CI),
- ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
- /// \defgroup VFDatabase query interface.
- ///
- /// @{
- /// Retrieve the Function with VFShape \p Shape.
- Function *getVectorizedFunction(const VFShape &Shape) const {
- if (Shape == VFShape::getScalarShape(CI))
- return CI.getCalledFunction();
- for (const auto &Info : ScalarToVectorMappings)
- if (Info.Shape == Shape)
- return M->getFunction(Info.VectorName);
- return nullptr;
- }
- /// @}
- };
- template <typename T> class ArrayRef;
- class DemandedBits;
- class GetElementPtrInst;
- template <typename InstTy> class InterleaveGroup;
- class IRBuilderBase;
- class Loop;
- class ScalarEvolution;
- class TargetTransformInfo;
- class Type;
- class Value;
- namespace Intrinsic {
- typedef unsigned ID;
- }
- /// A helper function for converting Scalar types to vector types. If
- /// the incoming type is void, we return void. If the EC represents a
- /// scalar, we return the scalar type.
- inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
- if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
- return Scalar;
- return VectorType::get(Scalar, EC);
- }
- inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
- return ToVectorTy(Scalar, ElementCount::getFixed(VF));
- }
- /// Identify if the intrinsic is trivially vectorizable.
- /// This method returns true if the intrinsic's argument types are all scalars
- /// for the scalar form of the intrinsic and all vectors (or scalars handled by
- /// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
- bool isTriviallyVectorizable(Intrinsic::ID ID);
- /// Identifies if the vector form of the intrinsic has a scalar operand.
- bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
- unsigned ScalarOpdIdx);
- /// Identifies if the vector form of the intrinsic has a operand that has
- /// an overloaded type.
- bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx);
- /// Returns intrinsic ID for call.
- /// For the input call instruction it finds mapping intrinsic and returns
- /// its intrinsic ID, in case it does not found it return not_intrinsic.
- Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
- const TargetLibraryInfo *TLI);
- /// Find the operand of the GEP that should be checked for consecutive
- /// stores. This ignores trailing indices that have no effect on the final
- /// pointer.
- unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
- /// If the argument is a GEP, then returns the operand identified by
- /// getGEPInductionOperand. However, if there is some other non-loop-invariant
- /// operand, it returns that instead.
- Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
- /// If a value has only one user that is a CastInst, return it.
- Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
- /// Get the stride of a pointer access in a loop. Looks for symbolic
- /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
- Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
- /// Given a vector and an element number, see if the scalar value is
- /// already around as a register, for example if it were inserted then extracted
- /// from the vector.
- Value *findScalarElement(Value *V, unsigned EltNo);
- /// If all non-negative \p Mask elements are the same value, return that value.
- /// If all elements are negative (undefined) or \p Mask contains different
- /// non-negative values, return -1.
- int getSplatIndex(ArrayRef<int> Mask);
- /// Get splat value if the input is a splat vector or return nullptr.
- /// The value may be extracted from a splat constants vector or from
- /// a sequence of instructions that broadcast a single value into a vector.
- Value *getSplatValue(const Value *V);
- /// Return true if each element of the vector value \p V is poisoned or equal to
- /// every other non-poisoned element. If an index element is specified, either
- /// every element of the vector is poisoned or the element at that index is not
- /// poisoned and equal to every other non-poisoned element.
- /// This may be more powerful than the related getSplatValue() because it is
- /// not limited by finding a scalar source value to a splatted vector.
- bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
- /// Transform a shuffle mask's output demanded element mask into demanded
- /// element masks for the 2 operands, returns false if the mask isn't valid.
- /// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].
- /// \p AllowUndefElts permits "-1" indices to be treated as undef.
- bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,
- const APInt &DemandedElts, APInt &DemandedLHS,
- APInt &DemandedRHS, bool AllowUndefElts = false);
- /// Replace each shuffle mask index with the scaled sequential indices for an
- /// equivalent mask of narrowed elements. Mask elements that are less than 0
- /// (sentinel values) are repeated in the output mask.
- ///
- /// Example with Scale = 4:
- /// <4 x i32> <3, 2, 0, -1> -->
- /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
- ///
- /// This is the reverse process of widening shuffle mask elements, but it always
- /// succeeds because the indexes can always be multiplied (scaled up) to map to
- /// narrower vector elements.
- void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
- SmallVectorImpl<int> &ScaledMask);
- /// Try to transform a shuffle mask by replacing elements with the scaled index
- /// for an equivalent mask of widened elements. If all mask elements that would
- /// map to a wider element of the new mask are the same negative number
- /// (sentinel value), that element of the new mask is the same value. If any
- /// element in a given slice is negative and some other element in that slice is
- /// not the same value, return false (partial matches with sentinel values are
- /// not allowed).
- ///
- /// Example with Scale = 4:
- /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
- /// <4 x i32> <3, 2, 0, -1>
- ///
- /// This is the reverse process of narrowing shuffle mask elements if it
- /// succeeds. This transform is not always possible because indexes may not
- /// divide evenly (scale down) to map to wider vector elements.
- bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
- SmallVectorImpl<int> &ScaledMask);
- /// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,
- /// to get the shuffle mask with widest possible elements.
- void getShuffleMaskWithWidestElts(ArrayRef<int> Mask,
- SmallVectorImpl<int> &ScaledMask);
- /// Splits and processes shuffle mask depending on the number of input and
- /// output registers. The function does 2 main things: 1) splits the
- /// source/destination vectors into real registers; 2) do the mask analysis to
- /// identify which real registers are permuted. Then the function processes
- /// resulting registers mask using provided action items. If no input register
- /// is defined, \p NoInputAction action is used. If only 1 input register is
- /// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
- /// process > 2 input registers and masks.
- /// \param Mask Original shuffle mask.
- /// \param NumOfSrcRegs Number of source registers.
- /// \param NumOfDestRegs Number of destination registers.
- /// \param NumOfUsedRegs Number of actually used destination registers.
- void processShuffleMasks(
- ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
- unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
- function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
- function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
- /// Compute a map of integer instructions to their minimum legal type
- /// size.
- ///
- /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
- /// type (e.g. i32) whenever arithmetic is performed on them.
- ///
- /// For targets with native i8 or i16 operations, usually InstCombine can shrink
- /// the arithmetic type down again. However InstCombine refuses to create
- /// illegal types, so for targets without i8 or i16 registers, the lengthening
- /// and shrinking remains.
- ///
- /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
- /// their scalar equivalents do not, so during vectorization it is important to
- /// remove these lengthens and truncates when deciding the profitability of
- /// vectorization.
- ///
- /// This function analyzes the given range of instructions and determines the
- /// minimum type size each can be converted to. It attempts to remove or
- /// minimize type size changes across each def-use chain, so for example in the
- /// following code:
- ///
- /// %1 = load i8, i8*
- /// %2 = add i8 %1, 2
- /// %3 = load i16, i16*
- /// %4 = zext i8 %2 to i32
- /// %5 = zext i16 %3 to i32
- /// %6 = add i32 %4, %5
- /// %7 = trunc i32 %6 to i16
- ///
- /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
- /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
- ///
- /// If the optional TargetTransformInfo is provided, this function tries harder
- /// to do less work by only looking at illegal types.
- MapVector<Instruction*, uint64_t>
- computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
- DemandedBits &DB,
- const TargetTransformInfo *TTI=nullptr);
- /// Compute the union of two access-group lists.
- ///
- /// If the list contains just one access group, it is returned directly. If the
- /// list is empty, returns nullptr.
- MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
- /// Compute the access-group list of access groups that @p Inst1 and @p Inst2
- /// are both in. If either instruction does not access memory at all, it is
- /// considered to be in every list.
- ///
- /// If the list contains just one access group, it is returned directly. If the
- /// list is empty, returns nullptr.
- MDNode *intersectAccessGroups(const Instruction *Inst1,
- const Instruction *Inst2);
- /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
- /// MD_nontemporal, MD_access_group].
- /// For K in Kinds, we get the MDNode for K from each of the
- /// elements of VL, compute their "intersection" (i.e., the most generic
- /// metadata value that covers all of the individual values), and set I's
- /// metadata for M equal to the intersection value.
- ///
- /// This function always sets a (possibly null) value for each K in Kinds.
- Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
- /// Create a mask that filters the members of an interleave group where there
- /// are gaps.
- ///
- /// For example, the mask for \p Group with interleave-factor 3
- /// and \p VF 4, that has only its first member present is:
- ///
- /// <1,0,0,1,0,0,1,0,0,1,0,0>
- ///
- /// Note: The result is a mask of 0's and 1's, as opposed to the other
- /// create[*]Mask() utilities which create a shuffle mask (mask that
- /// consists of indices).
- Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
- const InterleaveGroup<Instruction> &Group);
- /// Create a mask with replicated elements.
- ///
- /// This function creates a shuffle mask for replicating each of the \p VF
- /// elements in a vector \p ReplicationFactor times. It can be used to
- /// transform a mask of \p VF elements into a mask of
- /// \p VF * \p ReplicationFactor elements used by a predicated
- /// interleaved-group of loads/stores whose Interleaved-factor ==
- /// \p ReplicationFactor.
- ///
- /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
- ///
- /// <0,0,0,1,1,1,2,2,2,3,3,3>
- llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
- unsigned VF);
- /// Create an interleave shuffle mask.
- ///
- /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
- /// vectorization factor \p VF into a single wide vector. The mask is of the
- /// form:
- ///
- /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
- ///
- /// For example, the mask for VF = 4 and NumVecs = 2 is:
- ///
- /// <0, 4, 1, 5, 2, 6, 3, 7>.
- llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
- /// Create a stride shuffle mask.
- ///
- /// This function creates a shuffle mask whose elements begin at \p Start and
- /// are incremented by \p Stride. The mask can be used to deinterleave an
- /// interleaved vector into separate vectors of vectorization factor \p VF. The
- /// mask is of the form:
- ///
- /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
- ///
- /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
- ///
- /// <0, 2, 4, 6>
- llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
- unsigned VF);
- /// Create a sequential shuffle mask.
- ///
- /// This function creates shuffle mask whose elements are sequential and begin
- /// at \p Start. The mask contains \p NumInts integers and is padded with \p
- /// NumUndefs undef values. The mask is of the form:
- ///
- /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
- ///
- /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
- ///
- /// <0, 1, 2, 3, undef, undef, undef, undef>
- llvm::SmallVector<int, 16>
- createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
- /// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
- /// mask assuming both operands are identical. This assumes that the unary
- /// shuffle will use elements from operand 0 (operand 1 will be unused).
- llvm::SmallVector<int, 16> createUnaryMask(ArrayRef<int> Mask,
- unsigned NumElts);
- /// Concatenate a list of vectors.
- ///
- /// This function generates code that concatenate the vectors in \p Vecs into a
- /// single large vector. The number of vectors should be greater than one, and
- /// their element types should be the same. The number of elements in the
- /// vectors should also be the same; however, if the last vector has fewer
- /// elements, it will be padded with undefs.
- Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs);
- /// Given a mask vector of i1, Return true if all of the elements of this
- /// predicate mask are known to be false or undef. That is, return true if all
- /// lanes can be assumed inactive.
- bool maskIsAllZeroOrUndef(Value *Mask);
- /// Given a mask vector of i1, Return true if all of the elements of this
- /// predicate mask are known to be true or undef. That is, return true if all
- /// lanes can be assumed active.
- bool maskIsAllOneOrUndef(Value *Mask);
- /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
- /// for each lane which may be active.
- APInt possiblyDemandedEltsInMask(Value *Mask);
- /// The group of interleaved loads/stores sharing the same stride and
- /// close to each other.
- ///
- /// Each member in this group has an index starting from 0, and the largest
- /// index should be less than interleaved factor, which is equal to the absolute
- /// value of the access's stride.
- ///
- /// E.g. An interleaved load group of factor 4:
- /// for (unsigned i = 0; i < 1024; i+=4) {
- /// a = A[i]; // Member of index 0
- /// b = A[i+1]; // Member of index 1
- /// d = A[i+3]; // Member of index 3
- /// ...
- /// }
- ///
- /// An interleaved store group of factor 4:
- /// for (unsigned i = 0; i < 1024; i+=4) {
- /// ...
- /// A[i] = a; // Member of index 0
- /// A[i+1] = b; // Member of index 1
- /// A[i+2] = c; // Member of index 2
- /// A[i+3] = d; // Member of index 3
- /// }
- ///
- /// Note: the interleaved load group could have gaps (missing members), but
- /// the interleaved store group doesn't allow gaps.
- template <typename InstTy> class InterleaveGroup {
- public:
- InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
- : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
- InsertPos(nullptr) {}
- InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
- : Alignment(Alignment), InsertPos(Instr) {
- Factor = std::abs(Stride);
- assert(Factor > 1 && "Invalid interleave factor");
- Reverse = Stride < 0;
- Members[0] = Instr;
- }
- bool isReverse() const { return Reverse; }
- uint32_t getFactor() const { return Factor; }
- Align getAlign() const { return Alignment; }
- uint32_t getNumMembers() const { return Members.size(); }
- /// Try to insert a new member \p Instr with index \p Index and
- /// alignment \p NewAlign. The index is related to the leader and it could be
- /// negative if it is the new leader.
- ///
- /// \returns false if the instruction doesn't belong to the group.
- bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
- // Make sure the key fits in an int32_t.
- std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
- if (!MaybeKey)
- return false;
- int32_t Key = *MaybeKey;
- // Skip if the key is used for either the tombstone or empty special values.
- if (DenseMapInfo<int32_t>::getTombstoneKey() == Key ||
- DenseMapInfo<int32_t>::getEmptyKey() == Key)
- return false;
- // Skip if there is already a member with the same index.
- if (Members.find(Key) != Members.end())
- return false;
- if (Key > LargestKey) {
- // The largest index is always less than the interleave factor.
- if (Index >= static_cast<int32_t>(Factor))
- return false;
- LargestKey = Key;
- } else if (Key < SmallestKey) {
- // Make sure the largest index fits in an int32_t.
- std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
- if (!MaybeLargestIndex)
- return false;
- // The largest index is always less than the interleave factor.
- if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
- return false;
- SmallestKey = Key;
- }
- // It's always safe to select the minimum alignment.
- Alignment = std::min(Alignment, NewAlign);
- Members[Key] = Instr;
- return true;
- }
- /// Get the member with the given index \p Index
- ///
- /// \returns nullptr if contains no such member.
- InstTy *getMember(uint32_t Index) const {
- int32_t Key = SmallestKey + Index;
- return Members.lookup(Key);
- }
- /// Get the index for the given member. Unlike the key in the member
- /// map, the index starts from 0.
- uint32_t getIndex(const InstTy *Instr) const {
- for (auto I : Members) {
- if (I.second == Instr)
- return I.first - SmallestKey;
- }
- llvm_unreachable("InterleaveGroup contains no such member");
- }
- InstTy *getInsertPos() const { return InsertPos; }
- void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
- /// Add metadata (e.g. alias info) from the instructions in this group to \p
- /// NewInst.
- ///
- /// FIXME: this function currently does not add noalias metadata a'la
- /// addNewMedata. To do that we need to compute the intersection of the
- /// noalias info from all members.
- void addMetadata(InstTy *NewInst) const;
- /// Returns true if this Group requires a scalar iteration to handle gaps.
- bool requiresScalarEpilogue() const {
- // If the last member of the Group exists, then a scalar epilog is not
- // needed for this group.
- if (getMember(getFactor() - 1))
- return false;
- // We have a group with gaps. It therefore can't be a reversed access,
- // because such groups get invalidated (TODO).
- assert(!isReverse() && "Group should have been invalidated");
- // This is a group of loads, with gaps, and without a last-member
- return true;
- }
- private:
- uint32_t Factor; // Interleave Factor.
- bool Reverse;
- Align Alignment;
- DenseMap<int32_t, InstTy *> Members;
- int32_t SmallestKey = 0;
- int32_t LargestKey = 0;
- // To avoid breaking dependences, vectorized instructions of an interleave
- // group should be inserted at either the first load or the last store in
- // program order.
- //
- // E.g. %even = load i32 // Insert Position
- // %add = add i32 %even // Use of %even
- // %odd = load i32
- //
- // store i32 %even
- // %odd = add i32 // Def of %odd
- // store i32 %odd // Insert Position
- InstTy *InsertPos;
- };
- /// Drive the analysis of interleaved memory accesses in the loop.
- ///
- /// Use this class to analyze interleaved accesses only when we can vectorize
- /// a loop. Otherwise it's meaningless to do analysis as the vectorization
- /// on interleaved accesses is unsafe.
- ///
- /// The analysis collects interleave groups and records the relationships
- /// between the member and the group in a map.
- class InterleavedAccessInfo {
- public:
- InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
- DominatorTree *DT, LoopInfo *LI,
- const LoopAccessInfo *LAI)
- : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
- ~InterleavedAccessInfo() { invalidateGroups(); }
- /// Analyze the interleaved accesses and collect them in interleave
- /// groups. Substitute symbolic strides using \p Strides.
- /// Consider also predicated loads/stores in the analysis if
- /// \p EnableMaskedInterleavedGroup is true.
- void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
- /// Invalidate groups, e.g., in case all blocks in loop will be predicated
- /// contrary to original assumption. Although we currently prevent group
- /// formation for predicated accesses, we may be able to relax this limitation
- /// in the future once we handle more complicated blocks. Returns true if any
- /// groups were invalidated.
- bool invalidateGroups() {
- if (InterleaveGroups.empty()) {
- assert(
- !RequiresScalarEpilogue &&
- "RequiresScalarEpilog should not be set without interleave groups");
- return false;
- }
- InterleaveGroupMap.clear();
- for (auto *Ptr : InterleaveGroups)
- delete Ptr;
- InterleaveGroups.clear();
- RequiresScalarEpilogue = false;
- return true;
- }
- /// Check if \p Instr belongs to any interleave group.
- bool isInterleaved(Instruction *Instr) const {
- return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
- }
- /// Get the interleave group that \p Instr belongs to.
- ///
- /// \returns nullptr if doesn't have such group.
- InterleaveGroup<Instruction> *
- getInterleaveGroup(const Instruction *Instr) const {
- return InterleaveGroupMap.lookup(Instr);
- }
- iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>>
- getInterleaveGroups() {
- return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
- }
- /// Returns true if an interleaved group that may access memory
- /// out-of-bounds requires a scalar epilogue iteration for correctness.
- bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
- /// Invalidate groups that require a scalar epilogue (due to gaps). This can
- /// happen when optimizing for size forbids a scalar epilogue, and the gap
- /// cannot be filtered by masking the load/store.
- void invalidateGroupsRequiringScalarEpilogue();
- /// Returns true if we have any interleave groups.
- bool hasGroups() const { return !InterleaveGroups.empty(); }
- private:
- /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
- /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
- /// The interleaved access analysis can also add new predicates (for example
- /// by versioning strides of pointers).
- PredicatedScalarEvolution &PSE;
- Loop *TheLoop;
- DominatorTree *DT;
- LoopInfo *LI;
- const LoopAccessInfo *LAI;
- /// True if the loop may contain non-reversed interleaved groups with
- /// out-of-bounds accesses. We ensure we don't speculatively access memory
- /// out-of-bounds by executing at least one scalar epilogue iteration.
- bool RequiresScalarEpilogue = false;
- /// Holds the relationships between the members and the interleave group.
- DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap;
- SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
- /// Holds dependences among the memory accesses in the loop. It maps a source
- /// access to a set of dependent sink accesses.
- DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
- /// The descriptor for a strided memory access.
- struct StrideDescriptor {
- StrideDescriptor() = default;
- StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
- Align Alignment)
- : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
- // The access's stride. It is negative for a reverse access.
- int64_t Stride = 0;
- // The scalar expression of this access.
- const SCEV *Scev = nullptr;
- // The size of the memory object.
- uint64_t Size = 0;
- // The alignment of this access.
- Align Alignment;
- };
- /// A type for holding instructions and their stride descriptors.
- using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
- /// Create a new interleave group with the given instruction \p Instr,
- /// stride \p Stride and alignment \p Align.
- ///
- /// \returns the newly created interleave group.
- InterleaveGroup<Instruction> *
- createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
- assert(!InterleaveGroupMap.count(Instr) &&
- "Already in an interleaved access group");
- InterleaveGroupMap[Instr] =
- new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
- InterleaveGroups.insert(InterleaveGroupMap[Instr]);
- return InterleaveGroupMap[Instr];
- }
- /// Release the group and remove all the relationships.
- void releaseGroup(InterleaveGroup<Instruction> *Group) {
- for (unsigned i = 0; i < Group->getFactor(); i++)
- if (Instruction *Member = Group->getMember(i))
- InterleaveGroupMap.erase(Member);
- InterleaveGroups.erase(Group);
- delete Group;
- }
- /// Collect all the accesses with a constant stride in program order.
- void collectConstStrideAccesses(
- MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
- const ValueToValueMap &Strides);
- /// Returns true if \p Stride is allowed in an interleaved group.
- static bool isStrided(int Stride);
- /// Returns true if \p BB is a predicated block.
- bool isPredicated(BasicBlock *BB) const {
- return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
- }
- /// Returns true if LoopAccessInfo can be used for dependence queries.
- bool areDependencesValid() const {
- return LAI && LAI->getDepChecker().getDependences();
- }
- /// Returns true if memory accesses \p A and \p B can be reordered, if
- /// necessary, when constructing interleaved groups.
- ///
- /// \p A must precede \p B in program order. We return false if reordering is
- /// not necessary or is prevented because \p A and \p B may be dependent.
- bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
- StrideEntry *B) const {
- // Code motion for interleaved accesses can potentially hoist strided loads
- // and sink strided stores. The code below checks the legality of the
- // following two conditions:
- //
- // 1. Potentially moving a strided load (B) before any store (A) that
- // precedes B, or
- //
- // 2. Potentially moving a strided store (A) after any load or store (B)
- // that A precedes.
- //
- // It's legal to reorder A and B if we know there isn't a dependence from A
- // to B. Note that this determination is conservative since some
- // dependences could potentially be reordered safely.
- // A is potentially the source of a dependence.
- auto *Src = A->first;
- auto SrcDes = A->second;
- // B is potentially the sink of a dependence.
- auto *Sink = B->first;
- auto SinkDes = B->second;
- // Code motion for interleaved accesses can't violate WAR dependences.
- // Thus, reordering is legal if the source isn't a write.
- if (!Src->mayWriteToMemory())
- return true;
- // At least one of the accesses must be strided.
- if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
- return true;
- // If dependence information is not available from LoopAccessInfo,
- // conservatively assume the instructions can't be reordered.
- if (!areDependencesValid())
- return false;
- // If we know there is a dependence from source to sink, assume the
- // instructions can't be reordered. Otherwise, reordering is legal.
- return Dependences.find(Src) == Dependences.end() ||
- !Dependences.lookup(Src).count(Sink);
- }
- /// Collect the dependences from LoopAccessInfo.
- ///
- /// We process the dependences once during the interleaved access analysis to
- /// enable constant-time dependence queries.
- void collectDependences() {
- if (!areDependencesValid())
- return;
- auto *Deps = LAI->getDepChecker().getDependences();
- for (auto Dep : *Deps)
- Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
- }
- };
- } // llvm namespace
- #endif
- #ifdef __GNUC__
- #pragma GCC diagnostic pop
- #endif
|