123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364 |
- //===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file implements a CFL-base, summary-based alias analysis algorithm. It
- // does not depend on types. The algorithm is a mixture of the one described in
- // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast
- // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by
- // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a
- // graph of the uses of a variable, where each node is a memory location, and
- // each edge is an action that happened on that memory location. The "actions"
- // can be one of Dereference, Reference, or Assign. The precision of this
- // analysis is roughly the same as that of an one level context-sensitive
- // Steensgaard's algorithm.
- //
- // Two variables are considered as aliasing iff you can reach one value's node
- // from the other value's node and the language formed by concatenating all of
- // the edge labels (actions) conforms to a context-free grammar.
- //
- // Because this algorithm requires a graph search on each query, we execute the
- // algorithm outlined in "Fast algorithms..." (mentioned above)
- // in order to transform the graph into sets of variables that may alias in
- // ~nlogn time (n = number of variables), which makes queries take constant
- // time.
- //===----------------------------------------------------------------------===//
- // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
- // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because
- // FunctionPasses are only allowed to inspect the Function that they're being
- // run on. Realistically, this likely isn't a problem until we allow
- // FunctionPasses to run concurrently.
- #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
- #include "AliasAnalysisSummary.h"
- #include "CFLGraph.h"
- #include "StratifiedSets.h"
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/ADT/Optional.h"
- #include "llvm/ADT/SmallVector.h"
- #include "llvm/Analysis/TargetLibraryInfo.h"
- #include "llvm/IR/Constants.h"
- #include "llvm/IR/Function.h"
- #include "llvm/IR/Type.h"
- #include "llvm/IR/Value.h"
- #include "llvm/InitializePasses.h"
- #include "llvm/Pass.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/raw_ostream.h"
- #include <algorithm>
- #include <cassert>
- #include <limits>
- #include <memory>
- #include <utility>
- using namespace llvm;
- using namespace llvm::cflaa;
- #define DEBUG_TYPE "cfl-steens-aa"
- CFLSteensAAResult::CFLSteensAAResult(
- std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
- : GetTLI(std::move(GetTLI)) {}
- CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
- : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {}
- CFLSteensAAResult::~CFLSteensAAResult() = default;
- /// Information we have about a function and would like to keep around.
- class CFLSteensAAResult::FunctionInfo {
- StratifiedSets<InstantiatedValue> Sets;
- AliasSummary Summary;
- public:
- FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
- StratifiedSets<InstantiatedValue> S);
- const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
- return Sets;
- }
- const AliasSummary &getAliasSummary() const { return Summary; }
- };
- const StratifiedIndex StratifiedLink::SetSentinel =
- std::numeric_limits<StratifiedIndex>::max();
- //===----------------------------------------------------------------------===//
- // Function declarations that require types defined in the namespace above
- //===----------------------------------------------------------------------===//
- /// Determines whether it would be pointless to add the given Value to our sets.
- static bool canSkipAddingToSets(Value *Val) {
- // Constants can share instances, which may falsely unify multiple
- // sets, e.g. in
- // store i32* null, i32** %ptr1
- // store i32* null, i32** %ptr2
- // clearly ptr1 and ptr2 should not be unified into the same set, so
- // we should filter out the (potentially shared) instance to
- // i32* null.
- if (isa<Constant>(Val)) {
- // TODO: Because all of these things are constant, we can determine whether
- // the data is *actually* mutable at graph building time. This will probably
- // come for free/cheap with offset awareness.
- bool CanStoreMutableData = isa<GlobalValue>(Val) ||
- isa<ConstantExpr>(Val) ||
- isa<ConstantAggregate>(Val);
- return !CanStoreMutableData;
- }
- return false;
- }
- CFLSteensAAResult::FunctionInfo::FunctionInfo(
- Function &Fn, const SmallVectorImpl<Value *> &RetVals,
- StratifiedSets<InstantiatedValue> S)
- : Sets(std::move(S)) {
- // Historically, an arbitrary upper-bound of 50 args was selected. We may want
- // to remove this if it doesn't really matter in practice.
- if (Fn.arg_size() > MaxSupportedArgsInSummary)
- return;
- DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;
- // Our intention here is to record all InterfaceValues that share the same
- // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
- // have its StratifiedIndex scanned here and check if the index is presented
- // in InterfaceMap: if it is not, we add the correspondence to the map;
- // otherwise, an aliasing relation is found and we add it to
- // RetParamRelations.
- auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
- StratifiedIndex SetIndex) {
- unsigned Level = 0;
- while (true) {
- InterfaceValue CurrValue{InterfaceIndex, Level};
- auto Itr = InterfaceMap.find(SetIndex);
- if (Itr != InterfaceMap.end()) {
- if (CurrValue != Itr->second)
- Summary.RetParamRelations.push_back(
- ExternalRelation{CurrValue, Itr->second, UnknownOffset});
- break;
- }
- auto &Link = Sets.getLink(SetIndex);
- InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
- auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs);
- if (ExternalAttrs.any())
- Summary.RetParamAttributes.push_back(
- ExternalAttribute{CurrValue, ExternalAttrs});
- if (!Link.hasBelow())
- break;
- ++Level;
- SetIndex = Link.Below;
- }
- };
- // Populate RetParamRelations for return values
- for (auto *RetVal : RetVals) {
- assert(RetVal != nullptr);
- assert(RetVal->getType()->isPointerTy());
- auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
- if (RetInfo.hasValue())
- AddToRetParamRelations(0, RetInfo->Index);
- }
- // Populate RetParamRelations for parameters
- unsigned I = 0;
- for (auto &Param : Fn.args()) {
- if (Param.getType()->isPointerTy()) {
- auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
- if (ParamInfo.hasValue())
- AddToRetParamRelations(I + 1, ParamInfo->Index);
- }
- ++I;
- }
- }
- // Builds the graph + StratifiedSets for a function.
- CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
- CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn);
- StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
- // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
- auto &Graph = GraphBuilder.getCFLGraph();
- for (const auto &Mapping : Graph.value_mappings()) {
- auto Val = Mapping.first;
- if (canSkipAddingToSets(Val))
- continue;
- auto &ValueInfo = Mapping.second;
- assert(ValueInfo.getNumLevels() > 0);
- SetBuilder.add(InstantiatedValue{Val, 0});
- SetBuilder.noteAttributes(InstantiatedValue{Val, 0},
- ValueInfo.getNodeInfoAtLevel(0).Attr);
- for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) {
- SetBuilder.add(InstantiatedValue{Val, I + 1});
- SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1},
- ValueInfo.getNodeInfoAtLevel(I + 1).Attr);
- SetBuilder.addBelow(InstantiatedValue{Val, I},
- InstantiatedValue{Val, I + 1});
- }
- }
- // Add all assign edges to StratifiedSets
- for (const auto &Mapping : Graph.value_mappings()) {
- auto Val = Mapping.first;
- if (canSkipAddingToSets(Val))
- continue;
- auto &ValueInfo = Mapping.second;
- for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
- auto Src = InstantiatedValue{Val, I};
- for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
- SetBuilder.addWith(Src, Edge.Other);
- }
- }
- return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
- }
- void CFLSteensAAResult::scan(Function *Fn) {
- auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
- (void)InsertPair;
- assert(InsertPair.second &&
- "Trying to scan a function that has already been cached");
- // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
- // may get evaluated after operator[], potentially triggering a DenseMap
- // resize and invalidating the reference returned by operator[]
- auto FunInfo = buildSetsFrom(Fn);
- Cache[Fn] = std::move(FunInfo);
- Handles.emplace_front(Fn, this);
- }
- void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
- /// Ensures that the given function is available in the cache, and returns the
- /// entry.
- const Optional<CFLSteensAAResult::FunctionInfo> &
- CFLSteensAAResult::ensureCached(Function *Fn) {
- auto Iter = Cache.find(Fn);
- if (Iter == Cache.end()) {
- scan(Fn);
- Iter = Cache.find(Fn);
- assert(Iter != Cache.end());
- assert(Iter->second.hasValue());
- }
- return Iter->second;
- }
- const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
- auto &FunInfo = ensureCached(&Fn);
- if (FunInfo.hasValue())
- return &FunInfo->getAliasSummary();
- else
- return nullptr;
- }
- AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
- auto *ValA = const_cast<Value *>(LocA.Ptr);
- auto *ValB = const_cast<Value *>(LocB.Ptr);
- if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
- return AliasResult::NoAlias;
- Function *Fn = nullptr;
- Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
- Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
- if (!MaybeFnA && !MaybeFnB) {
- // The only times this is known to happen are when globals + InlineAsm are
- // involved
- LLVM_DEBUG(
- dbgs()
- << "CFLSteensAA: could not extract parent function information.\n");
- return AliasResult::MayAlias;
- }
- if (MaybeFnA) {
- Fn = MaybeFnA;
- assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
- "Interprocedural queries not supported");
- } else {
- Fn = MaybeFnB;
- }
- assert(Fn != nullptr);
- auto &MaybeInfo = ensureCached(Fn);
- assert(MaybeInfo.hasValue());
- auto &Sets = MaybeInfo->getStratifiedSets();
- auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
- if (!MaybeA.hasValue())
- return AliasResult::MayAlias;
- auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
- if (!MaybeB.hasValue())
- return AliasResult::MayAlias;
- auto SetA = *MaybeA;
- auto SetB = *MaybeB;
- auto AttrsA = Sets.getLink(SetA.Index).Attrs;
- auto AttrsB = Sets.getLink(SetB.Index).Attrs;
- // If both values are local (meaning the corresponding set has attribute
- // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them:
- // they may-alias each other if and only if they are in the same set.
- // If at least one value is non-local (meaning it either is global/argument or
- // it comes from unknown sources like integer cast), the situation becomes a
- // bit more interesting. We follow three general rules described below:
- // - Non-local values may alias each other
- // - AttrNone values do not alias any non-local values
- // - AttrEscaped do not alias globals/arguments, but they may alias
- // AttrUnknown values
- if (SetA.Index == SetB.Index)
- return AliasResult::MayAlias;
- if (AttrsA.none() || AttrsB.none())
- return AliasResult::NoAlias;
- if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
- return AliasResult::MayAlias;
- if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
- return AliasResult::MayAlias;
- return AliasResult::NoAlias;
- }
- AnalysisKey CFLSteensAA::Key;
- CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
- auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & {
- return AM.getResult<TargetLibraryAnalysis>(F);
- };
- return CFLSteensAAResult(GetTLI);
- }
- char CFLSteensAAWrapperPass::ID = 0;
- INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa",
- "Unification-Based CFL Alias Analysis", false, true)
- ImmutablePass *llvm::createCFLSteensAAWrapperPass() {
- return new CFLSteensAAWrapperPass();
- }
- CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
- initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
- void CFLSteensAAWrapperPass::initializePass() {
- auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
- Result.reset(new CFLSteensAAResult(GetTLI));
- }
- void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
|