//===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a CFL-base, summary-based alias analysis algorithm. It // does not depend on types. The algorithm is a mixture of the one described in // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a // graph of the uses of a variable, where each node is a memory location, and // each edge is an action that happened on that memory location. The "actions" // can be one of Dereference, Reference, or Assign. The precision of this // analysis is roughly the same as that of an one level context-sensitive // Steensgaard's algorithm. // // Two variables are considered as aliasing iff you can reach one value's node // from the other value's node and the language formed by concatenating all of // the edge labels (actions) conforms to a context-free grammar. // // Because this algorithm requires a graph search on each query, we execute the // algorithm outlined in "Fast algorithms..." (mentioned above) // in order to transform the graph into sets of variables that may alias in // ~nlogn time (n = number of variables), which makes queries take constant // time. //===----------------------------------------------------------------------===// // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because // FunctionPasses are only allowed to inspect the Function that they're being // run on. Realistically, this likely isn't a problem until we allow // FunctionPasses to run concurrently. #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "AliasAnalysisSummary.h" #include "CFLGraph.h" #include "StratifiedSets.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include using namespace llvm; using namespace llvm::cflaa; #define DEBUG_TYPE "cfl-steens-aa" CFLSteensAAResult::CFLSteensAAResult( std::function GetTLI) : GetTLI(std::move(GetTLI)) {} CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {} CFLSteensAAResult::~CFLSteensAAResult() = default; /// Information we have about a function and would like to keep around. class CFLSteensAAResult::FunctionInfo { StratifiedSets Sets; AliasSummary Summary; public: FunctionInfo(Function &Fn, const SmallVectorImpl &RetVals, StratifiedSets S); const StratifiedSets &getStratifiedSets() const { return Sets; } const AliasSummary &getAliasSummary() const { return Summary; } }; const StratifiedIndex StratifiedLink::SetSentinel = std::numeric_limits::max(); //===----------------------------------------------------------------------===// // Function declarations that require types defined in the namespace above //===----------------------------------------------------------------------===// /// Determines whether it would be pointless to add the given Value to our sets. static bool canSkipAddingToSets(Value *Val) { // Constants can share instances, which may falsely unify multiple // sets, e.g. in // store i32* null, i32** %ptr1 // store i32* null, i32** %ptr2 // clearly ptr1 and ptr2 should not be unified into the same set, so // we should filter out the (potentially shared) instance to // i32* null. if (isa(Val)) { // TODO: Because all of these things are constant, we can determine whether // the data is *actually* mutable at graph building time. This will probably // come for free/cheap with offset awareness. bool CanStoreMutableData = isa(Val) || isa(Val) || isa(Val); return !CanStoreMutableData; } return false; } CFLSteensAAResult::FunctionInfo::FunctionInfo( Function &Fn, const SmallVectorImpl &RetVals, StratifiedSets S) : Sets(std::move(S)) { // Historically, an arbitrary upper-bound of 50 args was selected. We may want // to remove this if it doesn't really matter in practice. if (Fn.arg_size() > MaxSupportedArgsInSummary) return; DenseMap InterfaceMap; // Our intention here is to record all InterfaceValues that share the same // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we // have its StratifiedIndex scanned here and check if the index is presented // in InterfaceMap: if it is not, we add the correspondence to the map; // otherwise, an aliasing relation is found and we add it to // RetParamRelations. auto AddToRetParamRelations = [&](unsigned InterfaceIndex, StratifiedIndex SetIndex) { unsigned Level = 0; while (true) { InterfaceValue CurrValue{InterfaceIndex, Level}; auto Itr = InterfaceMap.find(SetIndex); if (Itr != InterfaceMap.end()) { if (CurrValue != Itr->second) Summary.RetParamRelations.push_back( ExternalRelation{CurrValue, Itr->second, UnknownOffset}); break; } auto &Link = Sets.getLink(SetIndex); InterfaceMap.insert(std::make_pair(SetIndex, CurrValue)); auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs); if (ExternalAttrs.any()) Summary.RetParamAttributes.push_back( ExternalAttribute{CurrValue, ExternalAttrs}); if (!Link.hasBelow()) break; ++Level; SetIndex = Link.Below; } }; // Populate RetParamRelations for return values for (auto *RetVal : RetVals) { assert(RetVal != nullptr); assert(RetVal->getType()->isPointerTy()); auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0}); if (RetInfo.hasValue()) AddToRetParamRelations(0, RetInfo->Index); } // Populate RetParamRelations for parameters unsigned I = 0; for (auto &Param : Fn.args()) { if (Param.getType()->isPointerTy()) { auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0}); if (ParamInfo.hasValue()) AddToRetParamRelations(I + 1, ParamInfo->Index); } ++I; } } // Builds the graph + StratifiedSets for a function. CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { CFLGraphBuilder GraphBuilder(*this, GetTLI(*Fn), *Fn); StratifiedSetsBuilder SetBuilder; // Add all CFLGraph nodes and all Dereference edges to StratifiedSets auto &Graph = GraphBuilder.getCFLGraph(); for (const auto &Mapping : Graph.value_mappings()) { auto Val = Mapping.first; if (canSkipAddingToSets(Val)) continue; auto &ValueInfo = Mapping.second; assert(ValueInfo.getNumLevels() > 0); SetBuilder.add(InstantiatedValue{Val, 0}); SetBuilder.noteAttributes(InstantiatedValue{Val, 0}, ValueInfo.getNodeInfoAtLevel(0).Attr); for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) { SetBuilder.add(InstantiatedValue{Val, I + 1}); SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1}, ValueInfo.getNodeInfoAtLevel(I + 1).Attr); SetBuilder.addBelow(InstantiatedValue{Val, I}, InstantiatedValue{Val, I + 1}); } } // Add all assign edges to StratifiedSets for (const auto &Mapping : Graph.value_mappings()) { auto Val = Mapping.first; if (canSkipAddingToSets(Val)) continue; auto &ValueInfo = Mapping.second; for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { auto Src = InstantiatedValue{Val, I}; for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) SetBuilder.addWith(Src, Edge.Other); } } return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build()); } void CFLSteensAAResult::scan(Function *Fn) { auto InsertPair = Cache.insert(std::make_pair(Fn, Optional())); (void)InsertPair; assert(InsertPair.second && "Trying to scan a function that has already been cached"); // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call // may get evaluated after operator[], potentially triggering a DenseMap // resize and invalidating the reference returned by operator[] auto FunInfo = buildSetsFrom(Fn); Cache[Fn] = std::move(FunInfo); Handles.emplace_front(Fn, this); } void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); } /// Ensures that the given function is available in the cache, and returns the /// entry. const Optional & CFLSteensAAResult::ensureCached(Function *Fn) { auto Iter = Cache.find(Fn); if (Iter == Cache.end()) { scan(Fn); Iter = Cache.find(Fn); assert(Iter != Cache.end()); assert(Iter->second.hasValue()); } return Iter->second; } const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) { auto &FunInfo = ensureCached(&Fn); if (FunInfo.hasValue()) return &FunInfo->getAliasSummary(); else return nullptr; } AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, const MemoryLocation &LocB) { auto *ValA = const_cast(LocA.Ptr); auto *ValB = const_cast(LocB.Ptr); if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) return AliasResult::NoAlias; Function *Fn = nullptr; Function *MaybeFnA = const_cast(parentFunctionOfValue(ValA)); Function *MaybeFnB = const_cast(parentFunctionOfValue(ValB)); if (!MaybeFnA && !MaybeFnB) { // The only times this is known to happen are when globals + InlineAsm are // involved LLVM_DEBUG( dbgs() << "CFLSteensAA: could not extract parent function information.\n"); return AliasResult::MayAlias; } if (MaybeFnA) { Fn = MaybeFnA; assert((!MaybeFnB || MaybeFnB == MaybeFnA) && "Interprocedural queries not supported"); } else { Fn = MaybeFnB; } assert(Fn != nullptr); auto &MaybeInfo = ensureCached(Fn); assert(MaybeInfo.hasValue()); auto &Sets = MaybeInfo->getStratifiedSets(); auto MaybeA = Sets.find(InstantiatedValue{ValA, 0}); if (!MaybeA.hasValue()) return AliasResult::MayAlias; auto MaybeB = Sets.find(InstantiatedValue{ValB, 0}); if (!MaybeB.hasValue()) return AliasResult::MayAlias; auto SetA = *MaybeA; auto SetB = *MaybeB; auto AttrsA = Sets.getLink(SetA.Index).Attrs; auto AttrsB = Sets.getLink(SetB.Index).Attrs; // If both values are local (meaning the corresponding set has attribute // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them: // they may-alias each other if and only if they are in the same set. // If at least one value is non-local (meaning it either is global/argument or // it comes from unknown sources like integer cast), the situation becomes a // bit more interesting. We follow three general rules described below: // - Non-local values may alias each other // - AttrNone values do not alias any non-local values // - AttrEscaped do not alias globals/arguments, but they may alias // AttrUnknown values if (SetA.Index == SetB.Index) return AliasResult::MayAlias; if (AttrsA.none() || AttrsB.none()) return AliasResult::NoAlias; if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB)) return AliasResult::MayAlias; if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB)) return AliasResult::MayAlias; return AliasResult::NoAlias; } AnalysisKey CFLSteensAA::Key; CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) { auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & { return AM.getResult(F); }; return CFLSteensAAResult(GetTLI); } char CFLSteensAAWrapperPass::ID = 0; INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa", "Unification-Based CFL Alias Analysis", false, true) ImmutablePass *llvm::createCFLSteensAAWrapperPass() { return new CFLSteensAAWrapperPass(); } CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) { initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry()); } void CFLSteensAAWrapperPass::initializePass() { auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { return this->getAnalysis().getTLI(F); }; Result.reset(new CFLSteensAAResult(GetTLI)); } void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); }