365 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			365 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| // This file implements a CFL-base, summary-based alias analysis algorithm. It
 | |
| // does not depend on types. The algorithm is a mixture of the one described in
 | |
| // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast
 | |
| // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by
 | |
| // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a
 | |
| // graph of the uses of a variable, where each node is a memory location, and
 | |
| // each edge is an action that happened on that memory location.  The "actions"
 | |
| // can be one of Dereference, Reference, or Assign. The precision of this
 | |
| // analysis is roughly the same as that of an one level context-sensitive
 | |
| // Steensgaard's algorithm.
 | |
| //
 | |
| // Two variables are considered as aliasing iff you can reach one value's node
 | |
| // from the other value's node and the language formed by concatenating all of
 | |
| // the edge labels (actions) conforms to a context-free grammar.
 | |
| //
 | |
| // Because this algorithm requires a graph search on each query, we execute the
 | |
| // algorithm outlined in "Fast algorithms..." (mentioned above)
 | |
| // in order to transform the graph into sets of variables that may alias in
 | |
| // ~nlogn time (n = number of variables), which makes queries take constant
 | |
| // time.
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
 | |
| // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because
 | |
| // FunctionPasses are only allowed to inspect the Function that they're being
 | |
| // run on. Realistically, this likely isn't a problem until we allow
 | |
| // FunctionPasses to run concurrently.
 | |
| 
 | |
| #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
 | |
| #include "AliasAnalysisSummary.h"
 | |
| #include "CFLGraph.h"
 | |
| #include "StratifiedSets.h"
 | |
| #include "llvm/ADT/DenseMap.h"
 | |
| #include "llvm/ADT/Optional.h"
 | |
| #include "llvm/ADT/SmallVector.h"
 | |
| #include "llvm/Analysis/TargetLibraryInfo.h"
 | |
| #include "llvm/IR/Constants.h"
 | |
| #include "llvm/IR/Function.h"
 | |
| #include "llvm/IR/Type.h"
 | |
| #include "llvm/IR/Value.h"
 | |
| #include "llvm/InitializePasses.h"
 | |
| #include "llvm/Pass.h"
 | |
| #include "llvm/Support/Debug.h"
 | |
| #include "llvm/Support/raw_ostream.h"
 | |
| #include <algorithm>
 | |
| #include <cassert>
 | |
| #include <limits>
 | |
| #include <memory>
 | |
| #include <utility>
 | |
| 
 | |
| using namespace llvm;
 | |
| using namespace llvm::cflaa;
 | |
| 
 | |
| #define DEBUG_TYPE "cfl-steens-aa"
 | |
| 
 | |
| CFLSteensAAResult::CFLSteensAAResult(
 | |
|     std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
 | |
|     : AAResultBase(), GetTLI(std::move(GetTLI)) {}
 | |
| CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
 | |
|     : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {}
 | |
| CFLSteensAAResult::~CFLSteensAAResult() = default;
 | |
| 
 | |
| /// Information we have about a function and would like to keep around.
 | |
| class CFLSteensAAResult::FunctionInfo {
 | |
|   StratifiedSets<InstantiatedValue> Sets;
 | |
|   AliasSummary Summary;
 | |
| 
 | |
| public:
 | |
|   FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
 | |
|                StratifiedSets<InstantiatedValue> S);
 | |
| 
 | |
|   const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
 | |
|     return Sets;
 | |
|   }
 | |
| 
 | |
|   const AliasSummary &getAliasSummary() const { return Summary; }
 | |
| };
 | |
| 
 | |
| const StratifiedIndex StratifiedLink::SetSentinel =
 | |
|     std::numeric_limits<StratifiedIndex>::max();
 | |
| 
 | |
| //===----------------------------------------------------------------------===//
 | |
| // Function declarations that require types defined in the namespace above
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| /// Determines whether it would be pointless to add the given Value to our sets.
 | |
| static bool canSkipAddingToSets(Value *Val) {
 | |
|   // Constants can share instances, which may falsely unify multiple
 | |
|   // sets, e.g. in
 | |
|   // store i32* null, i32** %ptr1
 | |
|   // store i32* null, i32** %ptr2
 | |
|   // clearly ptr1 and ptr2 should not be unified into the same set, so
 | |
|   // we should filter out the (potentially shared) instance to
 | |
|   // i32* null.
 | |
|   if (isa<Constant>(Val)) {
 | |
|     // TODO: Because all of these things are constant, we can determine whether
 | |
|     // the data is *actually* mutable at graph building time. This will probably
 | |
|     // come for free/cheap with offset awareness.
 | |
|     bool CanStoreMutableData = isa<GlobalValue>(Val) ||
 | |
|                                isa<ConstantExpr>(Val) ||
 | |
|                                isa<ConstantAggregate>(Val);
 | |
|     return !CanStoreMutableData;
 | |
|   }
 | |
| 
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| CFLSteensAAResult::FunctionInfo::FunctionInfo(
 | |
|     Function &Fn, const SmallVectorImpl<Value *> &RetVals,
 | |
|     StratifiedSets<InstantiatedValue> S)
 | |
|     : Sets(std::move(S)) {
 | |
|   // Historically, an arbitrary upper-bound of 50 args was selected. We may want
 | |
|   // to remove this if it doesn't really matter in practice.
 | |
|   if (Fn.arg_size() > MaxSupportedArgsInSummary)
 | |
|     return;
 | |
| 
 | |
|   DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;
 | |
| 
 | |
|   // Our intention here is to record all InterfaceValues that share the same
 | |
|   // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
 | |
|   // have its StratifiedIndex scanned here and check if the index is presented
 | |
|   // in InterfaceMap: if it is not, we add the correspondence to the map;
 | |
|   // otherwise, an aliasing relation is found and we add it to
 | |
|   // RetParamRelations.
 | |
| 
 | |
|   auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
 | |
|                                     StratifiedIndex SetIndex) {
 | |
|     unsigned Level = 0;
 | |
|     while (true) {
 | |
|       InterfaceValue CurrValue{InterfaceIndex, Level};
 | |
| 
 | |
|       auto Itr = InterfaceMap.find(SetIndex);
 | |
|       if (Itr != InterfaceMap.end()) {
 | |
|         if (CurrValue != Itr->second)
 | |
|           Summary.RetParamRelations.push_back(
 | |
|               ExternalRelation{CurrValue, Itr->second, UnknownOffset});
 | |
|         break;
 | |
|       }
 | |
| 
 | |
|       auto &Link = Sets.getLink(SetIndex);
 | |
|       InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
 | |
|       auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs);
 | |
|       if (ExternalAttrs.any())
 | |
|         Summary.RetParamAttributes.push_back(
 | |
|             ExternalAttribute{CurrValue, ExternalAttrs});
 | |
| 
 | |
|       if (!Link.hasBelow())
 | |
|         break;
 | |
| 
 | |
|       ++Level;
 | |
|       SetIndex = Link.Below;
 | |
|     }
 | |
|   };
 | |
| 
 | |
|   // Populate RetParamRelations for return values
 | |
|   for (auto *RetVal : RetVals) {
 | |
|     assert(RetVal != nullptr);
 | |
|     assert(RetVal->getType()->isPointerTy());
 | |
|     auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
 | |
|     if (RetInfo.hasValue())
 | |
|       AddToRetParamRelations(0, RetInfo->Index);
 | |
|   }
 | |
| 
 | |
|   // Populate RetParamRelations for parameters
 | |
|   unsigned I = 0;
 | |
|   for (auto &Param : Fn.args()) {
 | |
|     if (Param.getType()->isPointerTy()) {
 | |
|       auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
 | |
|       if (ParamInfo.hasValue())
 | |
|         AddToRetParamRelations(I + 1, ParamInfo->Index);
 | |
|     }
 | |
|     ++I;
 | |
|   }
 | |
| }
 | |
| 
 | |
| // Builds the graph + StratifiedSets for a function.
 | |
| CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
 | |
|   CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn);
 | |
|   StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
 | |
| 
 | |
|   // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
 | |
|   auto &Graph = GraphBuilder.getCFLGraph();
 | |
|   for (const auto &Mapping : Graph.value_mappings()) {
 | |
|     auto Val = Mapping.first;
 | |
|     if (canSkipAddingToSets(Val))
 | |
|       continue;
 | |
|     auto &ValueInfo = Mapping.second;
 | |
| 
 | |
|     assert(ValueInfo.getNumLevels() > 0);
 | |
|     SetBuilder.add(InstantiatedValue{Val, 0});
 | |
|     SetBuilder.noteAttributes(InstantiatedValue{Val, 0},
 | |
|                               ValueInfo.getNodeInfoAtLevel(0).Attr);
 | |
|     for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) {
 | |
|       SetBuilder.add(InstantiatedValue{Val, I + 1});
 | |
|       SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1},
 | |
|                                 ValueInfo.getNodeInfoAtLevel(I + 1).Attr);
 | |
|       SetBuilder.addBelow(InstantiatedValue{Val, I},
 | |
|                           InstantiatedValue{Val, I + 1});
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Add all assign edges to StratifiedSets
 | |
|   for (const auto &Mapping : Graph.value_mappings()) {
 | |
|     auto Val = Mapping.first;
 | |
|     if (canSkipAddingToSets(Val))
 | |
|       continue;
 | |
|     auto &ValueInfo = Mapping.second;
 | |
| 
 | |
|     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
 | |
|       auto Src = InstantiatedValue{Val, I};
 | |
|       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
 | |
|         SetBuilder.addWith(Src, Edge.Other);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
 | |
| }
 | |
| 
 | |
| void CFLSteensAAResult::scan(Function *Fn) {
 | |
|   auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
 | |
|   (void)InsertPair;
 | |
|   assert(InsertPair.second &&
 | |
|          "Trying to scan a function that has already been cached");
 | |
| 
 | |
|   // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
 | |
|   // may get evaluated after operator[], potentially triggering a DenseMap
 | |
|   // resize and invalidating the reference returned by operator[]
 | |
|   auto FunInfo = buildSetsFrom(Fn);
 | |
|   Cache[Fn] = std::move(FunInfo);
 | |
| 
 | |
|   Handles.emplace_front(Fn, this);
 | |
| }
 | |
| 
 | |
| void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
 | |
| 
 | |
| /// Ensures that the given function is available in the cache, and returns the
 | |
| /// entry.
 | |
| const Optional<CFLSteensAAResult::FunctionInfo> &
 | |
| CFLSteensAAResult::ensureCached(Function *Fn) {
 | |
|   auto Iter = Cache.find(Fn);
 | |
|   if (Iter == Cache.end()) {
 | |
|     scan(Fn);
 | |
|     Iter = Cache.find(Fn);
 | |
|     assert(Iter != Cache.end());
 | |
|     assert(Iter->second.hasValue());
 | |
|   }
 | |
|   return Iter->second;
 | |
| }
 | |
| 
 | |
| const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
 | |
|   auto &FunInfo = ensureCached(&Fn);
 | |
|   if (FunInfo.hasValue())
 | |
|     return &FunInfo->getAliasSummary();
 | |
|   else
 | |
|     return nullptr;
 | |
| }
 | |
| 
 | |
| AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
 | |
|                                      const MemoryLocation &LocB) {
 | |
|   auto *ValA = const_cast<Value *>(LocA.Ptr);
 | |
|   auto *ValB = const_cast<Value *>(LocB.Ptr);
 | |
| 
 | |
|   if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
 | |
|     return NoAlias;
 | |
| 
 | |
|   Function *Fn = nullptr;
 | |
|   Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
 | |
|   Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
 | |
|   if (!MaybeFnA && !MaybeFnB) {
 | |
|     // The only times this is known to happen are when globals + InlineAsm are
 | |
|     // involved
 | |
|     LLVM_DEBUG(
 | |
|         dbgs()
 | |
|         << "CFLSteensAA: could not extract parent function information.\n");
 | |
|     return MayAlias;
 | |
|   }
 | |
| 
 | |
|   if (MaybeFnA) {
 | |
|     Fn = MaybeFnA;
 | |
|     assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
 | |
|            "Interprocedural queries not supported");
 | |
|   } else {
 | |
|     Fn = MaybeFnB;
 | |
|   }
 | |
| 
 | |
|   assert(Fn != nullptr);
 | |
|   auto &MaybeInfo = ensureCached(Fn);
 | |
|   assert(MaybeInfo.hasValue());
 | |
| 
 | |
|   auto &Sets = MaybeInfo->getStratifiedSets();
 | |
|   auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
 | |
|   if (!MaybeA.hasValue())
 | |
|     return MayAlias;
 | |
| 
 | |
|   auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
 | |
|   if (!MaybeB.hasValue())
 | |
|     return MayAlias;
 | |
| 
 | |
|   auto SetA = *MaybeA;
 | |
|   auto SetB = *MaybeB;
 | |
|   auto AttrsA = Sets.getLink(SetA.Index).Attrs;
 | |
|   auto AttrsB = Sets.getLink(SetB.Index).Attrs;
 | |
| 
 | |
|   // If both values are local (meaning the corresponding set has attribute
 | |
|   // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them:
 | |
|   // they may-alias each other if and only if they are in the same set.
 | |
|   // If at least one value is non-local (meaning it either is global/argument or
 | |
|   // it comes from unknown sources like integer cast), the situation becomes a
 | |
|   // bit more interesting. We follow three general rules described below:
 | |
|   // - Non-local values may alias each other
 | |
|   // - AttrNone values do not alias any non-local values
 | |
|   // - AttrEscaped do not alias globals/arguments, but they may alias
 | |
|   // AttrUnknown values
 | |
|   if (SetA.Index == SetB.Index)
 | |
|     return MayAlias;
 | |
|   if (AttrsA.none() || AttrsB.none())
 | |
|     return NoAlias;
 | |
|   if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
 | |
|     return MayAlias;
 | |
|   if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
 | |
|     return MayAlias;
 | |
|   return NoAlias;
 | |
| }
 | |
| 
 | |
| AnalysisKey CFLSteensAA::Key;
 | |
| 
 | |
| CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
 | |
|   auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & {
 | |
|     return AM.getResult<TargetLibraryAnalysis>(F);
 | |
|   };
 | |
|   return CFLSteensAAResult(GetTLI);
 | |
| }
 | |
| 
 | |
| char CFLSteensAAWrapperPass::ID = 0;
 | |
| INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa",
 | |
|                 "Unification-Based CFL Alias Analysis", false, true)
 | |
| 
 | |
| ImmutablePass *llvm::createCFLSteensAAWrapperPass() {
 | |
|   return new CFLSteensAAWrapperPass();
 | |
| }
 | |
| 
 | |
| CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
 | |
|   initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry());
 | |
| }
 | |
| 
 | |
| void CFLSteensAAWrapperPass::initializePass() {
 | |
|   auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
 | |
|     return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
 | |
|   };
 | |
|   Result.reset(new CFLSteensAAResult(GetTLI));
 | |
| }
 | |
| 
 | |
| void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
 | |
|   AU.setPreservesAll();
 | |
|   AU.addRequired<TargetLibraryInfoWrapperPass>();
 | |
| }
 |