//======- CFLGraph.h - Abstract stratified sets implementation. --------======// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// \file /// This file defines CFLGraph, an auxiliary data structure used by CFL-based /// alias analysis. /// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_CFLGRAPH_H #define LLVM_ANALYSIS_CFLGRAPH_H #include "AliasAnalysisSummary.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" namespace llvm { namespace cflaa { /// \brief The Program Expression Graph (PEG) of CFL analysis /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to /// describe flow-insensitive pointer-related behaviors. Given an LLVM function, /// the main purpose of this graph is to abstract away unrelated facts and /// translate the rest into a form that can be easily digested by CFL analyses. /// Each Node in the graph is an InstantiatedValue, and each edge represent a /// pointer assignment between InstantiatedValue. Pointer /// references/dereferences are not explicitly stored in the graph: we /// implicitly assume that for each node (X, I) it has a dereference edge to (X, /// I+1) and a reference edge to (X, I-1). class CFLGraph { public: typedef InstantiatedValue Node; struct Edge { Node Other; }; typedef std::vector<Edge> EdgeList; struct NodeInfo { EdgeList Edges; AliasAttrs Attr; }; class ValueInfo { std::vector<NodeInfo> Levels; public: bool addNodeToLevel(unsigned Level) { auto NumLevels = Levels.size(); if (NumLevels > Level) return false; Levels.resize(Level + 1); return true; } NodeInfo &getNodeInfoAtLevel(unsigned Level) { assert(Level < Levels.size()); return Levels[Level]; } const NodeInfo &getNodeInfoAtLevel(unsigned Level) const { assert(Level < Levels.size()); return Levels[Level]; } unsigned getNumLevels() const { return Levels.size(); } }; private: typedef DenseMap<Value *, ValueInfo> ValueMap; ValueMap ValueImpls; const NodeInfo *getNode(Node N) const { auto Itr = ValueImpls.find(N.Val); if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel) return nullptr; return &Itr->second.getNodeInfoAtLevel(N.DerefLevel); } NodeInfo *getNode(Node N) { auto Itr = ValueImpls.find(N.Val); if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel) return nullptr; return &Itr->second.getNodeInfoAtLevel(N.DerefLevel); } public: typedef ValueMap::const_iterator const_value_iterator; bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) { assert(N.Val != nullptr); auto &ValInfo = ValueImpls[N.Val]; auto Changed = ValInfo.addNodeToLevel(N.DerefLevel); ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr; return Changed; } void addAttr(Node N, AliasAttrs Attr) { auto *Info = getNode(N); assert(Info != nullptr); Info->Attr |= Attr; } void addEdge(Node From, Node To, int64_t Offset = 0) { assert(getNode(To) != nullptr); auto *FromInfo = getNode(From); assert(FromInfo != nullptr); FromInfo->Edges.push_back(Edge{To}); } AliasAttrs attrFor(Node N) const { auto *Info = getNode(N); assert(Info != nullptr); return Info->Attr; } iterator_range<const_value_iterator> value_mappings() const { return make_range<const_value_iterator>(ValueImpls.begin(), ValueImpls.end()); } }; ///\brief A builder class used to create CFLGraph instance from a given function /// The CFL-AA that uses this builder must provide its own type as a template /// argument. This is necessary for interprocedural processing: CFLGraphBuilder /// needs a way of obtaining the summary of other functions when callinsts are /// encountered. /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public /// member function that takes a Function& and returns the corresponding summary /// as a const AliasSummary*. template <typename CFLAA> class CFLGraphBuilder { // Input of the builder CFLAA &Analysis; const TargetLibraryInfo &TLI; // Output of the builder CFLGraph Graph; SmallVector<Value *, 4> ReturnedValues; // Helper class /// Gets the edges our graph should have, based on an Instruction* class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> { CFLAA &AA; const TargetLibraryInfo &TLI; CFLGraph &Graph; SmallVectorImpl<Value *> &ReturnValues; static bool hasUsefulEdges(ConstantExpr *CE) { // ConstantExpr doesn't have terminators, invokes, or fences, so only // needs // to check for compares. return CE->getOpcode() != Instruction::ICmp && CE->getOpcode() != Instruction::FCmp; } // Returns possible functions called by CS into the given SmallVectorImpl. // Returns true if targets found, false otherwise. static bool getPossibleTargets(CallSite CS, SmallVectorImpl<Function *> &Output) { if (auto *Fn = CS.getCalledFunction()) { Output.push_back(Fn); return true; } // TODO: If the call is indirect, we might be able to enumerate all // potential // targets of the call and return them, rather than just failing. return false; } void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) { assert(Val != nullptr && Val->getType()->isPointerTy()); if (auto GVal = dyn_cast<GlobalValue>(Val)) { if (Graph.addNode(InstantiatedValue{GVal, 0}, getGlobalOrArgAttrFromValue(*GVal))) Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown()); } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) { if (hasUsefulEdges(CExpr)) { if (Graph.addNode(InstantiatedValue{CExpr, 0})) visitConstantExpr(CExpr); } } else Graph.addNode(InstantiatedValue{Val, 0}, Attr); } void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) { assert(From != nullptr && To != nullptr); if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) return; addNode(From); if (To != From) { addNode(To); Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0}, Offset); } } void addDerefEdge(Value *From, Value *To) { assert(From != nullptr && To != nullptr); if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) return; addNode(From); addNode(To); Graph.addNode(InstantiatedValue{From, 1}); Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0}); } public: GetEdgesVisitor(CFLGraphBuilder &Builder) : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph), ReturnValues(Builder.ReturnedValues) {} void visitInstruction(Instruction &) { llvm_unreachable("Unsupported instruction encountered"); } void visitReturnInst(ReturnInst &Inst) { if (auto RetVal = Inst.getReturnValue()) { if (RetVal->getType()->isPointerTy()) { addNode(RetVal); ReturnValues.push_back(RetVal); } } } void visitPtrToIntInst(PtrToIntInst &Inst) { auto *Ptr = Inst.getOperand(0); addNode(Ptr, getAttrEscaped()); } void visitIntToPtrInst(IntToPtrInst &Inst) { auto *Ptr = &Inst; addNode(Ptr, getAttrUnknown()); } void visitCastInst(CastInst &Inst) { auto *Src = Inst.getOperand(0); addAssignEdge(Src, &Inst); } void visitBinaryOperator(BinaryOperator &Inst) { auto *Op1 = Inst.getOperand(0); auto *Op2 = Inst.getOperand(1); addAssignEdge(Op1, &Inst); addAssignEdge(Op2, &Inst); } void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { auto *Ptr = Inst.getPointerOperand(); auto *Val = Inst.getNewValOperand(); addDerefEdge(Ptr, Val); } void visitAtomicRMWInst(AtomicRMWInst &Inst) { auto *Ptr = Inst.getPointerOperand(); auto *Val = Inst.getValOperand(); addDerefEdge(Ptr, Val); } void visitPHINode(PHINode &Inst) { for (Value *Val : Inst.incoming_values()) addAssignEdge(Val, &Inst); } void visitGetElementPtrInst(GetElementPtrInst &Inst) { auto *Op = Inst.getPointerOperand(); addAssignEdge(Op, &Inst); } void visitSelectInst(SelectInst &Inst) { // Condition is not processed here (The actual statement producing // the condition result is processed elsewhere). For select, the // condition is evaluated, but not loaded, stored, or assigned // simply as a result of being the condition of a select. auto *TrueVal = Inst.getTrueValue(); auto *FalseVal = Inst.getFalseValue(); addAssignEdge(TrueVal, &Inst); addAssignEdge(FalseVal, &Inst); } void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); } void visitLoadInst(LoadInst &Inst) { auto *Ptr = Inst.getPointerOperand(); auto *Val = &Inst; addDerefEdge(Ptr, Val); } void visitStoreInst(StoreInst &Inst) { auto *Ptr = Inst.getPointerOperand(); auto *Val = Inst.getValueOperand(); addDerefEdge(Ptr, Val); } void visitVAArgInst(VAArgInst &Inst) { // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it // does // two things: // 1. Loads a value from *((T*)*Ptr). // 2. Increments (stores to) *Ptr by some target-specific amount. // For now, we'll handle this like a landingpad instruction (by placing // the // result in its own group, and having that group alias externals). addNode(&Inst, getAttrUnknown()); } static bool isFunctionExternal(Function *Fn) { return !Fn->hasExactDefinition(); } bool tryInterproceduralAnalysis(CallSite CS, const SmallVectorImpl<Function *> &Fns) { assert(Fns.size() > 0); if (CS.arg_size() > MaxSupportedArgsInSummary) return false; // Exit early if we'll fail anyway for (auto *Fn : Fns) { if (isFunctionExternal(Fn) || Fn->isVarArg()) return false; // Fail if the caller does not provide enough arguments assert(Fn->arg_size() <= CS.arg_size()); if (!AA.getAliasSummary(*Fn)) return false; } for (auto *Fn : Fns) { auto Summary = AA.getAliasSummary(*Fn); assert(Summary != nullptr); auto &RetParamRelations = Summary->RetParamRelations; for (auto &Relation : RetParamRelations) { auto IRelation = instantiateExternalRelation(Relation, CS); if (IRelation.hasValue()) { Graph.addNode(IRelation->From); Graph.addNode(IRelation->To); Graph.addEdge(IRelation->From, IRelation->To); } } auto &RetParamAttributes = Summary->RetParamAttributes; for (auto &Attribute : RetParamAttributes) { auto IAttr = instantiateExternalAttribute(Attribute, CS); if (IAttr.hasValue()) Graph.addNode(IAttr->IValue, IAttr->Attr); } } return true; } void visitCallSite(CallSite CS) { auto Inst = CS.getInstruction(); // Make sure all arguments and return value are added to the graph first for (Value *V : CS.args()) if (V->getType()->isPointerTy()) addNode(V); if (Inst->getType()->isPointerTy()) addNode(Inst); // Check if Inst is a call to a library function that // allocates/deallocates // on the heap. Those kinds of functions do not introduce any aliases. // TODO: address other common library functions such as realloc(), // strdup(), // etc. if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI)) return; // TODO: Add support for noalias args/all the other fun function // attributes // that we can tack on. SmallVector<Function *, 4> Targets; if (getPossibleTargets(CS, Targets)) if (tryInterproceduralAnalysis(CS, Targets)) return; // Because the function is opaque, we need to note that anything // could have happened to the arguments (unless the function is marked // readonly or readnone), and that the result could alias just about // anything, too (unless the result is marked noalias). if (!CS.onlyReadsMemory()) for (Value *V : CS.args()) { if (V->getType()->isPointerTy()) { // The argument itself escapes. Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped()); // The fate of argument memory is unknown. Note that since // AliasAttrs is transitive with respect to dereference, we only // need to specify it for the first-level memory. Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown()); } } if (Inst->getType()->isPointerTy()) { auto *Fn = CS.getCalledFunction(); if (Fn == nullptr || !Fn->doesNotAlias(0)) // No need to call addNode() since we've added Inst at the // beginning of this function and we know it is not a global. Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown()); } } /// Because vectors/aggregates are immutable and unaddressable, there's /// nothing we can do to coax a value out of them, other than calling /// Extract{Element,Value}. We can effectively treat them as pointers to /// arbitrary memory locations we can store in and load from. void visitExtractElementInst(ExtractElementInst &Inst) { auto *Ptr = Inst.getVectorOperand(); auto *Val = &Inst; addDerefEdge(Ptr, Val); } void visitInsertElementInst(InsertElementInst &Inst) { auto *Vec = Inst.getOperand(0); auto *Val = Inst.getOperand(1); addAssignEdge(Vec, &Inst); addDerefEdge(&Inst, Val); } void visitLandingPadInst(LandingPadInst &Inst) { // Exceptions come from "nowhere", from our analysis' perspective. // So we place the instruction its own group, noting that said group may // alias externals addNode(&Inst, getAttrUnknown()); } void visitInsertValueInst(InsertValueInst &Inst) { auto *Agg = Inst.getOperand(0); auto *Val = Inst.getOperand(1); addAssignEdge(Agg, &Inst); addDerefEdge(&Inst, Val); } void visitExtractValueInst(ExtractValueInst &Inst) { auto *Ptr = Inst.getAggregateOperand(); addDerefEdge(Ptr, &Inst); } void visitShuffleVectorInst(ShuffleVectorInst &Inst) { auto *From1 = Inst.getOperand(0); auto *From2 = Inst.getOperand(1); addAssignEdge(From1, &Inst); addAssignEdge(From2, &Inst); } void visitConstantExpr(ConstantExpr *CE) { switch (CE->getOpcode()) { default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: \ this->visit##OPCODE(*(CLASS *)CE); \ break; #include "llvm/IR/Instruction.def" } } }; // Helper functions // Determines whether or not we an instruction is useless to us (e.g. // FenceInst) static bool hasUsefulEdges(Instruction *Inst) { bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) && !isa<InvokeInst>(Inst) && !isa<ReturnInst>(Inst); return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) && !IsNonInvokeRetTerminator; } void addArgumentToGraph(Argument &Arg) { if (Arg.getType()->isPointerTy()) { Graph.addNode(InstantiatedValue{&Arg, 0}, getGlobalOrArgAttrFromValue(Arg)); // Pointees of a formal parameter is known to the caller Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller()); } } // Given an Instruction, this will add it to the graph, along with any // Instructions that are potentially only available from said Instruction // For example, given the following line: // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2 // addInstructionToGraph would add both the `load` and `getelementptr` // instructions to the graph appropriately. void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) { if (!hasUsefulEdges(&Inst)) return; Visitor.visit(Inst); } // Builds the graph needed for constructing the StratifiedSets for the given // function void buildGraphFrom(Function &Fn) { GetEdgesVisitor Visitor(*this); for (auto &Bb : Fn.getBasicBlockList()) for (auto &Inst : Bb.getInstList()) addInstructionToGraph(Visitor, Inst); for (auto &Arg : Fn.args()) addArgumentToGraph(Arg); } public: CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn) : Analysis(Analysis), TLI(TLI) { buildGraphFrom(Fn); } const CFLGraph &getCFLGraph() const { return Graph; } const SmallVector<Value *, 4> &getReturnValues() const { return ReturnedValues; } }; } } #endif