1 //===- CallGraph.h - Build a Module's call graph ----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file provides interfaces used to build and manipulate a call graph, 11 /// which is a very useful tool for interprocedural optimization. 12 /// 13 /// Every function in a module is represented as a node in the call graph. The 14 /// callgraph node keeps track of which functions are called by the function 15 /// corresponding to the node. 16 /// 17 /// A call graph may contain nodes where the function that they correspond to 18 /// is null. These 'external' nodes are used to represent control flow that is 19 /// not represented (or analyzable) in the module. In particular, this 20 /// analysis builds one external node such that: 21 /// 1. All functions in the module without internal linkage will have edges 22 /// from this external node, indicating that they could be called by 23 /// functions outside of the module. 24 /// 2. All functions whose address is used for something more than a direct 25 /// call, for example being stored into a memory location will also have 26 /// an edge from this external node. Since they may be called by an 27 /// unknown caller later, they must be tracked as such. 28 /// 29 /// There is a second external node added for calls that leave this module. 30 /// Functions have a call edge to the external node iff: 31 /// 1. The function is external, reflecting the fact that they could call 32 /// anything without internal linkage or that has its address taken. 33 /// 2. The function contains an indirect function call. 34 /// 35 /// As an extension in the future, there may be multiple nodes with a null 36 /// function. These will be used when we can prove (through pointer analysis) 37 /// that an indirect call site can call only a specific set of functions. 38 /// 39 /// Because of these properties, the CallGraph captures a conservative superset 40 /// of all of the caller-callee relationships, which is useful for 41 /// transformations. 42 /// 43 //===----------------------------------------------------------------------===// 44 45 #ifndef LLVM_ANALYSIS_CALLGRAPH_H 46 #define LLVM_ANALYSIS_CALLGRAPH_H 47 48 #include "llvm/ADT/GraphTraits.h" 49 #include "llvm/ADT/STLExtras.h" 50 #include "llvm/IR/Function.h" 51 #include "llvm/IR/InstrTypes.h" 52 #include "llvm/IR/Intrinsics.h" 53 #include "llvm/IR/PassManager.h" 54 #include "llvm/IR/ValueHandle.h" 55 #include "llvm/Pass.h" 56 #include <cassert> 57 #include <map> 58 #include <memory> 59 #include <utility> 60 #include <vector> 61 62 namespace llvm { 63 64 class CallGraphNode; 65 class Module; 66 class raw_ostream; 67 68 /// The basic data container for the call graph of a \c Module of IR. 69 /// 70 /// This class exposes both the interface to the call graph for a module of IR. 71 /// 72 /// The core call graph itself can also be updated to reflect changes to the IR. 73 class CallGraph { 74 Module &M; 75 76 using FunctionMapTy = 77 std::map<const Function *, std::unique_ptr<CallGraphNode>>; 78 79 /// A map from \c Function* to \c CallGraphNode*. 80 FunctionMapTy FunctionMap; 81 82 /// This node has edges to all external functions and those internal 83 /// functions that have their address taken. 84 CallGraphNode *ExternalCallingNode; 85 86 /// This node has edges to it from all functions making indirect calls 87 /// or calling an external function. 88 std::unique_ptr<CallGraphNode> CallsExternalNode; 89 90 public: 91 explicit CallGraph(Module &M); 92 CallGraph(CallGraph &&Arg); 93 ~CallGraph(); 94 95 void print(raw_ostream &OS) const; 96 void dump() const; 97 98 using iterator = FunctionMapTy::iterator; 99 using const_iterator = FunctionMapTy::const_iterator; 100 101 /// Returns the module the call graph corresponds to. getModule()102 Module &getModule() const { return M; } 103 104 bool invalidate(Module &, const PreservedAnalyses &PA, 105 ModuleAnalysisManager::Invalidator &); 106 begin()107 inline iterator begin() { return FunctionMap.begin(); } end()108 inline iterator end() { return FunctionMap.end(); } begin()109 inline const_iterator begin() const { return FunctionMap.begin(); } end()110 inline const_iterator end() const { return FunctionMap.end(); } 111 112 /// Returns the call graph node for the provided function. 113 inline const CallGraphNode *operator[](const Function *F) const { 114 const_iterator I = FunctionMap.find(F); 115 assert(I != FunctionMap.end() && "Function not in callgraph!"); 116 return I->second.get(); 117 } 118 119 /// Returns the call graph node for the provided function. 120 inline CallGraphNode *operator[](const Function *F) { 121 const_iterator I = FunctionMap.find(F); 122 assert(I != FunctionMap.end() && "Function not in callgraph!"); 123 return I->second.get(); 124 } 125 126 /// Returns the \c CallGraphNode which is used to represent 127 /// undetermined calls into the callgraph. getExternalCallingNode()128 CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; } 129 getCallsExternalNode()130 CallGraphNode *getCallsExternalNode() const { 131 return CallsExternalNode.get(); 132 } 133 134 /// Old node has been deleted, and New is to be used in its place, update the 135 /// ExternalCallingNode. 136 void ReplaceExternalCallEdge(CallGraphNode *Old, CallGraphNode *New); 137 138 //===--------------------------------------------------------------------- 139 // Functions to keep a call graph up to date with a function that has been 140 // modified. 141 // 142 143 /// Unlink the function from this module, returning it. 144 /// 145 /// Because this removes the function from the module, the call graph node is 146 /// destroyed. This is only valid if the function does not call any other 147 /// functions (ie, there are no edges in it's CGN). The easiest way to do 148 /// this is to dropAllReferences before calling this. 149 Function *removeFunctionFromModule(CallGraphNode *CGN); 150 151 /// Similar to operator[], but this will insert a new CallGraphNode for 152 /// \c F if one does not already exist. 153 CallGraphNode *getOrInsertFunction(const Function *F); 154 155 /// Populate \p CGN based on the calls inside the associated function. 156 void populateCallGraphNode(CallGraphNode *CGN); 157 158 /// Add a function to the call graph, and link the node to all of the 159 /// functions that it calls. 160 void addToCallGraph(Function *F); 161 }; 162 163 /// A node in the call graph for a module. 164 /// 165 /// Typically represents a function in the call graph. There are also special 166 /// "null" nodes used to represent theoretical entries in the call graph. 167 class CallGraphNode { 168 public: 169 /// A pair of the calling instruction (a call or invoke) 170 /// and the call graph node being called. 171 /// Call graph node may have two types of call records which represent an edge 172 /// in the call graph - reference or a call edge. Reference edges are not 173 /// associated with any call instruction and are created with the first field 174 /// set to `None`, while real call edges have instruction address in this 175 /// field. Therefore, all real call edges are expected to have a value in the 176 /// first field and it is not supposed to be `nullptr`. 177 /// Reference edges, for example, are used for connecting broker function 178 /// caller to the callback function for callback call sites. 179 using CallRecord = std::pair<Optional<WeakTrackingVH>, CallGraphNode *>; 180 181 public: 182 using CalledFunctionsVector = std::vector<CallRecord>; 183 184 /// Creates a node for the specified function. CallGraphNode(CallGraph * CG,Function * F)185 inline CallGraphNode(CallGraph *CG, Function *F) : CG(CG), F(F) {} 186 187 CallGraphNode(const CallGraphNode &) = delete; 188 CallGraphNode &operator=(const CallGraphNode &) = delete; 189 ~CallGraphNode()190 ~CallGraphNode() { 191 assert(NumReferences == 0 && "Node deleted while references remain"); 192 } 193 194 using iterator = std::vector<CallRecord>::iterator; 195 using const_iterator = std::vector<CallRecord>::const_iterator; 196 197 /// Returns the function that this call graph node represents. getFunction()198 Function *getFunction() const { return F; } 199 begin()200 inline iterator begin() { return CalledFunctions.begin(); } end()201 inline iterator end() { return CalledFunctions.end(); } begin()202 inline const_iterator begin() const { return CalledFunctions.begin(); } end()203 inline const_iterator end() const { return CalledFunctions.end(); } empty()204 inline bool empty() const { return CalledFunctions.empty(); } size()205 inline unsigned size() const { return (unsigned)CalledFunctions.size(); } 206 207 /// Returns the number of other CallGraphNodes in this CallGraph that 208 /// reference this node in their callee list. getNumReferences()209 unsigned getNumReferences() const { return NumReferences; } 210 211 /// Returns the i'th called function. 212 CallGraphNode *operator[](unsigned i) const { 213 assert(i < CalledFunctions.size() && "Invalid index"); 214 return CalledFunctions[i].second; 215 } 216 217 /// Print out this call graph node. 218 void dump() const; 219 void print(raw_ostream &OS) const; 220 221 //===--------------------------------------------------------------------- 222 // Methods to keep a call graph up to date with a function that has been 223 // modified 224 // 225 226 /// Removes all edges from this CallGraphNode to any functions it 227 /// calls. removeAllCalledFunctions()228 void removeAllCalledFunctions() { 229 while (!CalledFunctions.empty()) { 230 CalledFunctions.back().second->DropRef(); 231 CalledFunctions.pop_back(); 232 } 233 } 234 235 /// Moves all the callee information from N to this node. stealCalledFunctionsFrom(CallGraphNode * N)236 void stealCalledFunctionsFrom(CallGraphNode *N) { 237 assert(CalledFunctions.empty() && 238 "Cannot steal callsite information if I already have some"); 239 std::swap(CalledFunctions, N->CalledFunctions); 240 } 241 242 /// Adds a function to the list of functions called by this one. addCalledFunction(CallBase * Call,CallGraphNode * M)243 void addCalledFunction(CallBase *Call, CallGraphNode *M) { 244 assert(!Call || !Call->getCalledFunction() || 245 !Call->getCalledFunction()->isIntrinsic() || 246 !Intrinsic::isLeaf(Call->getCalledFunction()->getIntrinsicID())); 247 CalledFunctions.emplace_back( 248 Call ? Optional<WeakTrackingVH>(Call) : Optional<WeakTrackingVH>(), M); 249 M->AddRef(); 250 } 251 removeCallEdge(iterator I)252 void removeCallEdge(iterator I) { 253 I->second->DropRef(); 254 *I = CalledFunctions.back(); 255 CalledFunctions.pop_back(); 256 } 257 258 /// Removes the edge in the node for the specified call site. 259 /// 260 /// Note that this method takes linear time, so it should be used sparingly. 261 void removeCallEdgeFor(CallBase &Call); 262 263 /// Removes all call edges from this node to the specified callee 264 /// function. 265 /// 266 /// This takes more time to execute than removeCallEdgeTo, so it should not 267 /// be used unless necessary. 268 void removeAnyCallEdgeTo(CallGraphNode *Callee); 269 270 /// Removes one edge associated with a null callsite from this node to 271 /// the specified callee function. 272 void removeOneAbstractEdgeTo(CallGraphNode *Callee); 273 274 /// Replaces the edge in the node for the specified call site with a 275 /// new one. 276 /// 277 /// Note that this method takes linear time, so it should be used sparingly. 278 void replaceCallEdge(CallBase &Call, CallBase &NewCall, 279 CallGraphNode *NewNode); 280 281 private: 282 friend class CallGraph; 283 284 CallGraph *CG; 285 Function *F; 286 287 std::vector<CallRecord> CalledFunctions; 288 289 /// The number of times that this CallGraphNode occurs in the 290 /// CalledFunctions array of this or other CallGraphNodes. 291 unsigned NumReferences = 0; 292 DropRef()293 void DropRef() { --NumReferences; } AddRef()294 void AddRef() { ++NumReferences; } 295 296 /// A special function that should only be used by the CallGraph class. allReferencesDropped()297 void allReferencesDropped() { NumReferences = 0; } 298 }; 299 300 /// An analysis pass to compute the \c CallGraph for a \c Module. 301 /// 302 /// This class implements the concept of an analysis pass used by the \c 303 /// ModuleAnalysisManager to run an analysis over a module and cache the 304 /// resulting data. 305 class CallGraphAnalysis : public AnalysisInfoMixin<CallGraphAnalysis> { 306 friend AnalysisInfoMixin<CallGraphAnalysis>; 307 308 static AnalysisKey Key; 309 310 public: 311 /// A formulaic type to inform clients of the result type. 312 using Result = CallGraph; 313 314 /// Compute the \c CallGraph for the module \c M. 315 /// 316 /// The real work here is done in the \c CallGraph constructor. run(Module & M,ModuleAnalysisManager &)317 CallGraph run(Module &M, ModuleAnalysisManager &) { return CallGraph(M); } 318 }; 319 320 /// Printer pass for the \c CallGraphAnalysis results. 321 class CallGraphPrinterPass : public PassInfoMixin<CallGraphPrinterPass> { 322 raw_ostream &OS; 323 324 public: CallGraphPrinterPass(raw_ostream & OS)325 explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} 326 327 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 328 }; 329 330 /// The \c ModulePass which wraps up a \c CallGraph and the logic to 331 /// build it. 332 /// 333 /// This class exposes both the interface to the call graph container and the 334 /// module pass which runs over a module of IR and produces the call graph. The 335 /// call graph interface is entirelly a wrapper around a \c CallGraph object 336 /// which is stored internally for each module. 337 class CallGraphWrapperPass : public ModulePass { 338 std::unique_ptr<CallGraph> G; 339 340 public: 341 static char ID; // Class identification, replacement for typeinfo 342 343 CallGraphWrapperPass(); 344 ~CallGraphWrapperPass() override; 345 346 /// The internal \c CallGraph around which the rest of this interface 347 /// is wrapped. getCallGraph()348 const CallGraph &getCallGraph() const { return *G; } getCallGraph()349 CallGraph &getCallGraph() { return *G; } 350 351 using iterator = CallGraph::iterator; 352 using const_iterator = CallGraph::const_iterator; 353 354 /// Returns the module the call graph corresponds to. getModule()355 Module &getModule() const { return G->getModule(); } 356 begin()357 inline iterator begin() { return G->begin(); } end()358 inline iterator end() { return G->end(); } begin()359 inline const_iterator begin() const { return G->begin(); } end()360 inline const_iterator end() const { return G->end(); } 361 362 /// Returns the call graph node for the provided function. 363 inline const CallGraphNode *operator[](const Function *F) const { 364 return (*G)[F]; 365 } 366 367 /// Returns the call graph node for the provided function. 368 inline CallGraphNode *operator[](const Function *F) { return (*G)[F]; } 369 370 /// Returns the \c CallGraphNode which is used to represent 371 /// undetermined calls into the callgraph. getExternalCallingNode()372 CallGraphNode *getExternalCallingNode() const { 373 return G->getExternalCallingNode(); 374 } 375 getCallsExternalNode()376 CallGraphNode *getCallsExternalNode() const { 377 return G->getCallsExternalNode(); 378 } 379 380 //===--------------------------------------------------------------------- 381 // Functions to keep a call graph up to date with a function that has been 382 // modified. 383 // 384 385 /// Unlink the function from this module, returning it. 386 /// 387 /// Because this removes the function from the module, the call graph node is 388 /// destroyed. This is only valid if the function does not call any other 389 /// functions (ie, there are no edges in it's CGN). The easiest way to do 390 /// this is to dropAllReferences before calling this. removeFunctionFromModule(CallGraphNode * CGN)391 Function *removeFunctionFromModule(CallGraphNode *CGN) { 392 return G->removeFunctionFromModule(CGN); 393 } 394 395 /// Similar to operator[], but this will insert a new CallGraphNode for 396 /// \c F if one does not already exist. getOrInsertFunction(const Function * F)397 CallGraphNode *getOrInsertFunction(const Function *F) { 398 return G->getOrInsertFunction(F); 399 } 400 401 //===--------------------------------------------------------------------- 402 // Implementation of the ModulePass interface needed here. 403 // 404 405 void getAnalysisUsage(AnalysisUsage &AU) const override; 406 bool runOnModule(Module &M) override; 407 void releaseMemory() override; 408 409 void print(raw_ostream &o, const Module *) const override; 410 void dump() const; 411 }; 412 413 //===----------------------------------------------------------------------===// 414 // GraphTraits specializations for call graphs so that they can be treated as 415 // graphs by the generic graph algorithms. 416 // 417 418 // Provide graph traits for traversing call graphs using standard graph 419 // traversals. 420 template <> struct GraphTraits<CallGraphNode *> { 421 using NodeRef = CallGraphNode *; 422 using CGNPairTy = CallGraphNode::CallRecord; 423 424 static NodeRef getEntryNode(CallGraphNode *CGN) { return CGN; } 425 static CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } 426 427 using ChildIteratorType = 428 mapped_iterator<CallGraphNode::iterator, decltype(&CGNGetValue)>; 429 430 static ChildIteratorType child_begin(NodeRef N) { 431 return ChildIteratorType(N->begin(), &CGNGetValue); 432 } 433 434 static ChildIteratorType child_end(NodeRef N) { 435 return ChildIteratorType(N->end(), &CGNGetValue); 436 } 437 }; 438 439 template <> struct GraphTraits<const CallGraphNode *> { 440 using NodeRef = const CallGraphNode *; 441 using CGNPairTy = CallGraphNode::CallRecord; 442 using EdgeRef = const CallGraphNode::CallRecord &; 443 444 static NodeRef getEntryNode(const CallGraphNode *CGN) { return CGN; } 445 static const CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } 446 447 using ChildIteratorType = 448 mapped_iterator<CallGraphNode::const_iterator, decltype(&CGNGetValue)>; 449 using ChildEdgeIteratorType = CallGraphNode::const_iterator; 450 451 static ChildIteratorType child_begin(NodeRef N) { 452 return ChildIteratorType(N->begin(), &CGNGetValue); 453 } 454 455 static ChildIteratorType child_end(NodeRef N) { 456 return ChildIteratorType(N->end(), &CGNGetValue); 457 } 458 459 static ChildEdgeIteratorType child_edge_begin(NodeRef N) { 460 return N->begin(); 461 } 462 static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } 463 464 static NodeRef edge_dest(EdgeRef E) { return E.second; } 465 }; 466 467 template <> 468 struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> { 469 using PairTy = 470 std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; 471 472 static NodeRef getEntryNode(CallGraph *CGN) { 473 return CGN->getExternalCallingNode(); // Start at the external node! 474 } 475 476 static CallGraphNode *CGGetValuePtr(const PairTy &P) { 477 return P.second.get(); 478 } 479 480 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph 481 using nodes_iterator = 482 mapped_iterator<CallGraph::iterator, decltype(&CGGetValuePtr)>; 483 484 static nodes_iterator nodes_begin(CallGraph *CG) { 485 return nodes_iterator(CG->begin(), &CGGetValuePtr); 486 } 487 488 static nodes_iterator nodes_end(CallGraph *CG) { 489 return nodes_iterator(CG->end(), &CGGetValuePtr); 490 } 491 }; 492 493 template <> 494 struct GraphTraits<const CallGraph *> : public GraphTraits< 495 const CallGraphNode *> { 496 using PairTy = 497 std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; 498 499 static NodeRef getEntryNode(const CallGraph *CGN) { 500 return CGN->getExternalCallingNode(); // Start at the external node! 501 } 502 503 static const CallGraphNode *CGGetValuePtr(const PairTy &P) { 504 return P.second.get(); 505 } 506 507 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph 508 using nodes_iterator = 509 mapped_iterator<CallGraph::const_iterator, decltype(&CGGetValuePtr)>; 510 511 static nodes_iterator nodes_begin(const CallGraph *CG) { 512 return nodes_iterator(CG->begin(), &CGGetValuePtr); 513 } 514 515 static nodes_iterator nodes_end(const CallGraph *CG) { 516 return nodes_iterator(CG->end(), &CGGetValuePtr); 517 } 518 }; 519 520 } // end namespace llvm 521 522 #endif // LLVM_ANALYSIS_CALLGRAPH_H 523