1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This package provides Rapid Type Analysis (RTA) for Go, a fast
6// algorithm for call graph construction and discovery of reachable code
7// (and hence dead code) and runtime types.  The algorithm was first
8// described in:
9//
10// David F. Bacon and Peter F. Sweeney. 1996.
11// Fast static analysis of C++ virtual function calls. (OOPSLA '96)
12// http://doi.acm.org/10.1145/236337.236371
13//
14// The algorithm uses dynamic programming to tabulate the cross-product
15// of the set of known "address taken" functions with the set of known
16// dynamic calls of the same type.  As each new address-taken function
17// is discovered, call graph edges are added from each known callsite,
18// and as each new call site is discovered, call graph edges are added
19// from it to each known address-taken function.
20//
21// A similar approach is used for dynamic calls via interfaces: it
22// tabulates the cross-product of the set of known "runtime types",
23// i.e. types that may appear in an interface value, or be derived from
24// one via reflection, with the set of known "invoke"-mode dynamic
25// calls.  As each new "runtime type" is discovered, call edges are
26// added from the known call sites, and as each new call site is
27// discovered, call graph edges are added to each compatible
28// method.
29//
30// In addition, we must consider all exported methods of any runtime type
31// as reachable, since they may be called via reflection.
32//
33// Each time a newly added call edge causes a new function to become
34// reachable, the code of that function is analyzed for more call sites,
35// address-taken functions, and runtime types.  The process continues
36// until a fixed point is achieved.
37//
38// The resulting call graph is less precise than one produced by pointer
39// analysis, but the algorithm is much faster.  For example, running the
40// cmd/callgraph tool on its own source takes ~2.1s for RTA and ~5.4s
41// for points-to analysis.
42//
43package rta // import "golang.org/x/tools/go/callgraph/rta"
44
45// TODO(adonovan): test it by connecting it to the interpreter and
46// replacing all "unreachable" functions by a special intrinsic, and
47// ensure that that intrinsic is never called.
48
49import (
50	"fmt"
51	"go/types"
52
53	"golang.org/x/tools/go/callgraph"
54	"golang.org/x/tools/go/ssa"
55	"golang.org/x/tools/go/types/typeutil"
56)
57
58// A Result holds the results of Rapid Type Analysis, which includes the
59// set of reachable functions/methods, runtime types, and the call graph.
60//
61type Result struct {
62	// CallGraph is the discovered callgraph.
63	// It does not include edges for calls made via reflection.
64	CallGraph *callgraph.Graph
65
66	// Reachable contains the set of reachable functions and methods.
67	// This includes exported methods of runtime types, since
68	// they may be accessed via reflection.
69	// The value indicates whether the function is address-taken.
70	//
71	// (We wrap the bool in a struct to avoid inadvertent use of
72	// "if Reachable[f] {" to test for set membership.)
73	Reachable map[*ssa.Function]struct{ AddrTaken bool }
74
75	// RuntimeTypes contains the set of types that are needed at
76	// runtime, for interfaces or reflection.
77	//
78	// The value indicates whether the type is inaccessible to reflection.
79	// Consider:
80	// 	type A struct{B}
81	// 	fmt.Println(new(A))
82	// Types *A, A and B are accessible to reflection, but the unnamed
83	// type struct{B} is not.
84	RuntimeTypes typeutil.Map
85}
86
87// Working state of the RTA algorithm.
88type rta struct {
89	result *Result
90
91	prog *ssa.Program
92
93	worklist []*ssa.Function // list of functions to visit
94
95	// addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature.
96	// Keys are *types.Signature, values are map[*ssa.Function]bool sets.
97	addrTakenFuncsBySig typeutil.Map
98
99	// dynCallSites contains all dynamic "call"-mode call sites, grouped by signature.
100	// Keys are *types.Signature, values are unordered []ssa.CallInstruction.
101	dynCallSites typeutil.Map
102
103	// invokeSites contains all "invoke"-mode call sites, grouped by interface.
104	// Keys are *types.Interface (never *types.Named),
105	// Values are unordered []ssa.CallInstruction sets.
106	invokeSites typeutil.Map
107
108	// The following two maps together define the subset of the
109	// m:n "implements" relation needed by the algorithm.
110
111	// concreteTypes maps each concrete type to the set of interfaces that it implements.
112	// Keys are types.Type, values are unordered []*types.Interface.
113	// Only concrete types used as MakeInterface operands are included.
114	concreteTypes typeutil.Map
115
116	// interfaceTypes maps each interface type to
117	// the set of concrete types that implement it.
118	// Keys are *types.Interface, values are unordered []types.Type.
119	// Only interfaces used in "invoke"-mode CallInstructions are included.
120	interfaceTypes typeutil.Map
121}
122
123// addReachable marks a function as potentially callable at run-time,
124// and ensures that it gets processed.
125func (r *rta) addReachable(f *ssa.Function, addrTaken bool) {
126	reachable := r.result.Reachable
127	n := len(reachable)
128	v := reachable[f]
129	if addrTaken {
130		v.AddrTaken = true
131	}
132	reachable[f] = v
133	if len(reachable) > n {
134		// First time seeing f.  Add it to the worklist.
135		r.worklist = append(r.worklist, f)
136	}
137}
138
139// addEdge adds the specified call graph edge, and marks it reachable.
140// addrTaken indicates whether to mark the callee as "address-taken".
141func (r *rta) addEdge(site ssa.CallInstruction, callee *ssa.Function, addrTaken bool) {
142	r.addReachable(callee, addrTaken)
143
144	if g := r.result.CallGraph; g != nil {
145		if site.Parent() == nil {
146			panic(site)
147		}
148		from := g.CreateNode(site.Parent())
149		to := g.CreateNode(callee)
150		callgraph.AddEdge(from, site, to)
151	}
152}
153
154// ---------- addrTakenFuncs × dynCallSites ----------
155
156// visitAddrTakenFunc is called each time we encounter an address-taken function f.
157func (r *rta) visitAddrTakenFunc(f *ssa.Function) {
158	// Create two-level map (Signature -> Function -> bool).
159	S := f.Signature
160	funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
161	if funcs == nil {
162		funcs = make(map[*ssa.Function]bool)
163		r.addrTakenFuncsBySig.Set(S, funcs)
164	}
165	if !funcs[f] {
166		// First time seeing f.
167		funcs[f] = true
168
169		// If we've seen any dyncalls of this type, mark it reachable,
170		// and add call graph edges.
171		sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
172		for _, site := range sites {
173			r.addEdge(site, f, true)
174		}
175	}
176}
177
178// visitDynCall is called each time we encounter a dynamic "call"-mode call.
179func (r *rta) visitDynCall(site ssa.CallInstruction) {
180	S := site.Common().Signature()
181
182	// Record the call site.
183	sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
184	r.dynCallSites.Set(S, append(sites, site))
185
186	// For each function of signature S that we know is address-taken,
187	// add an edge and mark it reachable.
188	funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
189	for g := range funcs {
190		r.addEdge(site, g, true)
191	}
192}
193
194// ---------- concrete types × invoke sites ----------
195
196// addInvokeEdge is called for each new pair (site, C) in the matrix.
197func (r *rta) addInvokeEdge(site ssa.CallInstruction, C types.Type) {
198	// Ascertain the concrete method of C to be called.
199	imethod := site.Common().Method
200	cmethod := r.prog.MethodValue(r.prog.MethodSets.MethodSet(C).Lookup(imethod.Pkg(), imethod.Name()))
201	r.addEdge(site, cmethod, true)
202}
203
204// visitInvoke is called each time the algorithm encounters an "invoke"-mode call.
205func (r *rta) visitInvoke(site ssa.CallInstruction) {
206	I := site.Common().Value.Type().Underlying().(*types.Interface)
207
208	// Record the invoke site.
209	sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
210	r.invokeSites.Set(I, append(sites, site))
211
212	// Add callgraph edge for each existing
213	// address-taken concrete type implementing I.
214	for _, C := range r.implementations(I) {
215		r.addInvokeEdge(site, C)
216	}
217}
218
219// ---------- main algorithm ----------
220
221// visitFunc processes function f.
222func (r *rta) visitFunc(f *ssa.Function) {
223	var space [32]*ssa.Value // preallocate space for common case
224
225	for _, b := range f.Blocks {
226		for _, instr := range b.Instrs {
227			rands := instr.Operands(space[:0])
228
229			switch instr := instr.(type) {
230			case ssa.CallInstruction:
231				call := instr.Common()
232				if call.IsInvoke() {
233					r.visitInvoke(instr)
234				} else if g := call.StaticCallee(); g != nil {
235					r.addEdge(instr, g, false)
236				} else if _, ok := call.Value.(*ssa.Builtin); !ok {
237					r.visitDynCall(instr)
238				}
239
240				// Ignore the call-position operand when
241				// looking for address-taken Functions.
242				// Hack: assume this is rands[0].
243				rands = rands[1:]
244
245			case *ssa.MakeInterface:
246				r.addRuntimeType(instr.X.Type(), false)
247			}
248
249			// Process all address-taken functions.
250			for _, op := range rands {
251				if g, ok := (*op).(*ssa.Function); ok {
252					r.visitAddrTakenFunc(g)
253				}
254			}
255		}
256	}
257}
258
259// Analyze performs Rapid Type Analysis, starting at the specified root
260// functions.  It returns nil if no roots were specified.
261//
262// If buildCallGraph is true, Result.CallGraph will contain a call
263// graph; otherwise, only the other fields (reachable functions) are
264// populated.
265//
266func Analyze(roots []*ssa.Function, buildCallGraph bool) *Result {
267	if len(roots) == 0 {
268		return nil
269	}
270
271	r := &rta{
272		result: &Result{Reachable: make(map[*ssa.Function]struct{ AddrTaken bool })},
273		prog:   roots[0].Prog,
274	}
275
276	if buildCallGraph {
277		// TODO(adonovan): change callgraph API to eliminate the
278		// notion of a distinguished root node.  Some callgraphs
279		// have many roots, or none.
280		r.result.CallGraph = callgraph.New(roots[0])
281	}
282
283	hasher := typeutil.MakeHasher()
284	r.result.RuntimeTypes.SetHasher(hasher)
285	r.addrTakenFuncsBySig.SetHasher(hasher)
286	r.dynCallSites.SetHasher(hasher)
287	r.invokeSites.SetHasher(hasher)
288	r.concreteTypes.SetHasher(hasher)
289	r.interfaceTypes.SetHasher(hasher)
290
291	// Visit functions, processing their instructions, and adding
292	// new functions to the worklist, until a fixed point is
293	// reached.
294	var shadow []*ssa.Function // for efficiency, we double-buffer the worklist
295	r.worklist = append(r.worklist, roots...)
296	for len(r.worklist) > 0 {
297		shadow, r.worklist = r.worklist, shadow[:0]
298		for _, f := range shadow {
299			r.visitFunc(f)
300		}
301	}
302	return r.result
303}
304
305// interfaces(C) returns all currently known interfaces implemented by C.
306func (r *rta) interfaces(C types.Type) []*types.Interface {
307	// Ascertain set of interfaces C implements
308	// and update 'implements' relation.
309	var ifaces []*types.Interface
310	r.interfaceTypes.Iterate(func(I types.Type, concs interface{}) {
311		if I := I.(*types.Interface); types.Implements(C, I) {
312			concs, _ := concs.([]types.Type)
313			r.interfaceTypes.Set(I, append(concs, C))
314			ifaces = append(ifaces, I)
315		}
316	})
317	r.concreteTypes.Set(C, ifaces)
318	return ifaces
319}
320
321// implementations(I) returns all currently known concrete types that implement I.
322func (r *rta) implementations(I *types.Interface) []types.Type {
323	var concs []types.Type
324	if v := r.interfaceTypes.At(I); v != nil {
325		concs = v.([]types.Type)
326	} else {
327		// First time seeing this interface.
328		// Update the 'implements' relation.
329		r.concreteTypes.Iterate(func(C types.Type, ifaces interface{}) {
330			if types.Implements(C, I) {
331				ifaces, _ := ifaces.([]*types.Interface)
332				r.concreteTypes.Set(C, append(ifaces, I))
333				concs = append(concs, C)
334			}
335		})
336		r.interfaceTypes.Set(I, concs)
337	}
338	return concs
339}
340
341// addRuntimeType is called for each concrete type that can be the
342// dynamic type of some interface or reflect.Value.
343// Adapted from needMethods in go/ssa/builder.go
344//
345func (r *rta) addRuntimeType(T types.Type, skip bool) {
346	if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok {
347		if skip && !prev {
348			r.result.RuntimeTypes.Set(T, skip)
349		}
350		return
351	}
352	r.result.RuntimeTypes.Set(T, skip)
353
354	mset := r.prog.MethodSets.MethodSet(T)
355
356	if _, ok := T.Underlying().(*types.Interface); !ok {
357		// T is a new concrete type.
358		for i, n := 0, mset.Len(); i < n; i++ {
359			sel := mset.At(i)
360			m := sel.Obj()
361
362			if m.Exported() {
363				// Exported methods are always potentially callable via reflection.
364				r.addReachable(r.prog.MethodValue(sel), true)
365			}
366		}
367
368		// Add callgraph edge for each existing dynamic
369		// "invoke"-mode call via that interface.
370		for _, I := range r.interfaces(T) {
371			sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
372			for _, site := range sites {
373				r.addInvokeEdge(site, T)
374			}
375		}
376	}
377
378	// Precondition: T is not a method signature (*Signature with Recv()!=nil).
379	// Recursive case: skip => don't call makeMethods(T).
380	// Each package maintains its own set of types it has visited.
381
382	var n *types.Named
383	switch T := T.(type) {
384	case *types.Named:
385		n = T
386	case *types.Pointer:
387		n, _ = T.Elem().(*types.Named)
388	}
389	if n != nil {
390		owner := n.Obj().Pkg()
391		if owner == nil {
392			return // built-in error type
393		}
394	}
395
396	// Recursion over signatures of each exported method.
397	for i := 0; i < mset.Len(); i++ {
398		if mset.At(i).Obj().Exported() {
399			sig := mset.At(i).Type().(*types.Signature)
400			r.addRuntimeType(sig.Params(), true)  // skip the Tuple itself
401			r.addRuntimeType(sig.Results(), true) // skip the Tuple itself
402		}
403	}
404
405	switch t := T.(type) {
406	case *types.Basic:
407		// nop
408
409	case *types.Interface:
410		// nop---handled by recursion over method set.
411
412	case *types.Pointer:
413		r.addRuntimeType(t.Elem(), false)
414
415	case *types.Slice:
416		r.addRuntimeType(t.Elem(), false)
417
418	case *types.Chan:
419		r.addRuntimeType(t.Elem(), false)
420
421	case *types.Map:
422		r.addRuntimeType(t.Key(), false)
423		r.addRuntimeType(t.Elem(), false)
424
425	case *types.Signature:
426		if t.Recv() != nil {
427			panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv()))
428		}
429		r.addRuntimeType(t.Params(), true)  // skip the Tuple itself
430		r.addRuntimeType(t.Results(), true) // skip the Tuple itself
431
432	case *types.Named:
433		// A pointer-to-named type can be derived from a named
434		// type via reflection.  It may have methods too.
435		r.addRuntimeType(types.NewPointer(T), false)
436
437		// Consider 'type T struct{S}' where S has methods.
438		// Reflection provides no way to get from T to struct{S},
439		// only to S, so the method set of struct{S} is unwanted,
440		// so set 'skip' flag during recursion.
441		r.addRuntimeType(t.Underlying(), true)
442
443	case *types.Array:
444		r.addRuntimeType(t.Elem(), false)
445
446	case *types.Struct:
447		for i, n := 0, t.NumFields(); i < n; i++ {
448			r.addRuntimeType(t.Field(i).Type(), false)
449		}
450
451	case *types.Tuple:
452		for i, n := 0, t.Len(); i < n; i++ {
453			r.addRuntimeType(t.At(i).Type(), false)
454		}
455
456	default:
457		panic(T)
458	}
459}
460