1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssa
6
7// This file defines the lifting pass which tries to "lift" Alloc
8// cells (new/local variables) into SSA registers, replacing loads
9// with the dominating stored value, eliminating loads and stores, and
10// inserting φ-nodes as needed.
11
12// Cited papers and resources:
13//
14// Ron Cytron et al. 1991. Efficiently computing SSA form...
15// http://doi.acm.org/10.1145/115372.115320
16//
17// Cooper, Harvey, Kennedy.  2001.  A Simple, Fast Dominance Algorithm.
18// Software Practice and Experience 2001, 4:1-10.
19// http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
20//
21// Daniel Berlin, llvmdev mailing list, 2012.
22// http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
23// (Be sure to expand the whole thread.)
24
25// TODO(adonovan): opt: there are many optimizations worth evaluating, and
26// the conventional wisdom for SSA construction is that a simple
27// algorithm well engineered often beats those of better asymptotic
28// complexity on all but the most egregious inputs.
29//
30// Danny Berlin suggests that the Cooper et al. algorithm for
31// computing the dominance frontier is superior to Cytron et al.
32// Furthermore he recommends that rather than computing the DF for the
33// whole function then renaming all alloc cells, it may be cheaper to
34// compute the DF for each alloc cell separately and throw it away.
35//
36// Consider exploiting liveness information to avoid creating dead
37// φ-nodes which we then immediately remove.
38//
39// Also see many other "TODO: opt" suggestions in the code.
40
41import (
42	"fmt"
43	"go/token"
44	"go/types"
45	"math/big"
46	"os"
47)
48
49// If true, show diagnostic information at each step of lifting.
50// Very verbose.
51const debugLifting = false
52
53// domFrontier maps each block to the set of blocks in its dominance
54// frontier.  The outer slice is conceptually a map keyed by
55// Block.Index.  The inner slice is conceptually a set, possibly
56// containing duplicates.
57//
58// TODO(adonovan): opt: measure impact of dups; consider a packed bit
59// representation, e.g. big.Int, and bitwise parallel operations for
60// the union step in the Children loop.
61//
62// domFrontier's methods mutate the slice's elements but not its
63// length, so their receivers needn't be pointers.
64//
65type domFrontier [][]*BasicBlock
66
67func (df domFrontier) add(u, v *BasicBlock) {
68	p := &df[u.Index]
69	*p = append(*p, v)
70}
71
72// build builds the dominance frontier df for the dominator (sub)tree
73// rooted at u, using the Cytron et al. algorithm.
74//
75// TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
76// by pruning the entire IDF computation, rather than merely pruning
77// the DF -> IDF step.
78func (df domFrontier) build(u *BasicBlock) {
79	// Encounter each node u in postorder of dom tree.
80	for _, child := range u.dom.children {
81		df.build(child)
82	}
83	for _, vb := range u.Succs {
84		if v := vb.dom; v.idom != u {
85			df.add(u, vb)
86		}
87	}
88	for _, w := range u.dom.children {
89		for _, vb := range df[w.Index] {
90			// TODO(adonovan): opt: use word-parallel bitwise union.
91			if v := vb.dom; v.idom != u {
92				df.add(u, vb)
93			}
94		}
95	}
96}
97
98func buildDomFrontier(fn *Function) domFrontier {
99	df := make(domFrontier, len(fn.Blocks))
100	df.build(fn.Blocks[0])
101	if fn.Recover != nil {
102		df.build(fn.Recover)
103	}
104	return df
105}
106
107func removeInstr(refs []Instruction, instr Instruction) []Instruction {
108	i := 0
109	for _, ref := range refs {
110		if ref == instr {
111			continue
112		}
113		refs[i] = ref
114		i++
115	}
116	for j := i; j != len(refs); j++ {
117		refs[j] = nil // aid GC
118	}
119	return refs[:i]
120}
121
122// lift replaces local and new Allocs accessed only with
123// load/store by SSA registers, inserting φ-nodes where necessary.
124// The result is a program in classical pruned SSA form.
125//
126// Preconditions:
127// - fn has no dead blocks (blockopt has run).
128// - Def/use info (Operands and Referrers) is up-to-date.
129// - The dominator tree is up-to-date.
130//
131func lift(fn *Function) {
132	// TODO(adonovan): opt: lots of little optimizations may be
133	// worthwhile here, especially if they cause us to avoid
134	// buildDomFrontier.  For example:
135	//
136	// - Alloc never loaded?  Eliminate.
137	// - Alloc never stored?  Replace all loads with a zero constant.
138	// - Alloc stored once?  Replace loads with dominating store;
139	//   don't forget that an Alloc is itself an effective store
140	//   of zero.
141	// - Alloc used only within a single block?
142	//   Use degenerate algorithm avoiding φ-nodes.
143	// - Consider synergy with scalar replacement of aggregates (SRA).
144	//   e.g. *(&x.f) where x is an Alloc.
145	//   Perhaps we'd get better results if we generated this as x.f
146	//   i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
147	//   Unclear.
148	//
149	// But we will start with the simplest correct code.
150	df := buildDomFrontier(fn)
151
152	if debugLifting {
153		title := false
154		for i, blocks := range df {
155			if blocks != nil {
156				if !title {
157					fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
158					title = true
159				}
160				fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
161			}
162		}
163	}
164
165	newPhis := make(newPhiMap)
166
167	// During this pass we will replace some BasicBlock.Instrs
168	// (allocs, loads and stores) with nil, keeping a count in
169	// BasicBlock.gaps.  At the end we will reset Instrs to the
170	// concatenation of all non-dead newPhis and non-nil Instrs
171	// for the block, reusing the original array if space permits.
172
173	// While we're here, we also eliminate 'rundefers'
174	// instructions in functions that contain no 'defer'
175	// instructions.
176	usesDefer := false
177
178	// A counter used to generate ~unique ids for Phi nodes, as an
179	// aid to debugging.  We use large numbers to make them highly
180	// visible.  All nodes are renumbered later.
181	fresh := 1000
182
183	// Determine which allocs we can lift and number them densely.
184	// The renaming phase uses this numbering for compact maps.
185	numAllocs := 0
186	for _, b := range fn.Blocks {
187		b.gaps = 0
188		b.rundefers = 0
189		for _, instr := range b.Instrs {
190			switch instr := instr.(type) {
191			case *Alloc:
192				index := -1
193				if liftAlloc(df, instr, newPhis, &fresh) {
194					index = numAllocs
195					numAllocs++
196				}
197				instr.index = index
198			case *Defer:
199				usesDefer = true
200			case *RunDefers:
201				b.rundefers++
202			}
203		}
204	}
205
206	// renaming maps an alloc (keyed by index) to its replacement
207	// value.  Initially the renaming contains nil, signifying the
208	// zero constant of the appropriate type; we construct the
209	// Const lazily at most once on each path through the domtree.
210	// TODO(adonovan): opt: cache per-function not per subtree.
211	renaming := make([]Value, numAllocs)
212
213	// Renaming.
214	rename(fn.Blocks[0], renaming, newPhis)
215
216	// Eliminate dead φ-nodes.
217	removeDeadPhis(fn.Blocks, newPhis)
218
219	// Prepend remaining live φ-nodes to each block.
220	for _, b := range fn.Blocks {
221		nps := newPhis[b]
222		j := len(nps)
223
224		rundefersToKill := b.rundefers
225		if usesDefer {
226			rundefersToKill = 0
227		}
228
229		if j+b.gaps+rundefersToKill == 0 {
230			continue // fast path: no new phis or gaps
231		}
232
233		// Compact nps + non-nil Instrs into a new slice.
234		// TODO(adonovan): opt: compact in situ (rightwards)
235		// if Instrs has sufficient space or slack.
236		dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
237		for i, np := range nps {
238			dst[i] = np.phi
239		}
240		for _, instr := range b.Instrs {
241			if instr == nil {
242				continue
243			}
244			if !usesDefer {
245				if _, ok := instr.(*RunDefers); ok {
246					continue
247				}
248			}
249			dst[j] = instr
250			j++
251		}
252		b.Instrs = dst
253	}
254
255	// Remove any fn.Locals that were lifted.
256	j := 0
257	for _, l := range fn.Locals {
258		if l.index < 0 {
259			fn.Locals[j] = l
260			j++
261		}
262	}
263	// Nil out fn.Locals[j:] to aid GC.
264	for i := j; i < len(fn.Locals); i++ {
265		fn.Locals[i] = nil
266	}
267	fn.Locals = fn.Locals[:j]
268}
269
270// removeDeadPhis removes φ-nodes not transitively needed by a
271// non-Phi, non-DebugRef instruction.
272func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) {
273	// First pass: find the set of "live" φ-nodes: those reachable
274	// from some non-Phi instruction.
275	//
276	// We compute reachability in reverse, starting from each φ,
277	// rather than forwards, starting from each live non-Phi
278	// instruction, because this way visits much less of the
279	// Value graph.
280	livePhis := make(map[*Phi]bool)
281	for _, npList := range newPhis {
282		for _, np := range npList {
283			phi := np.phi
284			if !livePhis[phi] && phiHasDirectReferrer(phi) {
285				markLivePhi(livePhis, phi)
286			}
287		}
288	}
289
290	// Existing φ-nodes due to && and || operators
291	// are all considered live (see Go issue 19622).
292	for _, b := range blocks {
293		for _, phi := range b.phis() {
294			markLivePhi(livePhis, phi.(*Phi))
295		}
296	}
297
298	// Second pass: eliminate unused phis from newPhis.
299	for block, npList := range newPhis {
300		j := 0
301		for _, np := range npList {
302			if livePhis[np.phi] {
303				npList[j] = np
304				j++
305			} else {
306				// discard it, first removing it from referrers
307				for _, val := range np.phi.Edges {
308					if refs := val.Referrers(); refs != nil {
309						*refs = removeInstr(*refs, np.phi)
310					}
311				}
312				np.phi.block = nil
313			}
314		}
315		newPhis[block] = npList[:j]
316	}
317}
318
319// markLivePhi marks phi, and all φ-nodes transitively reachable via
320// its Operands, live.
321func markLivePhi(livePhis map[*Phi]bool, phi *Phi) {
322	livePhis[phi] = true
323	for _, rand := range phi.Operands(nil) {
324		if q, ok := (*rand).(*Phi); ok {
325			if !livePhis[q] {
326				markLivePhi(livePhis, q)
327			}
328		}
329	}
330}
331
332// phiHasDirectReferrer reports whether phi is directly referred to by
333// a non-Phi instruction.  Such instructions are the
334// roots of the liveness traversal.
335func phiHasDirectReferrer(phi *Phi) bool {
336	for _, instr := range *phi.Referrers() {
337		if _, ok := instr.(*Phi); !ok {
338			return true
339		}
340	}
341	return false
342}
343
344type blockSet struct{ big.Int } // (inherit methods from Int)
345
346// add adds b to the set and returns true if the set changed.
347func (s *blockSet) add(b *BasicBlock) bool {
348	i := b.Index
349	if s.Bit(i) != 0 {
350		return false
351	}
352	s.SetBit(&s.Int, i, 1)
353	return true
354}
355
356// take removes an arbitrary element from a set s and
357// returns its index, or returns -1 if empty.
358func (s *blockSet) take() int {
359	l := s.BitLen()
360	for i := 0; i < l; i++ {
361		if s.Bit(i) == 1 {
362			s.SetBit(&s.Int, i, 0)
363			return i
364		}
365	}
366	return -1
367}
368
369// newPhi is a pair of a newly introduced φ-node and the lifted Alloc
370// it replaces.
371type newPhi struct {
372	phi   *Phi
373	alloc *Alloc
374}
375
376// newPhiMap records for each basic block, the set of newPhis that
377// must be prepended to the block.
378type newPhiMap map[*BasicBlock][]newPhi
379
380// liftAlloc determines whether alloc can be lifted into registers,
381// and if so, it populates newPhis with all the φ-nodes it may require
382// and returns true.
383//
384// fresh is a source of fresh ids for phi nodes.
385//
386func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool {
387	// Don't lift aggregates into registers, because we don't have
388	// a way to express their zero-constants.
389	switch deref(alloc.Type()).Underlying().(type) {
390	case *types.Array, *types.Struct:
391		return false
392	}
393
394	// Don't lift named return values in functions that defer
395	// calls that may recover from panic.
396	if fn := alloc.Parent(); fn.Recover != nil {
397		for _, nr := range fn.namedResults {
398			if nr == alloc {
399				return false
400			}
401		}
402	}
403
404	// Compute defblocks, the set of blocks containing a
405	// definition of the alloc cell.
406	var defblocks blockSet
407	for _, instr := range *alloc.Referrers() {
408		// Bail out if we discover the alloc is not liftable;
409		// the only operations permitted to use the alloc are
410		// loads/stores into the cell, and DebugRef.
411		switch instr := instr.(type) {
412		case *Store:
413			if instr.Val == alloc {
414				return false // address used as value
415			}
416			if instr.Addr != alloc {
417				panic("Alloc.Referrers is inconsistent")
418			}
419			defblocks.add(instr.Block())
420		case *UnOp:
421			if instr.Op != token.MUL {
422				return false // not a load
423			}
424			if instr.X != alloc {
425				panic("Alloc.Referrers is inconsistent")
426			}
427		case *DebugRef:
428			// ok
429		default:
430			return false // some other instruction
431		}
432	}
433	// The Alloc itself counts as a (zero) definition of the cell.
434	defblocks.add(alloc.Block())
435
436	if debugLifting {
437		fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
438	}
439
440	fn := alloc.Parent()
441
442	// Φ-insertion.
443	//
444	// What follows is the body of the main loop of the insert-φ
445	// function described by Cytron et al, but instead of using
446	// counter tricks, we just reset the 'hasAlready' and 'work'
447	// sets each iteration.  These are bitmaps so it's pretty cheap.
448	//
449	// TODO(adonovan): opt: recycle slice storage for W,
450	// hasAlready, defBlocks across liftAlloc calls.
451	var hasAlready blockSet
452
453	// Initialize W and work to defblocks.
454	var work blockSet = defblocks // blocks seen
455	var W blockSet                // blocks to do
456	W.Set(&defblocks.Int)
457
458	// Traverse iterated dominance frontier, inserting φ-nodes.
459	for i := W.take(); i != -1; i = W.take() {
460		u := fn.Blocks[i]
461		for _, v := range df[u.Index] {
462			if hasAlready.add(v) {
463				// Create φ-node.
464				// It will be prepended to v.Instrs later, if needed.
465				phi := &Phi{
466					Edges:   make([]Value, len(v.Preds)),
467					Comment: alloc.Comment,
468				}
469				// This is merely a debugging aid:
470				phi.setNum(*fresh)
471				*fresh++
472
473				phi.pos = alloc.Pos()
474				phi.setType(deref(alloc.Type()))
475				phi.block = v
476				if debugLifting {
477					fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
478				}
479				newPhis[v] = append(newPhis[v], newPhi{phi, alloc})
480
481				if work.add(v) {
482					W.add(v)
483				}
484			}
485		}
486	}
487
488	return true
489}
490
491// replaceAll replaces all intraprocedural uses of x with y,
492// updating x.Referrers and y.Referrers.
493// Precondition: x.Referrers() != nil, i.e. x must be local to some function.
494//
495func replaceAll(x, y Value) {
496	var rands []*Value
497	pxrefs := x.Referrers()
498	pyrefs := y.Referrers()
499	for _, instr := range *pxrefs {
500		rands = instr.Operands(rands[:0]) // recycle storage
501		for _, rand := range rands {
502			if *rand != nil {
503				if *rand == x {
504					*rand = y
505				}
506			}
507		}
508		if pyrefs != nil {
509			*pyrefs = append(*pyrefs, instr) // dups ok
510		}
511	}
512	*pxrefs = nil // x is now unreferenced
513}
514
515// renamed returns the value to which alloc is being renamed,
516// constructing it lazily if it's the implicit zero initialization.
517//
518func renamed(renaming []Value, alloc *Alloc) Value {
519	v := renaming[alloc.index]
520	if v == nil {
521		v = zeroConst(deref(alloc.Type()))
522		renaming[alloc.index] = v
523	}
524	return v
525}
526
527// rename implements the (Cytron et al) SSA renaming algorithm, a
528// preorder traversal of the dominator tree replacing all loads of
529// Alloc cells with the value stored to that cell by the dominating
530// store instruction.  For lifting, we need only consider loads,
531// stores and φ-nodes.
532//
533// renaming is a map from *Alloc (keyed by index number) to its
534// dominating stored value; newPhis[x] is the set of new φ-nodes to be
535// prepended to block x.
536//
537func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
538	// Each φ-node becomes the new name for its associated Alloc.
539	for _, np := range newPhis[u] {
540		phi := np.phi
541		alloc := np.alloc
542		renaming[alloc.index] = phi
543	}
544
545	// Rename loads and stores of allocs.
546	for i, instr := range u.Instrs {
547		switch instr := instr.(type) {
548		case *Alloc:
549			if instr.index >= 0 { // store of zero to Alloc cell
550				// Replace dominated loads by the zero value.
551				renaming[instr.index] = nil
552				if debugLifting {
553					fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
554				}
555				// Delete the Alloc.
556				u.Instrs[i] = nil
557				u.gaps++
558			}
559
560		case *Store:
561			if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
562				// Replace dominated loads by the stored value.
563				renaming[alloc.index] = instr.Val
564				if debugLifting {
565					fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
566						instr, instr.Val.Name())
567				}
568				// Remove the store from the referrer list of the stored value.
569				if refs := instr.Val.Referrers(); refs != nil {
570					*refs = removeInstr(*refs, instr)
571				}
572				// Delete the Store.
573				u.Instrs[i] = nil
574				u.gaps++
575			}
576
577		case *UnOp:
578			if instr.Op == token.MUL {
579				if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
580					newval := renamed(renaming, alloc)
581					if debugLifting {
582						fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
583							instr.Name(), instr, newval.Name())
584					}
585					// Replace all references to
586					// the loaded value by the
587					// dominating stored value.
588					replaceAll(instr, newval)
589					// Delete the Load.
590					u.Instrs[i] = nil
591					u.gaps++
592				}
593			}
594
595		case *DebugRef:
596			if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
597				if instr.IsAddr {
598					instr.X = renamed(renaming, alloc)
599					instr.IsAddr = false
600
601					// Add DebugRef to instr.X's referrers.
602					if refs := instr.X.Referrers(); refs != nil {
603						*refs = append(*refs, instr)
604					}
605				} else {
606					// A source expression denotes the address
607					// of an Alloc that was optimized away.
608					instr.X = nil
609
610					// Delete the DebugRef.
611					u.Instrs[i] = nil
612					u.gaps++
613				}
614			}
615		}
616	}
617
618	// For each φ-node in a CFG successor, rename the edge.
619	for _, v := range u.Succs {
620		phis := newPhis[v]
621		if len(phis) == 0 {
622			continue
623		}
624		i := v.predIndex(u)
625		for _, np := range phis {
626			phi := np.phi
627			alloc := np.alloc
628			newval := renamed(renaming, alloc)
629			if debugLifting {
630				fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
631					phi.Name(), u, v, i, alloc.Name(), newval.Name())
632			}
633			phi.Edges[i] = newval
634			if prefs := newval.Referrers(); prefs != nil {
635				*prefs = append(*prefs, phi)
636			}
637		}
638	}
639
640	// Continue depth-first recursion over domtree, pushing a
641	// fresh copy of the renaming map for each subtree.
642	for i, v := range u.dom.children {
643		r := renaming
644		if i < len(u.dom.children)-1 {
645			// On all but the final iteration, we must make
646			// a copy to avoid destructive update.
647			r = make([]Value, len(renaming))
648			copy(r, renaming)
649		}
650		rename(v, r, newPhis)
651	}
652
653}
654