1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssa
6
7import "cmd/internal/src"
8
9// branchelim tries to eliminate branches by
10// generating CondSelect instructions.
11//
12// Search for basic blocks that look like
13//
14// bb0            bb0
15//  | \          /   \
16//  | bb1  or  bb1   bb2    <- trivial if/else blocks
17//  | /          \   /
18// bb2            bb3
19//
20// where the intermediate blocks are mostly empty (with no side-effects);
21// rewrite Phis in the postdominator as CondSelects.
22func branchelim(f *Func) {
23	// FIXME: add support for lowering CondSelects on more architectures
24	switch f.Config.arch {
25	case "arm64", "amd64", "wasm":
26		// implemented
27	default:
28		return
29	}
30
31	// Find all the values used in computing the address of any load.
32	// Typically these values have operations like AddPtr, Lsh64x64, etc.
33	loadAddr := f.newSparseSet(f.NumValues())
34	defer f.retSparseSet(loadAddr)
35	for _, b := range f.Blocks {
36		for _, v := range b.Values {
37			switch v.Op {
38			case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32:
39				loadAddr.add(v.Args[0].ID)
40			case OpMove:
41				loadAddr.add(v.Args[1].ID)
42			}
43		}
44	}
45	po := f.postorder()
46	for {
47		n := loadAddr.size()
48		for _, b := range po {
49			for i := len(b.Values) - 1; i >= 0; i-- {
50				v := b.Values[i]
51				if !loadAddr.contains(v.ID) {
52					continue
53				}
54				for _, a := range v.Args {
55					if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
56						loadAddr.add(a.ID)
57					}
58				}
59			}
60		}
61		if loadAddr.size() == n {
62			break
63		}
64	}
65
66	change := true
67	for change {
68		change = false
69		for _, b := range f.Blocks {
70			change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
71		}
72	}
73}
74
75func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
76	if loadAddr.contains(v.ID) {
77		// The result of the soon-to-be conditional move is used to compute a load address.
78		// We want to avoid generating a conditional move in this case
79		// because the load address would now be data-dependent on the condition.
80		// Previously it would only be control-dependent on the condition, which is faster
81		// if the branch predicts well (or possibly even if it doesn't, if the load will
82		// be an expensive cache miss).
83		// See issue #26306.
84		return false
85	}
86	// For now, stick to simple scalars that fit in registers
87	switch {
88	case v.Type.Size() > v.Block.Func.Config.RegSize:
89		return false
90	case v.Type.IsPtrShaped():
91		return true
92	case v.Type.IsInteger():
93		if arch == "amd64" && v.Type.Size() < 2 {
94			// amd64 doesn't support CMOV with byte registers
95			return false
96		}
97		return true
98	default:
99		return false
100	}
101}
102
103// elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
104// loadAddr is a set of values which are used to compute the address of a load.
105// Those values are exempt from CMOV generation.
106func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
107	// See if dom is an If with one arm that
108	// is trivial and succeeded by the other
109	// successor of dom.
110	if dom.Kind != BlockIf || dom.Likely != BranchUnknown {
111		return false
112	}
113	var simple, post *Block
114	for i := range dom.Succs {
115		bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
116		if isLeafPlain(bb) && bb.Succs[0].Block() == other {
117			simple = bb
118			post = other
119			break
120		}
121	}
122	if simple == nil || len(post.Preds) != 2 || post == dom {
123		return false
124	}
125
126	// We've found our diamond CFG of blocks.
127	// Now decide if fusing 'simple' into dom+post
128	// looks profitable.
129
130	// Check that there are Phis, and that all of them
131	// can be safely rewritten to CondSelect.
132	hasphis := false
133	for _, v := range post.Values {
134		if v.Op == OpPhi {
135			hasphis = true
136			if !canCondSelect(v, f.Config.arch, loadAddr) {
137				return false
138			}
139		}
140	}
141	if !hasphis {
142		return false
143	}
144
145	// Pick some upper bound for the number of instructions
146	// we'd be willing to execute just to generate a dead
147	// argument to CondSelect. In the worst case, this is
148	// the number of useless instructions executed.
149	const maxfuseinsts = 2
150
151	if len(simple.Values) > maxfuseinsts || !allTrivial(simple) {
152		return false
153	}
154
155	// Replace Phi instructions in b with CondSelect instructions
156	swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
157	for _, v := range post.Values {
158		if v.Op != OpPhi {
159			continue
160		}
161		v.Op = OpCondSelect
162		if swap {
163			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
164		}
165		v.AddArg(dom.Controls[0])
166	}
167
168	// Put all of the instructions into 'dom'
169	// and update the CFG appropriately.
170	dom.Kind = post.Kind
171	dom.CopyControls(post)
172	dom.Aux = post.Aux
173	dom.Succs = append(dom.Succs[:0], post.Succs...)
174	for i := range dom.Succs {
175		e := dom.Succs[i]
176		e.b.Preds[e.i].b = dom
177	}
178
179	// Try really hard to preserve statement marks attached to blocks.
180	simplePos := simple.Pos
181	postPos := post.Pos
182	simpleStmt := simplePos.IsStmt() == src.PosIsStmt
183	postStmt := postPos.IsStmt() == src.PosIsStmt
184
185	for _, v := range simple.Values {
186		v.Block = dom
187	}
188	for _, v := range post.Values {
189		v.Block = dom
190	}
191
192	// findBlockPos determines if b contains a stmt-marked value
193	// that has the same line number as the Pos for b itself.
194	// (i.e. is the position on b actually redundant?)
195	findBlockPos := func(b *Block) bool {
196		pos := b.Pos
197		for _, v := range b.Values {
198			// See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos)
199			if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt {
200				return true
201			}
202		}
203		return false
204	}
205	if simpleStmt {
206		simpleStmt = !findBlockPos(simple)
207		if !simpleStmt && simplePos.SameFileAndLine(postPos) {
208			postStmt = false
209		}
210
211	}
212	if postStmt {
213		postStmt = !findBlockPos(post)
214	}
215
216	// If simpleStmt and/or postStmt are still true, then try harder
217	// to find the corresponding statement marks new homes.
218
219	// setBlockPos determines if b contains a can-be-statement value
220	// that has the same line number as the Pos for b itself, and
221	// puts a statement mark on it, and returns whether it succeeded
222	// in this operation.
223	setBlockPos := func(b *Block) bool {
224		pos := b.Pos
225		for _, v := range b.Values {
226			if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) {
227				v.Pos = v.Pos.WithIsStmt()
228				return true
229			}
230		}
231		return false
232	}
233	// If necessary and possible, add a mark to a value in simple
234	if simpleStmt {
235		if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) {
236			postStmt = false
237		}
238	}
239	// If necessary and possible, add a mark to a value in post
240	if postStmt {
241		postStmt = !setBlockPos(post)
242	}
243
244	// Before giving up (this was added because it helps), try the end of "dom", and if that is not available,
245	// try the values in the successor block if it is uncomplicated.
246	if postStmt {
247		if dom.Pos.IsStmt() != src.PosIsStmt {
248			dom.Pos = postPos
249		} else {
250			// Try the successor block
251			if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 {
252				succ := dom.Succs[0].Block()
253				for _, v := range succ.Values {
254					if isPoorStatementOp(v.Op) {
255						continue
256					}
257					if postPos.SameFileAndLine(v.Pos) {
258						v.Pos = v.Pos.WithIsStmt()
259					}
260					postStmt = false
261					break
262				}
263				// If postStmt still true, tag the block itself if possible
264				if postStmt && succ.Pos.IsStmt() != src.PosIsStmt {
265					succ.Pos = postPos
266				}
267			}
268		}
269	}
270
271	dom.Values = append(dom.Values, simple.Values...)
272	dom.Values = append(dom.Values, post.Values...)
273
274	// Trash 'post' and 'simple'
275	clobberBlock(post)
276	clobberBlock(simple)
277
278	f.invalidateCFG()
279	return true
280}
281
282// is this a BlockPlain with one predecessor?
283func isLeafPlain(b *Block) bool {
284	return b.Kind == BlockPlain && len(b.Preds) == 1
285}
286
287func clobberBlock(b *Block) {
288	b.Values = nil
289	b.Preds = nil
290	b.Succs = nil
291	b.Aux = nil
292	b.ResetControls()
293	b.Likely = BranchUnknown
294	b.Kind = BlockInvalid
295}
296
297// elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
298// loadAddr is a set of values which are used to compute the address of a load.
299// Those values are exempt from CMOV generation.
300func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
301	// See if 'b' ends in an if/else: it should
302	// have two successors, both of which are BlockPlain
303	// and succeeded by the same block.
304	if b.Kind != BlockIf || b.Likely != BranchUnknown {
305		return false
306	}
307	yes, no := b.Succs[0].Block(), b.Succs[1].Block()
308	if !isLeafPlain(yes) || len(yes.Values) > 1 || !allTrivial(yes) {
309		return false
310	}
311	if !isLeafPlain(no) || len(no.Values) > 1 || !allTrivial(no) {
312		return false
313	}
314	if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
315		return false
316	}
317	// block that postdominates the if/else
318	post := b.Succs[0].Block().Succs[0].Block()
319	if len(post.Preds) != 2 || post == b {
320		return false
321	}
322	hasphis := false
323	for _, v := range post.Values {
324		if v.Op == OpPhi {
325			hasphis = true
326			if !canCondSelect(v, f.Config.arch, loadAddr) {
327				return false
328			}
329		}
330	}
331	if !hasphis {
332		return false
333	}
334
335	// Don't generate CondSelects if branch is cheaper.
336	if !shouldElimIfElse(no, yes, post, f.Config.arch) {
337		return false
338	}
339
340	// now we're committed: rewrite each Phi as a CondSelect
341	swap := post.Preds[0].Block() != b.Succs[0].Block()
342	for _, v := range post.Values {
343		if v.Op != OpPhi {
344			continue
345		}
346		v.Op = OpCondSelect
347		if swap {
348			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
349		}
350		v.AddArg(b.Controls[0])
351	}
352
353	// Move the contents of all of these
354	// blocks into 'b' and update CFG edges accordingly
355	b.Kind = post.Kind
356	b.CopyControls(post)
357	b.Aux = post.Aux
358	b.Succs = append(b.Succs[:0], post.Succs...)
359	for i := range b.Succs {
360		e := b.Succs[i]
361		e.b.Preds[e.i].b = b
362	}
363	for i := range post.Values {
364		post.Values[i].Block = b
365	}
366	for i := range yes.Values {
367		yes.Values[i].Block = b
368	}
369	for i := range no.Values {
370		no.Values[i].Block = b
371	}
372	b.Values = append(b.Values, yes.Values...)
373	b.Values = append(b.Values, no.Values...)
374	b.Values = append(b.Values, post.Values...)
375
376	// trash post, yes, and no
377	clobberBlock(yes)
378	clobberBlock(no)
379	clobberBlock(post)
380
381	f.invalidateCFG()
382	return true
383}
384
385// shouldElimIfElse reports whether estimated cost of eliminating branch
386// is lower than threshold.
387func shouldElimIfElse(no, yes, post *Block, arch string) bool {
388	switch arch {
389	default:
390		return true
391	case "amd64":
392		const maxcost = 2
393		phi := 0
394		other := 0
395		for _, v := range post.Values {
396			if v.Op == OpPhi {
397				// Each phi results in CondSelect, which lowers into CMOV,
398				// CMOV has latency >1 on most CPUs.
399				phi++
400			}
401			for _, x := range v.Args {
402				if x.Block == no || x.Block == yes {
403					other++
404				}
405			}
406		}
407		cost := phi * 1
408		if phi > 1 {
409			// If we have more than 1 phi and some values in post have args
410			// in yes or no blocks, we may have to recalculate condition, because
411			// those args may clobber flags. For now assume that all operations clobber flags.
412			cost += other * 1
413		}
414		return cost < maxcost
415	}
416}
417
418func allTrivial(b *Block) bool {
419	// don't fuse memory ops, Phi ops, divides (can panic),
420	// or anything else with side-effects
421	for _, v := range b.Values {
422		if v.Op == OpPhi || isDivMod(v.Op) || v.Type.IsMemory() ||
423			v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects {
424			return false
425		}
426	}
427	return true
428}
429
430func isDivMod(op Op) bool {
431	switch op {
432	case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
433		OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
434		OpDiv32F, OpDiv64F,
435		OpMod8, OpMod8u, OpMod16, OpMod16u,
436		OpMod32, OpMod32u, OpMod64, OpMod64u:
437		return true
438	default:
439		return false
440	}
441}
442