1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssa
6
7import (
8	"cmd/compile/internal/types"
9	"cmd/internal/obj"
10	"cmd/internal/objabi"
11	"cmd/internal/src"
12	"encoding/binary"
13	"fmt"
14	"io"
15	"math"
16	"math/bits"
17	"os"
18	"path/filepath"
19)
20
21func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) {
22	// repeat rewrites until we find no more rewrites
23	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
24	pendingLines.clear()
25	for {
26		change := false
27		for _, b := range f.Blocks {
28			for i, c := range b.ControlValues() {
29				for c.Op == OpCopy {
30					c = c.Args[0]
31					b.ReplaceControl(i, c)
32				}
33			}
34			if rb(b) {
35				change = true
36			}
37			for j, v := range b.Values {
38				change = phielimValue(v) || change
39
40				// Eliminate copy inputs.
41				// If any copy input becomes unused, mark it
42				// as invalid and discard its argument. Repeat
43				// recursively on the discarded argument.
44				// This phase helps remove phantom "dead copy" uses
45				// of a value so that a x.Uses==1 rule condition
46				// fires reliably.
47				for i, a := range v.Args {
48					if a.Op != OpCopy {
49						continue
50					}
51					aa := copySource(a)
52					v.SetArg(i, aa)
53					// If a, a copy, has a line boundary indicator, attempt to find a new value
54					// to hold it.  The first candidate is the value that will replace a (aa),
55					// if it shares the same block and line and is eligible.
56					// The second option is v, which has a as an input.  Because aa is earlier in
57					// the data flow, it is the better choice.
58					if a.Pos.IsStmt() == src.PosIsStmt {
59						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
60							aa.Pos = aa.Pos.WithIsStmt()
61						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
62							v.Pos = v.Pos.WithIsStmt()
63						} else {
64							// Record the lost line and look for a new home after all rewrites are complete.
65							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
66							// line to appear in more than one block, but only one block is stored, so if both end
67							// up here, then one will be lost.
68							pendingLines.set(a.Pos, int32(a.Block.ID))
69						}
70						a.Pos = a.Pos.WithNotStmt()
71					}
72					change = true
73					for a.Uses == 0 {
74						b := a.Args[0]
75						a.reset(OpInvalid)
76						a = b
77					}
78				}
79
80				// apply rewrite function
81				if rv(v) {
82					change = true
83					// If value changed to a poor choice for a statement boundary, move the boundary
84					if v.Pos.IsStmt() == src.PosIsStmt {
85						if k := nextGoodStatementIndex(v, j, b); k != j {
86							v.Pos = v.Pos.WithNotStmt()
87							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
88						}
89					}
90				}
91			}
92		}
93		if !change {
94			break
95		}
96	}
97	// remove clobbered values
98	for _, b := range f.Blocks {
99		j := 0
100		for i, v := range b.Values {
101			vl := v.Pos
102			if v.Op == OpInvalid {
103				if v.Pos.IsStmt() == src.PosIsStmt {
104					pendingLines.set(vl, int32(b.ID))
105				}
106				f.freeValue(v)
107				continue
108			}
109			if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
110				pendingLines.remove(vl)
111				v.Pos = v.Pos.WithIsStmt()
112			}
113			if i != j {
114				b.Values[j] = v
115			}
116			j++
117		}
118		if pendingLines.get(b.Pos) == int32(b.ID) {
119			b.Pos = b.Pos.WithIsStmt()
120			pendingLines.remove(b.Pos)
121		}
122		if j != len(b.Values) {
123			tail := b.Values[j:]
124			for j := range tail {
125				tail[j] = nil
126			}
127			b.Values = b.Values[:j]
128		}
129	}
130}
131
132// Common functions called from rewriting rules
133
134func is64BitFloat(t *types.Type) bool {
135	return t.Size() == 8 && t.IsFloat()
136}
137
138func is32BitFloat(t *types.Type) bool {
139	return t.Size() == 4 && t.IsFloat()
140}
141
142func is64BitInt(t *types.Type) bool {
143	return t.Size() == 8 && t.IsInteger()
144}
145
146func is32BitInt(t *types.Type) bool {
147	return t.Size() == 4 && t.IsInteger()
148}
149
150func is16BitInt(t *types.Type) bool {
151	return t.Size() == 2 && t.IsInteger()
152}
153
154func is8BitInt(t *types.Type) bool {
155	return t.Size() == 1 && t.IsInteger()
156}
157
158func isPtr(t *types.Type) bool {
159	return t.IsPtrShaped()
160}
161
162func isSigned(t *types.Type) bool {
163	return t.IsSigned()
164}
165
166// mergeSym merges two symbolic offsets. There is no real merging of
167// offsets, we just pick the non-nil one.
168func mergeSym(x, y interface{}) interface{} {
169	if x == nil {
170		return y
171	}
172	if y == nil {
173		return x
174	}
175	panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
176}
177func canMergeSym(x, y interface{}) bool {
178	return x == nil || y == nil
179}
180
181// canMergeLoadClobber reports whether the load can be merged into target without
182// invalidating the schedule.
183// It also checks that the other non-load argument x is something we
184// are ok with clobbering.
185func canMergeLoadClobber(target, load, x *Value) bool {
186	// The register containing x is going to get clobbered.
187	// Don't merge if we still need the value of x.
188	// We don't have liveness information here, but we can
189	// approximate x dying with:
190	//  1) target is x's only use.
191	//  2) target is not in a deeper loop than x.
192	if x.Uses != 1 {
193		return false
194	}
195	loopnest := x.Block.Func.loopnest()
196	loopnest.calculateDepths()
197	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
198		return false
199	}
200	return canMergeLoad(target, load)
201}
202
203// canMergeLoad reports whether the load can be merged into target without
204// invalidating the schedule.
205func canMergeLoad(target, load *Value) bool {
206	if target.Block.ID != load.Block.ID {
207		// If the load is in a different block do not merge it.
208		return false
209	}
210
211	// We can't merge the load into the target if the load
212	// has more than one use.
213	if load.Uses != 1 {
214		return false
215	}
216
217	mem := load.MemoryArg()
218
219	// We need the load's memory arg to still be alive at target. That
220	// can't be the case if one of target's args depends on a memory
221	// state that is a successor of load's memory arg.
222	//
223	// For example, it would be invalid to merge load into target in
224	// the following situation because newmem has killed oldmem
225	// before target is reached:
226	//     load = read ... oldmem
227	//   newmem = write ... oldmem
228	//     arg0 = read ... newmem
229	//   target = add arg0 load
230	//
231	// If the argument comes from a different block then we can exclude
232	// it immediately because it must dominate load (which is in the
233	// same block as target).
234	var args []*Value
235	for _, a := range target.Args {
236		if a != load && a.Block.ID == target.Block.ID {
237			args = append(args, a)
238		}
239	}
240
241	// memPreds contains memory states known to be predecessors of load's
242	// memory state. It is lazily initialized.
243	var memPreds map[*Value]bool
244	for i := 0; len(args) > 0; i++ {
245		const limit = 100
246		if i >= limit {
247			// Give up if we have done a lot of iterations.
248			return false
249		}
250		v := args[len(args)-1]
251		args = args[:len(args)-1]
252		if target.Block.ID != v.Block.ID {
253			// Since target and load are in the same block
254			// we can stop searching when we leave the block.
255			continue
256		}
257		if v.Op == OpPhi {
258			// A Phi implies we have reached the top of the block.
259			// The memory phi, if it exists, is always
260			// the first logical store in the block.
261			continue
262		}
263		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
264			// We could handle this situation however it is likely
265			// to be very rare.
266			return false
267		}
268		if v.Op.SymEffect()&SymAddr != 0 {
269			// This case prevents an operation that calculates the
270			// address of a local variable from being forced to schedule
271			// before its corresponding VarDef.
272			// See issue 28445.
273			//   v1 = LOAD ...
274			//   v2 = VARDEF
275			//   v3 = LEAQ
276			//   v4 = CMPQ v1 v3
277			// We don't want to combine the CMPQ with the load, because
278			// that would force the CMPQ to schedule before the VARDEF, which
279			// in turn requires the LEAQ to schedule before the VARDEF.
280			return false
281		}
282		if v.Type.IsMemory() {
283			if memPreds == nil {
284				// Initialise a map containing memory states
285				// known to be predecessors of load's memory
286				// state.
287				memPreds = make(map[*Value]bool)
288				m := mem
289				const limit = 50
290				for i := 0; i < limit; i++ {
291					if m.Op == OpPhi {
292						// The memory phi, if it exists, is always
293						// the first logical store in the block.
294						break
295					}
296					if m.Block.ID != target.Block.ID {
297						break
298					}
299					if !m.Type.IsMemory() {
300						break
301					}
302					memPreds[m] = true
303					if len(m.Args) == 0 {
304						break
305					}
306					m = m.MemoryArg()
307				}
308			}
309
310			// We can merge if v is a predecessor of mem.
311			//
312			// For example, we can merge load into target in the
313			// following scenario:
314			//      x = read ... v
315			//    mem = write ... v
316			//   load = read ... mem
317			// target = add x load
318			if memPreds[v] {
319				continue
320			}
321			return false
322		}
323		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
324			// If v takes mem as an input then we know mem
325			// is valid at this point.
326			continue
327		}
328		for _, a := range v.Args {
329			if target.Block.ID == a.Block.ID {
330				args = append(args, a)
331			}
332		}
333	}
334
335	return true
336}
337
338// isSameSym reports whether sym is the same as the given named symbol
339func isSameSym(sym interface{}, name string) bool {
340	s, ok := sym.(fmt.Stringer)
341	return ok && s.String() == name
342}
343
344// nlz returns the number of leading zeros.
345func nlz(x int64) int64 {
346	return int64(bits.LeadingZeros64(uint64(x)))
347}
348
349// ntz returns the number of trailing zeros.
350func ntz(x int64) int64 {
351	return int64(bits.TrailingZeros64(uint64(x)))
352}
353
354func oneBit(x int64) bool {
355	return bits.OnesCount64(uint64(x)) == 1
356}
357
358// nlo returns the number of leading ones.
359func nlo(x int64) int64 {
360	return nlz(^x)
361}
362
363// nto returns the number of trailing ones.
364func nto(x int64) int64 {
365	return ntz(^x)
366}
367
368// log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
369// Rounds down.
370func log2(n int64) int64 {
371	return int64(bits.Len64(uint64(n))) - 1
372}
373
374// log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
375// Rounds down.
376func log2uint32(n int64) int64 {
377	return int64(bits.Len32(uint32(n))) - 1
378}
379
380// isPowerOfTwo reports whether n is a power of 2.
381func isPowerOfTwo(n int64) bool {
382	return n > 0 && n&(n-1) == 0
383}
384
385// isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
386func isUint64PowerOfTwo(in int64) bool {
387	n := uint64(in)
388	return n > 0 && n&(n-1) == 0
389}
390
391// isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
392func isUint32PowerOfTwo(in int64) bool {
393	n := uint64(uint32(in))
394	return n > 0 && n&(n-1) == 0
395}
396
397// is32Bit reports whether n can be represented as a signed 32 bit integer.
398func is32Bit(n int64) bool {
399	return n == int64(int32(n))
400}
401
402// is16Bit reports whether n can be represented as a signed 16 bit integer.
403func is16Bit(n int64) bool {
404	return n == int64(int16(n))
405}
406
407// is8Bit reports whether n can be represented as a signed 8 bit integer.
408func is8Bit(n int64) bool {
409	return n == int64(int8(n))
410}
411
412// isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
413func isU8Bit(n int64) bool {
414	return n == int64(uint8(n))
415}
416
417// isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
418func isU12Bit(n int64) bool {
419	return 0 <= n && n < (1<<12)
420}
421
422// isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
423func isU16Bit(n int64) bool {
424	return n == int64(uint16(n))
425}
426
427// isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
428func isU32Bit(n int64) bool {
429	return n == int64(uint32(n))
430}
431
432// is20Bit reports whether n can be represented as a signed 20 bit integer.
433func is20Bit(n int64) bool {
434	return -(1<<19) <= n && n < (1<<19)
435}
436
437// b2i translates a boolean value to 0 or 1 for assigning to auxInt.
438func b2i(b bool) int64 {
439	if b {
440		return 1
441	}
442	return 0
443}
444
445// shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
446// A shift is bounded if it is shifting by less than the width of the shifted value.
447func shiftIsBounded(v *Value) bool {
448	return v.AuxInt != 0
449}
450
451// truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
452// of the mantissa. It will panic if the truncation results in lost information.
453func truncate64Fto32F(f float64) float32 {
454	if !isExactFloat32(f) {
455		panic("truncate64Fto32F: truncation is not exact")
456	}
457	if !math.IsNaN(f) {
458		return float32(f)
459	}
460	// NaN bit patterns aren't necessarily preserved across conversion
461	// instructions so we need to do the conversion manually.
462	b := math.Float64bits(f)
463	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
464	//          | sign                  | exponent   | mantissa       |
465	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
466	return math.Float32frombits(r)
467}
468
469// extend32Fto64F converts a float32 value to a float64 value preserving the bit
470// pattern of the mantissa.
471func extend32Fto64F(f float32) float64 {
472	if !math.IsNaN(float64(f)) {
473		return float64(f)
474	}
475	// NaN bit patterns aren't necessarily preserved across conversion
476	// instructions so we need to do the conversion manually.
477	b := uint64(math.Float32bits(f))
478	//   | sign                  | exponent      | mantissa                    |
479	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
480	return math.Float64frombits(r)
481}
482
483// NeedsFixUp reports whether the division needs fix-up code.
484func NeedsFixUp(v *Value) bool {
485	return v.AuxInt == 0
486}
487
488// auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
489func auxFrom64F(f float64) int64 {
490	return int64(math.Float64bits(f))
491}
492
493// auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
494func auxFrom32F(f float32) int64 {
495	return int64(math.Float64bits(extend32Fto64F(f)))
496}
497
498// auxTo32F decodes a float32 from the AuxInt value provided.
499func auxTo32F(i int64) float32 {
500	return truncate64Fto32F(math.Float64frombits(uint64(i)))
501}
502
503// auxTo64F decodes a float64 from the AuxInt value provided.
504func auxTo64F(i int64) float64 {
505	return math.Float64frombits(uint64(i))
506}
507
508// uaddOvf reports whether unsigned a+b would overflow.
509func uaddOvf(a, b int64) bool {
510	return uint64(a)+uint64(b) < uint64(a)
511}
512
513// de-virtualize an InterCall
514// 'sym' is the symbol for the itab
515func devirt(v *Value, sym interface{}, offset int64) *obj.LSym {
516	f := v.Block.Func
517	n, ok := sym.(*obj.LSym)
518	if !ok {
519		return nil
520	}
521	lsym := f.fe.DerefItab(n, offset)
522	if f.pass.debug > 0 {
523		if lsym != nil {
524			f.Warnl(v.Pos, "de-virtualizing call")
525		} else {
526			f.Warnl(v.Pos, "couldn't de-virtualize call")
527		}
528	}
529	return lsym
530}
531
532// isSamePtr reports whether p1 and p2 point to the same address.
533func isSamePtr(p1, p2 *Value) bool {
534	if p1 == p2 {
535		return true
536	}
537	if p1.Op != p2.Op {
538		return false
539	}
540	switch p1.Op {
541	case OpOffPtr:
542		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
543	case OpAddr, OpLocalAddr:
544		// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
545		// Checking for value equality only works after [z]cse has run.
546		return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
547	case OpAddPtr:
548		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
549	}
550	return false
551}
552
553func isStackPtr(v *Value) bool {
554	for v.Op == OpOffPtr || v.Op == OpAddPtr {
555		v = v.Args[0]
556	}
557	return v.Op == OpSP || v.Op == OpLocalAddr
558}
559
560// disjoint reports whether the memory region specified by [p1:p1+n1)
561// does not overlap with [p2:p2+n2).
562// A return value of false does not imply the regions overlap.
563func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
564	if n1 == 0 || n2 == 0 {
565		return true
566	}
567	if p1 == p2 {
568		return false
569	}
570	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
571		base, offset = ptr, 0
572		for base.Op == OpOffPtr {
573			offset += base.AuxInt
574			base = base.Args[0]
575		}
576		return base, offset
577	}
578	p1, off1 := baseAndOffset(p1)
579	p2, off2 := baseAndOffset(p2)
580	if isSamePtr(p1, p2) {
581		return !overlap(off1, n1, off2, n2)
582	}
583	// p1 and p2 are not the same, so if they are both OpAddrs then
584	// they point to different variables.
585	// If one pointer is on the stack and the other is an argument
586	// then they can't overlap.
587	switch p1.Op {
588	case OpAddr, OpLocalAddr:
589		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
590			return true
591		}
592		return p2.Op == OpArg && p1.Args[0].Op == OpSP
593	case OpArg:
594		if p2.Op == OpSP || p2.Op == OpLocalAddr {
595			return true
596		}
597	case OpSP:
598		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
599	}
600	return false
601}
602
603// moveSize returns the number of bytes an aligned MOV instruction moves
604func moveSize(align int64, c *Config) int64 {
605	switch {
606	case align%8 == 0 && c.PtrSize == 8:
607		return 8
608	case align%4 == 0:
609		return 4
610	case align%2 == 0:
611		return 2
612	}
613	return 1
614}
615
616// mergePoint finds a block among a's blocks which dominates b and is itself
617// dominated by all of a's blocks. Returns nil if it can't find one.
618// Might return nil even if one does exist.
619func mergePoint(b *Block, a ...*Value) *Block {
620	// Walk backward from b looking for one of the a's blocks.
621
622	// Max distance
623	d := 100
624
625	for d > 0 {
626		for _, x := range a {
627			if b == x.Block {
628				goto found
629			}
630		}
631		if len(b.Preds) > 1 {
632			// Don't know which way to go back. Abort.
633			return nil
634		}
635		b = b.Preds[0].b
636		d--
637	}
638	return nil // too far away
639found:
640	// At this point, r is the first value in a that we find by walking backwards.
641	// if we return anything, r will be it.
642	r := b
643
644	// Keep going, counting the other a's that we find. They must all dominate r.
645	na := 0
646	for d > 0 {
647		for _, x := range a {
648			if b == x.Block {
649				na++
650			}
651		}
652		if na == len(a) {
653			// Found all of a in a backwards walk. We can return r.
654			return r
655		}
656		if len(b.Preds) > 1 {
657			return nil
658		}
659		b = b.Preds[0].b
660		d--
661
662	}
663	return nil // too far away
664}
665
666// clobber invalidates v.  Returns true.
667// clobber is used by rewrite rules to:
668//   A) make sure v is really dead and never used again.
669//   B) decrement use counts of v's args.
670func clobber(v *Value) bool {
671	v.reset(OpInvalid)
672	// Note: leave v.Block intact.  The Block field is used after clobber.
673	return true
674}
675
676// clobberIfDead resets v when use count is 1. Returns true.
677// clobberIfDead is used by rewrite rules to decrement
678// use counts of v's args when v is dead and never used.
679func clobberIfDead(v *Value) bool {
680	if v.Uses == 1 {
681		v.reset(OpInvalid)
682	}
683	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
684	return true
685}
686
687// noteRule is an easy way to track if a rule is matched when writing
688// new ones.  Make the rule of interest also conditional on
689//     noteRule("note to self: rule of interest matched")
690// and that message will print when the rule matches.
691func noteRule(s string) bool {
692	fmt.Println(s)
693	return true
694}
695
696// countRule increments Func.ruleMatches[key].
697// If Func.ruleMatches is non-nil at the end
698// of compilation, it will be printed to stdout.
699// This is intended to make it easier to find which functions
700// which contain lots of rules matches when developing new rules.
701func countRule(v *Value, key string) bool {
702	f := v.Block.Func
703	if f.ruleMatches == nil {
704		f.ruleMatches = make(map[string]int)
705	}
706	f.ruleMatches[key]++
707	return true
708}
709
710// warnRule generates compiler debug output with string s when
711// v is not in autogenerated code, cond is true and the rule has fired.
712func warnRule(cond bool, v *Value, s string) bool {
713	if pos := v.Pos; pos.Line() > 1 && cond {
714		v.Block.Func.Warnl(pos, s)
715	}
716	return true
717}
718
719// for a pseudo-op like (LessThan x), extract x
720func flagArg(v *Value) *Value {
721	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
722		return nil
723	}
724	return v.Args[0]
725}
726
727// arm64Negate finds the complement to an ARM64 condition code,
728// for example Equal -> NotEqual or LessThan -> GreaterEqual
729//
730// TODO: add floating-point conditions
731func arm64Negate(op Op) Op {
732	switch op {
733	case OpARM64LessThan:
734		return OpARM64GreaterEqual
735	case OpARM64LessThanU:
736		return OpARM64GreaterEqualU
737	case OpARM64GreaterThan:
738		return OpARM64LessEqual
739	case OpARM64GreaterThanU:
740		return OpARM64LessEqualU
741	case OpARM64LessEqual:
742		return OpARM64GreaterThan
743	case OpARM64LessEqualU:
744		return OpARM64GreaterThanU
745	case OpARM64GreaterEqual:
746		return OpARM64LessThan
747	case OpARM64GreaterEqualU:
748		return OpARM64LessThanU
749	case OpARM64Equal:
750		return OpARM64NotEqual
751	case OpARM64NotEqual:
752		return OpARM64Equal
753	case OpARM64LessThanF:
754		return OpARM64GreaterEqualF
755	case OpARM64GreaterThanF:
756		return OpARM64LessEqualF
757	case OpARM64LessEqualF:
758		return OpARM64GreaterThanF
759	case OpARM64GreaterEqualF:
760		return OpARM64LessThanF
761	default:
762		panic("unreachable")
763	}
764}
765
766// arm64Invert evaluates (InvertFlags op), which
767// is the same as altering the condition codes such
768// that the same result would be produced if the arguments
769// to the flag-generating instruction were reversed, e.g.
770// (InvertFlags (CMP x y)) -> (CMP y x)
771//
772// TODO: add floating-point conditions
773func arm64Invert(op Op) Op {
774	switch op {
775	case OpARM64LessThan:
776		return OpARM64GreaterThan
777	case OpARM64LessThanU:
778		return OpARM64GreaterThanU
779	case OpARM64GreaterThan:
780		return OpARM64LessThan
781	case OpARM64GreaterThanU:
782		return OpARM64LessThanU
783	case OpARM64LessEqual:
784		return OpARM64GreaterEqual
785	case OpARM64LessEqualU:
786		return OpARM64GreaterEqualU
787	case OpARM64GreaterEqual:
788		return OpARM64LessEqual
789	case OpARM64GreaterEqualU:
790		return OpARM64LessEqualU
791	case OpARM64Equal, OpARM64NotEqual:
792		return op
793	case OpARM64LessThanF:
794		return OpARM64GreaterThanF
795	case OpARM64GreaterThanF:
796		return OpARM64LessThanF
797	case OpARM64LessEqualF:
798		return OpARM64GreaterEqualF
799	case OpARM64GreaterEqualF:
800		return OpARM64LessEqualF
801	default:
802		panic("unreachable")
803	}
804}
805
806// evaluate an ARM64 op against a flags value
807// that is potentially constant; return 1 for true,
808// -1 for false, and 0 for not constant.
809func ccARM64Eval(cc interface{}, flags *Value) int {
810	op := cc.(Op)
811	fop := flags.Op
812	switch fop {
813	case OpARM64InvertFlags:
814		return -ccARM64Eval(op, flags.Args[0])
815	case OpARM64FlagEQ:
816		switch op {
817		case OpARM64Equal, OpARM64GreaterEqual, OpARM64LessEqual,
818			OpARM64GreaterEqualU, OpARM64LessEqualU:
819			return 1
820		default:
821			return -1
822		}
823	case OpARM64FlagLT_ULT:
824		switch op {
825		case OpARM64LessThan, OpARM64LessThanU,
826			OpARM64LessEqual, OpARM64LessEqualU:
827			return 1
828		default:
829			return -1
830		}
831	case OpARM64FlagLT_UGT:
832		switch op {
833		case OpARM64LessThan, OpARM64GreaterThanU,
834			OpARM64LessEqual, OpARM64GreaterEqualU:
835			return 1
836		default:
837			return -1
838		}
839	case OpARM64FlagGT_ULT:
840		switch op {
841		case OpARM64GreaterThan, OpARM64LessThanU,
842			OpARM64GreaterEqual, OpARM64LessEqualU:
843			return 1
844		default:
845			return -1
846		}
847	case OpARM64FlagGT_UGT:
848		switch op {
849		case OpARM64GreaterThan, OpARM64GreaterThanU,
850			OpARM64GreaterEqual, OpARM64GreaterEqualU:
851			return 1
852		default:
853			return -1
854		}
855	default:
856		return 0
857	}
858}
859
860// logRule logs the use of the rule s. This will only be enabled if
861// rewrite rules were generated with the -log option, see gen/rulegen.go.
862func logRule(s string) {
863	if ruleFile == nil {
864		// Open a log file to write log to. We open in append
865		// mode because all.bash runs the compiler lots of times,
866		// and we want the concatenation of all of those logs.
867		// This means, of course, that users need to rm the old log
868		// to get fresh data.
869		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
870		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
871			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
872		if err != nil {
873			panic(err)
874		}
875		ruleFile = w
876	}
877	_, err := fmt.Fprintln(ruleFile, s)
878	if err != nil {
879		panic(err)
880	}
881}
882
883var ruleFile io.Writer
884
885func min(x, y int64) int64 {
886	if x < y {
887		return x
888	}
889	return y
890}
891
892func isConstZero(v *Value) bool {
893	switch v.Op {
894	case OpConstNil:
895		return true
896	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
897		return v.AuxInt == 0
898	}
899	return false
900}
901
902// reciprocalExact64 reports whether 1/c is exactly representable.
903func reciprocalExact64(c float64) bool {
904	b := math.Float64bits(c)
905	man := b & (1<<52 - 1)
906	if man != 0 {
907		return false // not a power of 2, denormal, or NaN
908	}
909	exp := b >> 52 & (1<<11 - 1)
910	// exponent bias is 0x3ff.  So taking the reciprocal of a number
911	// changes the exponent to 0x7fe-exp.
912	switch exp {
913	case 0:
914		return false // ±0
915	case 0x7ff:
916		return false // ±inf
917	case 0x7fe:
918		return false // exponent is not representable
919	default:
920		return true
921	}
922}
923
924// reciprocalExact32 reports whether 1/c is exactly representable.
925func reciprocalExact32(c float32) bool {
926	b := math.Float32bits(c)
927	man := b & (1<<23 - 1)
928	if man != 0 {
929		return false // not a power of 2, denormal, or NaN
930	}
931	exp := b >> 23 & (1<<8 - 1)
932	// exponent bias is 0x7f.  So taking the reciprocal of a number
933	// changes the exponent to 0xfe-exp.
934	switch exp {
935	case 0:
936		return false // ±0
937	case 0xff:
938		return false // ±inf
939	case 0xfe:
940		return false // exponent is not representable
941	default:
942		return true
943	}
944}
945
946// check if an immediate can be directly encoded into an ARM's instruction
947func isARMImmRot(v uint32) bool {
948	for i := 0; i < 16; i++ {
949		if v&^0xff == 0 {
950			return true
951		}
952		v = v<<2 | v>>30
953	}
954
955	return false
956}
957
958// overlap reports whether the ranges given by the given offset and
959// size pairs overlap.
960func overlap(offset1, size1, offset2, size2 int64) bool {
961	if offset1 >= offset2 && offset2+size2 > offset1 {
962		return true
963	}
964	if offset2 >= offset1 && offset1+size1 > offset2 {
965		return true
966	}
967	return false
968}
969
970func areAdjacentOffsets(off1, off2, size int64) bool {
971	return off1+size == off2 || off1 == off2+size
972}
973
974// check if value zeroes out upper 32-bit of 64-bit register.
975// depth limits recursion depth. In AMD64.rules 3 is used as limit,
976// because it catches same amount of cases as 4.
977func zeroUpper32Bits(x *Value, depth int) bool {
978	switch x.Op {
979	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
980		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
981		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
982		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
983		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
984		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
985		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL:
986		return true
987	case OpArg:
988		return x.Type.Width == 4
989	case OpPhi, OpSelect0, OpSelect1:
990		// Phis can use each-other as an arguments, instead of tracking visited values,
991		// just limit recursion depth.
992		if depth <= 0 {
993			return false
994		}
995		for i := range x.Args {
996			if !zeroUpper32Bits(x.Args[i], depth-1) {
997				return false
998			}
999		}
1000		return true
1001
1002	}
1003	return false
1004}
1005
1006// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1007func zeroUpper48Bits(x *Value, depth int) bool {
1008	switch x.Op {
1009	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1010		return true
1011	case OpArg:
1012		return x.Type.Width == 2
1013	case OpPhi, OpSelect0, OpSelect1:
1014		// Phis can use each-other as an arguments, instead of tracking visited values,
1015		// just limit recursion depth.
1016		if depth <= 0 {
1017			return false
1018		}
1019		for i := range x.Args {
1020			if !zeroUpper48Bits(x.Args[i], depth-1) {
1021				return false
1022			}
1023		}
1024		return true
1025
1026	}
1027	return false
1028}
1029
1030// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1031func zeroUpper56Bits(x *Value, depth int) bool {
1032	switch x.Op {
1033	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1034		return true
1035	case OpArg:
1036		return x.Type.Width == 1
1037	case OpPhi, OpSelect0, OpSelect1:
1038		// Phis can use each-other as an arguments, instead of tracking visited values,
1039		// just limit recursion depth.
1040		if depth <= 0 {
1041			return false
1042		}
1043		for i := range x.Args {
1044			if !zeroUpper56Bits(x.Args[i], depth-1) {
1045				return false
1046			}
1047		}
1048		return true
1049
1050	}
1051	return false
1052}
1053
1054// isInlinableMemmove reports whether the given arch performs a Move of the given size
1055// faster than memmove. It will only return true if replacing the memmove with a Move is
1056// safe, either because Move is small or because the arguments are disjoint.
1057// This is used as a check for replacing memmove with Move ops.
1058func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1059	// It is always safe to convert memmove into Move when its arguments are disjoint.
1060	// Move ops may or may not be faster for large sizes depending on how the platform
1061	// lowers them, so we only perform this optimization on platforms that we know to
1062	// have fast Move ops.
1063	switch c.arch {
1064	case "amd64":
1065		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1066	case "386", "ppc64", "ppc64le", "arm64":
1067		return sz <= 8
1068	case "s390x":
1069		return sz <= 8 || disjoint(dst, sz, src, sz)
1070	case "arm", "mips", "mips64", "mipsle", "mips64le":
1071		return sz <= 4
1072	}
1073	return false
1074}
1075
1076// hasSmallRotate reports whether the architecture has rotate instructions
1077// for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1078func hasSmallRotate(c *Config) bool {
1079	switch c.arch {
1080	case "amd64", "386":
1081		return true
1082	default:
1083		return false
1084	}
1085}
1086
1087// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1088func armBFAuxInt(lsb, width int64) int64 {
1089	if lsb < 0 || lsb > 63 {
1090		panic("ARM(64) bit field lsb constant out of range")
1091	}
1092	if width < 1 || width > 64 {
1093		panic("ARM(64) bit field width constant out of range")
1094	}
1095	return width | lsb<<8
1096}
1097
1098// returns the lsb part of the auxInt field of arm64 bitfield ops.
1099func getARM64BFlsb(bfc int64) int64 {
1100	return int64(uint64(bfc) >> 8)
1101}
1102
1103// returns the width part of the auxInt field of arm64 bitfield ops.
1104func getARM64BFwidth(bfc int64) int64 {
1105	return bfc & 0xff
1106}
1107
1108// checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1109func isARM64BFMask(lsb, mask, rshift int64) bool {
1110	shiftedMask := int64(uint64(mask) >> uint64(rshift))
1111	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1112}
1113
1114// returns the bitfield width of mask >> rshift for arm64 bitfield ops
1115func arm64BFWidth(mask, rshift int64) int64 {
1116	shiftedMask := int64(uint64(mask) >> uint64(rshift))
1117	if shiftedMask == 0 {
1118		panic("ARM64 BF mask is zero")
1119	}
1120	return nto(shiftedMask)
1121}
1122
1123// sizeof returns the size of t in bytes.
1124// It will panic if t is not a *types.Type.
1125func sizeof(t interface{}) int64 {
1126	return t.(*types.Type).Size()
1127}
1128
1129// alignof returns the alignment of t in bytes.
1130// It will panic if t is not a *types.Type.
1131func alignof(t interface{}) int64 {
1132	return t.(*types.Type).Alignment()
1133}
1134
1135// registerizable reports whether t is a primitive type that fits in
1136// a register. It assumes float64 values will always fit into registers
1137// even if that isn't strictly true.
1138// It will panic if t is not a *types.Type.
1139func registerizable(b *Block, t interface{}) bool {
1140	typ := t.(*types.Type)
1141	if typ.IsPtrShaped() || typ.IsFloat() {
1142		return true
1143	}
1144	if typ.IsInteger() {
1145		return typ.Size() <= b.Func.Config.RegSize
1146	}
1147	return false
1148}
1149
1150// needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1151func needRaceCleanup(sym interface{}, v *Value) bool {
1152	f := v.Block.Func
1153	if !f.Config.Race {
1154		return false
1155	}
1156	if !isSameSym(sym, "runtime.racefuncenter") && !isSameSym(sym, "runtime.racefuncexit") {
1157		return false
1158	}
1159	for _, b := range f.Blocks {
1160		for _, v := range b.Values {
1161			switch v.Op {
1162			case OpStaticCall:
1163				// Check for racefuncenter will encounter racefuncexit and vice versa.
1164				// Allow calls to panic*
1165				s := v.Aux.(fmt.Stringer).String()
1166				switch s {
1167				case "runtime.racefuncenter", "runtime.racefuncexit",
1168					"runtime.panicdivide", "runtime.panicwrap",
1169					"runtime.panicshift":
1170					continue
1171				}
1172				// If we encountered any call, we need to keep racefunc*,
1173				// for accurate stacktraces.
1174				return false
1175			case OpPanicBounds, OpPanicExtend:
1176				// Note: these are panic generators that are ok (like the static calls above).
1177			case OpClosureCall, OpInterCall:
1178				// We must keep the race functions if there are any other call types.
1179				return false
1180			}
1181		}
1182	}
1183	return true
1184}
1185
1186// symIsRO reports whether sym is a read-only global.
1187func symIsRO(sym interface{}) bool {
1188	lsym := sym.(*obj.LSym)
1189	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1190}
1191
1192// read8 reads one byte from the read-only global sym at offset off.
1193func read8(sym interface{}, off int64) uint8 {
1194	lsym := sym.(*obj.LSym)
1195	if off >= int64(len(lsym.P)) || off < 0 {
1196		// Invalid index into the global sym.
1197		// This can happen in dead code, so we don't want to panic.
1198		// Just return any value, it will eventually get ignored.
1199		// See issue 29215.
1200		return 0
1201	}
1202	return lsym.P[off]
1203}
1204
1205// read16 reads two bytes from the read-only global sym at offset off.
1206func read16(sym interface{}, off int64, bigEndian bool) uint16 {
1207	lsym := sym.(*obj.LSym)
1208	if off >= int64(len(lsym.P))-1 || off < 0 {
1209		return 0
1210	}
1211	if bigEndian {
1212		return binary.BigEndian.Uint16(lsym.P[off:])
1213	} else {
1214		return binary.LittleEndian.Uint16(lsym.P[off:])
1215	}
1216}
1217
1218// read32 reads four bytes from the read-only global sym at offset off.
1219func read32(sym interface{}, off int64, bigEndian bool) uint32 {
1220	lsym := sym.(*obj.LSym)
1221	if off >= int64(len(lsym.P))-3 || off < 0 {
1222		return 0
1223	}
1224	if bigEndian {
1225		return binary.BigEndian.Uint32(lsym.P[off:])
1226	} else {
1227		return binary.LittleEndian.Uint32(lsym.P[off:])
1228	}
1229}
1230
1231// read64 reads eight bytes from the read-only global sym at offset off.
1232func read64(sym interface{}, off int64, bigEndian bool) uint64 {
1233	lsym := sym.(*obj.LSym)
1234	if off >= int64(len(lsym.P))-7 || off < 0 {
1235		return 0
1236	}
1237	if bigEndian {
1238		return binary.BigEndian.Uint64(lsym.P[off:])
1239	} else {
1240		return binary.LittleEndian.Uint64(lsym.P[off:])
1241	}
1242}
1243