1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssa
6
7import (
8	"cmd/compile/internal/logopt"
9	"cmd/compile/internal/types"
10	"cmd/internal/obj"
11	"cmd/internal/obj/s390x"
12	"cmd/internal/objabi"
13	"cmd/internal/src"
14	"encoding/binary"
15	"fmt"
16	"io"
17	"math"
18	"math/bits"
19	"os"
20	"path/filepath"
21)
22
23type deadValueChoice bool
24
25const (
26	leaveDeadValues  deadValueChoice = false
27	removeDeadValues                 = true
28)
29
30// deadcode indicates whether rewrite should try to remove any values that become dead.
31func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
32	// repeat rewrites until we find no more rewrites
33	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
34	pendingLines.clear()
35	debug := f.pass.debug
36	if debug > 1 {
37		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
38	}
39	var iters int
40	var states map[string]bool
41	for {
42		change := false
43		for _, b := range f.Blocks {
44			var b0 *Block
45			if debug > 1 {
46				b0 = new(Block)
47				*b0 = *b
48				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
49			}
50			for i, c := range b.ControlValues() {
51				for c.Op == OpCopy {
52					c = c.Args[0]
53					b.ReplaceControl(i, c)
54				}
55			}
56			if rb(b) {
57				change = true
58				if debug > 1 {
59					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
60				}
61			}
62			for j, v := range b.Values {
63				var v0 *Value
64				if debug > 1 {
65					v0 = new(Value)
66					*v0 = *v
67					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
68				}
69				if v.Uses == 0 && v.removeable() {
70					if v.Op != OpInvalid && deadcode == removeDeadValues {
71						// Reset any values that are now unused, so that we decrement
72						// the use count of all of its arguments.
73						// Not quite a deadcode pass, because it does not handle cycles.
74						// But it should help Uses==1 rules to fire.
75						v.reset(OpInvalid)
76						change = true
77					}
78					// No point rewriting values which aren't used.
79					continue
80				}
81
82				vchange := phielimValue(v)
83				if vchange && debug > 1 {
84					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
85				}
86
87				// Eliminate copy inputs.
88				// If any copy input becomes unused, mark it
89				// as invalid and discard its argument. Repeat
90				// recursively on the discarded argument.
91				// This phase helps remove phantom "dead copy" uses
92				// of a value so that a x.Uses==1 rule condition
93				// fires reliably.
94				for i, a := range v.Args {
95					if a.Op != OpCopy {
96						continue
97					}
98					aa := copySource(a)
99					v.SetArg(i, aa)
100					// If a, a copy, has a line boundary indicator, attempt to find a new value
101					// to hold it.  The first candidate is the value that will replace a (aa),
102					// if it shares the same block and line and is eligible.
103					// The second option is v, which has a as an input.  Because aa is earlier in
104					// the data flow, it is the better choice.
105					if a.Pos.IsStmt() == src.PosIsStmt {
106						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
107							aa.Pos = aa.Pos.WithIsStmt()
108						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
109							v.Pos = v.Pos.WithIsStmt()
110						} else {
111							// Record the lost line and look for a new home after all rewrites are complete.
112							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
113							// line to appear in more than one block, but only one block is stored, so if both end
114							// up here, then one will be lost.
115							pendingLines.set(a.Pos, int32(a.Block.ID))
116						}
117						a.Pos = a.Pos.WithNotStmt()
118					}
119					vchange = true
120					for a.Uses == 0 {
121						b := a.Args[0]
122						a.reset(OpInvalid)
123						a = b
124					}
125				}
126				if vchange && debug > 1 {
127					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
128				}
129
130				// apply rewrite function
131				if rv(v) {
132					vchange = true
133					// If value changed to a poor choice for a statement boundary, move the boundary
134					if v.Pos.IsStmt() == src.PosIsStmt {
135						if k := nextGoodStatementIndex(v, j, b); k != j {
136							v.Pos = v.Pos.WithNotStmt()
137							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
138						}
139					}
140				}
141
142				change = change || vchange
143				if vchange && debug > 1 {
144					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
145				}
146			}
147		}
148		if !change {
149			break
150		}
151		iters++
152		if iters > 1000 || debug >= 2 {
153			// We've done a suspiciously large number of rewrites (or we're in debug mode).
154			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
155			// and the maximum value encountered during make.bash is 12.
156			// Start checking for cycles. (This is too expensive to do routinely.)
157			if states == nil {
158				states = make(map[string]bool)
159			}
160			h := f.rewriteHash()
161			if _, ok := states[h]; ok {
162				// We've found a cycle.
163				// To diagnose it, set debug to 2 and start again,
164				// so that we'll print all rules applied until we complete another cycle.
165				// If debug is already >= 2, we've already done that, so it's time to crash.
166				if debug < 2 {
167					debug = 2
168					states = make(map[string]bool)
169				} else {
170					f.Fatalf("rewrite cycle detected")
171				}
172			}
173			states[h] = true
174		}
175	}
176	// remove clobbered values
177	for _, b := range f.Blocks {
178		j := 0
179		for i, v := range b.Values {
180			vl := v.Pos
181			if v.Op == OpInvalid {
182				if v.Pos.IsStmt() == src.PosIsStmt {
183					pendingLines.set(vl, int32(b.ID))
184				}
185				f.freeValue(v)
186				continue
187			}
188			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
189				pendingLines.remove(vl)
190				v.Pos = v.Pos.WithIsStmt()
191			}
192			if i != j {
193				b.Values[j] = v
194			}
195			j++
196		}
197		if pendingLines.get(b.Pos) == int32(b.ID) {
198			b.Pos = b.Pos.WithIsStmt()
199			pendingLines.remove(b.Pos)
200		}
201		b.truncateValues(j)
202	}
203}
204
205// Common functions called from rewriting rules
206
207func is64BitFloat(t *types.Type) bool {
208	return t.Size() == 8 && t.IsFloat()
209}
210
211func is32BitFloat(t *types.Type) bool {
212	return t.Size() == 4 && t.IsFloat()
213}
214
215func is64BitInt(t *types.Type) bool {
216	return t.Size() == 8 && t.IsInteger()
217}
218
219func is32BitInt(t *types.Type) bool {
220	return t.Size() == 4 && t.IsInteger()
221}
222
223func is16BitInt(t *types.Type) bool {
224	return t.Size() == 2 && t.IsInteger()
225}
226
227func is8BitInt(t *types.Type) bool {
228	return t.Size() == 1 && t.IsInteger()
229}
230
231func isPtr(t *types.Type) bool {
232	return t.IsPtrShaped()
233}
234
235func isSigned(t *types.Type) bool {
236	return t.IsSigned()
237}
238
239// mergeSym merges two symbolic offsets. There is no real merging of
240// offsets, we just pick the non-nil one.
241func mergeSym(x, y Sym) Sym {
242	if x == nil {
243		return y
244	}
245	if y == nil {
246		return x
247	}
248	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
249}
250
251func canMergeSym(x, y Sym) bool {
252	return x == nil || y == nil
253}
254
255// canMergeLoadClobber reports whether the load can be merged into target without
256// invalidating the schedule.
257// It also checks that the other non-load argument x is something we
258// are ok with clobbering.
259func canMergeLoadClobber(target, load, x *Value) bool {
260	// The register containing x is going to get clobbered.
261	// Don't merge if we still need the value of x.
262	// We don't have liveness information here, but we can
263	// approximate x dying with:
264	//  1) target is x's only use.
265	//  2) target is not in a deeper loop than x.
266	if x.Uses != 1 {
267		return false
268	}
269	loopnest := x.Block.Func.loopnest()
270	loopnest.calculateDepths()
271	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
272		return false
273	}
274	return canMergeLoad(target, load)
275}
276
277// canMergeLoad reports whether the load can be merged into target without
278// invalidating the schedule.
279func canMergeLoad(target, load *Value) bool {
280	if target.Block.ID != load.Block.ID {
281		// If the load is in a different block do not merge it.
282		return false
283	}
284
285	// We can't merge the load into the target if the load
286	// has more than one use.
287	if load.Uses != 1 {
288		return false
289	}
290
291	mem := load.MemoryArg()
292
293	// We need the load's memory arg to still be alive at target. That
294	// can't be the case if one of target's args depends on a memory
295	// state that is a successor of load's memory arg.
296	//
297	// For example, it would be invalid to merge load into target in
298	// the following situation because newmem has killed oldmem
299	// before target is reached:
300	//     load = read ... oldmem
301	//   newmem = write ... oldmem
302	//     arg0 = read ... newmem
303	//   target = add arg0 load
304	//
305	// If the argument comes from a different block then we can exclude
306	// it immediately because it must dominate load (which is in the
307	// same block as target).
308	var args []*Value
309	for _, a := range target.Args {
310		if a != load && a.Block.ID == target.Block.ID {
311			args = append(args, a)
312		}
313	}
314
315	// memPreds contains memory states known to be predecessors of load's
316	// memory state. It is lazily initialized.
317	var memPreds map[*Value]bool
318	for i := 0; len(args) > 0; i++ {
319		const limit = 100
320		if i >= limit {
321			// Give up if we have done a lot of iterations.
322			return false
323		}
324		v := args[len(args)-1]
325		args = args[:len(args)-1]
326		if target.Block.ID != v.Block.ID {
327			// Since target and load are in the same block
328			// we can stop searching when we leave the block.
329			continue
330		}
331		if v.Op == OpPhi {
332			// A Phi implies we have reached the top of the block.
333			// The memory phi, if it exists, is always
334			// the first logical store in the block.
335			continue
336		}
337		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
338			// We could handle this situation however it is likely
339			// to be very rare.
340			return false
341		}
342		if v.Op.SymEffect()&SymAddr != 0 {
343			// This case prevents an operation that calculates the
344			// address of a local variable from being forced to schedule
345			// before its corresponding VarDef.
346			// See issue 28445.
347			//   v1 = LOAD ...
348			//   v2 = VARDEF
349			//   v3 = LEAQ
350			//   v4 = CMPQ v1 v3
351			// We don't want to combine the CMPQ with the load, because
352			// that would force the CMPQ to schedule before the VARDEF, which
353			// in turn requires the LEAQ to schedule before the VARDEF.
354			return false
355		}
356		if v.Type.IsMemory() {
357			if memPreds == nil {
358				// Initialise a map containing memory states
359				// known to be predecessors of load's memory
360				// state.
361				memPreds = make(map[*Value]bool)
362				m := mem
363				const limit = 50
364				for i := 0; i < limit; i++ {
365					if m.Op == OpPhi {
366						// The memory phi, if it exists, is always
367						// the first logical store in the block.
368						break
369					}
370					if m.Block.ID != target.Block.ID {
371						break
372					}
373					if !m.Type.IsMemory() {
374						break
375					}
376					memPreds[m] = true
377					if len(m.Args) == 0 {
378						break
379					}
380					m = m.MemoryArg()
381				}
382			}
383
384			// We can merge if v is a predecessor of mem.
385			//
386			// For example, we can merge load into target in the
387			// following scenario:
388			//      x = read ... v
389			//    mem = write ... v
390			//   load = read ... mem
391			// target = add x load
392			if memPreds[v] {
393				continue
394			}
395			return false
396		}
397		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
398			// If v takes mem as an input then we know mem
399			// is valid at this point.
400			continue
401		}
402		for _, a := range v.Args {
403			if target.Block.ID == a.Block.ID {
404				args = append(args, a)
405			}
406		}
407	}
408
409	return true
410}
411
412// isSameCall reports whether sym is the same as the given named symbol
413func isSameCall(sym interface{}, name string) bool {
414	fn := sym.(*AuxCall).Fn
415	return fn != nil && fn.String() == name
416}
417
418// canLoadUnaligned reports if the achitecture supports unaligned load operations
419func canLoadUnaligned(c *Config) bool {
420	return c.ctxt.Arch.Alignment == 1
421}
422
423// nlz returns the number of leading zeros.
424func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
425func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
426func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
427func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
428
429// ntzX returns the number of trailing zeros.
430func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
431func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
432func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
433func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
434
435func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
436func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
437func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
438func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
439func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
440
441// nto returns the number of trailing ones.
442func nto(x int64) int64 {
443	return int64(ntz64(^x))
444}
445
446// logX returns logarithm of n base 2.
447// n must be a positive power of 2 (isPowerOfTwoX returns true).
448func log8(n int8) int64 {
449	return int64(bits.Len8(uint8(n))) - 1
450}
451func log16(n int16) int64 {
452	return int64(bits.Len16(uint16(n))) - 1
453}
454func log32(n int32) int64 {
455	return int64(bits.Len32(uint32(n))) - 1
456}
457func log64(n int64) int64 {
458	return int64(bits.Len64(uint64(n))) - 1
459}
460
461// log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
462// Rounds down.
463func log2uint32(n int64) int64 {
464	return int64(bits.Len32(uint32(n))) - 1
465}
466
467// isPowerOfTwo functions report whether n is a power of 2.
468func isPowerOfTwo8(n int8) bool {
469	return n > 0 && n&(n-1) == 0
470}
471func isPowerOfTwo16(n int16) bool {
472	return n > 0 && n&(n-1) == 0
473}
474func isPowerOfTwo32(n int32) bool {
475	return n > 0 && n&(n-1) == 0
476}
477func isPowerOfTwo64(n int64) bool {
478	return n > 0 && n&(n-1) == 0
479}
480
481// isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
482func isUint64PowerOfTwo(in int64) bool {
483	n := uint64(in)
484	return n > 0 && n&(n-1) == 0
485}
486
487// isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
488func isUint32PowerOfTwo(in int64) bool {
489	n := uint64(uint32(in))
490	return n > 0 && n&(n-1) == 0
491}
492
493// is32Bit reports whether n can be represented as a signed 32 bit integer.
494func is32Bit(n int64) bool {
495	return n == int64(int32(n))
496}
497
498// is16Bit reports whether n can be represented as a signed 16 bit integer.
499func is16Bit(n int64) bool {
500	return n == int64(int16(n))
501}
502
503// is8Bit reports whether n can be represented as a signed 8 bit integer.
504func is8Bit(n int64) bool {
505	return n == int64(int8(n))
506}
507
508// isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
509func isU8Bit(n int64) bool {
510	return n == int64(uint8(n))
511}
512
513// isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
514func isU12Bit(n int64) bool {
515	return 0 <= n && n < (1<<12)
516}
517
518// isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
519func isU16Bit(n int64) bool {
520	return n == int64(uint16(n))
521}
522
523// isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
524func isU32Bit(n int64) bool {
525	return n == int64(uint32(n))
526}
527
528// is20Bit reports whether n can be represented as a signed 20 bit integer.
529func is20Bit(n int64) bool {
530	return -(1<<19) <= n && n < (1<<19)
531}
532
533// b2i translates a boolean value to 0 or 1 for assigning to auxInt.
534func b2i(b bool) int64 {
535	if b {
536		return 1
537	}
538	return 0
539}
540
541// b2i32 translates a boolean value to 0 or 1.
542func b2i32(b bool) int32 {
543	if b {
544		return 1
545	}
546	return 0
547}
548
549// shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
550// A shift is bounded if it is shifting by less than the width of the shifted value.
551func shiftIsBounded(v *Value) bool {
552	return v.AuxInt != 0
553}
554
555// canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
556// generated code as much as possible.
557func canonLessThan(x, y *Value) bool {
558	if x.Op != y.Op {
559		return x.Op < y.Op
560	}
561	if !x.Pos.SameFileAndLine(y.Pos) {
562		return x.Pos.Before(y.Pos)
563	}
564	return x.ID < y.ID
565}
566
567// truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
568// of the mantissa. It will panic if the truncation results in lost information.
569func truncate64Fto32F(f float64) float32 {
570	if !isExactFloat32(f) {
571		panic("truncate64Fto32F: truncation is not exact")
572	}
573	if !math.IsNaN(f) {
574		return float32(f)
575	}
576	// NaN bit patterns aren't necessarily preserved across conversion
577	// instructions so we need to do the conversion manually.
578	b := math.Float64bits(f)
579	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
580	//          | sign                  | exponent   | mantissa       |
581	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
582	return math.Float32frombits(r)
583}
584
585// extend32Fto64F converts a float32 value to a float64 value preserving the bit
586// pattern of the mantissa.
587func extend32Fto64F(f float32) float64 {
588	if !math.IsNaN(float64(f)) {
589		return float64(f)
590	}
591	// NaN bit patterns aren't necessarily preserved across conversion
592	// instructions so we need to do the conversion manually.
593	b := uint64(math.Float32bits(f))
594	//   | sign                  | exponent      | mantissa                    |
595	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
596	return math.Float64frombits(r)
597}
598
599// DivisionNeedsFixUp reports whether the division needs fix-up code.
600func DivisionNeedsFixUp(v *Value) bool {
601	return v.AuxInt == 0
602}
603
604// auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
605func auxFrom64F(f float64) int64 {
606	if f != f {
607		panic("can't encode a NaN in AuxInt field")
608	}
609	return int64(math.Float64bits(f))
610}
611
612// auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
613func auxFrom32F(f float32) int64 {
614	if f != f {
615		panic("can't encode a NaN in AuxInt field")
616	}
617	return int64(math.Float64bits(extend32Fto64F(f)))
618}
619
620// auxTo32F decodes a float32 from the AuxInt value provided.
621func auxTo32F(i int64) float32 {
622	return truncate64Fto32F(math.Float64frombits(uint64(i)))
623}
624
625// auxTo64F decodes a float64 from the AuxInt value provided.
626func auxTo64F(i int64) float64 {
627	return math.Float64frombits(uint64(i))
628}
629
630func auxIntToBool(i int64) bool {
631	if i == 0 {
632		return false
633	}
634	return true
635}
636func auxIntToInt8(i int64) int8 {
637	return int8(i)
638}
639func auxIntToInt16(i int64) int16 {
640	return int16(i)
641}
642func auxIntToInt32(i int64) int32 {
643	return int32(i)
644}
645func auxIntToInt64(i int64) int64 {
646	return i
647}
648func auxIntToUint8(i int64) uint8 {
649	return uint8(i)
650}
651func auxIntToFloat32(i int64) float32 {
652	return float32(math.Float64frombits(uint64(i)))
653}
654func auxIntToFloat64(i int64) float64 {
655	return math.Float64frombits(uint64(i))
656}
657func auxIntToValAndOff(i int64) ValAndOff {
658	return ValAndOff(i)
659}
660func auxIntToArm64BitField(i int64) arm64BitField {
661	return arm64BitField(i)
662}
663func auxIntToInt128(x int64) int128 {
664	if x != 0 {
665		panic("nonzero int128 not allowed")
666	}
667	return 0
668}
669func auxIntToFlagConstant(x int64) flagConstant {
670	return flagConstant(x)
671}
672
673func auxIntToOp(cc int64) Op {
674	return Op(cc)
675}
676
677func boolToAuxInt(b bool) int64 {
678	if b {
679		return 1
680	}
681	return 0
682}
683func int8ToAuxInt(i int8) int64 {
684	return int64(i)
685}
686func int16ToAuxInt(i int16) int64 {
687	return int64(i)
688}
689func int32ToAuxInt(i int32) int64 {
690	return int64(i)
691}
692func int64ToAuxInt(i int64) int64 {
693	return int64(i)
694}
695func uint8ToAuxInt(i uint8) int64 {
696	return int64(int8(i))
697}
698func float32ToAuxInt(f float32) int64 {
699	return int64(math.Float64bits(float64(f)))
700}
701func float64ToAuxInt(f float64) int64 {
702	return int64(math.Float64bits(f))
703}
704func valAndOffToAuxInt(v ValAndOff) int64 {
705	return int64(v)
706}
707func arm64BitFieldToAuxInt(v arm64BitField) int64 {
708	return int64(v)
709}
710func int128ToAuxInt(x int128) int64 {
711	if x != 0 {
712		panic("nonzero int128 not allowed")
713	}
714	return 0
715}
716func flagConstantToAuxInt(x flagConstant) int64 {
717	return int64(x)
718}
719
720func opToAuxInt(o Op) int64 {
721	return int64(o)
722}
723
724// Aux is an interface to hold miscellaneous data in Blocks and Values.
725type Aux interface {
726	CanBeAnSSAAux()
727}
728
729// stringAux wraps string values for use in Aux.
730type stringAux string
731
732func (stringAux) CanBeAnSSAAux() {}
733
734func auxToString(i Aux) string {
735	return string(i.(stringAux))
736}
737func auxToSym(i Aux) Sym {
738	// TODO: kind of a hack - allows nil interface through
739	s, _ := i.(Sym)
740	return s
741}
742func auxToType(i Aux) *types.Type {
743	return i.(*types.Type)
744}
745func auxToCall(i Aux) *AuxCall {
746	return i.(*AuxCall)
747}
748func auxToS390xCCMask(i Aux) s390x.CCMask {
749	return i.(s390x.CCMask)
750}
751func auxToS390xRotateParams(i Aux) s390x.RotateParams {
752	return i.(s390x.RotateParams)
753}
754
755func StringToAux(s string) Aux {
756	return stringAux(s)
757}
758func symToAux(s Sym) Aux {
759	return s
760}
761func callToAux(s *AuxCall) Aux {
762	return s
763}
764func typeToAux(t *types.Type) Aux {
765	return t
766}
767func s390xCCMaskToAux(c s390x.CCMask) Aux {
768	return c
769}
770func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
771	return r
772}
773
774// uaddOvf reports whether unsigned a+b would overflow.
775func uaddOvf(a, b int64) bool {
776	return uint64(a)+uint64(b) < uint64(a)
777}
778
779// loadLSymOffset simulates reading a word at an offset into a
780// read-only symbol's runtime memory. If it would read a pointer to
781// another symbol, that symbol is returned. Otherwise, it returns nil.
782func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
783	if lsym.Type != objabi.SRODATA {
784		return nil
785	}
786
787	for _, r := range lsym.R {
788		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
789			return r.Sym
790		}
791	}
792
793	return nil
794}
795
796// de-virtualize an InterLECall
797// 'sym' is the symbol for the itab
798func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
799	n, ok := sym.(*obj.LSym)
800	if !ok {
801		return nil
802	}
803
804	lsym := loadLSymOffset(n, offset)
805	if f := v.Block.Func; f.pass.debug > 0 {
806		if lsym != nil {
807			f.Warnl(v.Pos, "de-virtualizing call")
808		} else {
809			f.Warnl(v.Pos, "couldn't de-virtualize call")
810		}
811	}
812	return lsym
813}
814
815func devirtLECall(v *Value, sym *obj.LSym) *Value {
816	v.Op = OpStaticLECall
817	auxcall := v.Aux.(*AuxCall)
818	auxcall.Fn = sym
819	// Remove first arg
820	v.Args[0].Uses--
821	copy(v.Args[0:], v.Args[1:])
822	v.Args[len(v.Args)-1] = nil // aid GC
823	v.Args = v.Args[:len(v.Args)-1]
824	return v
825}
826
827// isSamePtr reports whether p1 and p2 point to the same address.
828func isSamePtr(p1, p2 *Value) bool {
829	if p1 == p2 {
830		return true
831	}
832	if p1.Op != p2.Op {
833		return false
834	}
835	switch p1.Op {
836	case OpOffPtr:
837		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
838	case OpAddr, OpLocalAddr:
839		// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
840		// Checking for value equality only works after [z]cse has run.
841		return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
842	case OpAddPtr:
843		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
844	}
845	return false
846}
847
848func isStackPtr(v *Value) bool {
849	for v.Op == OpOffPtr || v.Op == OpAddPtr {
850		v = v.Args[0]
851	}
852	return v.Op == OpSP || v.Op == OpLocalAddr
853}
854
855// disjoint reports whether the memory region specified by [p1:p1+n1)
856// does not overlap with [p2:p2+n2).
857// A return value of false does not imply the regions overlap.
858func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
859	if n1 == 0 || n2 == 0 {
860		return true
861	}
862	if p1 == p2 {
863		return false
864	}
865	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
866		base, offset = ptr, 0
867		for base.Op == OpOffPtr {
868			offset += base.AuxInt
869			base = base.Args[0]
870		}
871		return base, offset
872	}
873	p1, off1 := baseAndOffset(p1)
874	p2, off2 := baseAndOffset(p2)
875	if isSamePtr(p1, p2) {
876		return !overlap(off1, n1, off2, n2)
877	}
878	// p1 and p2 are not the same, so if they are both OpAddrs then
879	// they point to different variables.
880	// If one pointer is on the stack and the other is an argument
881	// then they can't overlap.
882	switch p1.Op {
883	case OpAddr, OpLocalAddr:
884		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
885			return true
886		}
887		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
888	case OpArg, OpArgIntReg:
889		if p2.Op == OpSP || p2.Op == OpLocalAddr {
890			return true
891		}
892	case OpSP:
893		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
894	}
895	return false
896}
897
898// moveSize returns the number of bytes an aligned MOV instruction moves
899func moveSize(align int64, c *Config) int64 {
900	switch {
901	case align%8 == 0 && c.PtrSize == 8:
902		return 8
903	case align%4 == 0:
904		return 4
905	case align%2 == 0:
906		return 2
907	}
908	return 1
909}
910
911// mergePoint finds a block among a's blocks which dominates b and is itself
912// dominated by all of a's blocks. Returns nil if it can't find one.
913// Might return nil even if one does exist.
914func mergePoint(b *Block, a ...*Value) *Block {
915	// Walk backward from b looking for one of the a's blocks.
916
917	// Max distance
918	d := 100
919
920	for d > 0 {
921		for _, x := range a {
922			if b == x.Block {
923				goto found
924			}
925		}
926		if len(b.Preds) > 1 {
927			// Don't know which way to go back. Abort.
928			return nil
929		}
930		b = b.Preds[0].b
931		d--
932	}
933	return nil // too far away
934found:
935	// At this point, r is the first value in a that we find by walking backwards.
936	// if we return anything, r will be it.
937	r := b
938
939	// Keep going, counting the other a's that we find. They must all dominate r.
940	na := 0
941	for d > 0 {
942		for _, x := range a {
943			if b == x.Block {
944				na++
945			}
946		}
947		if na == len(a) {
948			// Found all of a in a backwards walk. We can return r.
949			return r
950		}
951		if len(b.Preds) > 1 {
952			return nil
953		}
954		b = b.Preds[0].b
955		d--
956
957	}
958	return nil // too far away
959}
960
961// clobber invalidates values. Returns true.
962// clobber is used by rewrite rules to:
963//   A) make sure the values are really dead and never used again.
964//   B) decrement use counts of the values' args.
965func clobber(vv ...*Value) bool {
966	for _, v := range vv {
967		v.reset(OpInvalid)
968		// Note: leave v.Block intact.  The Block field is used after clobber.
969	}
970	return true
971}
972
973// clobberIfDead resets v when use count is 1. Returns true.
974// clobberIfDead is used by rewrite rules to decrement
975// use counts of v's args when v is dead and never used.
976func clobberIfDead(v *Value) bool {
977	if v.Uses == 1 {
978		v.reset(OpInvalid)
979	}
980	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
981	return true
982}
983
984// noteRule is an easy way to track if a rule is matched when writing
985// new ones.  Make the rule of interest also conditional on
986//     noteRule("note to self: rule of interest matched")
987// and that message will print when the rule matches.
988func noteRule(s string) bool {
989	fmt.Println(s)
990	return true
991}
992
993// countRule increments Func.ruleMatches[key].
994// If Func.ruleMatches is non-nil at the end
995// of compilation, it will be printed to stdout.
996// This is intended to make it easier to find which functions
997// which contain lots of rules matches when developing new rules.
998func countRule(v *Value, key string) bool {
999	f := v.Block.Func
1000	if f.ruleMatches == nil {
1001		f.ruleMatches = make(map[string]int)
1002	}
1003	f.ruleMatches[key]++
1004	return true
1005}
1006
1007// warnRule generates compiler debug output with string s when
1008// v is not in autogenerated code, cond is true and the rule has fired.
1009func warnRule(cond bool, v *Value, s string) bool {
1010	if pos := v.Pos; pos.Line() > 1 && cond {
1011		v.Block.Func.Warnl(pos, s)
1012	}
1013	return true
1014}
1015
1016// for a pseudo-op like (LessThan x), extract x
1017func flagArg(v *Value) *Value {
1018	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1019		return nil
1020	}
1021	return v.Args[0]
1022}
1023
1024// arm64Negate finds the complement to an ARM64 condition code,
1025// for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1026//
1027// For floating point, it's more subtle because NaN is unordered. We do
1028// !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1029func arm64Negate(op Op) Op {
1030	switch op {
1031	case OpARM64LessThan:
1032		return OpARM64GreaterEqual
1033	case OpARM64LessThanU:
1034		return OpARM64GreaterEqualU
1035	case OpARM64GreaterThan:
1036		return OpARM64LessEqual
1037	case OpARM64GreaterThanU:
1038		return OpARM64LessEqualU
1039	case OpARM64LessEqual:
1040		return OpARM64GreaterThan
1041	case OpARM64LessEqualU:
1042		return OpARM64GreaterThanU
1043	case OpARM64GreaterEqual:
1044		return OpARM64LessThan
1045	case OpARM64GreaterEqualU:
1046		return OpARM64LessThanU
1047	case OpARM64Equal:
1048		return OpARM64NotEqual
1049	case OpARM64NotEqual:
1050		return OpARM64Equal
1051	case OpARM64LessThanF:
1052		return OpARM64NotLessThanF
1053	case OpARM64NotLessThanF:
1054		return OpARM64LessThanF
1055	case OpARM64LessEqualF:
1056		return OpARM64NotLessEqualF
1057	case OpARM64NotLessEqualF:
1058		return OpARM64LessEqualF
1059	case OpARM64GreaterThanF:
1060		return OpARM64NotGreaterThanF
1061	case OpARM64NotGreaterThanF:
1062		return OpARM64GreaterThanF
1063	case OpARM64GreaterEqualF:
1064		return OpARM64NotGreaterEqualF
1065	case OpARM64NotGreaterEqualF:
1066		return OpARM64GreaterEqualF
1067	default:
1068		panic("unreachable")
1069	}
1070}
1071
1072// arm64Invert evaluates (InvertFlags op), which
1073// is the same as altering the condition codes such
1074// that the same result would be produced if the arguments
1075// to the flag-generating instruction were reversed, e.g.
1076// (InvertFlags (CMP x y)) -> (CMP y x)
1077func arm64Invert(op Op) Op {
1078	switch op {
1079	case OpARM64LessThan:
1080		return OpARM64GreaterThan
1081	case OpARM64LessThanU:
1082		return OpARM64GreaterThanU
1083	case OpARM64GreaterThan:
1084		return OpARM64LessThan
1085	case OpARM64GreaterThanU:
1086		return OpARM64LessThanU
1087	case OpARM64LessEqual:
1088		return OpARM64GreaterEqual
1089	case OpARM64LessEqualU:
1090		return OpARM64GreaterEqualU
1091	case OpARM64GreaterEqual:
1092		return OpARM64LessEqual
1093	case OpARM64GreaterEqualU:
1094		return OpARM64LessEqualU
1095	case OpARM64Equal, OpARM64NotEqual:
1096		return op
1097	case OpARM64LessThanF:
1098		return OpARM64GreaterThanF
1099	case OpARM64GreaterThanF:
1100		return OpARM64LessThanF
1101	case OpARM64LessEqualF:
1102		return OpARM64GreaterEqualF
1103	case OpARM64GreaterEqualF:
1104		return OpARM64LessEqualF
1105	case OpARM64NotLessThanF:
1106		return OpARM64NotGreaterThanF
1107	case OpARM64NotGreaterThanF:
1108		return OpARM64NotLessThanF
1109	case OpARM64NotLessEqualF:
1110		return OpARM64NotGreaterEqualF
1111	case OpARM64NotGreaterEqualF:
1112		return OpARM64NotLessEqualF
1113	default:
1114		panic("unreachable")
1115	}
1116}
1117
1118// evaluate an ARM64 op against a flags value
1119// that is potentially constant; return 1 for true,
1120// -1 for false, and 0 for not constant.
1121func ccARM64Eval(op Op, flags *Value) int {
1122	fop := flags.Op
1123	if fop == OpARM64InvertFlags {
1124		return -ccARM64Eval(op, flags.Args[0])
1125	}
1126	if fop != OpARM64FlagConstant {
1127		return 0
1128	}
1129	fc := flagConstant(flags.AuxInt)
1130	b2i := func(b bool) int {
1131		if b {
1132			return 1
1133		}
1134		return -1
1135	}
1136	switch op {
1137	case OpARM64Equal:
1138		return b2i(fc.eq())
1139	case OpARM64NotEqual:
1140		return b2i(fc.ne())
1141	case OpARM64LessThan:
1142		return b2i(fc.lt())
1143	case OpARM64LessThanU:
1144		return b2i(fc.ult())
1145	case OpARM64GreaterThan:
1146		return b2i(fc.gt())
1147	case OpARM64GreaterThanU:
1148		return b2i(fc.ugt())
1149	case OpARM64LessEqual:
1150		return b2i(fc.le())
1151	case OpARM64LessEqualU:
1152		return b2i(fc.ule())
1153	case OpARM64GreaterEqual:
1154		return b2i(fc.ge())
1155	case OpARM64GreaterEqualU:
1156		return b2i(fc.uge())
1157	}
1158	return 0
1159}
1160
1161// logRule logs the use of the rule s. This will only be enabled if
1162// rewrite rules were generated with the -log option, see gen/rulegen.go.
1163func logRule(s string) {
1164	if ruleFile == nil {
1165		// Open a log file to write log to. We open in append
1166		// mode because all.bash runs the compiler lots of times,
1167		// and we want the concatenation of all of those logs.
1168		// This means, of course, that users need to rm the old log
1169		// to get fresh data.
1170		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1171		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1172			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1173		if err != nil {
1174			panic(err)
1175		}
1176		ruleFile = w
1177	}
1178	_, err := fmt.Fprintln(ruleFile, s)
1179	if err != nil {
1180		panic(err)
1181	}
1182}
1183
1184var ruleFile io.Writer
1185
1186func min(x, y int64) int64 {
1187	if x < y {
1188		return x
1189	}
1190	return y
1191}
1192
1193func isConstZero(v *Value) bool {
1194	switch v.Op {
1195	case OpConstNil:
1196		return true
1197	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1198		return v.AuxInt == 0
1199	}
1200	return false
1201}
1202
1203// reciprocalExact64 reports whether 1/c is exactly representable.
1204func reciprocalExact64(c float64) bool {
1205	b := math.Float64bits(c)
1206	man := b & (1<<52 - 1)
1207	if man != 0 {
1208		return false // not a power of 2, denormal, or NaN
1209	}
1210	exp := b >> 52 & (1<<11 - 1)
1211	// exponent bias is 0x3ff.  So taking the reciprocal of a number
1212	// changes the exponent to 0x7fe-exp.
1213	switch exp {
1214	case 0:
1215		return false // ±0
1216	case 0x7ff:
1217		return false // ±inf
1218	case 0x7fe:
1219		return false // exponent is not representable
1220	default:
1221		return true
1222	}
1223}
1224
1225// reciprocalExact32 reports whether 1/c is exactly representable.
1226func reciprocalExact32(c float32) bool {
1227	b := math.Float32bits(c)
1228	man := b & (1<<23 - 1)
1229	if man != 0 {
1230		return false // not a power of 2, denormal, or NaN
1231	}
1232	exp := b >> 23 & (1<<8 - 1)
1233	// exponent bias is 0x7f.  So taking the reciprocal of a number
1234	// changes the exponent to 0xfe-exp.
1235	switch exp {
1236	case 0:
1237		return false // ±0
1238	case 0xff:
1239		return false // ±inf
1240	case 0xfe:
1241		return false // exponent is not representable
1242	default:
1243		return true
1244	}
1245}
1246
1247// check if an immediate can be directly encoded into an ARM's instruction
1248func isARMImmRot(v uint32) bool {
1249	for i := 0; i < 16; i++ {
1250		if v&^0xff == 0 {
1251			return true
1252		}
1253		v = v<<2 | v>>30
1254	}
1255
1256	return false
1257}
1258
1259// overlap reports whether the ranges given by the given offset and
1260// size pairs overlap.
1261func overlap(offset1, size1, offset2, size2 int64) bool {
1262	if offset1 >= offset2 && offset2+size2 > offset1 {
1263		return true
1264	}
1265	if offset2 >= offset1 && offset1+size1 > offset2 {
1266		return true
1267	}
1268	return false
1269}
1270
1271func areAdjacentOffsets(off1, off2, size int64) bool {
1272	return off1+size == off2 || off1 == off2+size
1273}
1274
1275// check if value zeroes out upper 32-bit of 64-bit register.
1276// depth limits recursion depth. In AMD64.rules 3 is used as limit,
1277// because it catches same amount of cases as 4.
1278func zeroUpper32Bits(x *Value, depth int) bool {
1279	switch x.Op {
1280	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1281		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1282		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1283		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1284		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1285		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1286		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1287		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1288		OpAMD64SHLL, OpAMD64SHLLconst:
1289		return true
1290	case OpArg:
1291		return x.Type.Size() == 4
1292	case OpPhi, OpSelect0, OpSelect1:
1293		// Phis can use each-other as an arguments, instead of tracking visited values,
1294		// just limit recursion depth.
1295		if depth <= 0 {
1296			return false
1297		}
1298		for i := range x.Args {
1299			if !zeroUpper32Bits(x.Args[i], depth-1) {
1300				return false
1301			}
1302		}
1303		return true
1304
1305	}
1306	return false
1307}
1308
1309// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
1310func zeroUpper48Bits(x *Value, depth int) bool {
1311	switch x.Op {
1312	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1313		return true
1314	case OpArg:
1315		return x.Type.Size() == 2
1316	case OpPhi, OpSelect0, OpSelect1:
1317		// Phis can use each-other as an arguments, instead of tracking visited values,
1318		// just limit recursion depth.
1319		if depth <= 0 {
1320			return false
1321		}
1322		for i := range x.Args {
1323			if !zeroUpper48Bits(x.Args[i], depth-1) {
1324				return false
1325			}
1326		}
1327		return true
1328
1329	}
1330	return false
1331}
1332
1333// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
1334func zeroUpper56Bits(x *Value, depth int) bool {
1335	switch x.Op {
1336	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1337		return true
1338	case OpArg:
1339		return x.Type.Size() == 1
1340	case OpPhi, OpSelect0, OpSelect1:
1341		// Phis can use each-other as an arguments, instead of tracking visited values,
1342		// just limit recursion depth.
1343		if depth <= 0 {
1344			return false
1345		}
1346		for i := range x.Args {
1347			if !zeroUpper56Bits(x.Args[i], depth-1) {
1348				return false
1349			}
1350		}
1351		return true
1352
1353	}
1354	return false
1355}
1356
1357// isInlinableMemmove reports whether the given arch performs a Move of the given size
1358// faster than memmove. It will only return true if replacing the memmove with a Move is
1359// safe, either because Move is small or because the arguments are disjoint.
1360// This is used as a check for replacing memmove with Move ops.
1361func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1362	// It is always safe to convert memmove into Move when its arguments are disjoint.
1363	// Move ops may or may not be faster for large sizes depending on how the platform
1364	// lowers them, so we only perform this optimization on platforms that we know to
1365	// have fast Move ops.
1366	switch c.arch {
1367	case "amd64":
1368		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1369	case "386", "arm64":
1370		return sz <= 8
1371	case "s390x", "ppc64", "ppc64le":
1372		return sz <= 8 || disjoint(dst, sz, src, sz)
1373	case "arm", "mips", "mips64", "mipsle", "mips64le":
1374		return sz <= 4
1375	}
1376	return false
1377}
1378
1379// logLargeCopy logs the occurrence of a large copy.
1380// The best place to do this is in the rewrite rules where the size of the move is easy to find.
1381// "Large" is arbitrarily chosen to be 128 bytes; this may change.
1382func logLargeCopy(v *Value, s int64) bool {
1383	if s < 128 {
1384		return true
1385	}
1386	if logopt.Enabled() {
1387		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1388	}
1389	return true
1390}
1391
1392// hasSmallRotate reports whether the architecture has rotate instructions
1393// for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1394func hasSmallRotate(c *Config) bool {
1395	switch c.arch {
1396	case "amd64", "386":
1397		return true
1398	default:
1399		return false
1400	}
1401}
1402
1403func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1404	if sh < 0 || sh >= sz {
1405		panic("PPC64 shift arg sh out of range")
1406	}
1407	if mb < 0 || mb >= sz {
1408		panic("PPC64 shift arg mb out of range")
1409	}
1410	if me < 0 || me >= sz {
1411		panic("PPC64 shift arg me out of range")
1412	}
1413	return int32(sh<<16 | mb<<8 | me)
1414}
1415
1416func GetPPC64Shiftsh(auxint int64) int64 {
1417	return int64(int8(auxint >> 16))
1418}
1419
1420func GetPPC64Shiftmb(auxint int64) int64 {
1421	return int64(int8(auxint >> 8))
1422}
1423
1424func GetPPC64Shiftme(auxint int64) int64 {
1425	return int64(int8(auxint))
1426}
1427
1428// Test if this value can encoded as a mask for a rlwinm like
1429// operation.  Masks can also extend from the msb and wrap to
1430// the lsb too.  That is, the valid masks are 32 bit strings
1431// of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1432func isPPC64WordRotateMask(v64 int64) bool {
1433	// Isolate rightmost 1 (if none 0) and add.
1434	v := uint32(v64)
1435	vp := (v & -v) + v
1436	// Likewise, for the wrapping case.
1437	vn := ^v
1438	vpn := (vn & -vn) + vn
1439	return (v&vp == 0 || vn&vpn == 0) && v != 0
1440}
1441
1442// Compress mask and shift into single value of the form
1443// me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1444// be used to regenerate the input mask.
1445func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1446	var mb, me, mbn, men int
1447
1448	// Determine boundaries and then decode them
1449	if mask == 0 || ^mask == 0 || rotate >= nbits {
1450		panic("Invalid PPC64 rotate mask")
1451	} else if nbits == 32 {
1452		mb = bits.LeadingZeros32(uint32(mask))
1453		me = 32 - bits.TrailingZeros32(uint32(mask))
1454		mbn = bits.LeadingZeros32(^uint32(mask))
1455		men = 32 - bits.TrailingZeros32(^uint32(mask))
1456	} else {
1457		mb = bits.LeadingZeros64(uint64(mask))
1458		me = 64 - bits.TrailingZeros64(uint64(mask))
1459		mbn = bits.LeadingZeros64(^uint64(mask))
1460		men = 64 - bits.TrailingZeros64(^uint64(mask))
1461	}
1462	// Check for a wrapping mask (e.g bits at 0 and 63)
1463	if mb == 0 && me == int(nbits) {
1464		// swap the inverted values
1465		mb, me = men, mbn
1466	}
1467
1468	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1469}
1470
1471// The inverse operation of encodePPC64RotateMask.  The values returned as
1472// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1473func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1474	auxint := uint64(sauxint)
1475	rotate = int64((auxint >> 16) & 0xFF)
1476	mb = int64((auxint >> 8) & 0xFF)
1477	me = int64((auxint >> 0) & 0xFF)
1478	nbits := int64((auxint >> 24) & 0xFF)
1479	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1480	if mb > me {
1481		mask = ^mask
1482	}
1483	if nbits == 32 {
1484		mask = uint64(uint32(mask))
1485	}
1486
1487	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1488	// is inclusive.
1489	me = (me - 1) & (nbits - 1)
1490	return
1491}
1492
1493// This verifies that the mask is a set of
1494// consecutive bits including the least
1495// significant bit.
1496func isPPC64ValidShiftMask(v int64) bool {
1497	if (v != 0) && ((v+1)&v) == 0 {
1498		return true
1499	}
1500	return false
1501}
1502
1503func getPPC64ShiftMaskLength(v int64) int64 {
1504	return int64(bits.Len64(uint64(v)))
1505}
1506
1507// Decompose a shift right into an equivalent rotate/mask,
1508// and return mask & m.
1509func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1510	smask := uint64((1<<uint(nbits))-1) >> uint(s)
1511	return m & int64(smask)
1512}
1513
1514// Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1515func mergePPC64AndSrwi(m, s int64) int64 {
1516	mask := mergePPC64RShiftMask(m, s, 32)
1517	if !isPPC64WordRotateMask(mask) {
1518		return 0
1519	}
1520	return encodePPC64RotateMask((32-s)&31, mask, 32)
1521}
1522
1523// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
1524// Return the encoded RLWINM constant, or 0 if they cannot be merged.
1525func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1526	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1527	// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1528	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1529
1530	// Rewrite mask to apply after the final left shift.
1531	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1532
1533	r_1 := 32 - srw
1534	r_2 := GetPPC64Shiftsh(sld)
1535	r_3 := (r_1 + r_2) & 31 // This can wrap.
1536
1537	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1538		return 0
1539	}
1540	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1541}
1542
1543// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1544// the encoded RLWINM constant, or 0 if they cannot be merged.
1545func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1546	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1547	// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
1548	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1549
1550	// combine the masks, and adjust for the final left shift.
1551	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1552	r_2 := GetPPC64Shiftsh(int64(sld))
1553	r_3 := (r_1 + r_2) & 31 // This can wrap.
1554
1555	// Verify the result is still a valid bitmask of <= 32 bits.
1556	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1557		return 0
1558	}
1559	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1560}
1561
1562// Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1563// or return 0 if they cannot be combined.
1564func mergePPC64SldiSrw(sld, srw int64) int64 {
1565	if sld > srw || srw >= 32 {
1566		return 0
1567	}
1568	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1569	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1570	mask := (mask_r & mask_l) << uint(sld)
1571	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1572}
1573
1574// Convenience function to rotate a 32 bit constant value by another constant.
1575func rotateLeft32(v, rotate int64) int64 {
1576	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1577}
1578
1579func rotateRight64(v, rotate int64) int64 {
1580	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1581}
1582
1583// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1584func armBFAuxInt(lsb, width int64) arm64BitField {
1585	if lsb < 0 || lsb > 63 {
1586		panic("ARM(64) bit field lsb constant out of range")
1587	}
1588	if width < 1 || lsb+width > 64 {
1589		panic("ARM(64) bit field width constant out of range")
1590	}
1591	return arm64BitField(width | lsb<<8)
1592}
1593
1594// returns the lsb part of the auxInt field of arm64 bitfield ops.
1595func (bfc arm64BitField) getARM64BFlsb() int64 {
1596	return int64(uint64(bfc) >> 8)
1597}
1598
1599// returns the width part of the auxInt field of arm64 bitfield ops.
1600func (bfc arm64BitField) getARM64BFwidth() int64 {
1601	return int64(bfc) & 0xff
1602}
1603
1604// checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1605func isARM64BFMask(lsb, mask, rshift int64) bool {
1606	shiftedMask := int64(uint64(mask) >> uint64(rshift))
1607	return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1608}
1609
1610// returns the bitfield width of mask >> rshift for arm64 bitfield ops
1611func arm64BFWidth(mask, rshift int64) int64 {
1612	shiftedMask := int64(uint64(mask) >> uint64(rshift))
1613	if shiftedMask == 0 {
1614		panic("ARM64 BF mask is zero")
1615	}
1616	return nto(shiftedMask)
1617}
1618
1619// sizeof returns the size of t in bytes.
1620// It will panic if t is not a *types.Type.
1621func sizeof(t interface{}) int64 {
1622	return t.(*types.Type).Size()
1623}
1624
1625// registerizable reports whether t is a primitive type that fits in
1626// a register. It assumes float64 values will always fit into registers
1627// even if that isn't strictly true.
1628func registerizable(b *Block, typ *types.Type) bool {
1629	if typ.IsPtrShaped() || typ.IsFloat() {
1630		return true
1631	}
1632	if typ.IsInteger() {
1633		return typ.Size() <= b.Func.Config.RegSize
1634	}
1635	return false
1636}
1637
1638// needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1639func needRaceCleanup(sym *AuxCall, v *Value) bool {
1640	f := v.Block.Func
1641	if !f.Config.Race {
1642		return false
1643	}
1644	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1645		return false
1646	}
1647	for _, b := range f.Blocks {
1648		for _, v := range b.Values {
1649			switch v.Op {
1650			case OpStaticCall, OpStaticLECall:
1651				// Check for racefuncenter will encounter racefuncexit and vice versa.
1652				// Allow calls to panic*
1653				s := v.Aux.(*AuxCall).Fn.String()
1654				switch s {
1655				case "runtime.racefuncenter", "runtime.racefuncexit",
1656					"runtime.panicdivide", "runtime.panicwrap",
1657					"runtime.panicshift":
1658					continue
1659				}
1660				// If we encountered any call, we need to keep racefunc*,
1661				// for accurate stacktraces.
1662				return false
1663			case OpPanicBounds, OpPanicExtend:
1664				// Note: these are panic generators that are ok (like the static calls above).
1665			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1666				// We must keep the race functions if there are any other call types.
1667				return false
1668			}
1669		}
1670	}
1671	if isSameCall(sym, "runtime.racefuncenter") {
1672		// TODO REGISTER ABI this needs to be cleaned up.
1673		// If we're removing racefuncenter, remove its argument as well.
1674		if v.Args[0].Op != OpStore {
1675			if v.Op == OpStaticLECall {
1676				// there is no store, yet.
1677				return true
1678			}
1679			return false
1680		}
1681		mem := v.Args[0].Args[2]
1682		v.Args[0].reset(OpCopy)
1683		v.Args[0].AddArg(mem)
1684	}
1685	return true
1686}
1687
1688// symIsRO reports whether sym is a read-only global.
1689func symIsRO(sym interface{}) bool {
1690	lsym := sym.(*obj.LSym)
1691	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1692}
1693
1694// symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1695func symIsROZero(sym Sym) bool {
1696	lsym := sym.(*obj.LSym)
1697	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1698		return false
1699	}
1700	for _, b := range lsym.P {
1701		if b != 0 {
1702			return false
1703		}
1704	}
1705	return true
1706}
1707
1708// read8 reads one byte from the read-only global sym at offset off.
1709func read8(sym interface{}, off int64) uint8 {
1710	lsym := sym.(*obj.LSym)
1711	if off >= int64(len(lsym.P)) || off < 0 {
1712		// Invalid index into the global sym.
1713		// This can happen in dead code, so we don't want to panic.
1714		// Just return any value, it will eventually get ignored.
1715		// See issue 29215.
1716		return 0
1717	}
1718	return lsym.P[off]
1719}
1720
1721// read16 reads two bytes from the read-only global sym at offset off.
1722func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
1723	lsym := sym.(*obj.LSym)
1724	// lsym.P is written lazily.
1725	// Bytes requested after the end of lsym.P are 0.
1726	var src []byte
1727	if 0 <= off && off < int64(len(lsym.P)) {
1728		src = lsym.P[off:]
1729	}
1730	buf := make([]byte, 2)
1731	copy(buf, src)
1732	return byteorder.Uint16(buf)
1733}
1734
1735// read32 reads four bytes from the read-only global sym at offset off.
1736func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
1737	lsym := sym.(*obj.LSym)
1738	var src []byte
1739	if 0 <= off && off < int64(len(lsym.P)) {
1740		src = lsym.P[off:]
1741	}
1742	buf := make([]byte, 4)
1743	copy(buf, src)
1744	return byteorder.Uint32(buf)
1745}
1746
1747// read64 reads eight bytes from the read-only global sym at offset off.
1748func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
1749	lsym := sym.(*obj.LSym)
1750	var src []byte
1751	if 0 <= off && off < int64(len(lsym.P)) {
1752		src = lsym.P[off:]
1753	}
1754	buf := make([]byte, 8)
1755	copy(buf, src)
1756	return byteorder.Uint64(buf)
1757}
1758
1759// sequentialAddresses reports true if it can prove that x + n == y
1760func sequentialAddresses(x, y *Value, n int64) bool {
1761	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
1762		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1763			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1764		return true
1765	}
1766	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1767		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1768			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1769		return true
1770	}
1771	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
1772		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1773			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1774		return true
1775	}
1776	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
1777		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
1778			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
1779		return true
1780	}
1781	return false
1782}
1783
1784// flagConstant represents the result of a compile-time comparison.
1785// The sense of these flags does not necessarily represent the hardware's notion
1786// of a flags register - these are just a compile-time construct.
1787// We happen to match the semantics to those of arm/arm64.
1788// Note that these semantics differ from x86: the carry flag has the opposite
1789// sense on a subtraction!
1790//   On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
1791//   On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
1792//    (because it does x + ^y + C).
1793// See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
1794type flagConstant uint8
1795
1796// N reports whether the result of an operation is negative (high bit set).
1797func (fc flagConstant) N() bool {
1798	return fc&1 != 0
1799}
1800
1801// Z reports whether the result of an operation is 0.
1802func (fc flagConstant) Z() bool {
1803	return fc&2 != 0
1804}
1805
1806// C reports whether an unsigned add overflowed (carry), or an
1807// unsigned subtract did not underflow (borrow).
1808func (fc flagConstant) C() bool {
1809	return fc&4 != 0
1810}
1811
1812// V reports whether a signed operation overflowed or underflowed.
1813func (fc flagConstant) V() bool {
1814	return fc&8 != 0
1815}
1816
1817func (fc flagConstant) eq() bool {
1818	return fc.Z()
1819}
1820func (fc flagConstant) ne() bool {
1821	return !fc.Z()
1822}
1823func (fc flagConstant) lt() bool {
1824	return fc.N() != fc.V()
1825}
1826func (fc flagConstant) le() bool {
1827	return fc.Z() || fc.lt()
1828}
1829func (fc flagConstant) gt() bool {
1830	return !fc.Z() && fc.ge()
1831}
1832func (fc flagConstant) ge() bool {
1833	return fc.N() == fc.V()
1834}
1835func (fc flagConstant) ult() bool {
1836	return !fc.C()
1837}
1838func (fc flagConstant) ule() bool {
1839	return fc.Z() || fc.ult()
1840}
1841func (fc flagConstant) ugt() bool {
1842	return !fc.Z() && fc.uge()
1843}
1844func (fc flagConstant) uge() bool {
1845	return fc.C()
1846}
1847
1848func (fc flagConstant) ltNoov() bool {
1849	return fc.lt() && !fc.V()
1850}
1851func (fc flagConstant) leNoov() bool {
1852	return fc.le() && !fc.V()
1853}
1854func (fc flagConstant) gtNoov() bool {
1855	return fc.gt() && !fc.V()
1856}
1857func (fc flagConstant) geNoov() bool {
1858	return fc.ge() && !fc.V()
1859}
1860
1861func (fc flagConstant) String() string {
1862	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
1863}
1864
1865type flagConstantBuilder struct {
1866	N bool
1867	Z bool
1868	C bool
1869	V bool
1870}
1871
1872func (fcs flagConstantBuilder) encode() flagConstant {
1873	var fc flagConstant
1874	if fcs.N {
1875		fc |= 1
1876	}
1877	if fcs.Z {
1878		fc |= 2
1879	}
1880	if fcs.C {
1881		fc |= 4
1882	}
1883	if fcs.V {
1884		fc |= 8
1885	}
1886	return fc
1887}
1888
1889// Note: addFlags(x,y) != subFlags(x,-y) in some situations:
1890//  - the results of the C flag are different
1891//  - the results of the V flag when y==minint are different
1892
1893// addFlags64 returns the flags that would be set from computing x+y.
1894func addFlags64(x, y int64) flagConstant {
1895	var fcb flagConstantBuilder
1896	fcb.Z = x+y == 0
1897	fcb.N = x+y < 0
1898	fcb.C = uint64(x+y) < uint64(x)
1899	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1900	return fcb.encode()
1901}
1902
1903// subFlags64 returns the flags that would be set from computing x-y.
1904func subFlags64(x, y int64) flagConstant {
1905	var fcb flagConstantBuilder
1906	fcb.Z = x-y == 0
1907	fcb.N = x-y < 0
1908	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
1909	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1910	return fcb.encode()
1911}
1912
1913// addFlags32 returns the flags that would be set from computing x+y.
1914func addFlags32(x, y int32) flagConstant {
1915	var fcb flagConstantBuilder
1916	fcb.Z = x+y == 0
1917	fcb.N = x+y < 0
1918	fcb.C = uint32(x+y) < uint32(x)
1919	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
1920	return fcb.encode()
1921}
1922
1923// subFlags32 returns the flags that would be set from computing x-y.
1924func subFlags32(x, y int32) flagConstant {
1925	var fcb flagConstantBuilder
1926	fcb.Z = x-y == 0
1927	fcb.N = x-y < 0
1928	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
1929	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
1930	return fcb.encode()
1931}
1932
1933// logicFlags64 returns flags set to the sign/zeroness of x.
1934// C and V are set to false.
1935func logicFlags64(x int64) flagConstant {
1936	var fcb flagConstantBuilder
1937	fcb.Z = x == 0
1938	fcb.N = x < 0
1939	return fcb.encode()
1940}
1941
1942// logicFlags32 returns flags set to the sign/zeroness of x.
1943// C and V are set to false.
1944func logicFlags32(x int32) flagConstant {
1945	var fcb flagConstantBuilder
1946	fcb.Z = x == 0
1947	fcb.N = x < 0
1948	return fcb.encode()
1949}
1950