1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ppc64
6
7import (
8	"cmd/compile/internal/gc"
9	"cmd/compile/internal/logopt"
10	"cmd/compile/internal/ssa"
11	"cmd/compile/internal/types"
12	"cmd/internal/obj"
13	"cmd/internal/obj/ppc64"
14	"cmd/internal/objabi"
15	"math"
16	"strings"
17)
18
19// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
20func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
21	//	flive := b.FlagsLiveAtEnd
22	//	if b.Control != nil && b.Control.Type.IsFlags() {
23	//		flive = true
24	//	}
25	//	for i := len(b.Values) - 1; i >= 0; i-- {
26	//		v := b.Values[i]
27	//		if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
28	//			// The "mark" is any non-nil Aux value.
29	//			v.Aux = v
30	//		}
31	//		if v.Type.IsFlags() {
32	//			flive = false
33	//		}
34	//		for _, a := range v.Args {
35	//			if a.Type.IsFlags() {
36	//				flive = true
37	//			}
38	//		}
39	//	}
40}
41
42// loadByType returns the load instruction of the given type.
43func loadByType(t *types.Type) obj.As {
44	if t.IsFloat() {
45		switch t.Size() {
46		case 4:
47			return ppc64.AFMOVS
48		case 8:
49			return ppc64.AFMOVD
50		}
51	} else {
52		switch t.Size() {
53		case 1:
54			if t.IsSigned() {
55				return ppc64.AMOVB
56			} else {
57				return ppc64.AMOVBZ
58			}
59		case 2:
60			if t.IsSigned() {
61				return ppc64.AMOVH
62			} else {
63				return ppc64.AMOVHZ
64			}
65		case 4:
66			if t.IsSigned() {
67				return ppc64.AMOVW
68			} else {
69				return ppc64.AMOVWZ
70			}
71		case 8:
72			return ppc64.AMOVD
73		}
74	}
75	panic("bad load type")
76}
77
78// storeByType returns the store instruction of the given type.
79func storeByType(t *types.Type) obj.As {
80	if t.IsFloat() {
81		switch t.Size() {
82		case 4:
83			return ppc64.AFMOVS
84		case 8:
85			return ppc64.AFMOVD
86		}
87	} else {
88		switch t.Size() {
89		case 1:
90			return ppc64.AMOVB
91		case 2:
92			return ppc64.AMOVH
93		case 4:
94			return ppc64.AMOVW
95		case 8:
96			return ppc64.AMOVD
97		}
98	}
99	panic("bad store type")
100}
101
102func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
103	switch v.Op {
104	case ssa.OpCopy:
105		t := v.Type
106		if t.IsMemory() {
107			return
108		}
109		x := v.Args[0].Reg()
110		y := v.Reg()
111		if x != y {
112			rt := obj.TYPE_REG
113			op := ppc64.AMOVD
114
115			if t.IsFloat() {
116				op = ppc64.AFMOVD
117			}
118			p := s.Prog(op)
119			p.From.Type = rt
120			p.From.Reg = x
121			p.To.Type = rt
122			p.To.Reg = y
123		}
124
125	case ssa.OpPPC64LoweredMuluhilo:
126		// MULHDU	Rarg1, Rarg0, Reg0
127		// MULLD	Rarg1, Rarg0, Reg1
128		r0 := v.Args[0].Reg()
129		r1 := v.Args[1].Reg()
130		p := s.Prog(ppc64.AMULHDU)
131		p.From.Type = obj.TYPE_REG
132		p.From.Reg = r1
133		p.Reg = r0
134		p.To.Type = obj.TYPE_REG
135		p.To.Reg = v.Reg0()
136		p1 := s.Prog(ppc64.AMULLD)
137		p1.From.Type = obj.TYPE_REG
138		p1.From.Reg = r1
139		p1.Reg = r0
140		p1.To.Type = obj.TYPE_REG
141		p1.To.Reg = v.Reg1()
142
143	case ssa.OpPPC64LoweredAdd64Carry:
144		// ADDC		Rarg2, -1, Rtmp
145		// ADDE		Rarg1, Rarg0, Reg0
146		// ADDZE	Rzero, Reg1
147		r0 := v.Args[0].Reg()
148		r1 := v.Args[1].Reg()
149		r2 := v.Args[2].Reg()
150		p := s.Prog(ppc64.AADDC)
151		p.From.Type = obj.TYPE_CONST
152		p.From.Offset = -1
153		p.Reg = r2
154		p.To.Type = obj.TYPE_REG
155		p.To.Reg = ppc64.REGTMP
156		p1 := s.Prog(ppc64.AADDE)
157		p1.From.Type = obj.TYPE_REG
158		p1.From.Reg = r1
159		p1.Reg = r0
160		p1.To.Type = obj.TYPE_REG
161		p1.To.Reg = v.Reg0()
162		p2 := s.Prog(ppc64.AADDZE)
163		p2.From.Type = obj.TYPE_REG
164		p2.From.Reg = ppc64.REGZERO
165		p2.To.Type = obj.TYPE_REG
166		p2.To.Reg = v.Reg1()
167
168	case ssa.OpPPC64LoweredAtomicAnd8,
169		ssa.OpPPC64LoweredAtomicOr8:
170		// LWSYNC
171		// LBAR		(Rarg0), Rtmp
172		// AND/OR	Rarg1, Rtmp
173		// STBCCC	Rtmp, (Rarg0)
174		// BNE		-3(PC)
175		r0 := v.Args[0].Reg()
176		r1 := v.Args[1].Reg()
177		// LWSYNC - Assuming shared data not write-through-required nor
178		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
179		plwsync := s.Prog(ppc64.ALWSYNC)
180		plwsync.To.Type = obj.TYPE_NONE
181		p := s.Prog(ppc64.ALBAR)
182		p.From.Type = obj.TYPE_MEM
183		p.From.Reg = r0
184		p.To.Type = obj.TYPE_REG
185		p.To.Reg = ppc64.REGTMP
186		p1 := s.Prog(v.Op.Asm())
187		p1.From.Type = obj.TYPE_REG
188		p1.From.Reg = r1
189		p1.To.Type = obj.TYPE_REG
190		p1.To.Reg = ppc64.REGTMP
191		p2 := s.Prog(ppc64.ASTBCCC)
192		p2.From.Type = obj.TYPE_REG
193		p2.From.Reg = ppc64.REGTMP
194		p2.To.Type = obj.TYPE_MEM
195		p2.To.Reg = r0
196		p2.RegTo2 = ppc64.REGTMP
197		p3 := s.Prog(ppc64.ABNE)
198		p3.To.Type = obj.TYPE_BRANCH
199		gc.Patch(p3, p)
200
201	case ssa.OpPPC64LoweredAtomicAdd32,
202		ssa.OpPPC64LoweredAtomicAdd64:
203		// LWSYNC
204		// LDAR/LWAR    (Rarg0), Rout
205		// ADD		Rarg1, Rout
206		// STDCCC/STWCCC Rout, (Rarg0)
207		// BNE         -3(PC)
208		// MOVW		Rout,Rout (if Add32)
209		ld := ppc64.ALDAR
210		st := ppc64.ASTDCCC
211		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
212			ld = ppc64.ALWAR
213			st = ppc64.ASTWCCC
214		}
215		r0 := v.Args[0].Reg()
216		r1 := v.Args[1].Reg()
217		out := v.Reg0()
218		// LWSYNC - Assuming shared data not write-through-required nor
219		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
220		plwsync := s.Prog(ppc64.ALWSYNC)
221		plwsync.To.Type = obj.TYPE_NONE
222		// LDAR or LWAR
223		p := s.Prog(ld)
224		p.From.Type = obj.TYPE_MEM
225		p.From.Reg = r0
226		p.To.Type = obj.TYPE_REG
227		p.To.Reg = out
228		// ADD reg1,out
229		p1 := s.Prog(ppc64.AADD)
230		p1.From.Type = obj.TYPE_REG
231		p1.From.Reg = r1
232		p1.To.Reg = out
233		p1.To.Type = obj.TYPE_REG
234		// STDCCC or STWCCC
235		p3 := s.Prog(st)
236		p3.From.Type = obj.TYPE_REG
237		p3.From.Reg = out
238		p3.To.Type = obj.TYPE_MEM
239		p3.To.Reg = r0
240		// BNE retry
241		p4 := s.Prog(ppc64.ABNE)
242		p4.To.Type = obj.TYPE_BRANCH
243		gc.Patch(p4, p)
244
245		// Ensure a 32 bit result
246		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
247			p5 := s.Prog(ppc64.AMOVWZ)
248			p5.To.Type = obj.TYPE_REG
249			p5.To.Reg = out
250			p5.From.Type = obj.TYPE_REG
251			p5.From.Reg = out
252		}
253
254	case ssa.OpPPC64LoweredAtomicExchange32,
255		ssa.OpPPC64LoweredAtomicExchange64:
256		// LWSYNC
257		// LDAR/LWAR    (Rarg0), Rout
258		// STDCCC/STWCCC Rout, (Rarg0)
259		// BNE         -2(PC)
260		// ISYNC
261		ld := ppc64.ALDAR
262		st := ppc64.ASTDCCC
263		if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
264			ld = ppc64.ALWAR
265			st = ppc64.ASTWCCC
266		}
267		r0 := v.Args[0].Reg()
268		r1 := v.Args[1].Reg()
269		out := v.Reg0()
270		// LWSYNC - Assuming shared data not write-through-required nor
271		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
272		plwsync := s.Prog(ppc64.ALWSYNC)
273		plwsync.To.Type = obj.TYPE_NONE
274		// LDAR or LWAR
275		p := s.Prog(ld)
276		p.From.Type = obj.TYPE_MEM
277		p.From.Reg = r0
278		p.To.Type = obj.TYPE_REG
279		p.To.Reg = out
280		// STDCCC or STWCCC
281		p1 := s.Prog(st)
282		p1.From.Type = obj.TYPE_REG
283		p1.From.Reg = r1
284		p1.To.Type = obj.TYPE_MEM
285		p1.To.Reg = r0
286		// BNE retry
287		p2 := s.Prog(ppc64.ABNE)
288		p2.To.Type = obj.TYPE_BRANCH
289		gc.Patch(p2, p)
290		// ISYNC
291		pisync := s.Prog(ppc64.AISYNC)
292		pisync.To.Type = obj.TYPE_NONE
293
294	case ssa.OpPPC64LoweredAtomicLoad8,
295		ssa.OpPPC64LoweredAtomicLoad32,
296		ssa.OpPPC64LoweredAtomicLoad64,
297		ssa.OpPPC64LoweredAtomicLoadPtr:
298		// SYNC
299		// MOVB/MOVD/MOVW (Rarg0), Rout
300		// CMP Rout,Rout
301		// BNE 1(PC)
302		// ISYNC
303		ld := ppc64.AMOVD
304		cmp := ppc64.ACMP
305		switch v.Op {
306		case ssa.OpPPC64LoweredAtomicLoad8:
307			ld = ppc64.AMOVBZ
308		case ssa.OpPPC64LoweredAtomicLoad32:
309			ld = ppc64.AMOVWZ
310			cmp = ppc64.ACMPW
311		}
312		arg0 := v.Args[0].Reg()
313		out := v.Reg0()
314		// SYNC when AuxInt == 1; otherwise, load-acquire
315		if v.AuxInt == 1 {
316			psync := s.Prog(ppc64.ASYNC)
317			psync.To.Type = obj.TYPE_NONE
318		}
319		// Load
320		p := s.Prog(ld)
321		p.From.Type = obj.TYPE_MEM
322		p.From.Reg = arg0
323		p.To.Type = obj.TYPE_REG
324		p.To.Reg = out
325		// CMP
326		p1 := s.Prog(cmp)
327		p1.From.Type = obj.TYPE_REG
328		p1.From.Reg = out
329		p1.To.Type = obj.TYPE_REG
330		p1.To.Reg = out
331		// BNE
332		p2 := s.Prog(ppc64.ABNE)
333		p2.To.Type = obj.TYPE_BRANCH
334		// ISYNC
335		pisync := s.Prog(ppc64.AISYNC)
336		pisync.To.Type = obj.TYPE_NONE
337		gc.Patch(p2, pisync)
338
339	case ssa.OpPPC64LoweredAtomicStore8,
340		ssa.OpPPC64LoweredAtomicStore32,
341		ssa.OpPPC64LoweredAtomicStore64:
342		// SYNC or LWSYNC
343		// MOVB/MOVW/MOVD arg1,(arg0)
344		st := ppc64.AMOVD
345		switch v.Op {
346		case ssa.OpPPC64LoweredAtomicStore8:
347			st = ppc64.AMOVB
348		case ssa.OpPPC64LoweredAtomicStore32:
349			st = ppc64.AMOVW
350		}
351		arg0 := v.Args[0].Reg()
352		arg1 := v.Args[1].Reg()
353		// If AuxInt == 0, LWSYNC (Store-Release), else SYNC
354		// SYNC
355		syncOp := ppc64.ASYNC
356		if v.AuxInt == 0 {
357			syncOp = ppc64.ALWSYNC
358		}
359		psync := s.Prog(syncOp)
360		psync.To.Type = obj.TYPE_NONE
361		// Store
362		p := s.Prog(st)
363		p.To.Type = obj.TYPE_MEM
364		p.To.Reg = arg0
365		p.From.Type = obj.TYPE_REG
366		p.From.Reg = arg1
367
368	case ssa.OpPPC64LoweredAtomicCas64,
369		ssa.OpPPC64LoweredAtomicCas32:
370		// LWSYNC
371		// loop:
372		// LDAR        (Rarg0), MutexHint, Rtmp
373		// CMP         Rarg1, Rtmp
374		// BNE         fail
375		// STDCCC      Rarg2, (Rarg0)
376		// BNE         loop
377		// LWSYNC      // Only for sequential consistency; not required in CasRel.
378		// MOVD        $1, Rout
379		// BR          end
380		// fail:
381		// MOVD        $0, Rout
382		// end:
383		ld := ppc64.ALDAR
384		st := ppc64.ASTDCCC
385		cmp := ppc64.ACMP
386		if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
387			ld = ppc64.ALWAR
388			st = ppc64.ASTWCCC
389			cmp = ppc64.ACMPW
390		}
391		r0 := v.Args[0].Reg()
392		r1 := v.Args[1].Reg()
393		r2 := v.Args[2].Reg()
394		out := v.Reg0()
395		// LWSYNC - Assuming shared data not write-through-required nor
396		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
397		plwsync1 := s.Prog(ppc64.ALWSYNC)
398		plwsync1.To.Type = obj.TYPE_NONE
399		// LDAR or LWAR
400		p := s.Prog(ld)
401		p.From.Type = obj.TYPE_MEM
402		p.From.Reg = r0
403		p.To.Type = obj.TYPE_REG
404		p.To.Reg = ppc64.REGTMP
405		// If it is a Compare-and-Swap-Release operation, set the EH field with
406		// the release hint.
407		if v.AuxInt == 0 {
408			p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
409		}
410		// CMP reg1,reg2
411		p1 := s.Prog(cmp)
412		p1.From.Type = obj.TYPE_REG
413		p1.From.Reg = r1
414		p1.To.Reg = ppc64.REGTMP
415		p1.To.Type = obj.TYPE_REG
416		// BNE cas_fail
417		p2 := s.Prog(ppc64.ABNE)
418		p2.To.Type = obj.TYPE_BRANCH
419		// STDCCC or STWCCC
420		p3 := s.Prog(st)
421		p3.From.Type = obj.TYPE_REG
422		p3.From.Reg = r2
423		p3.To.Type = obj.TYPE_MEM
424		p3.To.Reg = r0
425		// BNE retry
426		p4 := s.Prog(ppc64.ABNE)
427		p4.To.Type = obj.TYPE_BRANCH
428		gc.Patch(p4, p)
429		// LWSYNC - Assuming shared data not write-through-required nor
430		// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
431		// If the operation is a CAS-Release, then synchronization is not necessary.
432		if v.AuxInt != 0 {
433			plwsync2 := s.Prog(ppc64.ALWSYNC)
434			plwsync2.To.Type = obj.TYPE_NONE
435		}
436		// return true
437		p5 := s.Prog(ppc64.AMOVD)
438		p5.From.Type = obj.TYPE_CONST
439		p5.From.Offset = 1
440		p5.To.Type = obj.TYPE_REG
441		p5.To.Reg = out
442		// BR done
443		p6 := s.Prog(obj.AJMP)
444		p6.To.Type = obj.TYPE_BRANCH
445		// return false
446		p7 := s.Prog(ppc64.AMOVD)
447		p7.From.Type = obj.TYPE_CONST
448		p7.From.Offset = 0
449		p7.To.Type = obj.TYPE_REG
450		p7.To.Reg = out
451		gc.Patch(p2, p7)
452		// done (label)
453		p8 := s.Prog(obj.ANOP)
454		gc.Patch(p6, p8)
455
456	case ssa.OpPPC64LoweredGetClosurePtr:
457		// Closure pointer is R11 (already)
458		gc.CheckLoweredGetClosurePtr(v)
459
460	case ssa.OpPPC64LoweredGetCallerSP:
461		// caller's SP is FixedFrameSize below the address of the first arg
462		p := s.Prog(ppc64.AMOVD)
463		p.From.Type = obj.TYPE_ADDR
464		p.From.Offset = -gc.Ctxt.FixedFrameSize()
465		p.From.Name = obj.NAME_PARAM
466		p.To.Type = obj.TYPE_REG
467		p.To.Reg = v.Reg()
468
469	case ssa.OpPPC64LoweredGetCallerPC:
470		p := s.Prog(obj.AGETCALLERPC)
471		p.To.Type = obj.TYPE_REG
472		p.To.Reg = v.Reg()
473
474	case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
475		// input is already rounded
476
477	case ssa.OpLoadReg:
478		loadOp := loadByType(v.Type)
479		p := s.Prog(loadOp)
480		gc.AddrAuto(&p.From, v.Args[0])
481		p.To.Type = obj.TYPE_REG
482		p.To.Reg = v.Reg()
483
484	case ssa.OpStoreReg:
485		storeOp := storeByType(v.Type)
486		p := s.Prog(storeOp)
487		p.From.Type = obj.TYPE_REG
488		p.From.Reg = v.Args[0].Reg()
489		gc.AddrAuto(&p.To, v)
490
491	case ssa.OpPPC64DIVD:
492		// For now,
493		//
494		// cmp arg1, -1
495		// be  ahead
496		// v = arg0 / arg1
497		// b over
498		// ahead: v = - arg0
499		// over: nop
500		r := v.Reg()
501		r0 := v.Args[0].Reg()
502		r1 := v.Args[1].Reg()
503
504		p := s.Prog(ppc64.ACMP)
505		p.From.Type = obj.TYPE_REG
506		p.From.Reg = r1
507		p.To.Type = obj.TYPE_CONST
508		p.To.Offset = -1
509
510		pbahead := s.Prog(ppc64.ABEQ)
511		pbahead.To.Type = obj.TYPE_BRANCH
512
513		p = s.Prog(v.Op.Asm())
514		p.From.Type = obj.TYPE_REG
515		p.From.Reg = r1
516		p.Reg = r0
517		p.To.Type = obj.TYPE_REG
518		p.To.Reg = r
519
520		pbover := s.Prog(obj.AJMP)
521		pbover.To.Type = obj.TYPE_BRANCH
522
523		p = s.Prog(ppc64.ANEG)
524		p.To.Type = obj.TYPE_REG
525		p.To.Reg = r
526		p.From.Type = obj.TYPE_REG
527		p.From.Reg = r0
528		gc.Patch(pbahead, p)
529
530		p = s.Prog(obj.ANOP)
531		gc.Patch(pbover, p)
532
533	case ssa.OpPPC64DIVW:
534		// word-width version of above
535		r := v.Reg()
536		r0 := v.Args[0].Reg()
537		r1 := v.Args[1].Reg()
538
539		p := s.Prog(ppc64.ACMPW)
540		p.From.Type = obj.TYPE_REG
541		p.From.Reg = r1
542		p.To.Type = obj.TYPE_CONST
543		p.To.Offset = -1
544
545		pbahead := s.Prog(ppc64.ABEQ)
546		pbahead.To.Type = obj.TYPE_BRANCH
547
548		p = s.Prog(v.Op.Asm())
549		p.From.Type = obj.TYPE_REG
550		p.From.Reg = r1
551		p.Reg = r0
552		p.To.Type = obj.TYPE_REG
553		p.To.Reg = r
554
555		pbover := s.Prog(obj.AJMP)
556		pbover.To.Type = obj.TYPE_BRANCH
557
558		p = s.Prog(ppc64.ANEG)
559		p.To.Type = obj.TYPE_REG
560		p.To.Reg = r
561		p.From.Type = obj.TYPE_REG
562		p.From.Reg = r0
563		gc.Patch(pbahead, p)
564
565		p = s.Prog(obj.ANOP)
566		gc.Patch(pbover, p)
567
568	case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
569		ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
570		ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
571		ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
572		ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
573		ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
574		ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
575		r := v.Reg()
576		r1 := v.Args[0].Reg()
577		r2 := v.Args[1].Reg()
578		p := s.Prog(v.Op.Asm())
579		p.From.Type = obj.TYPE_REG
580		p.From.Reg = r2
581		p.Reg = r1
582		p.To.Type = obj.TYPE_REG
583		p.To.Reg = r
584
585	case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
586		r1 := v.Args[0].Reg()
587		r2 := v.Args[1].Reg()
588		p := s.Prog(v.Op.Asm())
589		p.From.Type = obj.TYPE_REG
590		p.From.Reg = r2
591		p.Reg = r1
592		p.To.Type = obj.TYPE_REG
593		p.To.Reg = ppc64.REGTMP // result is not needed
594
595	case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
596		p := s.Prog(v.Op.Asm())
597		p.From.Type = obj.TYPE_CONST
598		p.From.Offset = v.AuxInt
599		p.Reg = v.Args[0].Reg()
600		p.To.Type = obj.TYPE_REG
601		p.To.Reg = v.Reg()
602
603	case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
604		r := v.Reg()
605		r1 := v.Args[0].Reg()
606		r2 := v.Args[1].Reg()
607		r3 := v.Args[2].Reg()
608		// r = r1*r2 ± r3
609		p := s.Prog(v.Op.Asm())
610		p.From.Type = obj.TYPE_REG
611		p.From.Reg = r1
612		p.Reg = r3
613		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
614		p.To.Type = obj.TYPE_REG
615		p.To.Reg = r
616
617	case ssa.OpPPC64MaskIfNotCarry:
618		r := v.Reg()
619		p := s.Prog(v.Op.Asm())
620		p.From.Type = obj.TYPE_REG
621		p.From.Reg = ppc64.REGZERO
622		p.To.Type = obj.TYPE_REG
623		p.To.Reg = r
624
625	case ssa.OpPPC64ADDconstForCarry:
626		r1 := v.Args[0].Reg()
627		p := s.Prog(v.Op.Asm())
628		p.Reg = r1
629		p.From.Type = obj.TYPE_CONST
630		p.From.Offset = v.AuxInt
631		p.To.Type = obj.TYPE_REG
632		p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
633
634	case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
635		ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
636		ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
637		ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
638		r := v.Reg()
639		p := s.Prog(v.Op.Asm())
640		p.To.Type = obj.TYPE_REG
641		p.To.Reg = r
642		p.From.Type = obj.TYPE_REG
643		p.From.Reg = v.Args[0].Reg()
644
645	case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
646		ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
647		p := s.Prog(v.Op.Asm())
648		p.Reg = v.Args[0].Reg()
649		p.From.Type = obj.TYPE_CONST
650		p.From.Offset = v.AuxInt
651		p.To.Type = obj.TYPE_REG
652		p.To.Reg = v.Reg()
653
654	case ssa.OpPPC64ANDCCconst:
655		p := s.Prog(v.Op.Asm())
656		p.Reg = v.Args[0].Reg()
657
658		if v.Aux != nil {
659			p.From.Type = obj.TYPE_CONST
660			p.From.Offset = gc.AuxOffset(v)
661		} else {
662			p.From.Type = obj.TYPE_CONST
663			p.From.Offset = v.AuxInt
664		}
665
666		p.To.Type = obj.TYPE_REG
667		p.To.Reg = ppc64.REGTMP // discard result
668
669	case ssa.OpPPC64MOVDaddr:
670		switch v.Aux.(type) {
671		default:
672			v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
673		case nil:
674			// If aux offset and aux int are both 0, and the same
675			// input and output regs are used, no instruction
676			// needs to be generated, since it would just be
677			// addi rx, rx, 0.
678			if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
679				p := s.Prog(ppc64.AMOVD)
680				p.From.Type = obj.TYPE_ADDR
681				p.From.Reg = v.Args[0].Reg()
682				p.From.Offset = v.AuxInt
683				p.To.Type = obj.TYPE_REG
684				p.To.Reg = v.Reg()
685			}
686
687		case *obj.LSym, *gc.Node:
688			p := s.Prog(ppc64.AMOVD)
689			p.From.Type = obj.TYPE_ADDR
690			p.From.Reg = v.Args[0].Reg()
691			p.To.Type = obj.TYPE_REG
692			p.To.Reg = v.Reg()
693			gc.AddAux(&p.From, v)
694
695		}
696
697	case ssa.OpPPC64MOVDconst:
698		p := s.Prog(v.Op.Asm())
699		p.From.Type = obj.TYPE_CONST
700		p.From.Offset = v.AuxInt
701		p.To.Type = obj.TYPE_REG
702		p.To.Reg = v.Reg()
703
704	case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
705		p := s.Prog(v.Op.Asm())
706		p.From.Type = obj.TYPE_FCONST
707		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
708		p.To.Type = obj.TYPE_REG
709		p.To.Reg = v.Reg()
710
711	case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
712		p := s.Prog(v.Op.Asm())
713		p.From.Type = obj.TYPE_REG
714		p.From.Reg = v.Args[0].Reg()
715		p.To.Type = obj.TYPE_REG
716		p.To.Reg = v.Args[1].Reg()
717
718	case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
719		p := s.Prog(v.Op.Asm())
720		p.From.Type = obj.TYPE_REG
721		p.From.Reg = v.Args[0].Reg()
722		p.To.Type = obj.TYPE_CONST
723		p.To.Offset = v.AuxInt
724
725	case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
726		// Shift in register to required size
727		p := s.Prog(v.Op.Asm())
728		p.From.Type = obj.TYPE_REG
729		p.From.Reg = v.Args[0].Reg()
730		p.To.Reg = v.Reg()
731		p.To.Type = obj.TYPE_REG
732
733	case ssa.OpPPC64MOVDload:
734
735		// MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
736		// For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
737		// the offset is not known until link time. If the load of a go.string uses relocation for the
738		// offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
739		// To avoid this problem, the full address of the go.string is computed and loaded into the base register,
740		// and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
741		// go.string types because other types will have proper alignment.
742
743		gostring := false
744		switch n := v.Aux.(type) {
745		case *obj.LSym:
746			gostring = strings.HasPrefix(n.Name, "go.string.")
747		}
748		if gostring {
749			// Generate full addr of the go.string const
750			// including AuxInt
751			p := s.Prog(ppc64.AMOVD)
752			p.From.Type = obj.TYPE_ADDR
753			p.From.Reg = v.Args[0].Reg()
754			gc.AddAux(&p.From, v)
755			p.To.Type = obj.TYPE_REG
756			p.To.Reg = v.Reg()
757			// Load go.string using 0 offset
758			p = s.Prog(v.Op.Asm())
759			p.From.Type = obj.TYPE_MEM
760			p.From.Reg = v.Reg()
761			p.To.Type = obj.TYPE_REG
762			p.To.Reg = v.Reg()
763			break
764		}
765		// Not a go.string, generate a normal load
766		fallthrough
767
768	case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
769		p := s.Prog(v.Op.Asm())
770		p.From.Type = obj.TYPE_MEM
771		p.From.Reg = v.Args[0].Reg()
772		gc.AddAux(&p.From, v)
773		p.To.Type = obj.TYPE_REG
774		p.To.Reg = v.Reg()
775
776	case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
777		p := s.Prog(v.Op.Asm())
778		p.From.Type = obj.TYPE_MEM
779		p.From.Reg = v.Args[0].Reg()
780		p.To.Type = obj.TYPE_REG
781		p.To.Reg = v.Reg()
782
783	case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
784		p := s.Prog(v.Op.Asm())
785		p.To.Type = obj.TYPE_MEM
786		p.To.Reg = v.Args[0].Reg()
787		p.From.Type = obj.TYPE_REG
788		p.From.Reg = v.Args[1].Reg()
789
790	case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
791		ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
792		ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
793		p := s.Prog(v.Op.Asm())
794		p.From.Type = obj.TYPE_MEM
795		p.From.Reg = v.Args[0].Reg()
796		p.From.Index = v.Args[1].Reg()
797		gc.AddAux(&p.From, v)
798		p.To.Type = obj.TYPE_REG
799		p.To.Reg = v.Reg()
800
801	case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
802		p := s.Prog(v.Op.Asm())
803		p.From.Type = obj.TYPE_REG
804		p.From.Reg = ppc64.REGZERO
805		p.To.Type = obj.TYPE_MEM
806		p.To.Reg = v.Args[0].Reg()
807		gc.AddAux(&p.To, v)
808
809	case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
810		p := s.Prog(v.Op.Asm())
811		p.From.Type = obj.TYPE_REG
812		p.From.Reg = v.Args[1].Reg()
813		p.To.Type = obj.TYPE_MEM
814		p.To.Reg = v.Args[0].Reg()
815		gc.AddAux(&p.To, v)
816
817	case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
818		ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
819		ssa.OpPPC64MOVHBRstoreidx:
820		p := s.Prog(v.Op.Asm())
821		p.From.Type = obj.TYPE_REG
822		p.From.Reg = v.Args[2].Reg()
823		p.To.Index = v.Args[1].Reg()
824		p.To.Type = obj.TYPE_MEM
825		p.To.Reg = v.Args[0].Reg()
826		gc.AddAux(&p.To, v)
827
828	case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
829		// ISEL, ISELB
830		// AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
831		// ISEL only accepts 0, 1, 2 condition values but the others can be
832		// achieved by swapping operand order.
833		// arg0 ? arg1 : arg2 with conditions LT, GT, EQ
834		// arg0 ? arg2 : arg1 for conditions GE, LE, NE
835		// ISELB is used when a boolean result is needed, returning 0 or 1
836		p := s.Prog(ppc64.AISEL)
837		p.To.Type = obj.TYPE_REG
838		p.To.Reg = v.Reg()
839		// For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
840		r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
841		if v.Op == ssa.OpPPC64ISEL {
842			r.Reg = v.Args[1].Reg()
843		}
844		// AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
845		if v.AuxInt > 3 {
846			p.Reg = r.Reg
847			p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
848		} else {
849			p.Reg = v.Args[0].Reg()
850			p.SetFrom3(r)
851		}
852		p.From.Type = obj.TYPE_CONST
853		p.From.Offset = v.AuxInt & 3
854
855	case ssa.OpPPC64LoweredZero:
856
857		// unaligned data doesn't hurt performance
858		// for these instructions on power8 or later
859
860		// for sizes >= 64 generate a loop as follows:
861
862		// set up loop counter in CTR, used by BC
863		//       XXLXOR VS32,VS32,VS32
864		//	 MOVD len/32,REG_TMP
865		//	 MOVD REG_TMP,CTR
866		//       MOVD $16,REG_TMP
867		//	 loop:
868		//	 STXVD2X VS32,(R0)(R3)
869		//	 STXVD2X VS32,(R31)(R3)
870		//	 ADD  $32,R3
871		//	 BC   16, 0, loop
872		//
873		// any remainder is done as described below
874
875		// for sizes < 64 bytes, first clear as many doublewords as possible,
876		// then handle the remainder
877		//	MOVD R0,(R3)
878		//	MOVD R0,8(R3)
879		// .... etc.
880		//
881		// the remainder bytes are cleared using one or more
882		// of the following instructions with the appropriate
883		// offsets depending which instructions are needed
884		//
885		//	MOVW R0,n1(R3)	4 bytes
886		//	MOVH R0,n2(R3)	2 bytes
887		//	MOVB R0,n3(R3)	1 byte
888		//
889		// 7 bytes: MOVW, MOVH, MOVB
890		// 6 bytes: MOVW, MOVH
891		// 5 bytes: MOVW, MOVB
892		// 3 bytes: MOVH, MOVB
893
894		// each loop iteration does 32 bytes
895		ctr := v.AuxInt / 32
896
897		// remainder bytes
898		rem := v.AuxInt % 32
899
900		// only generate a loop if there is more
901		// than 1 iteration.
902		if ctr > 1 {
903			// Set up VS32 (V0) to hold 0s
904			p := s.Prog(ppc64.AXXLXOR)
905			p.From.Type = obj.TYPE_REG
906			p.From.Reg = ppc64.REG_VS32
907			p.To.Type = obj.TYPE_REG
908			p.To.Reg = ppc64.REG_VS32
909			p.Reg = ppc64.REG_VS32
910
911			// Set up CTR loop counter
912			p = s.Prog(ppc64.AMOVD)
913			p.From.Type = obj.TYPE_CONST
914			p.From.Offset = ctr
915			p.To.Type = obj.TYPE_REG
916			p.To.Reg = ppc64.REGTMP
917
918			p = s.Prog(ppc64.AMOVD)
919			p.From.Type = obj.TYPE_REG
920			p.From.Reg = ppc64.REGTMP
921			p.To.Type = obj.TYPE_REG
922			p.To.Reg = ppc64.REG_CTR
923
924			// Set up R31 to hold index value 16
925			p = s.Prog(ppc64.AMOVD)
926			p.From.Type = obj.TYPE_CONST
927			p.From.Offset = 16
928			p.To.Type = obj.TYPE_REG
929			p.To.Reg = ppc64.REGTMP
930
931			// generate 2 STXVD2Xs to store 16 bytes
932			// when this is a loop then the top must be saved
933			var top *obj.Prog
934			// This is the top of loop
935			p = s.Prog(ppc64.ASTXVD2X)
936			p.From.Type = obj.TYPE_REG
937			p.From.Reg = ppc64.REG_VS32
938			p.To.Type = obj.TYPE_MEM
939			p.To.Reg = v.Args[0].Reg()
940			p.To.Index = ppc64.REGZERO
941			// Save the top of loop
942			if top == nil {
943				top = p
944			}
945
946			p = s.Prog(ppc64.ASTXVD2X)
947			p.From.Type = obj.TYPE_REG
948			p.From.Reg = ppc64.REG_VS32
949			p.To.Type = obj.TYPE_MEM
950			p.To.Reg = v.Args[0].Reg()
951			p.To.Index = ppc64.REGTMP
952
953			// Increment address for the
954			// 4 doublewords just zeroed.
955			p = s.Prog(ppc64.AADD)
956			p.Reg = v.Args[0].Reg()
957			p.From.Type = obj.TYPE_CONST
958			p.From.Offset = 32
959			p.To.Type = obj.TYPE_REG
960			p.To.Reg = v.Args[0].Reg()
961
962			// Branch back to top of loop
963			// based on CTR
964			// BC with BO_BCTR generates bdnz
965			p = s.Prog(ppc64.ABC)
966			p.From.Type = obj.TYPE_CONST
967			p.From.Offset = ppc64.BO_BCTR
968			p.Reg = ppc64.REG_R0
969			p.To.Type = obj.TYPE_BRANCH
970			gc.Patch(p, top)
971		}
972
973		// when ctr == 1 the loop was not generated but
974		// there are at least 32 bytes to clear, so add
975		// that to the remainder to generate the code
976		// to clear those doublewords
977		if ctr == 1 {
978			rem += 32
979		}
980
981		// clear the remainder starting at offset zero
982		offset := int64(0)
983
984		// first clear as many doublewords as possible
985		// then clear remaining sizes as available
986		for rem > 0 {
987			op, size := ppc64.AMOVB, int64(1)
988			switch {
989			case rem >= 8:
990				op, size = ppc64.AMOVD, 8
991			case rem >= 4:
992				op, size = ppc64.AMOVW, 4
993			case rem >= 2:
994				op, size = ppc64.AMOVH, 2
995			}
996			p := s.Prog(op)
997			p.From.Type = obj.TYPE_REG
998			p.From.Reg = ppc64.REG_R0
999			p.To.Type = obj.TYPE_MEM
1000			p.To.Reg = v.Args[0].Reg()
1001			p.To.Offset = offset
1002			rem -= size
1003			offset += size
1004		}
1005
1006	case ssa.OpPPC64LoweredMove:
1007
1008		// This will be used when moving more
1009		// than 8 bytes.  Moves start with
1010		// as many 8 byte moves as possible, then
1011		// 4, 2, or 1 byte(s) as remaining.  This will
1012		// work and be efficient for power8 or later.
1013		// If there are 64 or more bytes, then a
1014		// loop is generated to move 32 bytes and
1015		// update the src and dst addresses on each
1016		// iteration. When < 64 bytes, the appropriate
1017		// number of moves are generated based on the
1018		// size.
1019		// When moving >= 64 bytes a loop is used
1020		//	MOVD len/32,REG_TMP
1021		//	MOVD REG_TMP,CTR
1022		//	MOVD $16,REG_TMP
1023		// top:
1024		//	LXVD2X (R0)(R4),VS32
1025		//	LXVD2X (R31)(R4),VS33
1026		//	ADD $32,R4
1027		//	STXVD2X VS32,(R0)(R3)
1028		//	STXVD2X VS33,(R31)(R4)
1029		//	ADD $32,R3
1030		//	BC 16,0,top
1031		// Bytes not moved by this loop are moved
1032		// with a combination of the following instructions,
1033		// starting with the largest sizes and generating as
1034		// many as needed, using the appropriate offset value.
1035		//	MOVD  n(R4),R14
1036		//	MOVD  R14,n(R3)
1037		//	MOVW  n1(R4),R14
1038		//	MOVW  R14,n1(R3)
1039		//	MOVH  n2(R4),R14
1040		//	MOVH  R14,n2(R3)
1041		//	MOVB  n3(R4),R14
1042		//	MOVB  R14,n3(R3)
1043
1044		// Each loop iteration moves 32 bytes
1045		ctr := v.AuxInt / 32
1046
1047		// Remainder after the loop
1048		rem := v.AuxInt % 32
1049
1050		dst_reg := v.Args[0].Reg()
1051		src_reg := v.Args[1].Reg()
1052
1053		// The set of registers used here, must match the clobbered reg list
1054		// in PPC64Ops.go.
1055		offset := int64(0)
1056
1057		// top of the loop
1058		var top *obj.Prog
1059		// Only generate looping code when loop counter is > 1 for >= 64 bytes
1060		if ctr > 1 {
1061			// Set up the CTR
1062			p := s.Prog(ppc64.AMOVD)
1063			p.From.Type = obj.TYPE_CONST
1064			p.From.Offset = ctr
1065			p.To.Type = obj.TYPE_REG
1066			p.To.Reg = ppc64.REGTMP
1067
1068			p = s.Prog(ppc64.AMOVD)
1069			p.From.Type = obj.TYPE_REG
1070			p.From.Reg = ppc64.REGTMP
1071			p.To.Type = obj.TYPE_REG
1072			p.To.Reg = ppc64.REG_CTR
1073
1074			// Use REGTMP as index reg
1075			p = s.Prog(ppc64.AMOVD)
1076			p.From.Type = obj.TYPE_CONST
1077			p.From.Offset = 16
1078			p.To.Type = obj.TYPE_REG
1079			p.To.Reg = ppc64.REGTMP
1080
1081			// Generate 16 byte loads and stores.
1082			// Use temp register for index (16)
1083			// on the second one.
1084			p = s.Prog(ppc64.ALXVD2X)
1085			p.From.Type = obj.TYPE_MEM
1086			p.From.Reg = src_reg
1087			p.From.Index = ppc64.REGZERO
1088			p.To.Type = obj.TYPE_REG
1089			p.To.Reg = ppc64.REG_VS32
1090
1091			if top == nil {
1092				top = p
1093			}
1094
1095			p = s.Prog(ppc64.ALXVD2X)
1096			p.From.Type = obj.TYPE_MEM
1097			p.From.Reg = src_reg
1098			p.From.Index = ppc64.REGTMP
1099			p.To.Type = obj.TYPE_REG
1100			p.To.Reg = ppc64.REG_VS33
1101
1102			// increment the src reg for next iteration
1103			p = s.Prog(ppc64.AADD)
1104			p.Reg = src_reg
1105			p.From.Type = obj.TYPE_CONST
1106			p.From.Offset = 32
1107			p.To.Type = obj.TYPE_REG
1108			p.To.Reg = src_reg
1109
1110			// generate 16 byte stores
1111			p = s.Prog(ppc64.ASTXVD2X)
1112			p.From.Type = obj.TYPE_REG
1113			p.From.Reg = ppc64.REG_VS32
1114			p.To.Type = obj.TYPE_MEM
1115			p.To.Reg = dst_reg
1116			p.To.Index = ppc64.REGZERO
1117
1118			p = s.Prog(ppc64.ASTXVD2X)
1119			p.From.Type = obj.TYPE_REG
1120			p.From.Reg = ppc64.REG_VS33
1121			p.To.Type = obj.TYPE_MEM
1122			p.To.Reg = dst_reg
1123			p.To.Index = ppc64.REGTMP
1124
1125			// increment the dst reg for next iteration
1126			p = s.Prog(ppc64.AADD)
1127			p.Reg = dst_reg
1128			p.From.Type = obj.TYPE_CONST
1129			p.From.Offset = 32
1130			p.To.Type = obj.TYPE_REG
1131			p.To.Reg = dst_reg
1132
1133			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
1134			// to loop top.
1135			p = s.Prog(ppc64.ABC)
1136			p.From.Type = obj.TYPE_CONST
1137			p.From.Offset = ppc64.BO_BCTR
1138			p.Reg = ppc64.REG_R0
1139			p.To.Type = obj.TYPE_BRANCH
1140			gc.Patch(p, top)
1141
1142			// src_reg and dst_reg were incremented in the loop, so
1143			// later instructions start with offset 0.
1144			offset = int64(0)
1145		}
1146
1147		// No loop was generated for one iteration, so
1148		// add 32 bytes to the remainder to move those bytes.
1149		if ctr == 1 {
1150			rem += 32
1151		}
1152
1153		if rem >= 16 {
1154			// Generate 16 byte loads and stores.
1155			// Use temp register for index (value 16)
1156			// on the second one.
1157			p := s.Prog(ppc64.ALXVD2X)
1158			p.From.Type = obj.TYPE_MEM
1159			p.From.Reg = src_reg
1160			p.From.Index = ppc64.REGZERO
1161			p.To.Type = obj.TYPE_REG
1162			p.To.Reg = ppc64.REG_VS32
1163
1164			p = s.Prog(ppc64.ASTXVD2X)
1165			p.From.Type = obj.TYPE_REG
1166			p.From.Reg = ppc64.REG_VS32
1167			p.To.Type = obj.TYPE_MEM
1168			p.To.Reg = dst_reg
1169			p.To.Index = ppc64.REGZERO
1170
1171			offset = 16
1172			rem -= 16
1173
1174			if rem >= 16 {
1175				// Use REGTMP as index reg
1176				p = s.Prog(ppc64.AMOVD)
1177				p.From.Type = obj.TYPE_CONST
1178				p.From.Offset = 16
1179				p.To.Type = obj.TYPE_REG
1180				p.To.Reg = ppc64.REGTMP
1181
1182				// Generate 16 byte loads and stores.
1183				// Use temp register for index (16)
1184				// on the second one.
1185				p = s.Prog(ppc64.ALXVD2X)
1186				p.From.Type = obj.TYPE_MEM
1187				p.From.Reg = src_reg
1188				p.From.Index = ppc64.REGTMP
1189				p.To.Type = obj.TYPE_REG
1190				p.To.Reg = ppc64.REG_VS32
1191
1192				p = s.Prog(ppc64.ASTXVD2X)
1193				p.From.Type = obj.TYPE_REG
1194				p.From.Reg = ppc64.REG_VS32
1195				p.To.Type = obj.TYPE_MEM
1196				p.To.Reg = dst_reg
1197				p.To.Index = ppc64.REGTMP
1198
1199				offset = 32
1200				rem -= 16
1201			}
1202		}
1203
1204		// Generate all the remaining load and store pairs, starting with
1205		// as many 8 byte moves as possible, then 4, 2, 1.
1206		for rem > 0 {
1207			op, size := ppc64.AMOVB, int64(1)
1208			switch {
1209			case rem >= 8:
1210				op, size = ppc64.AMOVD, 8
1211			case rem >= 4:
1212				op, size = ppc64.AMOVW, 4
1213			case rem >= 2:
1214				op, size = ppc64.AMOVH, 2
1215			}
1216			// Load
1217			p := s.Prog(op)
1218			p.To.Type = obj.TYPE_REG
1219			p.To.Reg = ppc64.REG_R14
1220			p.From.Type = obj.TYPE_MEM
1221			p.From.Reg = src_reg
1222			p.From.Offset = offset
1223
1224			// Store
1225			p = s.Prog(op)
1226			p.From.Type = obj.TYPE_REG
1227			p.From.Reg = ppc64.REG_R14
1228			p.To.Type = obj.TYPE_MEM
1229			p.To.Reg = dst_reg
1230			p.To.Offset = offset
1231			rem -= size
1232			offset += size
1233		}
1234
1235	case ssa.OpPPC64CALLstatic:
1236		s.Call(v)
1237
1238	case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
1239		p := s.Prog(ppc64.AMOVD)
1240		p.From.Type = obj.TYPE_REG
1241		p.From.Reg = v.Args[0].Reg()
1242		p.To.Type = obj.TYPE_REG
1243		p.To.Reg = ppc64.REG_LR
1244
1245		if v.Args[0].Reg() != ppc64.REG_R12 {
1246			v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
1247		}
1248
1249		pp := s.Call(v)
1250		pp.To.Reg = ppc64.REG_LR
1251
1252		if gc.Ctxt.Flag_shared {
1253			// When compiling Go into PIC, the function we just
1254			// called via pointer might have been implemented in
1255			// a separate module and so overwritten the TOC
1256			// pointer in R2; reload it.
1257			q := s.Prog(ppc64.AMOVD)
1258			q.From.Type = obj.TYPE_MEM
1259			q.From.Offset = 24
1260			q.From.Reg = ppc64.REGSP
1261			q.To.Type = obj.TYPE_REG
1262			q.To.Reg = ppc64.REG_R2
1263		}
1264
1265	case ssa.OpPPC64LoweredWB:
1266		p := s.Prog(obj.ACALL)
1267		p.To.Type = obj.TYPE_MEM
1268		p.To.Name = obj.NAME_EXTERN
1269		p.To.Sym = v.Aux.(*obj.LSym)
1270
1271	case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
1272		p := s.Prog(obj.ACALL)
1273		p.To.Type = obj.TYPE_MEM
1274		p.To.Name = obj.NAME_EXTERN
1275		p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
1276		s.UseArgs(16) // space used in callee args area by assembly stubs
1277
1278	case ssa.OpPPC64LoweredNilCheck:
1279		if objabi.GOOS == "aix" {
1280			// CMP Rarg0, R0
1281			// BNE 2(PC)
1282			// STW R0, 0(R0)
1283			// NOP (so the BNE has somewhere to land)
1284
1285			// CMP Rarg0, R0
1286			p := s.Prog(ppc64.ACMP)
1287			p.From.Type = obj.TYPE_REG
1288			p.From.Reg = v.Args[0].Reg()
1289			p.To.Type = obj.TYPE_REG
1290			p.To.Reg = ppc64.REG_R0
1291
1292			// BNE 2(PC)
1293			p2 := s.Prog(ppc64.ABNE)
1294			p2.To.Type = obj.TYPE_BRANCH
1295
1296			// STW R0, 0(R0)
1297			// Write at 0 is forbidden and will trigger a SIGSEGV
1298			p = s.Prog(ppc64.AMOVW)
1299			p.From.Type = obj.TYPE_REG
1300			p.From.Reg = ppc64.REG_R0
1301			p.To.Type = obj.TYPE_MEM
1302			p.To.Reg = ppc64.REG_R0
1303
1304			// NOP (so the BNE has somewhere to land)
1305			nop := s.Prog(obj.ANOP)
1306			gc.Patch(p2, nop)
1307
1308		} else {
1309			// Issue a load which will fault if arg is nil.
1310			p := s.Prog(ppc64.AMOVBZ)
1311			p.From.Type = obj.TYPE_MEM
1312			p.From.Reg = v.Args[0].Reg()
1313			gc.AddAux(&p.From, v)
1314			p.To.Type = obj.TYPE_REG
1315			p.To.Reg = ppc64.REGTMP
1316		}
1317		if logopt.Enabled() {
1318			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1319		}
1320		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1321			gc.Warnl(v.Pos, "generated nil check")
1322		}
1323
1324	// These should be resolved by rules and not make it here.
1325	case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
1326		ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
1327		ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
1328		v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
1329	case ssa.OpPPC64InvertFlags:
1330		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1331	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
1332		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1333	case ssa.OpClobber:
1334		// TODO: implement for clobberdead experiment. Nop is ok for now.
1335	default:
1336		v.Fatalf("genValue not implemented: %s", v.LongString())
1337	}
1338}
1339
1340var blockJump = [...]struct {
1341	asm, invasm     obj.As
1342	asmeq, invasmun bool
1343}{
1344	ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
1345	ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
1346
1347	ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
1348	ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
1349	ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
1350	ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
1351
1352	// TODO: need to work FP comparisons into block jumps
1353	ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
1354	ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
1355	ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
1356	ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
1357}
1358
1359func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
1360	switch b.Kind {
1361	case ssa.BlockDefer:
1362		// defer returns in R3:
1363		// 0 if we should continue executing
1364		// 1 if we should jump to deferreturn call
1365		p := s.Prog(ppc64.ACMP)
1366		p.From.Type = obj.TYPE_REG
1367		p.From.Reg = ppc64.REG_R3
1368		p.To.Type = obj.TYPE_REG
1369		p.To.Reg = ppc64.REG_R0
1370
1371		p = s.Prog(ppc64.ABNE)
1372		p.To.Type = obj.TYPE_BRANCH
1373		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
1374		if b.Succs[0].Block() != next {
1375			p := s.Prog(obj.AJMP)
1376			p.To.Type = obj.TYPE_BRANCH
1377			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1378		}
1379
1380	case ssa.BlockPlain:
1381		if b.Succs[0].Block() != next {
1382			p := s.Prog(obj.AJMP)
1383			p.To.Type = obj.TYPE_BRANCH
1384			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
1385		}
1386	case ssa.BlockExit:
1387	case ssa.BlockRet:
1388		s.Prog(obj.ARET)
1389	case ssa.BlockRetJmp:
1390		p := s.Prog(obj.AJMP)
1391		p.To.Type = obj.TYPE_MEM
1392		p.To.Name = obj.NAME_EXTERN
1393		p.To.Sym = b.Aux.(*obj.LSym)
1394
1395	case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
1396		ssa.BlockPPC64LT, ssa.BlockPPC64GE,
1397		ssa.BlockPPC64LE, ssa.BlockPPC64GT,
1398		ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
1399		ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
1400		jmp := blockJump[b.Kind]
1401		switch next {
1402		case b.Succs[0].Block():
1403			s.Br(jmp.invasm, b.Succs[1].Block())
1404			if jmp.invasmun {
1405				// TODO: The second branch is probably predict-not-taken since it is for FP unordered
1406				s.Br(ppc64.ABVS, b.Succs[1].Block())
1407			}
1408		case b.Succs[1].Block():
1409			s.Br(jmp.asm, b.Succs[0].Block())
1410			if jmp.asmeq {
1411				s.Br(ppc64.ABEQ, b.Succs[0].Block())
1412			}
1413		default:
1414			if b.Likely != ssa.BranchUnlikely {
1415				s.Br(jmp.asm, b.Succs[0].Block())
1416				if jmp.asmeq {
1417					s.Br(ppc64.ABEQ, b.Succs[0].Block())
1418				}
1419				s.Br(obj.AJMP, b.Succs[1].Block())
1420			} else {
1421				s.Br(jmp.invasm, b.Succs[1].Block())
1422				if jmp.invasmun {
1423					// TODO: The second branch is probably predict-not-taken since it is for FP unordered
1424					s.Br(ppc64.ABVS, b.Succs[1].Block())
1425				}
1426				s.Br(obj.AJMP, b.Succs[0].Block())
1427			}
1428		}
1429	default:
1430		b.Fatalf("branch not implemented: %s", b.LongString())
1431	}
1432}
1433