1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7// mkpreempt generates the asyncPreempt functions for each
8// architecture.
9package main
10
11import (
12	"flag"
13	"fmt"
14	"io"
15	"log"
16	"os"
17	"strings"
18)
19
20// Copied from cmd/compile/internal/ssa/gen/*Ops.go
21
22var regNames386 = []string{
23	"AX",
24	"CX",
25	"DX",
26	"BX",
27	"SP",
28	"BP",
29	"SI",
30	"DI",
31	"X0",
32	"X1",
33	"X2",
34	"X3",
35	"X4",
36	"X5",
37	"X6",
38	"X7",
39}
40
41var regNamesAMD64 = []string{
42	"AX",
43	"CX",
44	"DX",
45	"BX",
46	"SP",
47	"BP",
48	"SI",
49	"DI",
50	"R8",
51	"R9",
52	"R10",
53	"R11",
54	"R12",
55	"R13",
56	"R14",
57	"R15",
58	"X0",
59	"X1",
60	"X2",
61	"X3",
62	"X4",
63	"X5",
64	"X6",
65	"X7",
66	"X8",
67	"X9",
68	"X10",
69	"X11",
70	"X12",
71	"X13",
72	"X14",
73	"X15",
74}
75
76var out io.Writer
77
78var arches = map[string]func(){
79	"386":     gen386,
80	"amd64":   genAMD64,
81	"arm":     genARM,
82	"arm64":   genARM64,
83	"mips64x": func() { genMIPS(true) },
84	"mipsx":   func() { genMIPS(false) },
85	"ppc64x":  genPPC64,
86	"riscv64": genRISCV64,
87	"s390x":   genS390X,
88	"wasm":    genWasm,
89}
90var beLe = map[string]bool{"mips64x": true, "mipsx": true, "ppc64x": true}
91
92func main() {
93	flag.Parse()
94	if flag.NArg() > 0 {
95		out = os.Stdout
96		for _, arch := range flag.Args() {
97			gen, ok := arches[arch]
98			if !ok {
99				log.Fatalf("unknown arch %s", arch)
100			}
101			header(arch)
102			gen()
103		}
104		return
105	}
106
107	for arch, gen := range arches {
108		f, err := os.Create(fmt.Sprintf("preempt_%s.s", arch))
109		if err != nil {
110			log.Fatal(err)
111		}
112		out = f
113		header(arch)
114		gen()
115		if err := f.Close(); err != nil {
116			log.Fatal(err)
117		}
118	}
119}
120
121func header(arch string) {
122	fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n")
123	if beLe[arch] {
124		base := arch[:len(arch)-1]
125		fmt.Fprintf(out, "// +build %s %sle\n\n", base, base)
126	}
127	fmt.Fprintf(out, "#include \"go_asm.h\"\n")
128	fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
129	fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
130}
131
132func p(f string, args ...interface{}) {
133	fmted := fmt.Sprintf(f, args...)
134	fmt.Fprintf(out, "\t%s\n", strings.Replace(fmted, "\n", "\n\t", -1))
135}
136
137func label(l string) {
138	fmt.Fprintf(out, "%s\n", l)
139}
140
141type layout struct {
142	stack int
143	regs  []regPos
144	sp    string // stack pointer register
145}
146
147type regPos struct {
148	pos int
149
150	op  string
151	reg string
152
153	// If this register requires special save and restore, these
154	// give those operations with a %d placeholder for the stack
155	// offset.
156	save, restore string
157}
158
159func (l *layout) add(op, reg string, size int) {
160	l.regs = append(l.regs, regPos{op: op, reg: reg, pos: l.stack})
161	l.stack += size
162}
163
164func (l *layout) addSpecial(save, restore string, size int) {
165	l.regs = append(l.regs, regPos{save: save, restore: restore, pos: l.stack})
166	l.stack += size
167}
168
169func (l *layout) save() {
170	for _, reg := range l.regs {
171		if reg.save != "" {
172			p(reg.save, reg.pos)
173		} else {
174			p("%s %s, %d(%s)", reg.op, reg.reg, reg.pos, l.sp)
175		}
176	}
177}
178
179func (l *layout) restore() {
180	for i := len(l.regs) - 1; i >= 0; i-- {
181		reg := l.regs[i]
182		if reg.restore != "" {
183			p(reg.restore, reg.pos)
184		} else {
185			p("%s %d(%s), %s", reg.op, reg.pos, l.sp, reg.reg)
186		}
187	}
188}
189
190func gen386() {
191	p("PUSHFL")
192
193	// Save general purpose registers.
194	var l = layout{sp: "SP"}
195	for _, reg := range regNames386 {
196		if reg == "SP" || strings.HasPrefix(reg, "X") {
197			continue
198		}
199		l.add("MOVL", reg, 4)
200	}
201
202	// Save the 387 state.
203	l.addSpecial(
204		"FSAVE %d(SP)\nFLDCW runtime·controlWord64(SB)",
205		"FRSTOR %d(SP)",
206		108)
207
208	// Save SSE state only if supported.
209	lSSE := layout{stack: l.stack, sp: "SP"}
210	for i := 0; i < 8; i++ {
211		lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16)
212	}
213
214	p("ADJSP $%d", lSSE.stack)
215	p("NOP SP")
216	l.save()
217	p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse")
218	lSSE.save()
219	label("nosse:")
220	p("CALL ·asyncPreempt2(SB)")
221	p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2")
222	lSSE.restore()
223	label("nosse2:")
224	l.restore()
225	p("ADJSP $%d", -lSSE.stack)
226
227	p("POPFL")
228	p("RET")
229}
230
231func genAMD64() {
232	// Assign stack offsets.
233	var l = layout{sp: "SP"}
234	for _, reg := range regNamesAMD64 {
235		if reg == "SP" || reg == "BP" {
236			continue
237		}
238		if strings.HasPrefix(reg, "X") {
239			l.add("MOVUPS", reg, 16)
240		} else {
241			l.add("MOVQ", reg, 8)
242		}
243	}
244
245	// TODO: MXCSR register?
246
247	p("PUSHQ BP")
248	p("MOVQ SP, BP")
249	p("// Save flags before clobbering them")
250	p("PUSHFQ")
251	p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
252	p("ADJSP $%d", l.stack)
253	p("// But vet doesn't know ADJSP, so suppress vet stack checking")
254	p("NOP SP")
255
256	// Apparently, the signal handling code path in darwin kernel leaves
257	// the upper bits of Y registers in a dirty state, which causes
258	// many SSE operations (128-bit and narrower) become much slower.
259	// Clear the upper bits to get to a clean state. See issue #37174.
260	// It is safe here as Go code don't use the upper bits of Y registers.
261	p("#ifdef GOOS_darwin")
262	p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0")
263	p("JE 2(PC)")
264	p("VZEROUPPER")
265	p("#endif")
266
267	l.save()
268	p("CALL ·asyncPreempt2(SB)")
269	l.restore()
270	p("ADJSP $%d", -l.stack)
271	p("POPFQ")
272	p("POPQ BP")
273	p("RET")
274}
275
276func genARM() {
277	// Add integer registers R0-R12.
278	// R13 (SP), R14 (LR), R15 (PC) are special and not saved here.
279	var l = layout{sp: "R13", stack: 4} // add LR slot
280	for i := 0; i <= 12; i++ {
281		reg := fmt.Sprintf("R%d", i)
282		if i == 10 {
283			continue // R10 is g register, no need to save/restore
284		}
285		l.add("MOVW", reg, 4)
286	}
287	// Add flag register.
288	l.addSpecial(
289		"MOVW CPSR, R0\nMOVW R0, %d(R13)",
290		"MOVW %d(R13), R0\nMOVW R0, CPSR",
291		4)
292
293	// Add floating point registers F0-F15 and flag register.
294	var lfp = layout{stack: l.stack, sp: "R13"}
295	lfp.addSpecial(
296		"MOVW FPCR, R0\nMOVW R0, %d(R13)",
297		"MOVW %d(R13), R0\nMOVW R0, FPCR",
298		4)
299	for i := 0; i <= 15; i++ {
300		reg := fmt.Sprintf("F%d", i)
301		lfp.add("MOVD", reg, 8)
302	}
303
304	p("MOVW.W R14, -%d(R13)", lfp.stack) // allocate frame, save LR
305	l.save()
306	p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp") // test goarm, and skip FP registers if goarm=5.
307	lfp.save()
308	label("nofp:")
309	p("CALL ·asyncPreempt2(SB)")
310	p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp2") // test goarm, and skip FP registers if goarm=5.
311	lfp.restore()
312	label("nofp2:")
313	l.restore()
314
315	p("MOVW %d(R13), R14", lfp.stack)     // sigctxt.pushCall pushes LR on stack, restore it
316	p("MOVW.P %d(R13), R15", lfp.stack+4) // load PC, pop frame (including the space pushed by sigctxt.pushCall)
317	p("UNDEF")                            // shouldn't get here
318}
319
320func genARM64() {
321	// Add integer registers R0-R26
322	// R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special
323	// and not saved here.
324	var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction
325	for i := 0; i <= 26; i++ {
326		if i == 18 {
327			continue // R18 is not used, skip
328		}
329		reg := fmt.Sprintf("R%d", i)
330		l.add("MOVD", reg, 8)
331	}
332	// Add flag registers.
333	l.addSpecial(
334		"MOVD NZCV, R0\nMOVD R0, %d(RSP)",
335		"MOVD %d(RSP), R0\nMOVD R0, NZCV",
336		8)
337	l.addSpecial(
338		"MOVD FPSR, R0\nMOVD R0, %d(RSP)",
339		"MOVD %d(RSP), R0\nMOVD R0, FPSR",
340		8)
341	// TODO: FPCR? I don't think we'll change it, so no need to save.
342	// Add floating point registers F0-F31.
343	for i := 0; i <= 31; i++ {
344		reg := fmt.Sprintf("F%d", i)
345		l.add("FMOVD", reg, 8)
346	}
347	if l.stack%16 != 0 {
348		l.stack += 8 // SP needs 16-byte alignment
349	}
350
351	// allocate frame, save PC of interrupted instruction (in LR)
352	p("MOVD R30, %d(RSP)", -l.stack)
353	p("SUB $%d, RSP", l.stack)
354	p("#ifdef GOOS_linux")
355	p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
356	p("SUB $8, RSP, R29")  // set up new frame pointer
357	p("#endif")
358	// On darwin, save the LR again after decrementing SP. We run the
359	// signal handler on the G stack (as it doesn't support SA_ONSTACK),
360	// so any writes below SP may be clobbered.
361	p("#ifdef GOOS_darwin")
362	p("MOVD R30, (RSP)")
363	p("#endif")
364
365	l.save()
366	p("CALL ·asyncPreempt2(SB)")
367	l.restore()
368
369	p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
370	p("#ifdef GOOS_linux")
371	p("MOVD -8(RSP), R29") // restore frame pointer
372	p("#endif")
373	p("MOVD (RSP), R27")          // load PC to REGTMP
374	p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall)
375	p("JMP (R27)")
376}
377
378func genMIPS(_64bit bool) {
379	mov := "MOVW"
380	movf := "MOVF"
381	add := "ADD"
382	sub := "SUB"
383	r28 := "R28"
384	regsize := 4
385	softfloat := "GOMIPS_softfloat"
386	if _64bit {
387		mov = "MOVV"
388		movf = "MOVD"
389		add = "ADDV"
390		sub = "SUBV"
391		r28 = "RSB"
392		regsize = 8
393		softfloat = "GOMIPS64_softfloat"
394	}
395
396	// Add integer registers R1-R22, R24-R25, R28
397	// R0 (zero), R23 (REGTMP), R29 (SP), R30 (g), R31 (LR) are special,
398	// and not saved here. R26 and R27 are reserved by kernel and not used.
399	var l = layout{sp: "R29", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
400	for i := 1; i <= 25; i++ {
401		if i == 23 {
402			continue // R23 is REGTMP
403		}
404		reg := fmt.Sprintf("R%d", i)
405		l.add(mov, reg, regsize)
406	}
407	l.add(mov, r28, regsize)
408	l.addSpecial(
409		mov+" HI, R1\n"+mov+" R1, %d(R29)",
410		mov+" %d(R29), R1\n"+mov+" R1, HI",
411		regsize)
412	l.addSpecial(
413		mov+" LO, R1\n"+mov+" R1, %d(R29)",
414		mov+" %d(R29), R1\n"+mov+" R1, LO",
415		regsize)
416
417	// Add floating point control/status register FCR31 (FCR0-FCR30 are irrelevant)
418	var lfp = layout{sp: "R29", stack: l.stack}
419	lfp.addSpecial(
420		mov+" FCR31, R1\n"+mov+" R1, %d(R29)",
421		mov+" %d(R29), R1\n"+mov+" R1, FCR31",
422		regsize)
423	// Add floating point registers F0-F31.
424	for i := 0; i <= 31; i++ {
425		reg := fmt.Sprintf("F%d", i)
426		lfp.add(movf, reg, regsize)
427	}
428
429	// allocate frame, save PC of interrupted instruction (in LR)
430	p(mov+" R31, -%d(R29)", lfp.stack)
431	p(sub+" $%d, R29", lfp.stack)
432
433	l.save()
434	p("#ifndef %s", softfloat)
435	lfp.save()
436	p("#endif")
437	p("CALL ·asyncPreempt2(SB)")
438	p("#ifndef %s", softfloat)
439	lfp.restore()
440	p("#endif")
441	l.restore()
442
443	p(mov+" %d(R29), R31", lfp.stack)     // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
444	p(mov + " (R29), R23")                // load PC to REGTMP
445	p(add+" $%d, R29", lfp.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
446	p("JMP (R23)")
447}
448
449func genPPC64() {
450	// Add integer registers R3-R29
451	// R0 (zero), R1 (SP), R30 (g) are special and not saved here.
452	// R2 (TOC pointer in PIC mode), R12 (function entry address in PIC mode) have been saved in sigctxt.pushCall.
453	// R31 (REGTMP) will be saved manually.
454	var l = layout{sp: "R1", stack: 32 + 8} // MinFrameSize on PPC64, plus one word for saving R31
455	for i := 3; i <= 29; i++ {
456		if i == 12 || i == 13 {
457			// R12 has been saved in sigctxt.pushCall.
458			// R13 is TLS pointer, not used by Go code. we must NOT
459			// restore it, otherwise if we parked and resumed on a
460			// different thread we'll mess up TLS addresses.
461			continue
462		}
463		reg := fmt.Sprintf("R%d", i)
464		l.add("MOVD", reg, 8)
465	}
466	l.addSpecial(
467		"MOVW CR, R31\nMOVW R31, %d(R1)",
468		"MOVW %d(R1), R31\nMOVFL R31, $0xff", // this is MOVW R31, CR
469		8)                                    // CR is 4-byte wide, but just keep the alignment
470	l.addSpecial(
471		"MOVD XER, R31\nMOVD R31, %d(R1)",
472		"MOVD %d(R1), R31\nMOVD R31, XER",
473		8)
474	// Add floating point registers F0-F31.
475	for i := 0; i <= 31; i++ {
476		reg := fmt.Sprintf("F%d", i)
477		l.add("FMOVD", reg, 8)
478	}
479	// Add floating point control/status register FPSCR.
480	l.addSpecial(
481		"MOVFL FPSCR, F0\nFMOVD F0, %d(R1)",
482		"FMOVD %d(R1), F0\nMOVFL F0, FPSCR",
483		8)
484
485	p("MOVD R31, -%d(R1)", l.stack-32) // save R31 first, we'll use R31 for saving LR
486	p("MOVD LR, R31")
487	p("MOVDU R31, -%d(R1)", l.stack) // allocate frame, save PC of interrupted instruction (in LR)
488
489	l.save()
490	p("CALL ·asyncPreempt2(SB)")
491	l.restore()
492
493	p("MOVD %d(R1), R31", l.stack) // sigctxt.pushCall has pushed LR, R2, R12 (at interrupt) on stack, restore them
494	p("MOVD R31, LR")
495	p("MOVD %d(R1), R2", l.stack+8)
496	p("MOVD %d(R1), R12", l.stack+16)
497	p("MOVD (R1), R31") // load PC to CTR
498	p("MOVD R31, CTR")
499	p("MOVD 32(R1), R31")        // restore R31
500	p("ADD $%d, R1", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
501	p("JMP (CTR)")
502}
503
504func genRISCV64() {
505	p("// No async preemption on riscv64 - see issue 36711")
506	p("UNDEF")
507}
508
509func genS390X() {
510	// Add integer registers R0-R12
511	// R13 (g), R14 (LR), R15 (SP) are special, and not saved here.
512	// Saving R10 (REGTMP) is not necessary, but it is saved anyway.
513	var l = layout{sp: "R15", stack: 16} // add slot to save PC of interrupted instruction and flags
514	l.addSpecial(
515		"STMG R0, R12, %d(R15)",
516		"LMG %d(R15), R0, R12",
517		13*8)
518	// Add floating point registers F0-F31.
519	for i := 0; i <= 15; i++ {
520		reg := fmt.Sprintf("F%d", i)
521		l.add("FMOVD", reg, 8)
522	}
523
524	// allocate frame, save PC of interrupted instruction (in LR) and flags (condition code)
525	p("IPM R10") // save flags upfront, as ADD will clobber flags
526	p("MOVD R14, -%d(R15)", l.stack)
527	p("ADD $-%d, R15", l.stack)
528	p("MOVW R10, 8(R15)") // save flags
529
530	l.save()
531	p("CALL ·asyncPreempt2(SB)")
532	l.restore()
533
534	p("MOVD %d(R15), R14", l.stack)    // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
535	p("ADD $%d, R15", l.stack+8)       // pop frame (including the space pushed by sigctxt.pushCall)
536	p("MOVWZ -%d(R15), R10", l.stack)  // load flags to REGTMP
537	p("TMLH R10, $(3<<12)")            // restore flags
538	p("MOVD -%d(R15), R10", l.stack+8) // load PC to REGTMP
539	p("JMP (R10)")
540}
541
542func genWasm() {
543	p("// No async preemption on wasm")
544	p("UNDEF")
545}
546
547func notImplemented() {
548	p("// Not implemented yet")
549	p("JMP ·abort(SB)")
550}
551