1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7// mkpreempt generates the asyncPreempt functions for each
8// architecture.
9package main
10
11import (
12	"flag"
13	"fmt"
14	"io"
15	"log"
16	"os"
17	"strings"
18)
19
20// Copied from cmd/compile/internal/ssa/gen/*Ops.go
21
22var regNames386 = []string{
23	"AX",
24	"CX",
25	"DX",
26	"BX",
27	"SP",
28	"BP",
29	"SI",
30	"DI",
31	"X0",
32	"X1",
33	"X2",
34	"X3",
35	"X4",
36	"X5",
37	"X6",
38	"X7",
39}
40
41var regNamesAMD64 = []string{
42	"AX",
43	"CX",
44	"DX",
45	"BX",
46	"SP",
47	"BP",
48	"SI",
49	"DI",
50	"R8",
51	"R9",
52	"R10",
53	"R11",
54	"R12",
55	"R13",
56	"R14",
57	"R15",
58	"X0",
59	"X1",
60	"X2",
61	"X3",
62	"X4",
63	"X5",
64	"X6",
65	"X7",
66	"X8",
67	"X9",
68	"X10",
69	"X11",
70	"X12",
71	"X13",
72	"X14",
73	"X15",
74}
75
76var out io.Writer
77
78var arches = map[string]func(){
79	"386":     gen386,
80	"amd64":   genAMD64,
81	"arm":     genARM,
82	"arm64":   genARM64,
83	"mips64x": func() { genMIPS(true) },
84	"mipsx":   func() { genMIPS(false) },
85	"ppc64x":  genPPC64,
86	"riscv64": genRISCV64,
87	"s390x":   genS390X,
88	"wasm":    genWasm,
89}
90var beLe = map[string]bool{"mips64x": true, "mipsx": true, "ppc64x": true}
91
92func main() {
93	flag.Parse()
94	if flag.NArg() > 0 {
95		out = os.Stdout
96		for _, arch := range flag.Args() {
97			gen, ok := arches[arch]
98			if !ok {
99				log.Fatalf("unknown arch %s", arch)
100			}
101			header(arch)
102			gen()
103		}
104		return
105	}
106
107	for arch, gen := range arches {
108		f, err := os.Create(fmt.Sprintf("preempt_%s.s", arch))
109		if err != nil {
110			log.Fatal(err)
111		}
112		out = f
113		header(arch)
114		gen()
115		if err := f.Close(); err != nil {
116			log.Fatal(err)
117		}
118	}
119}
120
121func header(arch string) {
122	fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n")
123	if beLe[arch] {
124		base := arch[:len(arch)-1]
125		fmt.Fprintf(out, "// +build %s %sle\n\n", base, base)
126	}
127	fmt.Fprintf(out, "#include \"go_asm.h\"\n")
128	fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
129	fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
130}
131
132func p(f string, args ...interface{}) {
133	fmted := fmt.Sprintf(f, args...)
134	fmt.Fprintf(out, "\t%s\n", strings.Replace(fmted, "\n", "\n\t", -1))
135}
136
137func label(l string) {
138	fmt.Fprintf(out, "%s\n", l)
139}
140
141type layout struct {
142	stack int
143	regs  []regPos
144	sp    string // stack pointer register
145}
146
147type regPos struct {
148	pos int
149
150	op  string
151	reg string
152
153	// If this register requires special save and restore, these
154	// give those operations with a %d placeholder for the stack
155	// offset.
156	save, restore string
157}
158
159func (l *layout) add(op, reg string, size int) {
160	l.regs = append(l.regs, regPos{op: op, reg: reg, pos: l.stack})
161	l.stack += size
162}
163
164func (l *layout) addSpecial(save, restore string, size int) {
165	l.regs = append(l.regs, regPos{save: save, restore: restore, pos: l.stack})
166	l.stack += size
167}
168
169func (l *layout) save() {
170	for _, reg := range l.regs {
171		if reg.save != "" {
172			p(reg.save, reg.pos)
173		} else {
174			p("%s %s, %d(%s)", reg.op, reg.reg, reg.pos, l.sp)
175		}
176	}
177}
178
179func (l *layout) restore() {
180	for i := len(l.regs) - 1; i >= 0; i-- {
181		reg := l.regs[i]
182		if reg.restore != "" {
183			p(reg.restore, reg.pos)
184		} else {
185			p("%s %d(%s), %s", reg.op, reg.pos, l.sp, reg.reg)
186		}
187	}
188}
189
190func gen386() {
191	p("PUSHFL")
192
193	// Save general purpose registers.
194	var l = layout{sp: "SP"}
195	for _, reg := range regNames386 {
196		if reg == "SP" || strings.HasPrefix(reg, "X") {
197			continue
198		}
199		l.add("MOVL", reg, 4)
200	}
201
202	// Save the 387 state.
203	l.addSpecial(
204		"FSAVE %d(SP)\nFLDCW runtime·controlWord64(SB)",
205		"FRSTOR %d(SP)",
206		108)
207
208	// Save SSE state only if supported.
209	lSSE := layout{stack: l.stack, sp: "SP"}
210	for i := 0; i < 8; i++ {
211		lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16)
212	}
213
214	p("ADJSP $%d", lSSE.stack)
215	p("NOP SP")
216	l.save()
217	p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse")
218	lSSE.save()
219	label("nosse:")
220	p("CALL ·asyncPreempt2(SB)")
221	p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2")
222	lSSE.restore()
223	label("nosse2:")
224	l.restore()
225	p("ADJSP $%d", -lSSE.stack)
226
227	p("POPFL")
228	p("RET")
229}
230
231func genAMD64() {
232	// Assign stack offsets.
233	var l = layout{sp: "SP"}
234	for _, reg := range regNamesAMD64 {
235		if reg == "SP" || reg == "BP" {
236			continue
237		}
238		if strings.HasPrefix(reg, "X") {
239			l.add("MOVUPS", reg, 16)
240		} else {
241			l.add("MOVQ", reg, 8)
242		}
243	}
244
245	// TODO: MXCSR register?
246
247	// Apparently, the signal handling code path in darwin kernel leaves
248	// the upper bits of Y registers in a dirty state, which causes
249	// many SSE operations (128-bit and narrower) become much slower.
250	// Clear the upper bits to get to a clean state. See issue #37174.
251	// It is safe here as Go code don't use the upper bits of Y registers.
252	p("#ifdef GOOS_darwin")
253	p("VZEROUPPER")
254	p("#endif")
255
256	p("PUSHQ BP")
257	p("MOVQ SP, BP")
258	p("// Save flags before clobbering them")
259	p("PUSHFQ")
260	p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
261	p("ADJSP $%d", l.stack)
262	p("// But vet doesn't know ADJSP, so suppress vet stack checking")
263	p("NOP SP")
264	l.save()
265	p("CALL ·asyncPreempt2(SB)")
266	l.restore()
267	p("ADJSP $%d", -l.stack)
268	p("POPFQ")
269	p("POPQ BP")
270	p("RET")
271}
272
273func genARM() {
274	// Add integer registers R0-R12.
275	// R13 (SP), R14 (LR), R15 (PC) are special and not saved here.
276	var l = layout{sp: "R13", stack: 4} // add LR slot
277	for i := 0; i <= 12; i++ {
278		reg := fmt.Sprintf("R%d", i)
279		if i == 10 {
280			continue // R10 is g register, no need to save/restore
281		}
282		l.add("MOVW", reg, 4)
283	}
284	// Add flag register.
285	l.addSpecial(
286		"MOVW CPSR, R0\nMOVW R0, %d(R13)",
287		"MOVW %d(R13), R0\nMOVW R0, CPSR",
288		4)
289
290	// Add floating point registers F0-F15 and flag register.
291	var lfp = layout{stack: l.stack, sp: "R13"}
292	lfp.addSpecial(
293		"MOVW FPCR, R0\nMOVW R0, %d(R13)",
294		"MOVW %d(R13), R0\nMOVW R0, FPCR",
295		4)
296	for i := 0; i <= 15; i++ {
297		reg := fmt.Sprintf("F%d", i)
298		lfp.add("MOVD", reg, 8)
299	}
300
301	p("MOVW.W R14, -%d(R13)", lfp.stack) // allocate frame, save LR
302	l.save()
303	p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp") // test goarm, and skip FP registers if goarm=5.
304	lfp.save()
305	label("nofp:")
306	p("CALL ·asyncPreempt2(SB)")
307	p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp2") // test goarm, and skip FP registers if goarm=5.
308	lfp.restore()
309	label("nofp2:")
310	l.restore()
311
312	p("MOVW %d(R13), R14", lfp.stack)     // sigctxt.pushCall pushes LR on stack, restore it
313	p("MOVW.P %d(R13), R15", lfp.stack+4) // load PC, pop frame (including the space pushed by sigctxt.pushCall)
314	p("UNDEF")                            // shouldn't get here
315}
316
317func genARM64() {
318	// Add integer registers R0-R26
319	// R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special
320	// and not saved here.
321	var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction
322	for i := 0; i <= 26; i++ {
323		if i == 18 {
324			continue // R18 is not used, skip
325		}
326		reg := fmt.Sprintf("R%d", i)
327		l.add("MOVD", reg, 8)
328	}
329	// Add flag registers.
330	l.addSpecial(
331		"MOVD NZCV, R0\nMOVD R0, %d(RSP)",
332		"MOVD %d(RSP), R0\nMOVD R0, NZCV",
333		8)
334	l.addSpecial(
335		"MOVD FPSR, R0\nMOVD R0, %d(RSP)",
336		"MOVD %d(RSP), R0\nMOVD R0, FPSR",
337		8)
338	// TODO: FPCR? I don't think we'll change it, so no need to save.
339	// Add floating point registers F0-F31.
340	for i := 0; i <= 31; i++ {
341		reg := fmt.Sprintf("F%d", i)
342		l.add("FMOVD", reg, 8)
343	}
344	if l.stack%16 != 0 {
345		l.stack += 8 // SP needs 16-byte alignment
346	}
347
348	// allocate frame, save PC of interrupted instruction (in LR)
349	p("MOVD R30, %d(RSP)", -l.stack)
350	p("SUB $%d, RSP", l.stack)
351	p("#ifdef GOOS_linux")
352	p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
353	p("SUB $8, RSP, R29")  // set up new frame pointer
354	p("#endif")
355	// On darwin, save the LR again after decrementing SP. We run the
356	// signal handler on the G stack (as it doesn't support SA_ONSTACK),
357	// so any writes below SP may be clobbered.
358	p("#ifdef GOOS_darwin")
359	p("MOVD R30, (RSP)")
360	p("#endif")
361
362	l.save()
363	p("CALL ·asyncPreempt2(SB)")
364	l.restore()
365
366	p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
367	p("#ifdef GOOS_linux")
368	p("MOVD -8(RSP), R29") // restore frame pointer
369	p("#endif")
370	p("MOVD (RSP), R27")          // load PC to REGTMP
371	p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall)
372	p("JMP (R27)")
373}
374
375func genMIPS(_64bit bool) {
376	mov := "MOVW"
377	movf := "MOVF"
378	add := "ADD"
379	sub := "SUB"
380	r28 := "R28"
381	regsize := 4
382	if _64bit {
383		mov = "MOVV"
384		movf = "MOVD"
385		add = "ADDV"
386		sub = "SUBV"
387		r28 = "RSB"
388		regsize = 8
389	}
390
391	// Add integer registers R1-R22, R24-R25, R28
392	// R0 (zero), R23 (REGTMP), R29 (SP), R30 (g), R31 (LR) are special,
393	// and not saved here. R26 and R27 are reserved by kernel and not used.
394	var l = layout{sp: "R29", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
395	for i := 1; i <= 25; i++ {
396		if i == 23 {
397			continue // R23 is REGTMP
398		}
399		reg := fmt.Sprintf("R%d", i)
400		l.add(mov, reg, regsize)
401	}
402	l.add(mov, r28, regsize)
403	l.addSpecial(
404		mov+" HI, R1\n"+mov+" R1, %d(R29)",
405		mov+" %d(R29), R1\n"+mov+" R1, HI",
406		regsize)
407	l.addSpecial(
408		mov+" LO, R1\n"+mov+" R1, %d(R29)",
409		mov+" %d(R29), R1\n"+mov+" R1, LO",
410		regsize)
411	// Add floating point control/status register FCR31 (FCR0-FCR30 are irrelevant)
412	l.addSpecial(
413		mov+" FCR31, R1\n"+mov+" R1, %d(R29)",
414		mov+" %d(R29), R1\n"+mov+" R1, FCR31",
415		regsize)
416	// Add floating point registers F0-F31.
417	for i := 0; i <= 31; i++ {
418		reg := fmt.Sprintf("F%d", i)
419		l.add(movf, reg, regsize)
420	}
421
422	// allocate frame, save PC of interrupted instruction (in LR)
423	p(mov+" R31, -%d(R29)", l.stack)
424	p(sub+" $%d, R29", l.stack)
425
426	l.save()
427	p("CALL ·asyncPreempt2(SB)")
428	l.restore()
429
430	p(mov+" %d(R29), R31", l.stack)     // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
431	p(mov + " (R29), R23")              // load PC to REGTMP
432	p(add+" $%d, R29", l.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
433	p("JMP (R23)")
434}
435
436func genPPC64() {
437	// Add integer registers R3-R29
438	// R0 (zero), R1 (SP), R30 (g) are special and not saved here.
439	// R2 (TOC pointer in PIC mode), R12 (function entry address in PIC mode) have been saved in sigctxt.pushCall.
440	// R31 (REGTMP) will be saved manually.
441	var l = layout{sp: "R1", stack: 32 + 8} // MinFrameSize on PPC64, plus one word for saving R31
442	for i := 3; i <= 29; i++ {
443		if i == 12 || i == 13 {
444			// R12 has been saved in sigctxt.pushCall.
445			// R13 is TLS pointer, not used by Go code. we must NOT
446			// restore it, otherwise if we parked and resumed on a
447			// different thread we'll mess up TLS addresses.
448			continue
449		}
450		reg := fmt.Sprintf("R%d", i)
451		l.add("MOVD", reg, 8)
452	}
453	l.addSpecial(
454		"MOVW CR, R31\nMOVW R31, %d(R1)",
455		"MOVW %d(R1), R31\nMOVFL R31, $0xff", // this is MOVW R31, CR
456		8)                                    // CR is 4-byte wide, but just keep the alignment
457	l.addSpecial(
458		"MOVD XER, R31\nMOVD R31, %d(R1)",
459		"MOVD %d(R1), R31\nMOVD R31, XER",
460		8)
461	// Add floating point registers F0-F31.
462	for i := 0; i <= 31; i++ {
463		reg := fmt.Sprintf("F%d", i)
464		l.add("FMOVD", reg, 8)
465	}
466	// Add floating point control/status register FPSCR.
467	l.addSpecial(
468		"MOVFL FPSCR, F0\nFMOVD F0, %d(R1)",
469		"FMOVD %d(R1), F0\nMOVFL F0, FPSCR",
470		8)
471
472	p("MOVD R31, -%d(R1)", l.stack-32) // save R31 first, we'll use R31 for saving LR
473	p("MOVD LR, R31")
474	p("MOVDU R31, -%d(R1)", l.stack) // allocate frame, save PC of interrupted instruction (in LR)
475
476	l.save()
477	p("CALL ·asyncPreempt2(SB)")
478	l.restore()
479
480	p("MOVD %d(R1), R31", l.stack) // sigctxt.pushCall has pushed LR, R2, R12 (at interrupt) on stack, restore them
481	p("MOVD R31, LR")
482	p("MOVD %d(R1), R2", l.stack+8)
483	p("MOVD %d(R1), R12", l.stack+16)
484	p("MOVD (R1), R31") // load PC to CTR
485	p("MOVD R31, CTR")
486	p("MOVD 32(R1), R31")        // restore R31
487	p("ADD $%d, R1", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
488	p("JMP (CTR)")
489}
490
491func genRISCV64() {
492	p("// No async preemption on riscv64 - see issue 36711")
493	p("UNDEF")
494}
495
496func genS390X() {
497	// Add integer registers R0-R12
498	// R13 (g), R14 (LR), R15 (SP) are special, and not saved here.
499	// Saving R10 (REGTMP) is not necessary, but it is saved anyway.
500	var l = layout{sp: "R15", stack: 16} // add slot to save PC of interrupted instruction and flags
501	l.addSpecial(
502		"STMG R0, R12, %d(R15)",
503		"LMG %d(R15), R0, R12",
504		13*8)
505	// Add floating point registers F0-F31.
506	for i := 0; i <= 15; i++ {
507		reg := fmt.Sprintf("F%d", i)
508		l.add("FMOVD", reg, 8)
509	}
510
511	// allocate frame, save PC of interrupted instruction (in LR) and flags (condition code)
512	p("IPM R10") // save flags upfront, as ADD will clobber flags
513	p("MOVD R14, -%d(R15)", l.stack)
514	p("ADD $-%d, R15", l.stack)
515	p("MOVW R10, 8(R15)") // save flags
516
517	l.save()
518	p("CALL ·asyncPreempt2(SB)")
519	l.restore()
520
521	p("MOVD %d(R15), R14", l.stack)    // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
522	p("ADD $%d, R15", l.stack+8)       // pop frame (including the space pushed by sigctxt.pushCall)
523	p("MOVWZ -%d(R15), R10", l.stack)  // load flags to REGTMP
524	p("TMLH R10, $(3<<12)")            // restore flags
525	p("MOVD -%d(R15), R10", l.stack+8) // load PC to REGTMP
526	p("JMP (R10)")
527}
528
529func genWasm() {
530	p("// No async preemption on wasm")
531	p("UNDEF")
532}
533
534func notImplemented() {
535	p("// Not implemented yet")
536	p("JMP ·abort(SB)")
537}
538