1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7// The gen command generates Go code (in the parent directory) for all
8// the architecture-specific opcodes, blocks, and rewrites.
9package main
10
11import (
12	"bytes"
13	"flag"
14	"fmt"
15	"go/format"
16	"io/ioutil"
17	"log"
18	"os"
19	"path"
20	"regexp"
21	"runtime"
22	"runtime/pprof"
23	"runtime/trace"
24	"sort"
25	"strings"
26	"sync"
27)
28
29type arch struct {
30	name            string
31	pkg             string // obj package to import for this arch.
32	genfile         string // source file containing opcode code generation.
33	ops             []opData
34	blocks          []blockData
35	regnames        []string
36	gpregmask       regMask
37	fpregmask       regMask
38	fp32regmask     regMask
39	fp64regmask     regMask
40	specialregmask  regMask
41	framepointerreg int8
42	linkreg         int8
43	generic         bool
44	imports         []string
45}
46
47type opData struct {
48	name              string
49	reg               regInfo
50	asm               string
51	typ               string // default result type
52	aux               string
53	rematerializeable bool
54	argLength         int32  // number of arguments, if -1, then this operation has a variable number of arguments
55	commutative       bool   // this operation is commutative on its first 2 arguments (e.g. addition)
56	resultInArg0      bool   // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
57	resultNotInArgs   bool   // outputs must not be allocated to the same registers as inputs
58	clobberFlags      bool   // this op clobbers flags register
59	call              bool   // is a function call
60	nilCheck          bool   // this op is a nil check on arg0
61	faultOnNilArg0    bool   // this op will fault if arg0 is nil (and aux encodes a small offset)
62	faultOnNilArg1    bool   // this op will fault if arg1 is nil (and aux encodes a small offset)
63	usesScratch       bool   // this op requires scratch memory space
64	hasSideEffects    bool   // for "reasons", not to be eliminated.  E.g., atomic store, #19182.
65	zeroWidth         bool   // op never translates into any machine code. example: copy, which may sometimes translate to machine code, is not zero-width.
66	unsafePoint       bool   // this op is an unsafe point, i.e. not safe for async preemption
67	symEffect         string // effect this op has on symbol in aux
68	scale             uint8  // amd64/386 indexed load scale
69}
70
71type blockData struct {
72	name     string // the suffix for this block ("EQ", "LT", etc.)
73	controls int    // the number of control values this type of block requires
74	auxint   string // the type of the AuxInt value, if any
75}
76
77type regInfo struct {
78	// inputs[i] encodes the set of registers allowed for the i'th input.
79	// Inputs that don't use registers (flags, memory, etc.) should be 0.
80	inputs []regMask
81	// clobbers encodes the set of registers that are overwritten by
82	// the instruction (other than the output registers).
83	clobbers regMask
84	// outputs[i] encodes the set of registers allowed for the i'th output.
85	outputs []regMask
86}
87
88type regMask uint64
89
90func (a arch) regMaskComment(r regMask) string {
91	var buf bytes.Buffer
92	for i := uint64(0); r != 0; i++ {
93		if r&1 != 0 {
94			if buf.Len() == 0 {
95				buf.WriteString(" //")
96			}
97			buf.WriteString(" ")
98			buf.WriteString(a.regnames[i])
99		}
100		r >>= 1
101	}
102	return buf.String()
103}
104
105var archs []arch
106
107var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
108var memprofile = flag.String("memprofile", "", "write memory profile to `file`")
109var tracefile = flag.String("trace", "", "write trace to `file`")
110
111func main() {
112	flag.Parse()
113	if *cpuprofile != "" {
114		f, err := os.Create(*cpuprofile)
115		if err != nil {
116			log.Fatal("could not create CPU profile: ", err)
117		}
118		defer f.Close()
119		if err := pprof.StartCPUProfile(f); err != nil {
120			log.Fatal("could not start CPU profile: ", err)
121		}
122		defer pprof.StopCPUProfile()
123	}
124	if *tracefile != "" {
125		f, err := os.Create(*tracefile)
126		if err != nil {
127			log.Fatalf("failed to create trace output file: %v", err)
128		}
129		defer func() {
130			if err := f.Close(); err != nil {
131				log.Fatalf("failed to close trace file: %v", err)
132			}
133		}()
134
135		if err := trace.Start(f); err != nil {
136			log.Fatalf("failed to start trace: %v", err)
137		}
138		defer trace.Stop()
139	}
140
141	sort.Sort(ArchsByName(archs))
142
143	// The generate tasks are run concurrently, since they are CPU-intensive
144	// that can easily make use of many cores on a machine.
145	//
146	// Note that there is no limit on the concurrency at the moment. On a
147	// four-core laptop at the time of writing, peak RSS usually reaches
148	// ~200MiB, which seems doable by practically any machine nowadays. If
149	// that stops being the case, we can cap this func to a fixed number of
150	// architectures being generated at once.
151
152	tasks := []func(){
153		genOp,
154	}
155	for _, a := range archs {
156		a := a // the funcs are ran concurrently at a later time
157		tasks = append(tasks, func() {
158			genRules(a)
159			genSplitLoadRules(a)
160		})
161	}
162	var wg sync.WaitGroup
163	for _, task := range tasks {
164		task := task
165		wg.Add(1)
166		go func() {
167			task()
168			wg.Done()
169		}()
170	}
171	wg.Wait()
172
173	if *memprofile != "" {
174		f, err := os.Create(*memprofile)
175		if err != nil {
176			log.Fatal("could not create memory profile: ", err)
177		}
178		defer f.Close()
179		runtime.GC() // get up-to-date statistics
180		if err := pprof.WriteHeapProfile(f); err != nil {
181			log.Fatal("could not write memory profile: ", err)
182		}
183	}
184}
185
186func genOp() {
187	w := new(bytes.Buffer)
188	fmt.Fprintf(w, "// Code generated from gen/*Ops.go; DO NOT EDIT.\n")
189	fmt.Fprintln(w)
190	fmt.Fprintln(w, "package ssa")
191
192	fmt.Fprintln(w, "import (")
193	fmt.Fprintln(w, "\"cmd/internal/obj\"")
194	for _, a := range archs {
195		if a.pkg != "" {
196			fmt.Fprintf(w, "%q\n", a.pkg)
197		}
198	}
199	fmt.Fprintln(w, ")")
200
201	// generate Block* declarations
202	fmt.Fprintln(w, "const (")
203	fmt.Fprintln(w, "BlockInvalid BlockKind = iota")
204	for _, a := range archs {
205		fmt.Fprintln(w)
206		for _, d := range a.blocks {
207			fmt.Fprintf(w, "Block%s%s\n", a.Name(), d.name)
208		}
209	}
210	fmt.Fprintln(w, ")")
211
212	// generate block kind string method
213	fmt.Fprintln(w, "var blockString = [...]string{")
214	fmt.Fprintln(w, "BlockInvalid:\"BlockInvalid\",")
215	for _, a := range archs {
216		fmt.Fprintln(w)
217		for _, b := range a.blocks {
218			fmt.Fprintf(w, "Block%s%s:\"%s\",\n", a.Name(), b.name, b.name)
219		}
220	}
221	fmt.Fprintln(w, "}")
222	fmt.Fprintln(w, "func (k BlockKind) String() string {return blockString[k]}")
223
224	// generate block kind auxint method
225	fmt.Fprintln(w, "func (k BlockKind) AuxIntType() string {")
226	fmt.Fprintln(w, "switch k {")
227	for _, a := range archs {
228		for _, b := range a.blocks {
229			if b.auxint == "" {
230				continue
231			}
232			fmt.Fprintf(w, "case Block%s%s: return \"%s\"\n", a.Name(), b.name, b.auxint)
233		}
234	}
235	fmt.Fprintln(w, "}")
236	fmt.Fprintln(w, "return \"\"")
237	fmt.Fprintln(w, "}")
238
239	// generate Op* declarations
240	fmt.Fprintln(w, "const (")
241	fmt.Fprintln(w, "OpInvalid Op = iota") // make sure OpInvalid is 0.
242	for _, a := range archs {
243		fmt.Fprintln(w)
244		for _, v := range a.ops {
245			if v.name == "Invalid" {
246				continue
247			}
248			fmt.Fprintf(w, "Op%s%s\n", a.Name(), v.name)
249		}
250	}
251	fmt.Fprintln(w, ")")
252
253	// generate OpInfo table
254	fmt.Fprintln(w, "var opcodeTable = [...]opInfo{")
255	fmt.Fprintln(w, " { name: \"OpInvalid\" },")
256	for _, a := range archs {
257		fmt.Fprintln(w)
258
259		pkg := path.Base(a.pkg)
260		for _, v := range a.ops {
261			if v.name == "Invalid" {
262				continue
263			}
264			fmt.Fprintln(w, "{")
265			fmt.Fprintf(w, "name:\"%s\",\n", v.name)
266
267			// flags
268			if v.aux != "" {
269				fmt.Fprintf(w, "auxType: aux%s,\n", v.aux)
270			}
271			fmt.Fprintf(w, "argLen: %d,\n", v.argLength)
272
273			if v.rematerializeable {
274				if v.reg.clobbers != 0 {
275					log.Fatalf("%s is rematerializeable and clobbers registers", v.name)
276				}
277				if v.clobberFlags {
278					log.Fatalf("%s is rematerializeable and clobbers flags", v.name)
279				}
280				fmt.Fprintln(w, "rematerializeable: true,")
281			}
282			if v.commutative {
283				fmt.Fprintln(w, "commutative: true,")
284			}
285			if v.resultInArg0 {
286				fmt.Fprintln(w, "resultInArg0: true,")
287				// OpConvert's register mask is selected dynamically,
288				// so don't try to check it in the static table.
289				if v.name != "Convert" && v.reg.inputs[0] != v.reg.outputs[0] {
290					log.Fatalf("%s: input[0] and output[0] must use the same registers for %s", a.name, v.name)
291				}
292				if v.name != "Convert" && v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
293					log.Fatalf("%s: input[1] and output[0] must use the same registers for %s", a.name, v.name)
294				}
295			}
296			if v.resultNotInArgs {
297				fmt.Fprintln(w, "resultNotInArgs: true,")
298			}
299			if v.clobberFlags {
300				fmt.Fprintln(w, "clobberFlags: true,")
301			}
302			if v.call {
303				fmt.Fprintln(w, "call: true,")
304			}
305			if v.nilCheck {
306				fmt.Fprintln(w, "nilCheck: true,")
307			}
308			if v.faultOnNilArg0 {
309				fmt.Fprintln(w, "faultOnNilArg0: true,")
310				if v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" {
311					log.Fatalf("faultOnNilArg0 with aux %s not allowed", v.aux)
312				}
313			}
314			if v.faultOnNilArg1 {
315				fmt.Fprintln(w, "faultOnNilArg1: true,")
316				if v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" {
317					log.Fatalf("faultOnNilArg1 with aux %s not allowed", v.aux)
318				}
319			}
320			if v.usesScratch {
321				fmt.Fprintln(w, "usesScratch: true,")
322			}
323			if v.hasSideEffects {
324				fmt.Fprintln(w, "hasSideEffects: true,")
325			}
326			if v.zeroWidth {
327				fmt.Fprintln(w, "zeroWidth: true,")
328			}
329			if v.unsafePoint {
330				fmt.Fprintln(w, "unsafePoint: true,")
331			}
332			needEffect := strings.HasPrefix(v.aux, "Sym")
333			if v.symEffect != "" {
334				if !needEffect {
335					log.Fatalf("symEffect with aux %s not allowed", v.aux)
336				}
337				fmt.Fprintf(w, "symEffect: Sym%s,\n", strings.Replace(v.symEffect, ",", "|Sym", -1))
338			} else if needEffect {
339				log.Fatalf("symEffect needed for aux %s", v.aux)
340			}
341			if a.name == "generic" {
342				fmt.Fprintln(w, "generic:true,")
343				fmt.Fprintln(w, "},") // close op
344				// generic ops have no reg info or asm
345				continue
346			}
347			if v.asm != "" {
348				fmt.Fprintf(w, "asm: %s.A%s,\n", pkg, v.asm)
349			}
350			if v.scale != 0 {
351				fmt.Fprintf(w, "scale: %d,\n", v.scale)
352			}
353			fmt.Fprintln(w, "reg:regInfo{")
354
355			// Compute input allocation order. We allocate from the
356			// most to the least constrained input. This order guarantees
357			// that we will always be able to find a register.
358			var s []intPair
359			for i, r := range v.reg.inputs {
360				if r != 0 {
361					s = append(s, intPair{countRegs(r), i})
362				}
363			}
364			if len(s) > 0 {
365				sort.Sort(byKey(s))
366				fmt.Fprintln(w, "inputs: []inputInfo{")
367				for _, p := range s {
368					r := v.reg.inputs[p.val]
369					fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r))
370				}
371				fmt.Fprintln(w, "},")
372			}
373
374			if v.reg.clobbers > 0 {
375				fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers))
376			}
377
378			// reg outputs
379			s = s[:0]
380			for i, r := range v.reg.outputs {
381				s = append(s, intPair{countRegs(r), i})
382			}
383			if len(s) > 0 {
384				sort.Sort(byKey(s))
385				fmt.Fprintln(w, "outputs: []outputInfo{")
386				for _, p := range s {
387					r := v.reg.outputs[p.val]
388					fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r))
389				}
390				fmt.Fprintln(w, "},")
391			}
392			fmt.Fprintln(w, "},") // close reg info
393			fmt.Fprintln(w, "},") // close op
394		}
395	}
396	fmt.Fprintln(w, "}")
397
398	fmt.Fprintln(w, "func (o Op) Asm() obj.As {return opcodeTable[o].asm}")
399	fmt.Fprintln(w, "func (o Op) Scale() int16 {return int16(opcodeTable[o].scale)}")
400
401	// generate op string method
402	fmt.Fprintln(w, "func (o Op) String() string {return opcodeTable[o].name }")
403
404	fmt.Fprintln(w, "func (o Op) UsesScratch() bool { return opcodeTable[o].usesScratch }")
405
406	fmt.Fprintln(w, "func (o Op) SymEffect() SymEffect { return opcodeTable[o].symEffect }")
407	fmt.Fprintln(w, "func (o Op) IsCall() bool { return opcodeTable[o].call }")
408	fmt.Fprintln(w, "func (o Op) HasSideEffects() bool { return opcodeTable[o].hasSideEffects }")
409	fmt.Fprintln(w, "func (o Op) UnsafePoint() bool { return opcodeTable[o].unsafePoint }")
410
411	// generate registers
412	for _, a := range archs {
413		if a.generic {
414			continue
415		}
416		fmt.Fprintf(w, "var registers%s = [...]Register {\n", a.name)
417		var gcRegN int
418		for i, r := range a.regnames {
419			pkg := a.pkg[len("cmd/internal/obj/"):]
420			var objname string // name in cmd/internal/obj/$ARCH
421			switch r {
422			case "SB":
423				// SB isn't a real register.  cmd/internal/obj expects 0 in this case.
424				objname = "0"
425			case "SP":
426				objname = pkg + ".REGSP"
427			case "g":
428				objname = pkg + ".REGG"
429			default:
430				objname = pkg + ".REG_" + r
431			}
432			// Assign a GC register map index to registers
433			// that may contain pointers.
434			gcRegIdx := -1
435			if a.gpregmask&(1<<uint(i)) != 0 {
436				gcRegIdx = gcRegN
437				gcRegN++
438			}
439			fmt.Fprintf(w, "  {%d, %s, %d, \"%s\"},\n", i, objname, gcRegIdx, r)
440		}
441		if gcRegN > 32 {
442			// Won't fit in a uint32 mask.
443			log.Fatalf("too many GC registers (%d > 32) on %s", gcRegN, a.name)
444		}
445		fmt.Fprintln(w, "}")
446		fmt.Fprintf(w, "var gpRegMask%s = regMask(%d)\n", a.name, a.gpregmask)
447		fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask)
448		if a.fp32regmask != 0 {
449			fmt.Fprintf(w, "var fp32RegMask%s = regMask(%d)\n", a.name, a.fp32regmask)
450		}
451		if a.fp64regmask != 0 {
452			fmt.Fprintf(w, "var fp64RegMask%s = regMask(%d)\n", a.name, a.fp64regmask)
453		}
454		fmt.Fprintf(w, "var specialRegMask%s = regMask(%d)\n", a.name, a.specialregmask)
455		fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg)
456		fmt.Fprintf(w, "var linkReg%s = int8(%d)\n", a.name, a.linkreg)
457	}
458
459	// gofmt result
460	b := w.Bytes()
461	var err error
462	b, err = format.Source(b)
463	if err != nil {
464		fmt.Printf("%s\n", w.Bytes())
465		panic(err)
466	}
467
468	if err := ioutil.WriteFile("../opGen.go", b, 0666); err != nil {
469		log.Fatalf("can't write output: %v\n", err)
470	}
471
472	// Check that the arch genfile handles all the arch-specific opcodes.
473	// This is very much a hack, but it is better than nothing.
474	//
475	// Do a single regexp pass to record all ops being handled in a map, and
476	// then compare that with the ops list. This is much faster than one
477	// regexp pass per opcode.
478	for _, a := range archs {
479		if a.genfile == "" {
480			continue
481		}
482
483		pattern := fmt.Sprintf(`\Wssa\.Op%s([a-zA-Z0-9_]+)\W`, a.name)
484		rxOp, err := regexp.Compile(pattern)
485		if err != nil {
486			log.Fatalf("bad opcode regexp %s: %v", pattern, err)
487		}
488
489		src, err := ioutil.ReadFile(a.genfile)
490		if err != nil {
491			log.Fatalf("can't read %s: %v", a.genfile, err)
492		}
493		seen := make(map[string]bool, len(a.ops))
494		for _, m := range rxOp.FindAllSubmatch(src, -1) {
495			seen[string(m[1])] = true
496		}
497		for _, op := range a.ops {
498			if !seen[op.name] {
499				log.Fatalf("Op%s%s has no code generation in %s", a.name, op.name, a.genfile)
500			}
501		}
502	}
503}
504
505// Name returns the name of the architecture for use in Op* and Block* enumerations.
506func (a arch) Name() string {
507	s := a.name
508	if s == "generic" {
509		s = ""
510	}
511	return s
512}
513
514// countRegs returns the number of set bits in the register mask.
515func countRegs(r regMask) int {
516	n := 0
517	for r != 0 {
518		n += int(r & 1)
519		r >>= 1
520	}
521	return n
522}
523
524// for sorting a pair of integers by key
525type intPair struct {
526	key, val int
527}
528type byKey []intPair
529
530func (a byKey) Len() int           { return len(a) }
531func (a byKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
532func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key }
533
534type ArchsByName []arch
535
536func (x ArchsByName) Len() int           { return len(x) }
537func (x ArchsByName) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
538func (x ArchsByName) Less(i, j int) bool { return x[i].name < x[j].name }
539