1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7// runtime·duffzero is a Duff's device for zeroing memory.
8// The compiler jumps to computed addresses within
9// the routine to zero chunks of memory.
10// Do not change duffzero without also
11// changing the uses in cmd/compile/internal/*/*.go.
12
13// runtime·duffcopy is a Duff's device for copying memory.
14// The compiler jumps to computed addresses within
15// the routine to copy chunks of memory.
16// Source and destination must not overlap.
17// Do not change duffcopy without also
18// changing the uses in cmd/compile/internal/*/*.go.
19
20// See the zero* and copy* generators below
21// for architecture-specific comments.
22
23// mkduff generates duff_*.s.
24package main
25
26import (
27	"bytes"
28	"fmt"
29	"io"
30	"io/ioutil"
31	"log"
32)
33
34func main() {
35	gen("amd64", notags, zeroAMD64, copyAMD64)
36	gen("386", notags, zero386, copy386)
37	gen("arm", notags, zeroARM, copyARM)
38	gen("arm64", notags, zeroARM64, copyARM64)
39	gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x)
40	gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x)
41}
42
43func gen(arch string, tags, zero, copy func(io.Writer)) {
44	var buf bytes.Buffer
45
46	fmt.Fprintln(&buf, "// Code generated by mkduff.go; DO NOT EDIT.")
47	fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
48	fmt.Fprintln(&buf, "// See mkduff.go for comments.")
49	tags(&buf)
50	fmt.Fprintln(&buf, "#include \"textflag.h\"")
51	fmt.Fprintln(&buf)
52	zero(&buf)
53	fmt.Fprintln(&buf)
54	copy(&buf)
55
56	if err := ioutil.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil {
57		log.Fatalln(err)
58	}
59}
60
61func notags(w io.Writer) { fmt.Fprintln(w) }
62
63func zeroAMD64(w io.Writer) {
64	// X0: zero
65	// DI: ptr to memory to be zeroed
66	// DI is updated as a side effect.
67	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
68	for i := 0; i < 16; i++ {
69		fmt.Fprintln(w, "\tMOVUPS\tX0,(DI)")
70		fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)")
71		fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)")
72		fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)")
73		fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags
74		fmt.Fprintln(w)
75	}
76	fmt.Fprintln(w, "\tRET")
77}
78
79func copyAMD64(w io.Writer) {
80	// SI: ptr to source memory
81	// DI: ptr to destination memory
82	// SI and DI are updated as a side effect.
83	//
84	// This is equivalent to a sequence of MOVSQ but
85	// for some reason that is 3.5x slower than this code.
86	// The STOSQ in duffzero seem fine, though.
87	fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
88	for i := 0; i < 64; i++ {
89		fmt.Fprintln(w, "\tMOVUPS\t(SI), X0")
90		fmt.Fprintln(w, "\tADDQ\t$16, SI")
91		fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)")
92		fmt.Fprintln(w, "\tADDQ\t$16, DI")
93		fmt.Fprintln(w)
94	}
95	fmt.Fprintln(w, "\tRET")
96}
97
98func zero386(w io.Writer) {
99	// AX: zero
100	// DI: ptr to memory to be zeroed
101	// DI is updated as a side effect.
102	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
103	for i := 0; i < 128; i++ {
104		fmt.Fprintln(w, "\tSTOSL")
105	}
106	fmt.Fprintln(w, "\tRET")
107}
108
109func copy386(w io.Writer) {
110	// SI: ptr to source memory
111	// DI: ptr to destination memory
112	// SI and DI are updated as a side effect.
113	//
114	// This is equivalent to a sequence of MOVSL but
115	// for some reason MOVSL is really slow.
116	fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
117	for i := 0; i < 128; i++ {
118		fmt.Fprintln(w, "\tMOVL\t(SI), CX")
119		fmt.Fprintln(w, "\tADDL\t$4, SI")
120		fmt.Fprintln(w, "\tMOVL\tCX, (DI)")
121		fmt.Fprintln(w, "\tADDL\t$4, DI")
122		fmt.Fprintln(w)
123	}
124	fmt.Fprintln(w, "\tRET")
125}
126
127func zeroARM(w io.Writer) {
128	// R0: zero
129	// R1: ptr to memory to be zeroed
130	// R1 is updated as a side effect.
131	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
132	for i := 0; i < 128; i++ {
133		fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)")
134	}
135	fmt.Fprintln(w, "\tRET")
136}
137
138func copyARM(w io.Writer) {
139	// R0: scratch space
140	// R1: ptr to source memory
141	// R2: ptr to destination memory
142	// R1 and R2 are updated as a side effect
143	fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
144	for i := 0; i < 128; i++ {
145		fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0")
146		fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)")
147		fmt.Fprintln(w)
148	}
149	fmt.Fprintln(w, "\tRET")
150}
151
152func zeroARM64(w io.Writer) {
153	// ZR: always zero
154	// R20: ptr to memory to be zeroed
155	// On return, R20 points to the last zeroed dword.
156	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
157	for i := 0; i < 63; i++ {
158		fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R20)")
159	}
160	fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R20)")
161	fmt.Fprintln(w, "\tRET")
162}
163
164func copyARM64(w io.Writer) {
165	// R20: ptr to source memory
166	// R21: ptr to destination memory
167	// R26, R27 (aka REGTMP): scratch space
168	// R20 and R21 are updated as a side effect
169	fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
170
171	for i := 0; i < 64; i++ {
172		fmt.Fprintln(w, "\tLDP.P\t16(R20), (R26, R27)")
173		fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R21)")
174		fmt.Fprintln(w)
175	}
176	fmt.Fprintln(w, "\tRET")
177}
178
179func tagsPPC64x(w io.Writer) {
180	fmt.Fprintln(w)
181	fmt.Fprintln(w, "// +build ppc64 ppc64le")
182	fmt.Fprintln(w)
183}
184
185func zeroPPC64x(w io.Writer) {
186	// R0: always zero
187	// R3 (aka REGRT1): ptr to memory to be zeroed - 8
188	// On return, R3 points to the last zeroed dword.
189	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
190	for i := 0; i < 128; i++ {
191		fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)")
192	}
193	fmt.Fprintln(w, "\tRET")
194}
195
196func copyPPC64x(w io.Writer) {
197	fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
198}
199
200func tagsMIPS64x(w io.Writer) {
201	fmt.Fprintln(w)
202	fmt.Fprintln(w, "// +build mips64 mips64le")
203	fmt.Fprintln(w)
204}
205
206func zeroMIPS64x(w io.Writer) {
207	// R0: always zero
208	// R1 (aka REGRT1): ptr to memory to be zeroed - 8
209	// On return, R1 points to the last zeroed dword.
210	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
211	for i := 0; i < 128; i++ {
212		fmt.Fprintln(w, "\tMOVV\tR0, 8(R1)")
213		fmt.Fprintln(w, "\tADDV\t$8, R1")
214	}
215	fmt.Fprintln(w, "\tRET")
216}
217
218func copyMIPS64x(w io.Writer) {
219	fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
220	for i := 0; i < 128; i++ {
221		fmt.Fprintln(w, "\tMOVV\t(R1), R23")
222		fmt.Fprintln(w, "\tADDV\t$8, R1")
223		fmt.Fprintln(w, "\tMOVV\tR23, (R2)")
224		fmt.Fprintln(w, "\tADDV\t$8, R2")
225		fmt.Fprintln(w)
226	}
227	fmt.Fprintln(w, "\tRET")
228}
229