1// Inferno's libkern/memmove-arm.s
2// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-arm.s
3//
4//         Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
6//         Portions Copyright 2009 The Go Authors. All rights reserved.
7//
8// Permission is hereby granted, free of charge, to any person obtaining a copy
9// of this software and associated documentation files (the "Software"), to deal
10// in the Software without restriction, including without limitation the rights
11// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12// copies of the Software, and to permit persons to whom the Software is
13// furnished to do so, subject to the following conditions:
14//
15// The above copyright notice and this permission notice shall be included in
16// all copies or substantial portions of the Software.
17//
18// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
21// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24// THE SOFTWARE.
25
26#include "textflag.h"
27
28// TE or TS are spilled to the stack during bulk register moves.
29#define TS	R0
30#define TE	R8
31
32// Warning: the linker will use R11 to synthesize certain instructions. Please
33// take care and double check with objdump.
34#define FROM	R11
35#define N	R12
36#define TMP	R12				/* N and TMP don't overlap */
37#define TMP1	R5
38
39#define RSHIFT	R5
40#define LSHIFT	R6
41#define OFFSET	R7
42
43#define BR0	R0					/* shared with TS */
44#define BW0	R1
45#define BR1	R1
46#define BW1	R2
47#define BR2	R2
48#define BW2	R3
49#define BR3	R3
50#define BW3	R4
51
52#define FW0	R1
53#define FR0	R2
54#define FW1	R2
55#define FR1	R3
56#define FW2	R3
57#define FR2	R4
58#define FW3	R4
59#define FR3	R8					/* shared with TE */
60
61// See memmove Go doc for important implementation constraints.
62
63// func memmove(to, from unsafe.Pointer, n uintptr)
64TEXT runtime·memmove(SB), NOSPLIT, $4-12
65_memmove:
66	MOVW	to+0(FP), TS
67	MOVW	from+4(FP), FROM
68	MOVW	n+8(FP), N
69
70	ADD	N, TS, TE	/* to end pointer */
71
72	CMP	FROM, TS
73	BLS	_forward
74
75_back:
76	ADD	N, FROM		/* from end pointer */
77	CMP	$4, N		/* need at least 4 bytes to copy */
78	BLT	_b1tail
79
80_b4align:				/* align destination on 4 */
81	AND.S	$3, TE, TMP
82	BEQ	_b4aligned
83
84	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
85	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
86	B	_b4align
87
88_b4aligned:				/* is source now aligned? */
89	AND.S	$3, FROM, TMP
90	BNE	_bunaligned
91
92	ADD	$31, TS, TMP	/* do 32-byte chunks if possible */
93	MOVW	TS, savedts-4(SP)
94_b32loop:
95	CMP	TMP, TE
96	BLS	_b4tail
97
98	MOVM.DB.W (FROM), [R0-R7]
99	MOVM.DB.W [R0-R7], (TE)
100	B	_b32loop
101
102_b4tail:				/* do remaining words if possible */
103	MOVW	savedts-4(SP), TS
104	ADD	$3, TS, TMP
105_b4loop:
106	CMP	TMP, TE
107	BLS	_b1tail
108
109	MOVW.W	-4(FROM), TMP1	/* pre-indexed */
110	MOVW.W	TMP1, -4(TE)	/* pre-indexed */
111	B	_b4loop
112
113_b1tail:				/* remaining bytes */
114	CMP	TE, TS
115	BEQ	_return
116
117	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
118	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
119	B	_b1tail
120
121_forward:
122	CMP	$4, N		/* need at least 4 bytes to copy */
123	BLT	_f1tail
124
125_f4align:				/* align destination on 4 */
126	AND.S	$3, TS, TMP
127	BEQ	_f4aligned
128
129	MOVBU.P	1(FROM), TMP	/* implicit write back */
130	MOVBU.P	TMP, 1(TS)	/* implicit write back */
131	B	_f4align
132
133_f4aligned:				/* is source now aligned? */
134	AND.S	$3, FROM, TMP
135	BNE	_funaligned
136
137	SUB	$31, TE, TMP	/* do 32-byte chunks if possible */
138	MOVW	TE, savedte-4(SP)
139_f32loop:
140	CMP	TMP, TS
141	BHS	_f4tail
142
143	MOVM.IA.W (FROM), [R1-R8]
144	MOVM.IA.W [R1-R8], (TS)
145	B	_f32loop
146
147_f4tail:
148	MOVW	savedte-4(SP), TE
149	SUB	$3, TE, TMP	/* do remaining words if possible */
150_f4loop:
151	CMP	TMP, TS
152	BHS	_f1tail
153
154	MOVW.P	4(FROM), TMP1	/* implicit write back */
155	MOVW.P	TMP1, 4(TS)	/* implicit write back */
156	B	_f4loop
157
158_f1tail:
159	CMP	TS, TE
160	BEQ	_return
161
162	MOVBU.P	1(FROM), TMP	/* implicit write back */
163	MOVBU.P	TMP, 1(TS)	/* implicit write back */
164	B	_f1tail
165
166_return:
167	MOVW	to+0(FP), R0
168	RET
169
170_bunaligned:
171	CMP	$2, TMP		/* is TMP < 2 ? */
172
173	MOVW.LT	$8, RSHIFT		/* (R(n)<<24)|(R(n-1)>>8) */
174	MOVW.LT	$24, LSHIFT
175	MOVW.LT	$1, OFFSET
176
177	MOVW.EQ	$16, RSHIFT		/* (R(n)<<16)|(R(n-1)>>16) */
178	MOVW.EQ	$16, LSHIFT
179	MOVW.EQ	$2, OFFSET
180
181	MOVW.GT	$24, RSHIFT		/* (R(n)<<8)|(R(n-1)>>24) */
182	MOVW.GT	$8, LSHIFT
183	MOVW.GT	$3, OFFSET
184
185	ADD	$16, TS, TMP	/* do 16-byte chunks if possible */
186	CMP	TMP, TE
187	BLS	_b1tail
188
189	BIC	$3, FROM		/* align source */
190	MOVW	TS, savedts-4(SP)
191	MOVW	(FROM), BR0	/* prime first block register */
192
193_bu16loop:
194	CMP	TMP, TE
195	BLS	_bu1tail
196
197	MOVW	BR0<<LSHIFT, BW3
198	MOVM.DB.W (FROM), [BR0-BR3]
199	ORR	BR3>>RSHIFT, BW3
200
201	MOVW	BR3<<LSHIFT, BW2
202	ORR	BR2>>RSHIFT, BW2
203
204	MOVW	BR2<<LSHIFT, BW1
205	ORR	BR1>>RSHIFT, BW1
206
207	MOVW	BR1<<LSHIFT, BW0
208	ORR	BR0>>RSHIFT, BW0
209
210	MOVM.DB.W [BW0-BW3], (TE)
211	B	_bu16loop
212
213_bu1tail:
214	MOVW	savedts-4(SP), TS
215	ADD	OFFSET, FROM
216	B	_b1tail
217
218_funaligned:
219	CMP	$2, TMP
220
221	MOVW.LT	$8, RSHIFT		/* (R(n+1)<<24)|(R(n)>>8) */
222	MOVW.LT	$24, LSHIFT
223	MOVW.LT	$3, OFFSET
224
225	MOVW.EQ	$16, RSHIFT		/* (R(n+1)<<16)|(R(n)>>16) */
226	MOVW.EQ	$16, LSHIFT
227	MOVW.EQ	$2, OFFSET
228
229	MOVW.GT	$24, RSHIFT		/* (R(n+1)<<8)|(R(n)>>24) */
230	MOVW.GT	$8, LSHIFT
231	MOVW.GT	$1, OFFSET
232
233	SUB	$16, TE, TMP	/* do 16-byte chunks if possible */
234	CMP	TMP, TS
235	BHS	_f1tail
236
237	BIC	$3, FROM		/* align source */
238	MOVW	TE, savedte-4(SP)
239	MOVW.P	4(FROM), FR3	/* prime last block register, implicit write back */
240
241_fu16loop:
242	CMP	TMP, TS
243	BHS	_fu1tail
244
245	MOVW	FR3>>RSHIFT, FW0
246	MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
247	ORR	FR0<<LSHIFT, FW0
248
249	MOVW	FR0>>RSHIFT, FW1
250	ORR	FR1<<LSHIFT, FW1
251
252	MOVW	FR1>>RSHIFT, FW2
253	ORR	FR2<<LSHIFT, FW2
254
255	MOVW	FR2>>RSHIFT, FW3
256	ORR	FR3<<LSHIFT, FW3
257
258	MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
259	B	_fu16loop
260
261_fu1tail:
262	MOVW	savedte-4(SP), TE
263	SUB	OFFSET, FROM
264	B	_f1tail
265