xref: /minix/minix/kernel/arch/earm/phys_copy.S (revision 433d6423)
1/*	$NetBSD: memcpy_arm.S,v 1.4 2013/08/11 04:56:32 matt Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33
34#if !defined(__minix)
35#if defined(__ARM_EABI__)
36STRONG_ALIAS(__aeabi_memcpy, memcpy)
37#endif
38#endif /* !defined(__minix) */
39
40/*
41 * This is one fun bit of code ...
42 * Some easy listening music is suggested while trying to understand this
43 * code e.g. Iron Maiden
44 *
45 * For anyone attempting to understand it :
46 *
47 * The core code is implemented here with simple stubs for memcpy().
48 *
49 * All local labels are prefixed with Lmemcpy_
50 * Following the prefix a label starting f is used in the forward copy code
51 * while a label using b is used in the backwards copy code
52 * The source and destination addresses determine whether a forward or
53 * backward copy is performed.
54 * Separate bits of code are used to deal with the following situations
55 * for both the forward and backwards copy.
56 * unaligned source address
57 * unaligned destination address
58 * Separate copy routines are used to produce an optimised result for each
59 * of these cases.
60 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
61 * a time where possible.
62 *
63 * Note: r12 (aka ip) can be trashed during the function along with
64 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
65 * Additional registers are preserved prior to use i.e. r4, r5 & lr
66 *
67 * Apologies for the state of the comments ;-)
68 */
69
70/* For MINIX, we always spill r0, r4, r5, and lr, so we can easily
71 * clean up the stack after a phys_copy fault. NetBSD, in contrast,
72 * spills the minimum number of registers for each path.
73 */
74#if defined(__minix)
75/* LINTSTUB: Func: void *phys_copy(void *src, void *dst, size_t len) */
76ENTRY(phys_copy)
77	/* switch the source and destination registers */
78	eor     r0, r1, r0
79	eor     r1, r0, r1
80	eor     r0, r1, r0
81#else
82/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
83ENTRY(memcpy)
84#endif
85	/* save leaf functions having to store this away */
86#if defined(__minix)
87	push	{r0, r4, r5, lr}	/* memcpy() returns dest addr */
88#else
89	push	{r0, lr}		/* memcpy() returns dest addr */
90#endif
91
92	subs	r2, r2, #4
93	blt	.Lmemcpy_l4		/* less than 4 bytes */
94	ands	r12, r0, #3
95	bne	.Lmemcpy_destul		/* oh unaligned destination addr */
96	ands	r12, r1, #3
97	bne	.Lmemcpy_srcul		/* oh unaligned source addr */
98
99.Lmemcpy_t8:
100	/* We have aligned source and destination */
101	subs	r2, r2, #8
102	blt	.Lmemcpy_l12		/* less than 12 bytes (4 from above) */
103	subs	r2, r2, #0x14
104	blt	.Lmemcpy_l32		/* less than 32 bytes (12 from above) */
105#if !defined(__minix)
106	push	{r4}		/* borrow r4 */
107#endif
108
109	/* blat 32 bytes at a time */
110	/* XXX for really big copies perhaps we should use more registers */
111.Lmemcpy_loop32:
112	ldmia	r1!, {r3, r4, r12, lr}
113	stmia	r0!, {r3, r4, r12, lr}
114	ldmia	r1!, {r3, r4, r12, lr}
115	stmia	r0!, {r3, r4, r12, lr}
116	subs	r2, r2, #0x20
117	bge	.Lmemcpy_loop32
118
119	cmn	r2, #0x10
120	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
121	stmiage	r0!, {r3, r4, r12, lr}
122	subge	r2, r2, #0x10
123#if !defined(__minix)
124	pop	{r4}		/* return r4 */
125#endif
126
127.Lmemcpy_l32:
128	adds	r2, r2, #0x14
129
130	/* blat 12 bytes at a time */
131.Lmemcpy_loop12:
132	ldmiage	r1!, {r3, r12, lr}
133	stmiage	r0!, {r3, r12, lr}
134	subsge	r2, r2, #0x0c
135	bge	.Lmemcpy_loop12
136
137.Lmemcpy_l12:
138	adds	r2, r2, #8
139	blt	.Lmemcpy_l4
140
141	subs	r2, r2, #4
142	ldrlt	r3, [r1], #4
143	strlt	r3, [r0], #4
144	ldmiage	r1!, {r3, r12}
145	stmiage	r0!, {r3, r12}
146	subge	r2, r2, #4
147
148.Lmemcpy_l4:
149	/* less than 4 bytes to go */
150	adds	r2, r2, #4
151#if defined(__minix)
152	popeq	{r0, r4, r5}
153	moveq	r0, #0
154	popeq	{pc}
155#else
156#ifdef __APCS_26_
157	ldmiaeq sp!, {r0, pc}^		/* done */
158#else
159	popeq	{r0, pc}		/* done */
160#endif
161#endif
162	/* copy the crud byte at a time */
163	cmp	r2, #2
164	ldrb	r3, [r1], #1
165	strb	r3, [r0], #1
166	ldrbge	r3, [r1], #1
167	strbge	r3, [r0], #1
168	ldrbgt	r3, [r1], #1
169	strbgt	r3, [r0], #1
170#if defined(__minix)
171	pop	{r0, r4, r5}
172	mov	r0, #0
173	pop	{pc}
174#else
175	pop	{r0, pc}
176#endif
177
178	/* erg - unaligned destination */
179.Lmemcpy_destul:
180	rsb	r12, r12, #4
181	cmp	r12, #2
182
183	/* align destination with byte copies */
184	ldrb	r3, [r1], #1
185	strb	r3, [r0], #1
186	ldrbge	r3, [r1], #1
187	strbge	r3, [r0], #1
188	ldrbgt	r3, [r1], #1
189	strbgt	r3, [r0], #1
190	subs	r2, r2, r12
191	blt	.Lmemcpy_l4		/* less the 4 bytes */
192
193	ands	r12, r1, #3
194	beq	.Lmemcpy_t8		/* we have an aligned source */
195
196	/* erg - unaligned source */
197	/* This is where it gets nasty ... */
198.Lmemcpy_srcul:
199	bic	r1, r1, #3
200	ldr	lr, [r1], #4
201	cmp	r12, #2
202	bgt	.Lmemcpy_srcul3
203	beq	.Lmemcpy_srcul2
204	cmp	r2, #0x0c
205	blt	.Lmemcpy_srcul1loop4
206	sub	r2, r2, #0x0c
207#if !defined(__minix)
208	push	{r4, r5}
209#endif
210
211.Lmemcpy_srcul1loop16:
212#ifdef __ARMEB__
213	mov	r3, lr, lsl #8
214#else
215	mov	r3, lr, lsr #8
216#endif
217	ldmia	r1!, {r4, r5, r12, lr}
218#ifdef __ARMEB__
219	orr	r3, r3, r4, lsr #24
220	mov	r4, r4, lsl #8
221	orr	r4, r4, r5, lsr #24
222	mov	r5, r5, lsl #8
223	orr	r5, r5, r12, lsr #24
224	mov	r12, r12, lsl #8
225	orr	r12, r12, lr, lsr #24
226#else
227	orr	r3, r3, r4, lsl #24
228	mov	r4, r4, lsr #8
229	orr	r4, r4, r5, lsl #24
230	mov	r5, r5, lsr #8
231	orr	r5, r5, r12, lsl #24
232	mov	r12, r12, lsr #8
233	orr	r12, r12, lr, lsl #24
234#endif
235	stmia	r0!, {r3-r5, r12}
236	subs	r2, r2, #0x10
237	bge	.Lmemcpy_srcul1loop16
238#if !defined(__minix)
239	pop	{r4, r5}
240#endif
241	adds	r2, r2, #0x0c
242	blt	.Lmemcpy_srcul1l4
243
244.Lmemcpy_srcul1loop4:
245#ifdef __ARMEB__
246	mov	r12, lr, lsl #8
247#else
248	mov	r12, lr, lsr #8
249#endif
250	ldr	lr, [r1], #4
251#ifdef __ARMEB__
252	orr	r12, r12, lr, lsr #24
253#else
254	orr	r12, r12, lr, lsl #24
255#endif
256	str	r12, [r0], #4
257	subs	r2, r2, #4
258	bge	.Lmemcpy_srcul1loop4
259
260.Lmemcpy_srcul1l4:
261	sub	r1, r1, #3
262	b	.Lmemcpy_l4
263
264.Lmemcpy_srcul2:
265	cmp	r2, #0x0c
266	blt	.Lmemcpy_srcul2loop4
267	sub	r2, r2, #0x0c
268#if !defined(__minix)
269	push	{r4, r5}
270#endif
271
272.Lmemcpy_srcul2loop16:
273#ifdef __ARMEB__
274	mov	r3, lr, lsl #16
275#else
276	mov	r3, lr, lsr #16
277#endif
278	ldmia	r1!, {r4, r5, r12, lr}
279#ifdef __ARMEB__
280	orr	r3, r3, r4, lsr #16
281	mov	r4, r4, lsl #16
282	orr	r4, r4, r5, lsr #16
283	mov	r5, r5, lsl #16
284	orr	r5, r5, r12, lsr #16
285	mov	r12, r12, lsl #16
286	orr	r12, r12, lr, lsr #16
287#else
288	orr	r3, r3, r4, lsl #16
289	mov	r4, r4, lsr #16
290	orr	r4, r4, r5, lsl #16
291	mov	r5, r5, lsr #16
292	orr	r5, r5, r12, lsl #16
293	mov	r12, r12, lsr #16
294	orr	r12, r12, lr, lsl #16
295#endif
296	stmia	r0!, {r3-r5, r12}
297	subs	r2, r2, #0x10
298	bge	.Lmemcpy_srcul2loop16
299#if !defined(__minix)
300	pop	{r4, r5}
301#endif
302	adds	r2, r2, #0x0c
303	blt	.Lmemcpy_srcul2l4
304
305.Lmemcpy_srcul2loop4:
306#ifdef __ARMEB__
307	mov	r12, lr, lsl #16
308#else
309	mov	r12, lr, lsr #16
310#endif
311	ldr	lr, [r1], #4
312#ifdef __ARMEB__
313	orr	r12, r12, lr, lsr #16
314#else
315	orr	r12, r12, lr, lsl #16
316#endif
317	str	r12, [r0], #4
318	subs	r2, r2, #4
319	bge	.Lmemcpy_srcul2loop4
320
321.Lmemcpy_srcul2l4:
322	sub	r1, r1, #2
323	b	.Lmemcpy_l4
324
325.Lmemcpy_srcul3:
326	cmp	r2, #0x0c
327	blt	.Lmemcpy_srcul3loop4
328	sub	r2, r2, #0x0c
329#if !defined(__minix)
330	push	{r4, r5}
331#endif
332
333.Lmemcpy_srcul3loop16:
334#ifdef __ARMEB__
335	mov	r3, lr, lsl #24
336#else
337	mov	r3, lr, lsr #24
338#endif
339	ldmia	r1!, {r4, r5, r12, lr}
340#ifdef __ARMEB__
341	orr	r3, r3, r4, lsr #8
342	mov	r4, r4, lsl #24
343	orr	r4, r4, r5, lsr #8
344	mov	r5, r5, lsl #24
345	orr	r5, r5, r12, lsr #8
346	mov	r12, r12, lsl #24
347	orr	r12, r12, lr, lsr #8
348#else
349	orr	r3, r3, r4, lsl #8
350	mov	r4, r4, lsr #24
351	orr	r4, r4, r5, lsl #8
352	mov	r5, r5, lsr #24
353	orr	r5, r5, r12, lsl #8
354	mov	r12, r12, lsr #24
355	orr	r12, r12, lr, lsl #8
356#endif
357	stmia	r0!, {r3-r5, r12}
358	subs	r2, r2, #0x10
359	bge	.Lmemcpy_srcul3loop16
360#if !defined(__minix)
361	pop	{r4, r5}
362#endif
363	adds	r2, r2, #0x0c
364	blt	.Lmemcpy_srcul3l4
365
366.Lmemcpy_srcul3loop4:
367#ifdef __ARMEB__
368	mov	r12, lr, lsl #24
369#else
370	mov	r12, lr, lsr #24
371#endif
372	ldr	lr, [r1], #4
373#ifdef __ARMEB__
374	orr	r12, r12, lr, lsr #8
375#else
376	orr	r12, r12, lr, lsl #8
377#endif
378	str	r12, [r0], #4
379	subs	r2, r2, #4
380	bge	.Lmemcpy_srcul3loop4
381
382.Lmemcpy_srcul3l4:
383	sub	r1, r1, #1
384	b	.Lmemcpy_l4
385
386#if defined(__minix)
387LABEL(phys_copy_fault)		/* kernel can send us here */
388	pop	{r0, r4, r5}
389	pop	{pc}
390
391LABEL(phys_copy_fault_in_kernel)	/* kernel can send us here */
392	pop	 {r0, r4, r5}
393	mrc	p15, 0, r0, c6, c0, 0	/* Read DFAR */
394	pop	{pc}
395#else
396END(memcpy)
397#endif
398