xref: /freebsd/sys/arm/arm/bcopyinout_xscale.S (revision 325151a3)
1/*	$NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $	*/
2
3/*-
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <machine/asm.h>
39__FBSDID("$FreeBSD$");
40
41#include <machine/acle-compat.h>
42
43	.syntax	unified
44	.text
45	.align	2
46
47#if __ARM_ARCH >= 6
48#define GET_PCB(tmp) \
49	mrc p15, 0, tmp, c13, c0, 4; \
50	add	tmp, tmp, #(TD_PCB)
51#else
52.Lcurpcb:
53	.word	_C_LABEL(__pcpu) + PC_CURPCB
54#define GET_PCB(tmp) \
55	ldr	tmp, .Lcurpcb
56#endif
57
58/*
59 * r0 = user space address
60 * r1 = kernel space address
61 * r2 = length
62 *
63 * Copies bytes from user space to kernel space
64 */
65ENTRY(copyin)
66	cmp	r2, #0x00
67	movle	r0, #0x00
68	movle	pc, lr			/* Bail early if length is <= 0 */
69
70	adds	r3, r0, r2
71	movcs	r0, #EFAULT
72	RETc(cs)
73
74	ldr	r12, =(VM_MAXUSER_ADDRESS + 1)
75	cmp	r3, r12
76	movcs	r0, #EFAULT
77	RETc(cs)
78
79	ldr	r3, .L_arm_memcpy
80	ldr	r3, [r3]
81	cmp	r3, #0
82	beq	.Lnormal
83	ldr	r3, .L_min_memcpy_size
84	ldr	r3, [r3]
85	cmp	r2, r3
86	blt	.Lnormal
87	stmfd	sp!, {r0-r2, r4, lr}
88	mov     r3, r0
89	mov     r0, r1
90	mov     r1, r3
91	mov     r3, #2 /* SRC_IS_USER */
92	ldr	r4, .L_arm_memcpy
93	mov	lr, pc
94	ldr	pc, [r4]
95	cmp     r0, #0
96	ldmfd   sp!, {r0-r2, r4, lr}
97	moveq	r0, #0
98	RETeq
99
100.Lnormal:
101	stmfd	sp!, {r10-r11, lr}
102
103	GET_PCB(r10)
104	ldr	r10, [r10]
105
106	mov	r3, #0x00
107	adr	ip, .Lcopyin_fault
108	ldr	r11, [r10, #PCB_ONFAULT]
109	str	ip, [r10, #PCB_ONFAULT]
110	bl	.Lcopyin_guts
111	str	r11, [r10, #PCB_ONFAULT]
112	mov	r0, #0x00
113	ldmfd	sp!, {r10-r11, pc}
114
115.Lcopyin_fault:
116	ldr	r0, =EFAULT
117	str	r11, [r10, #PCB_ONFAULT]
118	cmp	r3, #0x00
119	ldmfdgt	sp!, {r4-r7}		/* r3 > 0 Restore r4-r7 */
120	ldmfdlt	sp!, {r4-r9}		/* r3 < 0 Restore r4-r9 */
121	ldmfd	sp!, {r10-r11, pc}
122
123.Lcopyin_guts:
124	pld	[r0]
125	/* Word-align the destination buffer */
126	ands	ip, r1, #0x03		/* Already word aligned? */
127	beq	.Lcopyin_wordaligned	/* Yup */
128	rsb	ip, ip, #0x04
129	cmp	r2, ip			/* Enough bytes left to align it? */
130	blt	.Lcopyin_l4_2		/* Nope. Just copy bytewise */
131	sub	r2, r2, ip
132	rsbs	ip, ip, #0x03
133	addne	pc, pc, ip, lsl #3
134	nop
135	ldrbt	ip, [r0], #0x01
136	strb	ip, [r1], #0x01
137	ldrbt	ip, [r0], #0x01
138	strb	ip, [r1], #0x01
139	ldrbt	ip, [r0], #0x01
140	strb	ip, [r1], #0x01
141	cmp	r2, #0x00		/* All done? */
142	RETeq
143
144	/* Destination buffer is now word aligned */
145.Lcopyin_wordaligned:
146	ands	ip, r0, #0x03		/* Is src also word-aligned? */
147	bne	.Lcopyin_bad_align	/* Nope. Things just got bad */
148	cmp	r2, #0x08		/* Less than 8 bytes remaining? */
149	blt	.Lcopyin_w_less_than8
150
151	/* Quad-align the destination buffer */
152	tst	r1, #0x07		/* Already quad aligned? */
153	ldrtne	ip, [r0], #0x04
154	strne	ip, [r1], #0x04
155	subne	r2, r2, #0x04
156	stmfd	sp!, {r4-r9}		/* Free up some registers */
157	mov	r3, #-1			/* Signal restore r4-r9 */
158
159	/* Destination buffer quad aligned, source is word aligned */
160	subs	r2, r2, #0x80
161	blt	.Lcopyin_w_lessthan128
162
163	/* Copy 128 bytes at a time */
164.Lcopyin_w_loop128:
165	ldrt	r4, [r0], #0x04		/* LD:00-03 */
166	ldrt	r5, [r0], #0x04		/* LD:04-07 */
167	pld	[r0, #0x18]		/* Prefetch 0x20 */
168	ldrt	r6, [r0], #0x04		/* LD:08-0b */
169	ldrt	r7, [r0], #0x04		/* LD:0c-0f */
170	ldrt	r8, [r0], #0x04		/* LD:10-13 */
171	ldrt	r9, [r0], #0x04		/* LD:14-17 */
172	strd	r4, [r1], #0x08		/* ST:00-07 */
173	ldrt	r4, [r0], #0x04		/* LD:18-1b */
174	ldrt	r5, [r0], #0x04		/* LD:1c-1f */
175	strd	r6, [r1], #0x08		/* ST:08-0f */
176	ldrt	r6, [r0], #0x04		/* LD:20-23 */
177	ldrt	r7, [r0], #0x04		/* LD:24-27 */
178	pld	[r0, #0x18]		/* Prefetch 0x40 */
179	strd	r8, [r1], #0x08		/* ST:10-17 */
180	ldrt	r8, [r0], #0x04		/* LD:28-2b */
181	ldrt	r9, [r0], #0x04		/* LD:2c-2f */
182	strd	r4, [r1], #0x08		/* ST:18-1f */
183	ldrt	r4, [r0], #0x04		/* LD:30-33 */
184	ldrt	r5, [r0], #0x04		/* LD:34-37 */
185	strd	r6, [r1], #0x08		/* ST:20-27 */
186	ldrt	r6, [r0], #0x04		/* LD:38-3b */
187	ldrt	r7, [r0], #0x04		/* LD:3c-3f */
188	strd	r8, [r1], #0x08		/* ST:28-2f */
189	ldrt	r8, [r0], #0x04		/* LD:40-43 */
190	ldrt	r9, [r0], #0x04		/* LD:44-47 */
191	pld	[r0, #0x18]		/* Prefetch 0x60 */
192	strd	r4, [r1], #0x08		/* ST:30-37 */
193	ldrt	r4, [r0], #0x04		/* LD:48-4b */
194	ldrt	r5, [r0], #0x04		/* LD:4c-4f */
195	strd	r6, [r1], #0x08		/* ST:38-3f */
196	ldrt	r6, [r0], #0x04		/* LD:50-53 */
197	ldrt	r7, [r0], #0x04		/* LD:54-57 */
198	strd	r8, [r1], #0x08		/* ST:40-47 */
199	ldrt	r8, [r0], #0x04		/* LD:58-5b */
200	ldrt	r9, [r0], #0x04		/* LD:5c-5f */
201	strd	r4, [r1], #0x08		/* ST:48-4f */
202	ldrt	r4, [r0], #0x04		/* LD:60-63 */
203	ldrt	r5, [r0], #0x04		/* LD:64-67 */
204	pld	[r0, #0x18]		/* Prefetch 0x80 */
205	strd	r6, [r1], #0x08		/* ST:50-57 */
206	ldrt	r6, [r0], #0x04		/* LD:68-6b */
207	ldrt	r7, [r0], #0x04		/* LD:6c-6f */
208	strd	r8, [r1], #0x08		/* ST:58-5f */
209	ldrt	r8, [r0], #0x04		/* LD:70-73 */
210	ldrt	r9, [r0], #0x04		/* LD:74-77 */
211	strd	r4, [r1], #0x08		/* ST:60-67 */
212	ldrt	r4, [r0], #0x04		/* LD:78-7b */
213	ldrt	r5, [r0], #0x04		/* LD:7c-7f */
214	strd	r6, [r1], #0x08		/* ST:68-6f */
215	strd	r8, [r1], #0x08		/* ST:70-77 */
216	subs	r2, r2, #0x80
217	strd	r4, [r1], #0x08		/* ST:78-7f */
218	bge	.Lcopyin_w_loop128
219
220.Lcopyin_w_lessthan128:
221	adds	r2, r2, #0x80		/* Adjust for extra sub */
222	ldmfdeq	sp!, {r4-r9}
223	RETeq
224	subs	r2, r2, #0x20
225	blt	.Lcopyin_w_lessthan32
226
227	/* Copy 32 bytes at a time */
228.Lcopyin_w_loop32:
229	ldrt	r4, [r0], #0x04
230	ldrt	r5, [r0], #0x04
231	pld	[r0, #0x18]
232	ldrt	r6, [r0], #0x04
233	ldrt	r7, [r0], #0x04
234	ldrt	r8, [r0], #0x04
235	ldrt	r9, [r0], #0x04
236	strd	r4, [r1], #0x08
237	ldrt	r4, [r0], #0x04
238	ldrt	r5, [r0], #0x04
239	strd	r6, [r1], #0x08
240	strd	r8, [r1], #0x08
241	subs	r2, r2, #0x20
242	strd	r4, [r1], #0x08
243	bge	.Lcopyin_w_loop32
244
245.Lcopyin_w_lessthan32:
246	adds	r2, r2, #0x20		/* Adjust for extra sub */
247	ldmfdeq	sp!, {r4-r9}
248	RETeq				/* Return now if done */
249
250	and	r4, r2, #0x18
251	rsb	r5, r4, #0x18
252	subs	r2, r2, r4
253	add	pc, pc, r5, lsl #1
254	nop
255
256	/* At least 24 bytes remaining */
257	ldrt	r4, [r0], #0x04
258	ldrt	r5, [r0], #0x04
259	nop
260	strd	r4, [r1], #0x08
261
262	/* At least 16 bytes remaining */
263	ldrt	r4, [r0], #0x04
264	ldrt	r5, [r0], #0x04
265	nop
266	strd	r4, [r1], #0x08
267
268	/* At least 8 bytes remaining */
269	ldrt	r4, [r0], #0x04
270	ldrt	r5, [r0], #0x04
271	nop
272	strd	r4, [r1], #0x08
273
274	/* Less than 8 bytes remaining */
275	ldmfd	sp!, {r4-r9}
276	RETeq				/* Return now if done */
277	mov	r3, #0x00
278
279.Lcopyin_w_less_than8:
280	subs	r2, r2, #0x04
281	ldrtge	ip, [r0], #0x04
282	strge	ip, [r1], #0x04
283	RETeq				/* Return now if done */
284	addlt	r2, r2, #0x04
285	ldrbt	ip, [r0], #0x01
286	cmp	r2, #0x02
287	ldrbtge	r2, [r0], #0x01
288	strb	ip, [r1], #0x01
289	ldrbtgt	ip, [r0]
290	strbge	r2, [r1], #0x01
291	strbgt	ip, [r1]
292	RET
293
294/*
295 * At this point, it has not been possible to word align both buffers.
296 * The destination buffer (r1) is word aligned, but the source buffer
297 * (r0) is not.
298 */
299.Lcopyin_bad_align:
300	stmfd	sp!, {r4-r7}
301	mov	r3, #0x01
302	bic	r0, r0, #0x03
303	cmp	ip, #2
304	ldrt	ip, [r0], #0x04
305	bgt	.Lcopyin_bad3
306	beq	.Lcopyin_bad2
307	b	.Lcopyin_bad1
308
309.Lcopyin_bad1_loop16:
310#ifdef __ARMEB__
311	mov	r4, ip, lsl #8
312#else
313	mov	r4, ip, lsr #8
314#endif
315	ldrt	r5, [r0], #0x04
316	pld	[r0, #0x018]
317	ldrt	r6, [r0], #0x04
318	ldrt	r7, [r0], #0x04
319	ldrt	ip, [r0], #0x04
320#ifdef __ARMEB__
321	orr	r4, r4, r5, lsr #24
322	mov	r5, r5, lsl #8
323	orr	r5, r5, r6, lsr #24
324	mov	r6, r6, lsl #8
325	orr	r6, r6, r7, lsr #24
326	mov	r7, r7, lsl #8
327	orr	r7, r7, ip, lsr #24
328#else
329	orr	r4, r4, r5, lsl #24
330	mov	r5, r5, lsr #8
331	orr	r5, r5, r6, lsl #24
332	mov	r6, r6, lsr #8
333	orr	r6, r6, r7, lsl #24
334	mov	r7, r7, lsr #8
335	orr	r7, r7, ip, lsl #24
336#endif
337	str	r4, [r1], #0x04
338	str	r5, [r1], #0x04
339	str	r6, [r1], #0x04
340	str	r7, [r1], #0x04
341.Lcopyin_bad1:
342	subs	r2, r2, #0x10
343	bge	.Lcopyin_bad1_loop16
344
345	adds	r2, r2, #0x10
346	ldmfdeq	sp!, {r4-r7}
347	RETeq				/* Return now if done */
348	subs	r2, r2, #0x04
349	sublt	r0, r0, #0x03
350	blt	.Lcopyin_l4
351
352.Lcopyin_bad1_loop4:
353#ifdef __ARMEB__
354	mov	r4, ip, lsl #8
355#else
356	mov	r4, ip, lsr #8
357#endif
358	ldrt	ip, [r0], #0x04
359	subs	r2, r2, #0x04
360#ifdef __ARMEB__
361	orr	r4, r4, ip, lsr #24
362#else
363	orr	r4, r4, ip, lsl #24
364#endif
365	str	r4, [r1], #0x04
366	bge	.Lcopyin_bad1_loop4
367	sub	r0, r0, #0x03
368	b	.Lcopyin_l4
369
370.Lcopyin_bad2_loop16:
371#ifdef __ARMEB__
372	mov	r4, ip, lsl #16
373#else
374	mov	r4, ip, lsr #16
375#endif
376	ldrt	r5, [r0], #0x04
377	pld	[r0, #0x018]
378	ldrt	r6, [r0], #0x04
379	ldrt	r7, [r0], #0x04
380	ldrt	ip, [r0], #0x04
381#ifdef __ARMEB__
382	orr	r4, r4, r5, lsr #16
383	mov	r5, r5, lsl #16
384	orr	r5, r5, r6, lsr #16
385	mov	r6, r6, lsl #16
386	orr	r6, r6, r7, lsr #16
387	mov	r7, r7, lsl #16
388	orr	r7, r7, ip, lsr #16
389#else
390	orr	r4, r4, r5, lsl #16
391	mov	r5, r5, lsr #16
392	orr	r5, r5, r6, lsl #16
393	mov	r6, r6, lsr #16
394	orr	r6, r6, r7, lsl #16
395	mov	r7, r7, lsr #16
396	orr	r7, r7, ip, lsl #16
397#endif
398	str	r4, [r1], #0x04
399	str	r5, [r1], #0x04
400	str	r6, [r1], #0x04
401	str	r7, [r1], #0x04
402.Lcopyin_bad2:
403	subs	r2, r2, #0x10
404	bge	.Lcopyin_bad2_loop16
405
406	adds	r2, r2, #0x10
407	ldmfdeq	sp!, {r4-r7}
408	RETeq				/* Return now if done */
409	subs	r2, r2, #0x04
410	sublt	r0, r0, #0x02
411	blt	.Lcopyin_l4
412
413.Lcopyin_bad2_loop4:
414#ifdef __ARMEB__
415	mov	r4, ip, lsl #16
416#else
417	mov	r4, ip, lsr #16
418#endif
419	ldrt	ip, [r0], #0x04
420	subs	r2, r2, #0x04
421#ifdef __ARMEB__
422	orr	r4, r4, ip, lsr #16
423#else
424	orr	r4, r4, ip, lsl #16
425#endif
426	str	r4, [r1], #0x04
427	bge	.Lcopyin_bad2_loop4
428	sub	r0, r0, #0x02
429	b	.Lcopyin_l4
430
431.Lcopyin_bad3_loop16:
432#ifdef __ARMEB__
433	mov	r4, ip, lsl #24
434#else
435	mov	r4, ip, lsr #24
436#endif
437	ldrt	r5, [r0], #0x04
438	pld	[r0, #0x018]
439	ldrt	r6, [r0], #0x04
440	ldrt	r7, [r0], #0x04
441	ldrt	ip, [r0], #0x04
442#ifdef __ARMEB__
443	orr	r4, r4, r5, lsr #8
444	mov	r5, r5, lsl #24
445	orr	r5, r5, r6, lsr #8
446	mov	r6, r6, lsl #24
447	orr	r6, r6, r7, lsr #8
448	mov	r7, r7, lsl #24
449	orr	r7, r7, ip, lsr #8
450#else
451	orr	r4, r4, r5, lsl #8
452	mov	r5, r5, lsr #24
453	orr	r5, r5, r6, lsl #8
454	mov	r6, r6, lsr #24
455	orr	r6, r6, r7, lsl #8
456	mov	r7, r7, lsr #24
457	orr	r7, r7, ip, lsl #8
458#endif
459	str	r4, [r1], #0x04
460	str	r5, [r1], #0x04
461	str	r6, [r1], #0x04
462	str	r7, [r1], #0x04
463.Lcopyin_bad3:
464	subs	r2, r2, #0x10
465	bge	.Lcopyin_bad3_loop16
466
467	adds	r2, r2, #0x10
468	ldmfdeq	sp!, {r4-r7}
469	RETeq				/* Return now if done */
470	subs	r2, r2, #0x04
471	sublt	r0, r0, #0x01
472	blt	.Lcopyin_l4
473
474.Lcopyin_bad3_loop4:
475#ifdef __ARMEB__
476	mov	r4, ip, lsl #24
477#else
478	mov	r4, ip, lsr #24
479#endif
480	ldrt	ip, [r0], #0x04
481	subs	r2, r2, #0x04
482#ifdef __ARMEB__
483	orr	r4, r4, ip, lsr #8
484#else
485	orr	r4, r4, ip, lsl #8
486#endif
487	str	r4, [r1], #0x04
488	bge	.Lcopyin_bad3_loop4
489	sub	r0, r0, #0x01
490
491.Lcopyin_l4:
492	ldmfd	sp!, {r4-r7}
493	mov	r3, #0x00
494	adds	r2, r2, #0x04
495	RETeq
496.Lcopyin_l4_2:
497	rsbs	r2, r2, #0x03
498	addne	pc, pc, r2, lsl #3
499	nop
500	ldrbt	ip, [r0], #0x01
501	strb	ip, [r1], #0x01
502	ldrbt	ip, [r0], #0x01
503	strb	ip, [r1], #0x01
504	ldrbt	ip, [r0]
505	strb	ip, [r1]
506	RET
507END(copyin)
508
509/*
510 * r0 = kernel space address
511 * r1 = user space address
512 * r2 = length
513 *
514 * Copies bytes from kernel space to user space
515 */
516ENTRY(copyout)
517	cmp	r2, #0x00
518	movle	r0, #0x00
519	movle	pc, lr			/* Bail early if length is <= 0 */
520
521	adds	r3, r1, r2
522	movcs	r0, #EFAULT
523	RETc(cs)
524
525	ldr	r12, =(VM_MAXUSER_ADDRESS + 1)
526	cmp	r3, r12
527	movcs	r0, #EFAULT
528	RETc(cs)
529
530	ldr	r3, .L_arm_memcpy
531	ldr	r3, [r3]
532	cmp	r3, #0
533	beq	.Lnormale
534	ldr	r3, .L_min_memcpy_size
535	ldr	r3, [r3]
536	cmp	r2, r3
537	blt	.Lnormale
538	stmfd	sp!, {r0-r2, r4, lr}
539	mov     r3, r0
540	mov     r0, r1
541	mov     r1, r3
542	mov     r3, #1 /* DST_IS_USER */
543	ldr	r4, .L_arm_memcpy
544	mov	lr, pc
545	ldr	pc, [r4]
546	cmp     r0, #0
547	ldmfd   sp!, {r0-r2, r4, lr}
548	moveq	r0, #0
549	RETeq
550
551.Lnormale:
552	stmfd	sp!, {r10-r11, lr}
553
554	GET_PCB(r10)
555	ldr	r10, [r10]
556
557	mov	r3, #0x00
558	adr	ip, .Lcopyout_fault
559	ldr	r11, [r10, #PCB_ONFAULT]
560	str	ip, [r10, #PCB_ONFAULT]
561	bl	.Lcopyout_guts
562	str	r11, [r10, #PCB_ONFAULT]
563	mov	r0, #0x00
564	ldmfd	sp!, {r10-r11, pc}
565
566.Lcopyout_fault:
567	ldr	r0, =EFAULT
568	str	r11, [r10, #PCB_ONFAULT]
569	cmp	r3, #0x00
570	ldmfdgt	sp!, {r4-r7}		/* r3 > 0 Restore r4-r7 */
571	ldmfdlt	sp!, {r4-r9}		/* r3 < 0 Restore r4-r9 */
572	ldmfd	sp!, {r10-r11, pc}
573
574.Lcopyout_guts:
575	pld	[r0]
576	/* Word-align the destination buffer */
577	ands	ip, r1, #0x03		/* Already word aligned? */
578	beq	.Lcopyout_wordaligned	/* Yup */
579	rsb	ip, ip, #0x04
580	cmp	r2, ip			/* Enough bytes left to align it? */
581	blt	.Lcopyout_l4_2		/* Nope. Just copy bytewise */
582	sub	r2, r2, ip
583	rsbs	ip, ip, #0x03
584	addne	pc, pc, ip, lsl #3
585	nop
586	ldrb	ip, [r0], #0x01
587	strbt	ip, [r1], #0x01
588	ldrb	ip, [r0], #0x01
589	strbt	ip, [r1], #0x01
590	ldrb	ip, [r0], #0x01
591	strbt	ip, [r1], #0x01
592	cmp	r2, #0x00		/* All done? */
593	RETeq
594
595	/* Destination buffer is now word aligned */
596.Lcopyout_wordaligned:
597	ands	ip, r0, #0x03		/* Is src also word-aligned? */
598	bne	.Lcopyout_bad_align	/* Nope. Things just got bad */
599	cmp	r2, #0x08		/* Less than 8 bytes remaining? */
600	blt	.Lcopyout_w_less_than8
601
602	/* Quad-align the destination buffer */
603	tst	r0, #0x07		/* Already quad aligned? */
604	ldrne	ip, [r0], #0x04
605	subne	r2, r2, #0x04
606	strtne	ip, [r1], #0x04
607
608	stmfd	sp!, {r4-r9}		/* Free up some registers */
609	mov	r3, #-1			/* Signal restore r4-r9 */
610
611	/* Destination buffer word aligned, source is quad aligned */
612	subs	r2, r2, #0x80
613	blt	.Lcopyout_w_lessthan128
614
615	/* Copy 128 bytes at a time */
616.Lcopyout_w_loop128:
617	ldrd	r4, [r0], #0x08		/* LD:00-07 */
618	pld	[r0, #0x18]		/* Prefetch 0x20 */
619	ldrd	r6, [r0], #0x08		/* LD:08-0f */
620	ldrd	r8, [r0], #0x08		/* LD:10-17 */
621	strt	r4, [r1], #0x04		/* ST:00-03 */
622	strt	r5, [r1], #0x04		/* ST:04-07 */
623	ldrd	r4, [r0], #0x08		/* LD:18-1f */
624	strt	r6, [r1], #0x04		/* ST:08-0b */
625	strt	r7, [r1], #0x04		/* ST:0c-0f */
626	ldrd	r6, [r0], #0x08		/* LD:20-27 */
627	pld	[r0, #0x18]		/* Prefetch 0x40 */
628	strt	r8, [r1], #0x04		/* ST:10-13 */
629	strt	r9, [r1], #0x04		/* ST:14-17 */
630	ldrd	r8, [r0], #0x08		/* LD:28-2f */
631	strt	r4, [r1], #0x04		/* ST:18-1b */
632	strt	r5, [r1], #0x04		/* ST:1c-1f */
633	ldrd	r4, [r0], #0x08		/* LD:30-37 */
634	strt	r6, [r1], #0x04		/* ST:20-23 */
635	strt	r7, [r1], #0x04		/* ST:24-27 */
636	ldrd	r6, [r0], #0x08		/* LD:38-3f */
637	strt	r8, [r1], #0x04		/* ST:28-2b */
638	strt	r9, [r1], #0x04		/* ST:2c-2f */
639	ldrd	r8, [r0], #0x08		/* LD:40-47 */
640	pld	[r0, #0x18]		/* Prefetch 0x60 */
641	strt	r4, [r1], #0x04		/* ST:30-33 */
642	strt	r5, [r1], #0x04		/* ST:34-37 */
643	ldrd	r4, [r0], #0x08		/* LD:48-4f */
644	strt	r6, [r1], #0x04		/* ST:38-3b */
645	strt	r7, [r1], #0x04		/* ST:3c-3f */
646	ldrd	r6, [r0], #0x08		/* LD:50-57 */
647	strt	r8, [r1], #0x04		/* ST:40-43 */
648	strt	r9, [r1], #0x04		/* ST:44-47 */
649	ldrd	r8, [r0], #0x08		/* LD:58-4f */
650	strt	r4, [r1], #0x04		/* ST:48-4b */
651	strt	r5, [r1], #0x04		/* ST:4c-4f */
652	ldrd	r4, [r0], #0x08		/* LD:60-67 */
653	pld	[r0, #0x18]		/* Prefetch 0x80 */
654	strt	r6, [r1], #0x04		/* ST:50-53 */
655	strt	r7, [r1], #0x04		/* ST:54-57 */
656	ldrd	r6, [r0], #0x08		/* LD:68-6f */
657	strt	r8, [r1], #0x04		/* ST:58-5b */
658	strt	r9, [r1], #0x04		/* ST:5c-5f */
659	ldrd	r8, [r0], #0x08		/* LD:70-77 */
660	strt	r4, [r1], #0x04		/* ST:60-63 */
661	strt	r5, [r1], #0x04		/* ST:64-67 */
662	ldrd	r4, [r0], #0x08		/* LD:78-7f */
663	strt	r6, [r1], #0x04		/* ST:68-6b */
664	strt	r7, [r1], #0x04		/* ST:6c-6f */
665	strt	r8, [r1], #0x04		/* ST:70-73 */
666	strt	r9, [r1], #0x04		/* ST:74-77 */
667	subs	r2, r2, #0x80
668	strt	r4, [r1], #0x04		/* ST:78-7b */
669	strt	r5, [r1], #0x04		/* ST:7c-7f */
670	bge	.Lcopyout_w_loop128
671
672.Lcopyout_w_lessthan128:
673	adds	r2, r2, #0x80		/* Adjust for extra sub */
674	ldmfdeq	sp!, {r4-r9}
675	RETeq				/* Return now if done */
676	subs	r2, r2, #0x20
677	blt	.Lcopyout_w_lessthan32
678
679	/* Copy 32 bytes at a time */
680.Lcopyout_w_loop32:
681	ldrd	r4, [r0], #0x08
682	pld	[r0, #0x18]
683	ldrd	r6, [r0], #0x08
684	ldrd	r8, [r0], #0x08
685	strt	r4, [r1], #0x04
686	strt	r5, [r1], #0x04
687	ldrd	r4, [r0], #0x08
688	strt	r6, [r1], #0x04
689	strt	r7, [r1], #0x04
690	strt	r8, [r1], #0x04
691	strt	r9, [r1], #0x04
692	subs	r2, r2, #0x20
693	strt	r4, [r1], #0x04
694	strt	r5, [r1], #0x04
695	bge	.Lcopyout_w_loop32
696
697.Lcopyout_w_lessthan32:
698	adds	r2, r2, #0x20		/* Adjust for extra sub */
699	ldmfdeq	sp!, {r4-r9}
700	RETeq				/* Return now if done */
701
702	and	r4, r2, #0x18
703	rsb	r5, r4, #0x18
704	subs	r2, r2, r4
705	add	pc, pc, r5, lsl #1
706	nop
707
708	/* At least 24 bytes remaining */
709	ldrd	r4, [r0], #0x08
710	strt	r4, [r1], #0x04
711	strt	r5, [r1], #0x04
712	nop
713
714	/* At least 16 bytes remaining */
715	ldrd	r4, [r0], #0x08
716	strt	r4, [r1], #0x04
717	strt	r5, [r1], #0x04
718	nop
719
720	/* At least 8 bytes remaining */
721	ldrd	r4, [r0], #0x08
722	strt	r4, [r1], #0x04
723	strt	r5, [r1], #0x04
724	nop
725
726	/* Less than 8 bytes remaining */
727	ldmfd	sp!, {r4-r9}
728	RETeq				/* Return now if done */
729	mov	r3, #0x00
730
731.Lcopyout_w_less_than8:
732	subs	r2, r2, #0x04
733	ldrge	ip, [r0], #0x04
734	strtge	ip, [r1], #0x04
735	RETeq				/* Return now if done */
736	addlt	r2, r2, #0x04
737	ldrb	ip, [r0], #0x01
738	cmp	r2, #0x02
739	ldrbge	r2, [r0], #0x01
740	strbt	ip, [r1], #0x01
741	ldrbgt	ip, [r0]
742	strbtge	r2, [r1], #0x01
743	strbtgt	ip, [r1]
744	RET
745
746/*
747 * At this point, it has not been possible to word align both buffers.
748 * The destination buffer (r1) is word aligned, but the source buffer
749 * (r0) is not.
750 */
751.Lcopyout_bad_align:
752	stmfd	sp!, {r4-r7}
753	mov	r3, #0x01
754	bic	r0, r0, #0x03
755	cmp	ip, #2
756	ldr	ip, [r0], #0x04
757	bgt	.Lcopyout_bad3
758	beq	.Lcopyout_bad2
759	b	.Lcopyout_bad1
760
761.Lcopyout_bad1_loop16:
762#ifdef	__ARMEB__
763	mov	r4, ip, lsl #8
764#else
765	mov	r4, ip, lsr #8
766#endif
767	ldr	r5, [r0], #0x04
768	pld	[r0, #0x018]
769	ldr	r6, [r0], #0x04
770	ldr	r7, [r0], #0x04
771	ldr	ip, [r0], #0x04
772#ifdef	__ARMEB__
773	orr	r4, r4, r5, lsr #24
774	mov	r5, r5, lsl #8
775	orr	r5, r5, r6, lsr #24
776	mov	r6, r6, lsl #8
777	orr	r6, r6, r7, lsr #24
778	mov	r7, r7, lsl #8
779	orr	r7, r7, ip, lsr #24
780#else
781	orr	r4, r4, r5, lsl #24
782	mov	r5, r5, lsr #8
783	orr	r5, r5, r6, lsl #24
784	mov	r6, r6, lsr #8
785	orr	r6, r6, r7, lsl #24
786	mov	r7, r7, lsr #8
787	orr	r7, r7, ip, lsl #24
788#endif
789	strt	r4, [r1], #0x04
790	strt	r5, [r1], #0x04
791	strt	r6, [r1], #0x04
792	strt	r7, [r1], #0x04
793.Lcopyout_bad1:
794	subs	r2, r2, #0x10
795	bge	.Lcopyout_bad1_loop16
796
797	adds	r2, r2, #0x10
798	ldmfdeq	sp!, {r4-r7}
799	RETeq				/* Return now if done */
800	subs	r2, r2, #0x04
801	sublt	r0, r0, #0x03
802	blt	.Lcopyout_l4
803
804.Lcopyout_bad1_loop4:
805#ifdef __ARMEB__
806	mov	r4, ip, lsl #8
807#else
808	mov	r4, ip, lsr #8
809#endif
810	ldr	ip, [r0], #0x04
811	subs	r2, r2, #0x04
812#ifdef __ARMEB__
813	orr	r4, r4, ip, lsr #24
814#else
815	orr	r4, r4, ip, lsl #24
816#endif
817	strt	r4, [r1], #0x04
818	bge	.Lcopyout_bad1_loop4
819	sub	r0, r0, #0x03
820	b	.Lcopyout_l4
821
822.Lcopyout_bad2_loop16:
823#ifdef __ARMEB__
824	mov	r4, ip, lsl #16
825#else
826	mov	r4, ip, lsr #16
827#endif
828	ldr	r5, [r0], #0x04
829	pld	[r0, #0x018]
830	ldr	r6, [r0], #0x04
831	ldr	r7, [r0], #0x04
832	ldr	ip, [r0], #0x04
833#ifdef __ARMEB__
834	orr	r4, r4, r5, lsr #16
835	mov	r5, r5, lsl #16
836	orr	r5, r5, r6, lsr #16
837	mov	r6, r6, lsl #16
838	orr	r6, r6, r7, lsr #16
839	mov	r7, r7, lsl #16
840	orr	r7, r7, ip, lsr #16
841#else
842	orr	r4, r4, r5, lsl #16
843	mov	r5, r5, lsr #16
844	orr	r5, r5, r6, lsl #16
845	mov	r6, r6, lsr #16
846	orr	r6, r6, r7, lsl #16
847	mov	r7, r7, lsr #16
848	orr	r7, r7, ip, lsl #16
849#endif
850	strt	r4, [r1], #0x04
851	strt	r5, [r1], #0x04
852	strt	r6, [r1], #0x04
853	strt	r7, [r1], #0x04
854.Lcopyout_bad2:
855	subs	r2, r2, #0x10
856	bge	.Lcopyout_bad2_loop16
857
858	adds	r2, r2, #0x10
859	ldmfdeq	sp!, {r4-r7}
860	RETeq				/* Return now if done */
861	subs	r2, r2, #0x04
862	sublt	r0, r0, #0x02
863	blt	.Lcopyout_l4
864
865.Lcopyout_bad2_loop4:
866#ifdef __ARMEB__
867	mov	r4, ip, lsl #16
868#else
869	mov	r4, ip, lsr #16
870#endif
871	ldr	ip, [r0], #0x04
872	subs	r2, r2, #0x04
873#ifdef __ARMEB__
874	orr	r4, r4, ip, lsr #16
875#else
876	orr	r4, r4, ip, lsl #16
877#endif
878	strt	r4, [r1], #0x04
879	bge	.Lcopyout_bad2_loop4
880	sub	r0, r0, #0x02
881	b	.Lcopyout_l4
882
883.Lcopyout_bad3_loop16:
884#ifdef __ARMEB__
885	mov	r4, ip, lsl #24
886#else
887	mov	r4, ip, lsr #24
888#endif
889	ldr	r5, [r0], #0x04
890	pld	[r0, #0x018]
891	ldr	r6, [r0], #0x04
892	ldr	r7, [r0], #0x04
893	ldr	ip, [r0], #0x04
894#ifdef __ARMEB__
895	orr	r4, r4, r5, lsr #8
896	mov	r5, r5, lsl #24
897	orr	r5, r5, r6, lsr #8
898	mov	r6, r6, lsl #24
899	orr	r6, r6, r7, lsr #8
900	mov	r7, r7, lsl #24
901	orr	r7, r7, ip, lsr #8
902#else
903	orr	r4, r4, r5, lsl #8
904	mov	r5, r5, lsr #24
905	orr	r5, r5, r6, lsl #8
906	mov	r6, r6, lsr #24
907	orr	r6, r6, r7, lsl #8
908	mov	r7, r7, lsr #24
909	orr	r7, r7, ip, lsl #8
910#endif
911	strt	r4, [r1], #0x04
912	strt	r5, [r1], #0x04
913	strt	r6, [r1], #0x04
914	strt	r7, [r1], #0x04
915.Lcopyout_bad3:
916	subs	r2, r2, #0x10
917	bge	.Lcopyout_bad3_loop16
918
919	adds	r2, r2, #0x10
920	ldmfdeq	sp!, {r4-r7}
921	RETeq				/* Return now if done */
922	subs	r2, r2, #0x04
923	sublt	r0, r0, #0x01
924	blt	.Lcopyout_l4
925
926.Lcopyout_bad3_loop4:
927#ifdef __ARMEB__
928	mov	r4, ip, lsl #24
929#else
930	mov	r4, ip, lsr #24
931#endif
932	ldr	ip, [r0], #0x04
933	subs	r2, r2, #0x04
934#ifdef __ARMEB__
935	orr	r4, r4, ip, lsr #8
936#else
937	orr	r4, r4, ip, lsl #8
938#endif
939	strt	r4, [r1], #0x04
940	bge	.Lcopyout_bad3_loop4
941	sub	r0, r0, #0x01
942
943.Lcopyout_l4:
944	ldmfd	sp!, {r4-r7}
945	mov	r3, #0x00
946	adds	r2, r2, #0x04
947	RETeq
948.Lcopyout_l4_2:
949	rsbs	r2, r2, #0x03
950	addne	pc, pc, r2, lsl #3
951	nop
952	ldrb	ip, [r0], #0x01
953	strbt	ip, [r1], #0x01
954	ldrb	ip, [r0], #0x01
955	strbt	ip, [r1], #0x01
956	ldrb	ip, [r0]
957	strbt	ip, [r1]
958	RET
959END(copyout)
960
961