xref: /freebsd/sys/arm/arm/bcopyinout_xscale.S (revision 685dc743)
1/*	$NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $	*/
2
3/*-
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <machine/asm.h>
39	.syntax	unified
40	.text
41	.align	2
42
43#define GET_PCB(tmp) \
44	mrc p15, 0, tmp, c13, c0, 4; \
45	add	tmp, tmp, #(TD_PCB)
46
47/*
48 * r0 = user space address
49 * r1 = kernel space address
50 * r2 = length
51 *
52 * Copies bytes from user space to kernel space
53 */
54ENTRY(copyin)
55	cmp	r2, #0x00
56	movle	r0, #0x00
57	movle	pc, lr			/* Bail early if length is <= 0 */
58
59	adds	r3, r0, r2
60	movcs	r0, #EFAULT
61	RETc(cs)
62
63	ldr	r12, =(VM_MAXUSER_ADDRESS + 1)
64	cmp	r3, r12
65	movcs	r0, #EFAULT
66	RETc(cs)
67
68	stmfd	sp!, {r10-r11, lr}
69
70	GET_PCB(r10)
71	ldr	r10, [r10]
72
73	mov	r3, #0x00
74	adr	ip, .Lcopyin_fault
75	ldr	r11, [r10, #PCB_ONFAULT]
76	str	ip, [r10, #PCB_ONFAULT]
77	bl	.Lcopyin_guts
78	str	r11, [r10, #PCB_ONFAULT]
79	mov	r0, #0x00
80	ldmfd	sp!, {r10-r11, pc}
81
82.Lcopyin_fault:
83	ldr	r0, =EFAULT
84	str	r11, [r10, #PCB_ONFAULT]
85	cmp	r3, #0x00
86	ldmfdgt	sp!, {r4-r7}		/* r3 > 0 Restore r4-r7 */
87	ldmfdlt	sp!, {r4-r9}		/* r3 < 0 Restore r4-r9 */
88	ldmfd	sp!, {r10-r11, pc}
89
90.Lcopyin_guts:
91	pld	[r0]
92	/* Word-align the destination buffer */
93	ands	ip, r1, #0x03		/* Already word aligned? */
94	beq	.Lcopyin_wordaligned	/* Yup */
95	rsb	ip, ip, #0x04
96	cmp	r2, ip			/* Enough bytes left to align it? */
97	blt	.Lcopyin_l4_2		/* Nope. Just copy bytewise */
98	sub	r2, r2, ip
99	rsbs	ip, ip, #0x03
100	addne	pc, pc, ip, lsl #3
101	nop
102	ldrbt	ip, [r0], #0x01
103	strb	ip, [r1], #0x01
104	ldrbt	ip, [r0], #0x01
105	strb	ip, [r1], #0x01
106	ldrbt	ip, [r0], #0x01
107	strb	ip, [r1], #0x01
108	cmp	r2, #0x00		/* All done? */
109	RETeq
110
111	/* Destination buffer is now word aligned */
112.Lcopyin_wordaligned:
113	ands	ip, r0, #0x03		/* Is src also word-aligned? */
114	bne	.Lcopyin_bad_align	/* Nope. Things just got bad */
115	cmp	r2, #0x08		/* Less than 8 bytes remaining? */
116	blt	.Lcopyin_w_less_than8
117
118	/* Quad-align the destination buffer */
119	tst	r1, #0x07		/* Already quad aligned? */
120	ldrtne	ip, [r0], #0x04
121	strne	ip, [r1], #0x04
122	subne	r2, r2, #0x04
123	stmfd	sp!, {r4-r9}		/* Free up some registers */
124	mov	r3, #-1			/* Signal restore r4-r9 */
125
126	/* Destination buffer quad aligned, source is word aligned */
127	subs	r2, r2, #0x80
128	blt	.Lcopyin_w_lessthan128
129
130	/* Copy 128 bytes at a time */
131.Lcopyin_w_loop128:
132	ldrt	r4, [r0], #0x04		/* LD:00-03 */
133	ldrt	r5, [r0], #0x04		/* LD:04-07 */
134	pld	[r0, #0x18]		/* Prefetch 0x20 */
135	ldrt	r6, [r0], #0x04		/* LD:08-0b */
136	ldrt	r7, [r0], #0x04		/* LD:0c-0f */
137	ldrt	r8, [r0], #0x04		/* LD:10-13 */
138	ldrt	r9, [r0], #0x04		/* LD:14-17 */
139	strd	r4, [r1], #0x08		/* ST:00-07 */
140	ldrt	r4, [r0], #0x04		/* LD:18-1b */
141	ldrt	r5, [r0], #0x04		/* LD:1c-1f */
142	strd	r6, [r1], #0x08		/* ST:08-0f */
143	ldrt	r6, [r0], #0x04		/* LD:20-23 */
144	ldrt	r7, [r0], #0x04		/* LD:24-27 */
145	pld	[r0, #0x18]		/* Prefetch 0x40 */
146	strd	r8, [r1], #0x08		/* ST:10-17 */
147	ldrt	r8, [r0], #0x04		/* LD:28-2b */
148	ldrt	r9, [r0], #0x04		/* LD:2c-2f */
149	strd	r4, [r1], #0x08		/* ST:18-1f */
150	ldrt	r4, [r0], #0x04		/* LD:30-33 */
151	ldrt	r5, [r0], #0x04		/* LD:34-37 */
152	strd	r6, [r1], #0x08		/* ST:20-27 */
153	ldrt	r6, [r0], #0x04		/* LD:38-3b */
154	ldrt	r7, [r0], #0x04		/* LD:3c-3f */
155	strd	r8, [r1], #0x08		/* ST:28-2f */
156	ldrt	r8, [r0], #0x04		/* LD:40-43 */
157	ldrt	r9, [r0], #0x04		/* LD:44-47 */
158	pld	[r0, #0x18]		/* Prefetch 0x60 */
159	strd	r4, [r1], #0x08		/* ST:30-37 */
160	ldrt	r4, [r0], #0x04		/* LD:48-4b */
161	ldrt	r5, [r0], #0x04		/* LD:4c-4f */
162	strd	r6, [r1], #0x08		/* ST:38-3f */
163	ldrt	r6, [r0], #0x04		/* LD:50-53 */
164	ldrt	r7, [r0], #0x04		/* LD:54-57 */
165	strd	r8, [r1], #0x08		/* ST:40-47 */
166	ldrt	r8, [r0], #0x04		/* LD:58-5b */
167	ldrt	r9, [r0], #0x04		/* LD:5c-5f */
168	strd	r4, [r1], #0x08		/* ST:48-4f */
169	ldrt	r4, [r0], #0x04		/* LD:60-63 */
170	ldrt	r5, [r0], #0x04		/* LD:64-67 */
171	pld	[r0, #0x18]		/* Prefetch 0x80 */
172	strd	r6, [r1], #0x08		/* ST:50-57 */
173	ldrt	r6, [r0], #0x04		/* LD:68-6b */
174	ldrt	r7, [r0], #0x04		/* LD:6c-6f */
175	strd	r8, [r1], #0x08		/* ST:58-5f */
176	ldrt	r8, [r0], #0x04		/* LD:70-73 */
177	ldrt	r9, [r0], #0x04		/* LD:74-77 */
178	strd	r4, [r1], #0x08		/* ST:60-67 */
179	ldrt	r4, [r0], #0x04		/* LD:78-7b */
180	ldrt	r5, [r0], #0x04		/* LD:7c-7f */
181	strd	r6, [r1], #0x08		/* ST:68-6f */
182	strd	r8, [r1], #0x08		/* ST:70-77 */
183	subs	r2, r2, #0x80
184	strd	r4, [r1], #0x08		/* ST:78-7f */
185	bge	.Lcopyin_w_loop128
186
187.Lcopyin_w_lessthan128:
188	adds	r2, r2, #0x80		/* Adjust for extra sub */
189	ldmfdeq	sp!, {r4-r9}
190	RETeq
191	subs	r2, r2, #0x20
192	blt	.Lcopyin_w_lessthan32
193
194	/* Copy 32 bytes at a time */
195.Lcopyin_w_loop32:
196	ldrt	r4, [r0], #0x04
197	ldrt	r5, [r0], #0x04
198	pld	[r0, #0x18]
199	ldrt	r6, [r0], #0x04
200	ldrt	r7, [r0], #0x04
201	ldrt	r8, [r0], #0x04
202	ldrt	r9, [r0], #0x04
203	strd	r4, [r1], #0x08
204	ldrt	r4, [r0], #0x04
205	ldrt	r5, [r0], #0x04
206	strd	r6, [r1], #0x08
207	strd	r8, [r1], #0x08
208	subs	r2, r2, #0x20
209	strd	r4, [r1], #0x08
210	bge	.Lcopyin_w_loop32
211
212.Lcopyin_w_lessthan32:
213	adds	r2, r2, #0x20		/* Adjust for extra sub */
214	ldmfdeq	sp!, {r4-r9}
215	RETeq				/* Return now if done */
216
217	and	r4, r2, #0x18
218	rsb	r5, r4, #0x18
219	subs	r2, r2, r4
220	add	pc, pc, r5, lsl #1
221	nop
222
223	/* At least 24 bytes remaining */
224	ldrt	r4, [r0], #0x04
225	ldrt	r5, [r0], #0x04
226	nop
227	strd	r4, [r1], #0x08
228
229	/* At least 16 bytes remaining */
230	ldrt	r4, [r0], #0x04
231	ldrt	r5, [r0], #0x04
232	nop
233	strd	r4, [r1], #0x08
234
235	/* At least 8 bytes remaining */
236	ldrt	r4, [r0], #0x04
237	ldrt	r5, [r0], #0x04
238	nop
239	strd	r4, [r1], #0x08
240
241	/* Less than 8 bytes remaining */
242	ldmfd	sp!, {r4-r9}
243	RETeq				/* Return now if done */
244	mov	r3, #0x00
245
246.Lcopyin_w_less_than8:
247	subs	r2, r2, #0x04
248	ldrtge	ip, [r0], #0x04
249	strge	ip, [r1], #0x04
250	RETeq				/* Return now if done */
251	addlt	r2, r2, #0x04
252	ldrbt	ip, [r0], #0x01
253	cmp	r2, #0x02
254	ldrbtge	r2, [r0], #0x01
255	strb	ip, [r1], #0x01
256	ldrbtgt	ip, [r0]
257	strbge	r2, [r1], #0x01
258	strbgt	ip, [r1]
259	RET
260
261/*
262 * At this point, it has not been possible to word align both buffers.
263 * The destination buffer (r1) is word aligned, but the source buffer
264 * (r0) is not.
265 */
266.Lcopyin_bad_align:
267	stmfd	sp!, {r4-r7}
268	mov	r3, #0x01
269	bic	r0, r0, #0x03
270	cmp	ip, #2
271	ldrt	ip, [r0], #0x04
272	bgt	.Lcopyin_bad3
273	beq	.Lcopyin_bad2
274	b	.Lcopyin_bad1
275
276.Lcopyin_bad1_loop16:
277	mov	r4, ip, lsr #8
278	ldrt	r5, [r0], #0x04
279	pld	[r0, #0x018]
280	ldrt	r6, [r0], #0x04
281	ldrt	r7, [r0], #0x04
282	ldrt	ip, [r0], #0x04
283	orr	r4, r4, r5, lsl #24
284	mov	r5, r5, lsr #8
285	orr	r5, r5, r6, lsl #24
286	mov	r6, r6, lsr #8
287	orr	r6, r6, r7, lsl #24
288	mov	r7, r7, lsr #8
289	orr	r7, r7, ip, lsl #24
290	str	r4, [r1], #0x04
291	str	r5, [r1], #0x04
292	str	r6, [r1], #0x04
293	str	r7, [r1], #0x04
294.Lcopyin_bad1:
295	subs	r2, r2, #0x10
296	bge	.Lcopyin_bad1_loop16
297
298	adds	r2, r2, #0x10
299	ldmfdeq	sp!, {r4-r7}
300	RETeq				/* Return now if done */
301	subs	r2, r2, #0x04
302	sublt	r0, r0, #0x03
303	blt	.Lcopyin_l4
304
305.Lcopyin_bad1_loop4:
306	mov	r4, ip, lsr #8
307	ldrt	ip, [r0], #0x04
308	subs	r2, r2, #0x04
309	orr	r4, r4, ip, lsl #24
310	str	r4, [r1], #0x04
311	bge	.Lcopyin_bad1_loop4
312	sub	r0, r0, #0x03
313	b	.Lcopyin_l4
314
315.Lcopyin_bad2_loop16:
316	mov	r4, ip, lsr #16
317	ldrt	r5, [r0], #0x04
318	pld	[r0, #0x018]
319	ldrt	r6, [r0], #0x04
320	ldrt	r7, [r0], #0x04
321	ldrt	ip, [r0], #0x04
322	orr	r4, r4, r5, lsl #16
323	mov	r5, r5, lsr #16
324	orr	r5, r5, r6, lsl #16
325	mov	r6, r6, lsr #16
326	orr	r6, r6, r7, lsl #16
327	mov	r7, r7, lsr #16
328	orr	r7, r7, ip, lsl #16
329	str	r4, [r1], #0x04
330	str	r5, [r1], #0x04
331	str	r6, [r1], #0x04
332	str	r7, [r1], #0x04
333.Lcopyin_bad2:
334	subs	r2, r2, #0x10
335	bge	.Lcopyin_bad2_loop16
336
337	adds	r2, r2, #0x10
338	ldmfdeq	sp!, {r4-r7}
339	RETeq				/* Return now if done */
340	subs	r2, r2, #0x04
341	sublt	r0, r0, #0x02
342	blt	.Lcopyin_l4
343
344.Lcopyin_bad2_loop4:
345	mov	r4, ip, lsr #16
346	ldrt	ip, [r0], #0x04
347	subs	r2, r2, #0x04
348	orr	r4, r4, ip, lsl #16
349	str	r4, [r1], #0x04
350	bge	.Lcopyin_bad2_loop4
351	sub	r0, r0, #0x02
352	b	.Lcopyin_l4
353
354.Lcopyin_bad3_loop16:
355	mov	r4, ip, lsr #24
356	ldrt	r5, [r0], #0x04
357	pld	[r0, #0x018]
358	ldrt	r6, [r0], #0x04
359	ldrt	r7, [r0], #0x04
360	ldrt	ip, [r0], #0x04
361	orr	r4, r4, r5, lsl #8
362	mov	r5, r5, lsr #24
363	orr	r5, r5, r6, lsl #8
364	mov	r6, r6, lsr #24
365	orr	r6, r6, r7, lsl #8
366	mov	r7, r7, lsr #24
367	orr	r7, r7, ip, lsl #8
368	str	r4, [r1], #0x04
369	str	r5, [r1], #0x04
370	str	r6, [r1], #0x04
371	str	r7, [r1], #0x04
372.Lcopyin_bad3:
373	subs	r2, r2, #0x10
374	bge	.Lcopyin_bad3_loop16
375
376	adds	r2, r2, #0x10
377	ldmfdeq	sp!, {r4-r7}
378	RETeq				/* Return now if done */
379	subs	r2, r2, #0x04
380	sublt	r0, r0, #0x01
381	blt	.Lcopyin_l4
382
383.Lcopyin_bad3_loop4:
384	mov	r4, ip, lsr #24
385	ldrt	ip, [r0], #0x04
386	subs	r2, r2, #0x04
387	orr	r4, r4, ip, lsl #8
388	str	r4, [r1], #0x04
389	bge	.Lcopyin_bad3_loop4
390	sub	r0, r0, #0x01
391
392.Lcopyin_l4:
393	ldmfd	sp!, {r4-r7}
394	mov	r3, #0x00
395	adds	r2, r2, #0x04
396	RETeq
397.Lcopyin_l4_2:
398	rsbs	r2, r2, #0x03
399	addne	pc, pc, r2, lsl #3
400	nop
401	ldrbt	ip, [r0], #0x01
402	strb	ip, [r1], #0x01
403	ldrbt	ip, [r0], #0x01
404	strb	ip, [r1], #0x01
405	ldrbt	ip, [r0]
406	strb	ip, [r1]
407	RET
408END(copyin)
409
410/*
411 * r0 = kernel space address
412 * r1 = user space address
413 * r2 = length
414 *
415 * Copies bytes from kernel space to user space
416 */
417ENTRY(copyout)
418	cmp	r2, #0x00
419	movle	r0, #0x00
420	movle	pc, lr			/* Bail early if length is <= 0 */
421
422	adds	r3, r1, r2
423	movcs	r0, #EFAULT
424	RETc(cs)
425
426	ldr	r12, =(VM_MAXUSER_ADDRESS + 1)
427	cmp	r3, r12
428	movcs	r0, #EFAULT
429	RETc(cs)
430
431	stmfd	sp!, {r10-r11, lr}
432
433	GET_PCB(r10)
434	ldr	r10, [r10]
435
436	mov	r3, #0x00
437	adr	ip, .Lcopyout_fault
438	ldr	r11, [r10, #PCB_ONFAULT]
439	str	ip, [r10, #PCB_ONFAULT]
440	bl	.Lcopyout_guts
441	str	r11, [r10, #PCB_ONFAULT]
442	mov	r0, #0x00
443	ldmfd	sp!, {r10-r11, pc}
444
445.Lcopyout_fault:
446	ldr	r0, =EFAULT
447	str	r11, [r10, #PCB_ONFAULT]
448	cmp	r3, #0x00
449	ldmfdgt	sp!, {r4-r7}		/* r3 > 0 Restore r4-r7 */
450	ldmfdlt	sp!, {r4-r9}		/* r3 < 0 Restore r4-r9 */
451	ldmfd	sp!, {r10-r11, pc}
452
453.Lcopyout_guts:
454	pld	[r0]
455	/* Word-align the destination buffer */
456	ands	ip, r1, #0x03		/* Already word aligned? */
457	beq	.Lcopyout_wordaligned	/* Yup */
458	rsb	ip, ip, #0x04
459	cmp	r2, ip			/* Enough bytes left to align it? */
460	blt	.Lcopyout_l4_2		/* Nope. Just copy bytewise */
461	sub	r2, r2, ip
462	rsbs	ip, ip, #0x03
463	addne	pc, pc, ip, lsl #3
464	nop
465	ldrb	ip, [r0], #0x01
466	strbt	ip, [r1], #0x01
467	ldrb	ip, [r0], #0x01
468	strbt	ip, [r1], #0x01
469	ldrb	ip, [r0], #0x01
470	strbt	ip, [r1], #0x01
471	cmp	r2, #0x00		/* All done? */
472	RETeq
473
474	/* Destination buffer is now word aligned */
475.Lcopyout_wordaligned:
476	ands	ip, r0, #0x03		/* Is src also word-aligned? */
477	bne	.Lcopyout_bad_align	/* Nope. Things just got bad */
478	cmp	r2, #0x08		/* Less than 8 bytes remaining? */
479	blt	.Lcopyout_w_less_than8
480
481	/* Quad-align the destination buffer */
482	tst	r0, #0x07		/* Already quad aligned? */
483	ldrne	ip, [r0], #0x04
484	subne	r2, r2, #0x04
485	strtne	ip, [r1], #0x04
486
487	stmfd	sp!, {r4-r9}		/* Free up some registers */
488	mov	r3, #-1			/* Signal restore r4-r9 */
489
490	/* Destination buffer word aligned, source is quad aligned */
491	subs	r2, r2, #0x80
492	blt	.Lcopyout_w_lessthan128
493
494	/* Copy 128 bytes at a time */
495.Lcopyout_w_loop128:
496	ldrd	r4, [r0], #0x08		/* LD:00-07 */
497	pld	[r0, #0x18]		/* Prefetch 0x20 */
498	ldrd	r6, [r0], #0x08		/* LD:08-0f */
499	ldrd	r8, [r0], #0x08		/* LD:10-17 */
500	strt	r4, [r1], #0x04		/* ST:00-03 */
501	strt	r5, [r1], #0x04		/* ST:04-07 */
502	ldrd	r4, [r0], #0x08		/* LD:18-1f */
503	strt	r6, [r1], #0x04		/* ST:08-0b */
504	strt	r7, [r1], #0x04		/* ST:0c-0f */
505	ldrd	r6, [r0], #0x08		/* LD:20-27 */
506	pld	[r0, #0x18]		/* Prefetch 0x40 */
507	strt	r8, [r1], #0x04		/* ST:10-13 */
508	strt	r9, [r1], #0x04		/* ST:14-17 */
509	ldrd	r8, [r0], #0x08		/* LD:28-2f */
510	strt	r4, [r1], #0x04		/* ST:18-1b */
511	strt	r5, [r1], #0x04		/* ST:1c-1f */
512	ldrd	r4, [r0], #0x08		/* LD:30-37 */
513	strt	r6, [r1], #0x04		/* ST:20-23 */
514	strt	r7, [r1], #0x04		/* ST:24-27 */
515	ldrd	r6, [r0], #0x08		/* LD:38-3f */
516	strt	r8, [r1], #0x04		/* ST:28-2b */
517	strt	r9, [r1], #0x04		/* ST:2c-2f */
518	ldrd	r8, [r0], #0x08		/* LD:40-47 */
519	pld	[r0, #0x18]		/* Prefetch 0x60 */
520	strt	r4, [r1], #0x04		/* ST:30-33 */
521	strt	r5, [r1], #0x04		/* ST:34-37 */
522	ldrd	r4, [r0], #0x08		/* LD:48-4f */
523	strt	r6, [r1], #0x04		/* ST:38-3b */
524	strt	r7, [r1], #0x04		/* ST:3c-3f */
525	ldrd	r6, [r0], #0x08		/* LD:50-57 */
526	strt	r8, [r1], #0x04		/* ST:40-43 */
527	strt	r9, [r1], #0x04		/* ST:44-47 */
528	ldrd	r8, [r0], #0x08		/* LD:58-4f */
529	strt	r4, [r1], #0x04		/* ST:48-4b */
530	strt	r5, [r1], #0x04		/* ST:4c-4f */
531	ldrd	r4, [r0], #0x08		/* LD:60-67 */
532	pld	[r0, #0x18]		/* Prefetch 0x80 */
533	strt	r6, [r1], #0x04		/* ST:50-53 */
534	strt	r7, [r1], #0x04		/* ST:54-57 */
535	ldrd	r6, [r0], #0x08		/* LD:68-6f */
536	strt	r8, [r1], #0x04		/* ST:58-5b */
537	strt	r9, [r1], #0x04		/* ST:5c-5f */
538	ldrd	r8, [r0], #0x08		/* LD:70-77 */
539	strt	r4, [r1], #0x04		/* ST:60-63 */
540	strt	r5, [r1], #0x04		/* ST:64-67 */
541	ldrd	r4, [r0], #0x08		/* LD:78-7f */
542	strt	r6, [r1], #0x04		/* ST:68-6b */
543	strt	r7, [r1], #0x04		/* ST:6c-6f */
544	strt	r8, [r1], #0x04		/* ST:70-73 */
545	strt	r9, [r1], #0x04		/* ST:74-77 */
546	subs	r2, r2, #0x80
547	strt	r4, [r1], #0x04		/* ST:78-7b */
548	strt	r5, [r1], #0x04		/* ST:7c-7f */
549	bge	.Lcopyout_w_loop128
550
551.Lcopyout_w_lessthan128:
552	adds	r2, r2, #0x80		/* Adjust for extra sub */
553	ldmfdeq	sp!, {r4-r9}
554	RETeq				/* Return now if done */
555	subs	r2, r2, #0x20
556	blt	.Lcopyout_w_lessthan32
557
558	/* Copy 32 bytes at a time */
559.Lcopyout_w_loop32:
560	ldrd	r4, [r0], #0x08
561	pld	[r0, #0x18]
562	ldrd	r6, [r0], #0x08
563	ldrd	r8, [r0], #0x08
564	strt	r4, [r1], #0x04
565	strt	r5, [r1], #0x04
566	ldrd	r4, [r0], #0x08
567	strt	r6, [r1], #0x04
568	strt	r7, [r1], #0x04
569	strt	r8, [r1], #0x04
570	strt	r9, [r1], #0x04
571	subs	r2, r2, #0x20
572	strt	r4, [r1], #0x04
573	strt	r5, [r1], #0x04
574	bge	.Lcopyout_w_loop32
575
576.Lcopyout_w_lessthan32:
577	adds	r2, r2, #0x20		/* Adjust for extra sub */
578	ldmfdeq	sp!, {r4-r9}
579	RETeq				/* Return now if done */
580
581	and	r4, r2, #0x18
582	rsb	r5, r4, #0x18
583	subs	r2, r2, r4
584	add	pc, pc, r5, lsl #1
585	nop
586
587	/* At least 24 bytes remaining */
588	ldrd	r4, [r0], #0x08
589	strt	r4, [r1], #0x04
590	strt	r5, [r1], #0x04
591	nop
592
593	/* At least 16 bytes remaining */
594	ldrd	r4, [r0], #0x08
595	strt	r4, [r1], #0x04
596	strt	r5, [r1], #0x04
597	nop
598
599	/* At least 8 bytes remaining */
600	ldrd	r4, [r0], #0x08
601	strt	r4, [r1], #0x04
602	strt	r5, [r1], #0x04
603	nop
604
605	/* Less than 8 bytes remaining */
606	ldmfd	sp!, {r4-r9}
607	RETeq				/* Return now if done */
608	mov	r3, #0x00
609
610.Lcopyout_w_less_than8:
611	subs	r2, r2, #0x04
612	ldrge	ip, [r0], #0x04
613	strtge	ip, [r1], #0x04
614	RETeq				/* Return now if done */
615	addlt	r2, r2, #0x04
616	ldrb	ip, [r0], #0x01
617	cmp	r2, #0x02
618	ldrbge	r2, [r0], #0x01
619	strbt	ip, [r1], #0x01
620	ldrbgt	ip, [r0]
621	strbtge	r2, [r1], #0x01
622	strbtgt	ip, [r1]
623	RET
624
625/*
626 * At this point, it has not been possible to word align both buffers.
627 * The destination buffer (r1) is word aligned, but the source buffer
628 * (r0) is not.
629 */
630.Lcopyout_bad_align:
631	stmfd	sp!, {r4-r7}
632	mov	r3, #0x01
633	bic	r0, r0, #0x03
634	cmp	ip, #2
635	ldr	ip, [r0], #0x04
636	bgt	.Lcopyout_bad3
637	beq	.Lcopyout_bad2
638	b	.Lcopyout_bad1
639
640.Lcopyout_bad1_loop16:
641	mov	r4, ip, lsr #8
642	ldr	r5, [r0], #0x04
643	pld	[r0, #0x018]
644	ldr	r6, [r0], #0x04
645	ldr	r7, [r0], #0x04
646	ldr	ip, [r0], #0x04
647	orr	r4, r4, r5, lsl #24
648	mov	r5, r5, lsr #8
649	orr	r5, r5, r6, lsl #24
650	mov	r6, r6, lsr #8
651	orr	r6, r6, r7, lsl #24
652	mov	r7, r7, lsr #8
653	orr	r7, r7, ip, lsl #24
654	strt	r4, [r1], #0x04
655	strt	r5, [r1], #0x04
656	strt	r6, [r1], #0x04
657	strt	r7, [r1], #0x04
658.Lcopyout_bad1:
659	subs	r2, r2, #0x10
660	bge	.Lcopyout_bad1_loop16
661
662	adds	r2, r2, #0x10
663	ldmfdeq	sp!, {r4-r7}
664	RETeq				/* Return now if done */
665	subs	r2, r2, #0x04
666	sublt	r0, r0, #0x03
667	blt	.Lcopyout_l4
668
669.Lcopyout_bad1_loop4:
670	mov	r4, ip, lsr #8
671	ldr	ip, [r0], #0x04
672	subs	r2, r2, #0x04
673	orr	r4, r4, ip, lsl #24
674	strt	r4, [r1], #0x04
675	bge	.Lcopyout_bad1_loop4
676	sub	r0, r0, #0x03
677	b	.Lcopyout_l4
678
679.Lcopyout_bad2_loop16:
680	mov	r4, ip, lsr #16
681	ldr	r5, [r0], #0x04
682	pld	[r0, #0x018]
683	ldr	r6, [r0], #0x04
684	ldr	r7, [r0], #0x04
685	ldr	ip, [r0], #0x04
686	orr	r4, r4, r5, lsl #16
687	mov	r5, r5, lsr #16
688	orr	r5, r5, r6, lsl #16
689	mov	r6, r6, lsr #16
690	orr	r6, r6, r7, lsl #16
691	mov	r7, r7, lsr #16
692	orr	r7, r7, ip, lsl #16
693	strt	r4, [r1], #0x04
694	strt	r5, [r1], #0x04
695	strt	r6, [r1], #0x04
696	strt	r7, [r1], #0x04
697.Lcopyout_bad2:
698	subs	r2, r2, #0x10
699	bge	.Lcopyout_bad2_loop16
700
701	adds	r2, r2, #0x10
702	ldmfdeq	sp!, {r4-r7}
703	RETeq				/* Return now if done */
704	subs	r2, r2, #0x04
705	sublt	r0, r0, #0x02
706	blt	.Lcopyout_l4
707
708.Lcopyout_bad2_loop4:
709	mov	r4, ip, lsr #16
710	ldr	ip, [r0], #0x04
711	subs	r2, r2, #0x04
712	orr	r4, r4, ip, lsl #16
713	strt	r4, [r1], #0x04
714	bge	.Lcopyout_bad2_loop4
715	sub	r0, r0, #0x02
716	b	.Lcopyout_l4
717
718.Lcopyout_bad3_loop16:
719	mov	r4, ip, lsr #24
720	ldr	r5, [r0], #0x04
721	pld	[r0, #0x018]
722	ldr	r6, [r0], #0x04
723	ldr	r7, [r0], #0x04
724	ldr	ip, [r0], #0x04
725	orr	r4, r4, r5, lsl #8
726	mov	r5, r5, lsr #24
727	orr	r5, r5, r6, lsl #8
728	mov	r6, r6, lsr #24
729	orr	r6, r6, r7, lsl #8
730	mov	r7, r7, lsr #24
731	orr	r7, r7, ip, lsl #8
732	strt	r4, [r1], #0x04
733	strt	r5, [r1], #0x04
734	strt	r6, [r1], #0x04
735	strt	r7, [r1], #0x04
736.Lcopyout_bad3:
737	subs	r2, r2, #0x10
738	bge	.Lcopyout_bad3_loop16
739
740	adds	r2, r2, #0x10
741	ldmfdeq	sp!, {r4-r7}
742	RETeq				/* Return now if done */
743	subs	r2, r2, #0x04
744	sublt	r0, r0, #0x01
745	blt	.Lcopyout_l4
746
747.Lcopyout_bad3_loop4:
748	mov	r4, ip, lsr #24
749	ldr	ip, [r0], #0x04
750	subs	r2, r2, #0x04
751	orr	r4, r4, ip, lsl #8
752	strt	r4, [r1], #0x04
753	bge	.Lcopyout_bad3_loop4
754	sub	r0, r0, #0x01
755
756.Lcopyout_l4:
757	ldmfd	sp!, {r4-r7}
758	mov	r3, #0x00
759	adds	r2, r2, #0x04
760	RETeq
761.Lcopyout_l4_2:
762	rsbs	r2, r2, #0x03
763	addne	pc, pc, r2, lsl #3
764	nop
765	ldrb	ip, [r0], #0x01
766	strbt	ip, [r1], #0x01
767	ldrb	ip, [r0], #0x01
768	strbt	ip, [r1], #0x01
769	ldrb	ip, [r0]
770	strbt	ip, [r1]
771	RET
772END(copyout)
773
774