xref: /freebsd/sys/arm/arm/bcopyinout.S (revision 076ad2f8)
1/*	$NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $	*/
2
3/*-
4 * Copyright (c) 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38
39#include "assym.s"
40
41#include <machine/asm.h>
42#include <sys/errno.h>
43
44.L_arm_memcpy:
45	.word	_C_LABEL(_arm_memcpy)
46.L_min_memcpy_size:
47	.word	_C_LABEL(_min_memcpy_size)
48
49__FBSDID("$FreeBSD$");
50#ifdef _ARM_ARCH_5E
51#include <arm/arm/bcopyinout_xscale.S>
52#else
53
54	.text
55	.align	2
56
57#if __ARM_ARCH >= 6
58#define GET_PCB(tmp) \
59	mrc p15, 0, tmp, c13, c0, 4; \
60	add	tmp, tmp, #(TD_PCB)
61#else
62.Lcurpcb:
63	.word	_C_LABEL(__pcpu) + PC_CURPCB
64
65#define GET_PCB(tmp) \
66	ldr	tmp, .Lcurpcb
67#endif
68
69
70#define SAVE_REGS	stmfd	sp!, {r4-r11}; _SAVE({r4-r11})
71#define RESTORE_REGS	ldmfd	sp!, {r4-r11}
72
73#if defined(_ARM_ARCH_5E)
74#define HELLOCPP #
75#define PREFETCH(rx,o)	pld	[ rx , HELLOCPP (o) ]
76#else
77#define PREFETCH(rx,o)
78#endif
79
80/*
81 * r0 = user space address
82 * r1 = kernel space address
83 * r2 = length
84 *
85 * Copies bytes from user space to kernel space
86 *
87 * We save/restore r4-r11:
88 * r4-r11 are scratch
89 */
90ENTRY(copyin)
91	/* Quick exit if length is zero */
92	teq	r2, #0
93	moveq	r0, #0
94	RETeq
95
96	adds	r3, r0, r2
97	movcs	r0, #EFAULT
98	RETc(cs)
99
100	ldr	r12, =(VM_MAXUSER_ADDRESS + 1)
101	cmp	r3, r12
102	movcs	r0, #EFAULT
103	RETc(cs)
104
105	ldr	r3, .L_arm_memcpy
106	ldr	r3, [r3]
107	cmp	r3, #0
108	beq	.Lnormal
109	ldr	r3, .L_min_memcpy_size
110	ldr	r3, [r3]
111	cmp	r2, r3
112	blt	.Lnormal
113	stmfd	sp!, {r0-r2, r4, lr}
114	mov     r3, r0
115	mov     r0, r1
116	mov     r1, r3
117	mov     r3, #2 /* SRC_IS_USER */
118	ldr	r4, .L_arm_memcpy
119	mov	lr, pc
120	ldr	pc, [r4]
121	cmp     r0, #0
122	ldmfd   sp!, {r0-r2, r4, lr}
123	moveq	r0, #0
124	RETeq
125
126.Lnormal:
127	SAVE_REGS
128	GET_PCB(r4)
129	ldr	r4, [r4]
130
131
132	ldr	r5, [r4, #PCB_ONFAULT]
133	adr	r3, .Lcopyfault
134	str	r3, [r4, #PCB_ONFAULT]
135
136	PREFETCH(r0, 0)
137	PREFETCH(r1, 0)
138
139	/*
140	 * If not too many bytes, take the slow path.
141	 */
142	cmp	r2, #0x08
143	blt	.Licleanup
144
145	/*
146	 * Align destination to word boundary.
147	 */
148	and	r6, r1, #0x3
149	ldr	pc, [pc, r6, lsl #2]
150	b	.Lialend
151	.word	.Lialend
152	.word	.Lial3
153	.word	.Lial2
154	.word	.Lial1
155.Lial3:	ldrbt	r6, [r0], #1
156	sub	r2, r2, #1
157	strb	r6, [r1], #1
158.Lial2:	ldrbt	r7, [r0], #1
159	sub	r2, r2, #1
160	strb	r7, [r1], #1
161.Lial1:	ldrbt	r6, [r0], #1
162	sub	r2, r2, #1
163	strb	r6, [r1], #1
164.Lialend:
165
166	/*
167	 * If few bytes left, finish slow.
168	 */
169	cmp	r2, #0x08
170	blt	.Licleanup
171
172	/*
173	 * If source is not aligned, finish slow.
174	 */
175	ands	r3, r0, #0x03
176	bne	.Licleanup
177
178	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
179	blt	.Licleanup8
180
181	/*
182	 * Align destination to cacheline boundary.
183	 * If source and destination are nicely aligned, this can be a big
184	 * win.  If not, it's still cheaper to copy in groups of 32 even if
185	 * we don't get the nice cacheline alignment.
186	 */
187	and	r6, r1, #0x1f
188	ldr	pc, [pc, r6]
189	b	.Licaligned
190	.word	.Licaligned
191	.word	.Lical28
192	.word	.Lical24
193	.word	.Lical20
194	.word	.Lical16
195	.word	.Lical12
196	.word	.Lical8
197	.word	.Lical4
198.Lical28:ldrt	r6, [r0], #4
199	sub	r2, r2, #4
200	str	r6, [r1], #4
201.Lical24:ldrt	r7, [r0], #4
202	sub	r2, r2, #4
203	str	r7, [r1], #4
204.Lical20:ldrt	r6, [r0], #4
205	sub	r2, r2, #4
206	str	r6, [r1], #4
207.Lical16:ldrt	r7, [r0], #4
208	sub	r2, r2, #4
209	str	r7, [r1], #4
210.Lical12:ldrt	r6, [r0], #4
211	sub	r2, r2, #4
212	str	r6, [r1], #4
213.Lical8:ldrt	r7, [r0], #4
214	sub	r2, r2, #4
215	str	r7, [r1], #4
216.Lical4:ldrt	r6, [r0], #4
217	sub	r2, r2, #4
218	str	r6, [r1], #4
219
220	/*
221	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
222	 * part of the code, and we may have knocked that down by as much
223	 * as 0x1c getting aligned).
224	 *
225	 * This loop basically works out to:
226	 * do {
227	 * 	prefetch-next-cacheline(s)
228	 *	bytes -= 0x20;
229	 *	copy cacheline
230	 * } while (bytes >= 0x40);
231	 * bytes -= 0x20;
232	 * copy cacheline
233	 */
234.Licaligned:
235	PREFETCH(r0, 32)
236	PREFETCH(r1, 32)
237
238	sub	r2, r2, #0x20
239
240	/* Copy a cacheline */
241	ldrt	r10, [r0], #4
242	ldrt	r11, [r0], #4
243	ldrt	r6, [r0], #4
244	ldrt	r7, [r0], #4
245	ldrt	r8, [r0], #4
246	ldrt	r9, [r0], #4
247	stmia	r1!, {r10-r11}
248	ldrt	r10, [r0], #4
249	ldrt	r11, [r0], #4
250	stmia	r1!, {r6-r11}
251
252	cmp	r2, #0x40
253	bge	.Licaligned
254
255	sub	r2, r2, #0x20
256
257	/* Copy a cacheline */
258	ldrt	r10, [r0], #4
259	ldrt	r11, [r0], #4
260	ldrt	r6, [r0], #4
261	ldrt	r7, [r0], #4
262	ldrt	r8, [r0], #4
263	ldrt	r9, [r0], #4
264	stmia	r1!, {r10-r11}
265	ldrt	r10, [r0], #4
266	ldrt	r11, [r0], #4
267	stmia	r1!, {r6-r11}
268
269	cmp	r2, #0x08
270	blt	.Liprecleanup
271
272.Licleanup8:
273	ldrt	r8, [r0], #4
274	ldrt	r9, [r0], #4
275	sub	r2, r2, #8
276	stmia	r1!, {r8, r9}
277	cmp	r2, #8
278	bge	.Licleanup8
279
280.Liprecleanup:
281	/*
282	 * If we're done, bail.
283	 */
284	cmp	r2, #0
285	beq	.Lout
286
287.Licleanup:
288	and	r6, r2, #0x3
289	ldr	pc, [pc, r6, lsl #2]
290	b	.Licend
291	.word	.Lic4
292	.word	.Lic1
293	.word	.Lic2
294	.word	.Lic3
295.Lic4:	ldrbt	r6, [r0], #1
296	sub	r2, r2, #1
297	strb	r6, [r1], #1
298.Lic3:	ldrbt	r7, [r0], #1
299	sub	r2, r2, #1
300	strb	r7, [r1], #1
301.Lic2:	ldrbt	r6, [r0], #1
302	sub	r2, r2, #1
303	strb	r6, [r1], #1
304.Lic1:	ldrbt	r7, [r0], #1
305	subs	r2, r2, #1
306	strb	r7, [r1], #1
307.Licend:
308	bne	.Licleanup
309
310.Liout:
311	mov	r0, #0
312
313	str	r5, [r4, #PCB_ONFAULT]
314	RESTORE_REGS
315
316	RET
317
318.Lcopyfault:
319	ldr	r0, =EFAULT
320	str	r5, [r4, #PCB_ONFAULT]
321	RESTORE_REGS
322
323	RET
324END(copyin)
325
326/*
327 * r0 = kernel space address
328 * r1 = user space address
329 * r2 = length
330 *
331 * Copies bytes from kernel space to user space
332 *
333 * We save/restore r4-r11:
334 * r4-r11 are scratch
335 */
336
337ENTRY(copyout)
338	/* Quick exit if length is zero */
339	teq	r2, #0
340	moveq	r0, #0
341	RETeq
342
343	adds	r3, r1, r2
344	movcs	r0, #EFAULT
345	RETc(cs)
346
347	ldr	r12, =(VM_MAXUSER_ADDRESS + 1)
348	cmp	r3, r12
349	movcs	r0, #EFAULT
350	RETc(cs)
351
352	ldr	r3, .L_arm_memcpy
353	ldr	r3, [r3]
354	cmp	r3, #0
355	beq	.Lnormale
356	ldr	r3, .L_min_memcpy_size
357	ldr	r3, [r3]
358	cmp	r2, r3
359	blt	.Lnormale
360	stmfd	sp!, {r0-r2, r4, lr}
361	_SAVE({r0-r2, r4, lr})
362	mov     r3, r0
363	mov     r0, r1
364	mov     r1, r3
365	mov     r3, #1 /* DST_IS_USER */
366	ldr	r4, .L_arm_memcpy
367	mov	lr, pc
368	ldr	pc, [r4]
369	cmp     r0, #0
370	ldmfd   sp!, {r0-r2, r4, lr}
371	moveq	r0, #0
372	RETeq
373
374.Lnormale:
375	SAVE_REGS
376	GET_PCB(r4)
377	ldr	r4, [r4]
378
379	ldr	r5, [r4, #PCB_ONFAULT]
380	adr	r3, .Lcopyfault
381	str	r3, [r4, #PCB_ONFAULT]
382
383	PREFETCH(r0, 0)
384	PREFETCH(r1, 0)
385
386	/*
387	 * If not too many bytes, take the slow path.
388	 */
389	cmp	r2, #0x08
390	blt	.Lcleanup
391
392	/*
393	 * Align destination to word boundary.
394	 */
395	and	r6, r1, #0x3
396	ldr	pc, [pc, r6, lsl #2]
397	b	.Lalend
398	.word	.Lalend
399	.word	.Lal3
400	.word	.Lal2
401	.word	.Lal1
402.Lal3:	ldrb	r6, [r0], #1
403	sub	r2, r2, #1
404	strbt	r6, [r1], #1
405.Lal2:	ldrb	r7, [r0], #1
406	sub	r2, r2, #1
407	strbt	r7, [r1], #1
408.Lal1:	ldrb	r6, [r0], #1
409	sub	r2, r2, #1
410	strbt	r6, [r1], #1
411.Lalend:
412
413	/*
414	 * If few bytes left, finish slow.
415	 */
416	cmp	r2, #0x08
417	blt	.Lcleanup
418
419	/*
420	 * If source is not aligned, finish slow.
421	 */
422	ands	r3, r0, #0x03
423	bne	.Lcleanup
424
425	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
426	blt	.Lcleanup8
427
428	/*
429	 * Align source & destination to cacheline boundary.
430	 */
431	and	r6, r1, #0x1f
432	ldr	pc, [pc, r6]
433	b	.Lcaligned
434	.word	.Lcaligned
435	.word	.Lcal28
436	.word	.Lcal24
437	.word	.Lcal20
438	.word	.Lcal16
439	.word	.Lcal12
440	.word	.Lcal8
441	.word	.Lcal4
442.Lcal28:ldr	r6, [r0], #4
443	sub	r2, r2, #4
444	strt	r6, [r1], #4
445.Lcal24:ldr	r7, [r0], #4
446	sub	r2, r2, #4
447	strt	r7, [r1], #4
448.Lcal20:ldr	r6, [r0], #4
449	sub	r2, r2, #4
450	strt	r6, [r1], #4
451.Lcal16:ldr	r7, [r0], #4
452	sub	r2, r2, #4
453	strt	r7, [r1], #4
454.Lcal12:ldr	r6, [r0], #4
455	sub	r2, r2, #4
456	strt	r6, [r1], #4
457.Lcal8:	ldr	r7, [r0], #4
458	sub	r2, r2, #4
459	strt	r7, [r1], #4
460.Lcal4:	ldr	r6, [r0], #4
461	sub	r2, r2, #4
462	strt	r6, [r1], #4
463
464	/*
465	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
466	 * part of the code, and we may have knocked that down by as much
467	 * as 0x1c getting aligned).
468	 *
469	 * This loop basically works out to:
470	 * do {
471	 * 	prefetch-next-cacheline(s)
472	 *	bytes -= 0x20;
473	 *	copy cacheline
474	 * } while (bytes >= 0x40);
475	 * bytes -= 0x20;
476	 * copy cacheline
477	 */
478.Lcaligned:
479	PREFETCH(r0, 32)
480	PREFETCH(r1, 32)
481
482	sub	r2, r2, #0x20
483
484	/* Copy a cacheline */
485	ldmia	r0!, {r6-r11}
486	strt	r6, [r1], #4
487	strt	r7, [r1], #4
488	ldmia	r0!, {r6-r7}
489	strt	r8, [r1], #4
490	strt	r9, [r1], #4
491	strt	r10, [r1], #4
492	strt	r11, [r1], #4
493	strt	r6, [r1], #4
494	strt	r7, [r1], #4
495
496	cmp	r2, #0x40
497	bge	.Lcaligned
498
499	sub	r2, r2, #0x20
500
501	/* Copy a cacheline */
502	ldmia	r0!, {r6-r11}
503	strt	r6, [r1], #4
504	strt	r7, [r1], #4
505	ldmia	r0!, {r6-r7}
506	strt	r8, [r1], #4
507	strt	r9, [r1], #4
508	strt	r10, [r1], #4
509	strt	r11, [r1], #4
510	strt	r6, [r1], #4
511	strt	r7, [r1], #4
512
513	cmp	r2, #0x08
514	blt	.Lprecleanup
515
516.Lcleanup8:
517	ldmia	r0!, {r8-r9}
518	sub	r2, r2, #8
519	strt	r8, [r1], #4
520	strt	r9, [r1], #4
521	cmp	r2, #8
522	bge	.Lcleanup8
523
524.Lprecleanup:
525	/*
526	 * If we're done, bail.
527	 */
528	cmp	r2, #0
529	beq	.Lout
530
531.Lcleanup:
532	and	r6, r2, #0x3
533	ldr	pc, [pc, r6, lsl #2]
534	b	.Lcend
535	.word	.Lc4
536	.word	.Lc1
537	.word	.Lc2
538	.word	.Lc3
539.Lc4:	ldrb	r6, [r0], #1
540	sub	r2, r2, #1
541	strbt	r6, [r1], #1
542.Lc3:	ldrb	r7, [r0], #1
543	sub	r2, r2, #1
544	strbt	r7, [r1], #1
545.Lc2:	ldrb	r6, [r0], #1
546	sub	r2, r2, #1
547	strbt	r6, [r1], #1
548.Lc1:	ldrb	r7, [r0], #1
549	subs	r2, r2, #1
550	strbt	r7, [r1], #1
551.Lcend:
552	bne	.Lcleanup
553
554.Lout:
555	mov	r0, #0
556
557	str	r5, [r4, #PCB_ONFAULT]
558	RESTORE_REGS
559
560	RET
561END(copyout)
562#endif
563
564/*
565 * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
566 *
567 * Copies a single 8-bit value from src to dest, returning 0 on success,
568 * else EFAULT if a page fault occurred.
569 */
570ENTRY(badaddr_read_1)
571	GET_PCB(r2)
572	ldr	r2, [r2]
573
574	ldr	ip, [r2, #PCB_ONFAULT]
575	adr	r3, 1f
576	str	r3, [r2, #PCB_ONFAULT]
577	nop
578	nop
579	nop
580	ldrb	r3, [r0]
581	nop
582	nop
583	nop
584	strb	r3, [r1]
585	mov	r0, #0		/* No fault */
5861:	str	ip, [r2, #PCB_ONFAULT]
587	RET
588END(badaddr_read_1)
589
590/*
591 * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
592 *
593 * Copies a single 16-bit value from src to dest, returning 0 on success,
594 * else EFAULT if a page fault occurred.
595 */
596ENTRY(badaddr_read_2)
597	GET_PCB(r2)
598	ldr	r2, [r2]
599
600	ldr	ip, [r2, #PCB_ONFAULT]
601	adr	r3, 1f
602	str	r3, [r2, #PCB_ONFAULT]
603	nop
604	nop
605	nop
606	ldrh	r3, [r0]
607	nop
608	nop
609	nop
610	strh	r3, [r1]
611	mov	r0, #0		/* No fault */
6121:	str	ip, [r2, #PCB_ONFAULT]
613	RET
614END(badaddr_read_2)
615
616/*
617 * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
618 *
619 * Copies a single 32-bit value from src to dest, returning 0 on success,
620 * else EFAULT if a page fault occurred.
621 */
622ENTRY(badaddr_read_4)
623	GET_PCB(r2)
624	ldr	r2, [r2]
625
626	ldr	ip, [r2, #PCB_ONFAULT]
627	adr	r3, 1f
628	str	r3, [r2, #PCB_ONFAULT]
629	nop
630	nop
631	nop
632	ldr	r3, [r0]
633	nop
634	nop
635	nop
636	str	r3, [r1]
637	mov	r0, #0		/* No fault */
6381:	str	ip, [r2, #PCB_ONFAULT]
639	RET
640END(badaddr_read_4)
641
642