xref: /freebsd/sys/arm/arm/bcopyinout.S (revision aa0a1e58)
1/*	$NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $	*/
2
3/*-
4 * Copyright (c) 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38
39#include "assym.s"
40
41#include <machine/asm.h>
42
43.L_arm_memcpy:
44	.word	_C_LABEL(_arm_memcpy)
45.L_min_memcpy_size:
46	.word	_C_LABEL(_min_memcpy_size)
47
48__FBSDID("$FreeBSD$");
49#ifdef _ARM_ARCH_5E
50#include <arm/arm/bcopyinout_xscale.S>
51#else
52
53	.text
54	.align	0
55
56#ifdef MULTIPROCESSOR
57.Lcpu_info:
58	.word	_C_LABEL(cpu_info)
59#else
60.Lcurpcb:
61	.word _C_LABEL(__pcpu) + PC_CURPCB
62#endif
63
64#define SAVE_REGS	stmfd	sp!, {r4-r11}
65#define RESTORE_REGS	ldmfd	sp!, {r4-r11}
66
67#if defined(_ARM_ARCH_5E)
68#define HELLOCPP #
69#define PREFETCH(rx,o)	pld	[ rx , HELLOCPP (o) ]
70#else
71#define PREFETCH(rx,o)
72#endif
73
74/*
75 * r0 = user space address
76 * r1 = kernel space address
77 * r2 = length
78 *
79 * Copies bytes from user space to kernel space
80 *
81 * We save/restore r4-r11:
82 * r4-r11 are scratch
83 */
84ENTRY(copyin)
85	/* Quick exit if length is zero */
86	teq	r2, #0
87	moveq	r0, #0
88	RETeq
89
90	ldr	r3, .L_arm_memcpy
91	ldr	r3, [r3]
92	cmp	r3, #0
93	beq	.Lnormal
94	ldr	r3, .L_min_memcpy_size
95	ldr	r3, [r3]
96	cmp	r2, r3
97	blt	.Lnormal
98	stmfd	sp!, {r0-r2, r4, lr}
99	mov     r3, r0
100	mov     r0, r1
101	mov     r1, r3
102	mov     r3, #2 /* SRC_IS_USER */
103	ldr	r4, .L_arm_memcpy
104	mov	lr, pc
105	ldr	pc, [r4]
106	cmp     r0, #0
107	ldmfd   sp!, {r0-r2, r4, lr}
108	moveq	r0, #0
109	RETeq
110
111.Lnormal:
112	SAVE_REGS
113#ifdef MULTIPROCESSOR
114	/* XXX Probably not appropriate for non-Hydra SMPs */
115	stmfd	sp!, {r0-r2, r14}
116	bl	_C_LABEL(cpu_number)
117	ldr	r4, .Lcpu_info
118	ldr	r4, [r4, r0, lsl #2]
119	ldr	r4, [r4, #CI_CURPCB]
120	ldmfd	sp!, {r0-r2, r14}
121#else
122	ldr	r4, .Lcurpcb
123	ldr	r4, [r4]
124#endif
125
126	ldr	r5, [r4, #PCB_ONFAULT]
127	adr	r3, .Lcopyfault
128	str	r3, [r4, #PCB_ONFAULT]
129
130	PREFETCH(r0, 0)
131	PREFETCH(r1, 0)
132
133	/*
134	 * If not too many bytes, take the slow path.
135	 */
136	cmp	r2, #0x08
137	blt	.Licleanup
138
139	/*
140	 * Align destination to word boundary.
141	 */
142	and	r6, r1, #0x3
143	ldr	pc, [pc, r6, lsl #2]
144	b	.Lialend
145	.word	.Lialend
146	.word	.Lial3
147	.word	.Lial2
148	.word	.Lial1
149.Lial3:	ldrbt	r6, [r0], #1
150	sub	r2, r2, #1
151	strb	r6, [r1], #1
152.Lial2:	ldrbt	r7, [r0], #1
153	sub	r2, r2, #1
154	strb	r7, [r1], #1
155.Lial1:	ldrbt	r6, [r0], #1
156	sub	r2, r2, #1
157	strb	r6, [r1], #1
158.Lialend:
159
160	/*
161	 * If few bytes left, finish slow.
162	 */
163	cmp	r2, #0x08
164	blt	.Licleanup
165
166	/*
167	 * If source is not aligned, finish slow.
168	 */
169	ands	r3, r0, #0x03
170	bne	.Licleanup
171
172	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
173	blt	.Licleanup8
174
175	/*
176	 * Align destination to cacheline boundary.
177	 * If source and destination are nicely aligned, this can be a big
178	 * win.  If not, it's still cheaper to copy in groups of 32 even if
179	 * we don't get the nice cacheline alignment.
180	 */
181	and	r6, r1, #0x1f
182	ldr	pc, [pc, r6]
183	b	.Licaligned
184	.word	.Licaligned
185	.word	.Lical28
186	.word	.Lical24
187	.word	.Lical20
188	.word	.Lical16
189	.word	.Lical12
190	.word	.Lical8
191	.word	.Lical4
192.Lical28:ldrt	r6, [r0], #4
193	sub	r2, r2, #4
194	str	r6, [r1], #4
195.Lical24:ldrt	r7, [r0], #4
196	sub	r2, r2, #4
197	str	r7, [r1], #4
198.Lical20:ldrt	r6, [r0], #4
199	sub	r2, r2, #4
200	str	r6, [r1], #4
201.Lical16:ldrt	r7, [r0], #4
202	sub	r2, r2, #4
203	str	r7, [r1], #4
204.Lical12:ldrt	r6, [r0], #4
205	sub	r2, r2, #4
206	str	r6, [r1], #4
207.Lical8:ldrt	r7, [r0], #4
208	sub	r2, r2, #4
209	str	r7, [r1], #4
210.Lical4:ldrt	r6, [r0], #4
211	sub	r2, r2, #4
212	str	r6, [r1], #4
213
214	/*
215	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
216	 * part of the code, and we may have knocked that down by as much
217	 * as 0x1c getting aligned).
218	 *
219	 * This loop basically works out to:
220	 * do {
221	 * 	prefetch-next-cacheline(s)
222	 *	bytes -= 0x20;
223	 *	copy cacheline
224	 * } while (bytes >= 0x40);
225	 * bytes -= 0x20;
226	 * copy cacheline
227	 */
228.Licaligned:
229	PREFETCH(r0, 32)
230	PREFETCH(r1, 32)
231
232	sub	r2, r2, #0x20
233
234	/* Copy a cacheline */
235	ldrt	r10, [r0], #4
236	ldrt	r11, [r0], #4
237	ldrt	r6, [r0], #4
238	ldrt	r7, [r0], #4
239	ldrt	r8, [r0], #4
240	ldrt	r9, [r0], #4
241	stmia	r1!, {r10-r11}
242	ldrt	r10, [r0], #4
243	ldrt	r11, [r0], #4
244	stmia	r1!, {r6-r11}
245
246	cmp	r2, #0x40
247	bge	.Licaligned
248
249	sub	r2, r2, #0x20
250
251	/* Copy a cacheline */
252	ldrt	r10, [r0], #4
253	ldrt	r11, [r0], #4
254	ldrt	r6, [r0], #4
255	ldrt	r7, [r0], #4
256	ldrt	r8, [r0], #4
257	ldrt	r9, [r0], #4
258	stmia	r1!, {r10-r11}
259	ldrt	r10, [r0], #4
260	ldrt	r11, [r0], #4
261	stmia	r1!, {r6-r11}
262
263	cmp	r2, #0x08
264	blt	.Liprecleanup
265
266.Licleanup8:
267	ldrt	r8, [r0], #4
268	ldrt	r9, [r0], #4
269	sub	r2, r2, #8
270	stmia	r1!, {r8, r9}
271	cmp	r2, #8
272	bge	.Licleanup8
273
274.Liprecleanup:
275	/*
276	 * If we're done, bail.
277	 */
278	cmp	r2, #0
279	beq	.Lout
280
281.Licleanup:
282	and	r6, r2, #0x3
283	ldr	pc, [pc, r6, lsl #2]
284	b	.Licend
285	.word	.Lic4
286	.word	.Lic1
287	.word	.Lic2
288	.word	.Lic3
289.Lic4:	ldrbt	r6, [r0], #1
290	sub	r2, r2, #1
291	strb	r6, [r1], #1
292.Lic3:	ldrbt	r7, [r0], #1
293	sub	r2, r2, #1
294	strb	r7, [r1], #1
295.Lic2:	ldrbt	r6, [r0], #1
296	sub	r2, r2, #1
297	strb	r6, [r1], #1
298.Lic1:	ldrbt	r7, [r0], #1
299	subs	r2, r2, #1
300	strb	r7, [r1], #1
301.Licend:
302	bne	.Licleanup
303
304.Liout:
305	mov	r0, #0
306
307	str	r5, [r4, #PCB_ONFAULT]
308	RESTORE_REGS
309
310	RET
311
312.Lcopyfault:
313	mov	r0, #14 /* EFAULT */
314	str	r5, [r4, #PCB_ONFAULT]
315	RESTORE_REGS
316
317	RET
318
319/*
320 * r0 = kernel space address
321 * r1 = user space address
322 * r2 = length
323 *
324 * Copies bytes from kernel space to user space
325 *
326 * We save/restore r4-r11:
327 * r4-r11 are scratch
328 */
329
330ENTRY(copyout)
331	/* Quick exit if length is zero */
332	teq	r2, #0
333	moveq	r0, #0
334	RETeq
335
336	ldr	r3, .L_arm_memcpy
337	ldr	r3, [r3]
338	cmp	r3, #0
339	beq	.Lnormale
340	ldr	r3, .L_min_memcpy_size
341	ldr	r3, [r3]
342	cmp	r2, r3
343	blt	.Lnormale
344	stmfd	sp!, {r0-r2, r4, lr}
345	mov     r3, r0
346	mov     r0, r1
347	mov     r1, r3
348	mov     r3, #1 /* DST_IS_USER */
349	ldr	r4, .L_arm_memcpy
350	mov	lr, pc
351	ldr	pc, [r4]
352	cmp     r0, #0
353	ldmfd   sp!, {r0-r2, r4, lr}
354	moveq	r0, #0
355	RETeq
356
357.Lnormale:
358	SAVE_REGS
359#ifdef MULTIPROCESSOR
360	/* XXX Probably not appropriate for non-Hydra SMPs */
361	stmfd	sp!, {r0-r2, r14}
362	bl	_C_LABEL(cpu_number)
363	ldr	r4, .Lcpu_info
364	ldr	r4, [r4, r0, lsl #2]
365	ldr	r4, [r4, #CI_CURPCB]
366	ldmfd	sp!, {r0-r2, r14}
367#else
368	ldr	r4, .Lcurpcb
369	ldr	r4, [r4]
370#endif
371
372	ldr	r5, [r4, #PCB_ONFAULT]
373	adr	r3, .Lcopyfault
374	str	r3, [r4, #PCB_ONFAULT]
375
376	PREFETCH(r0, 0)
377	PREFETCH(r1, 0)
378
379	/*
380	 * If not too many bytes, take the slow path.
381	 */
382	cmp	r2, #0x08
383	blt	.Lcleanup
384
385	/*
386	 * Align destination to word boundary.
387	 */
388	and	r6, r1, #0x3
389	ldr	pc, [pc, r6, lsl #2]
390	b	.Lalend
391	.word	.Lalend
392	.word	.Lal3
393	.word	.Lal2
394	.word	.Lal1
395.Lal3:	ldrb	r6, [r0], #1
396	sub	r2, r2, #1
397	strbt	r6, [r1], #1
398.Lal2:	ldrb	r7, [r0], #1
399	sub	r2, r2, #1
400	strbt	r7, [r1], #1
401.Lal1:	ldrb	r6, [r0], #1
402	sub	r2, r2, #1
403	strbt	r6, [r1], #1
404.Lalend:
405
406	/*
407	 * If few bytes left, finish slow.
408	 */
409	cmp	r2, #0x08
410	blt	.Lcleanup
411
412	/*
413	 * If source is not aligned, finish slow.
414	 */
415	ands	r3, r0, #0x03
416	bne	.Lcleanup
417
418	cmp	r2, #0x60	/* Must be > 0x5f for unrolled cacheline */
419	blt	.Lcleanup8
420
421	/*
422	 * Align source & destination to cacheline boundary.
423	 */
424	and	r6, r1, #0x1f
425	ldr	pc, [pc, r6]
426	b	.Lcaligned
427	.word	.Lcaligned
428	.word	.Lcal28
429	.word	.Lcal24
430	.word	.Lcal20
431	.word	.Lcal16
432	.word	.Lcal12
433	.word	.Lcal8
434	.word	.Lcal4
435.Lcal28:ldr	r6, [r0], #4
436	sub	r2, r2, #4
437	strt	r6, [r1], #4
438.Lcal24:ldr	r7, [r0], #4
439	sub	r2, r2, #4
440	strt	r7, [r1], #4
441.Lcal20:ldr	r6, [r0], #4
442	sub	r2, r2, #4
443	strt	r6, [r1], #4
444.Lcal16:ldr	r7, [r0], #4
445	sub	r2, r2, #4
446	strt	r7, [r1], #4
447.Lcal12:ldr	r6, [r0], #4
448	sub	r2, r2, #4
449	strt	r6, [r1], #4
450.Lcal8:	ldr	r7, [r0], #4
451	sub	r2, r2, #4
452	strt	r7, [r1], #4
453.Lcal4:	ldr	r6, [r0], #4
454	sub	r2, r2, #4
455	strt	r6, [r1], #4
456
457	/*
458	 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
459	 * part of the code, and we may have knocked that down by as much
460	 * as 0x1c getting aligned).
461	 *
462	 * This loop basically works out to:
463	 * do {
464	 * 	prefetch-next-cacheline(s)
465	 *	bytes -= 0x20;
466	 *	copy cacheline
467	 * } while (bytes >= 0x40);
468	 * bytes -= 0x20;
469	 * copy cacheline
470	 */
471.Lcaligned:
472	PREFETCH(r0, 32)
473	PREFETCH(r1, 32)
474
475	sub	r2, r2, #0x20
476
477	/* Copy a cacheline */
478	ldmia	r0!, {r6-r11}
479	strt	r6, [r1], #4
480	strt	r7, [r1], #4
481	ldmia	r0!, {r6-r7}
482	strt	r8, [r1], #4
483	strt	r9, [r1], #4
484	strt	r10, [r1], #4
485	strt	r11, [r1], #4
486	strt	r6, [r1], #4
487	strt	r7, [r1], #4
488
489	cmp	r2, #0x40
490	bge	.Lcaligned
491
492	sub	r2, r2, #0x20
493
494	/* Copy a cacheline */
495	ldmia	r0!, {r6-r11}
496	strt	r6, [r1], #4
497	strt	r7, [r1], #4
498	ldmia	r0!, {r6-r7}
499	strt	r8, [r1], #4
500	strt	r9, [r1], #4
501	strt	r10, [r1], #4
502	strt	r11, [r1], #4
503	strt	r6, [r1], #4
504	strt	r7, [r1], #4
505
506	cmp	r2, #0x08
507	blt	.Lprecleanup
508
509.Lcleanup8:
510	ldmia	r0!, {r8-r9}
511	sub	r2, r2, #8
512	strt	r8, [r1], #4
513	strt	r9, [r1], #4
514	cmp	r2, #8
515	bge	.Lcleanup8
516
517.Lprecleanup:
518	/*
519	 * If we're done, bail.
520	 */
521	cmp	r2, #0
522	beq	.Lout
523
524.Lcleanup:
525	and	r6, r2, #0x3
526	ldr	pc, [pc, r6, lsl #2]
527	b	.Lcend
528	.word	.Lc4
529	.word	.Lc1
530	.word	.Lc2
531	.word	.Lc3
532.Lc4:	ldrb	r6, [r0], #1
533	sub	r2, r2, #1
534	strbt	r6, [r1], #1
535.Lc3:	ldrb	r7, [r0], #1
536	sub	r2, r2, #1
537	strbt	r7, [r1], #1
538.Lc2:	ldrb	r6, [r0], #1
539	sub	r2, r2, #1
540	strbt	r6, [r1], #1
541.Lc1:	ldrb	r7, [r0], #1
542	subs	r2, r2, #1
543	strbt	r7, [r1], #1
544.Lcend:
545	bne	.Lcleanup
546
547.Lout:
548	mov	r0, #0
549
550	str	r5, [r4, #PCB_ONFAULT]
551	RESTORE_REGS
552
553	RET
554#endif
555
556/*
557 * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
558 *
559 * Copies a single 8-bit value from src to dest, returning 0 on success,
560 * else EFAULT if a page fault occurred.
561 */
562ENTRY(badaddr_read_1)
563#ifdef MULTIPROCESSOR
564	/* XXX Probably not appropriate for non-Hydra SMPs */
565	stmfd	sp!, {r0-r1, r14}
566	bl	_C_LABEL(cpu_number)
567	ldr	r2, .Lcpu_info
568	ldr	r2, [r2, r0, lsl #2]
569	ldr	r2, [r2, #CI_CURPCB]
570	ldmfd	sp!, {r0-r1, r14}
571#else
572	ldr	r2, .Lcurpcb
573	ldr	r2, [r2]
574#endif
575	ldr	ip, [r2, #PCB_ONFAULT]
576	adr	r3, 1f
577	str	r3, [r2, #PCB_ONFAULT]
578	nop
579	nop
580	nop
581	ldrb	r3, [r0]
582	nop
583	nop
584	nop
585	strb	r3, [r1]
586	mov	r0, #0		/* No fault */
5871:	str	ip, [r2, #PCB_ONFAULT]
588	RET
589
590/*
591 * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
592 *
593 * Copies a single 16-bit value from src to dest, returning 0 on success,
594 * else EFAULT if a page fault occurred.
595 */
596ENTRY(badaddr_read_2)
597#ifdef MULTIPROCESSOR
598	/* XXX Probably not appropriate for non-Hydra SMPs */
599	stmfd	sp!, {r0-r1, r14}
600	bl	_C_LABEL(cpu_number)
601	ldr	r2, .Lcpu_info
602	ldr	r2, [r2, r0, lsl #2]
603	ldr	r2, [r2, #CI_CURPCB]
604	ldmfd	sp!, {r0-r1, r14}
605#else
606	ldr	r2, .Lcurpcb
607	ldr	r2, [r2]
608#endif
609	ldr	ip, [r2, #PCB_ONFAULT]
610	adr	r3, 1f
611	str	r3, [r2, #PCB_ONFAULT]
612	nop
613	nop
614	nop
615	ldrh	r3, [r0]
616	nop
617	nop
618	nop
619	strh	r3, [r1]
620	mov	r0, #0		/* No fault */
6211:	str	ip, [r2, #PCB_ONFAULT]
622	RET
623
624/*
625 * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
626 *
627 * Copies a single 32-bit value from src to dest, returning 0 on success,
628 * else EFAULT if a page fault occurred.
629 */
630ENTRY(badaddr_read_4)
631#ifdef MULTIPROCESSOR
632	/* XXX Probably not appropriate for non-Hydra SMPs */
633	stmfd	sp!, {r0-r1, r14}
634	bl	_C_LABEL(cpu_number)
635	ldr	r2, .Lcpu_info
636	ldr	r2, [r2, r0, lsl #2]
637	ldr	r2, [r2, #CI_CURPCB]
638	ldmfd	sp!, {r0-r1, r14}
639#else
640	ldr	r2, .Lcurpcb
641	ldr	r2, [r2]
642#endif
643	ldr	ip, [r2, #PCB_ONFAULT]
644	adr	r3, 1f
645	str	r3, [r2, #PCB_ONFAULT]
646	nop
647	nop
648	nop
649	ldr	r3, [r0]
650	nop
651	nop
652	nop
653	str	r3, [r1]
654	mov	r0, #0		/* No fault */
6551:	str	ip, [r2, #PCB_ONFAULT]
656	RET
657
658