xref: /netbsd/sys/arch/arm/arm32/bcopy_page.S (revision c4a72b64)
1/*	$NetBSD: bcopy_page.S,v 1.5 2002/08/17 16:36:33 thorpej Exp $	*/
2
3/*
4 * Copyright (c) 1995 Scott Stevens
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by Scott Stevens.
18 * 4. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * RiscBSD kernel project
33 *
34 * bcopy_page.S
35 *
36 * page optimised bcopy and bzero routines
37 *
38 * Created      : 08/04/95
39 */
40
41#include <machine/param.h>
42#include <machine/asm.h>
43
44/* #define BIG_LOOPS */
45
46/*
47 * bcopy_page(src, dest)
48 *
49 * Optimised copy page routine.
50 *
51 * On entry:
52 *   r0 - src address
53 *   r1 - dest address
54 *
55 * Requires:
56 *   number of bytes per page (NBPG) is a multiple of 512 (BIG_LOOPS), 128
57 *   otherwise.
58 */
59
60#define	CHUNK_SIZE	32
61
62#ifdef __XSCALE__
63	/* Conveniently, the chunk size is the XScale cache line size. */
64#define	PREFETCH_FIRST_CHUNK	pld	[r0]
65#define	PREFETCH_NEXT_CHUNK	pld	[r0, #(CHUNK_SIZE)]
66#else
67#define	PREFETCH_FIRST_CHUNK	/* nothing */
68#define	PREFETCH_NEXT_CHUNK	/* nothing */
69#endif
70
71#ifndef COPY_CHUNK
72#define	COPY_CHUNK \
73	PREFETCH_NEXT_CHUNK ; \
74	ldmia	r0!, {r3-r8,ip,lr} ; \
75	stmia	r1!, {r3-r8,ip,lr}
76#endif /* ! COPY_CHUNK */
77
78#ifndef SAVE_REGS
79#define	SAVE_REGS	stmfd	sp!, {r4-r8, lr}
80#define	RESTORE_REGS	ldmfd	sp!, {r4-r8, pc}
81#endif
82
83ENTRY(bcopy_page)
84	PREFETCH_FIRST_CHUNK
85	SAVE_REGS
86#ifdef BIG_LOOPS
87	mov	r2, #(NBPG >> 9)
88#else
89	mov	r2, #(NBPG >> 7)
90#endif
91
921:
93	COPY_CHUNK
94	COPY_CHUNK
95	COPY_CHUNK
96	COPY_CHUNK
97
98#ifdef BIG_LOOPS
99	/* There is little point making the loop any larger; unless we are
100	   running with the cache off, the load/store overheads will
101	   completely dominate this loop.  */
102	COPY_CHUNK
103	COPY_CHUNK
104	COPY_CHUNK
105	COPY_CHUNK
106
107	COPY_CHUNK
108	COPY_CHUNK
109	COPY_CHUNK
110	COPY_CHUNK
111
112	COPY_CHUNK
113	COPY_CHUNK
114	COPY_CHUNK
115	COPY_CHUNK
116#endif
117	subs	r2, r2, #1
118	bne	1b
119
120	RESTORE_REGS		/* ...and return. */
121
122/*
123 * bzero_page(dest)
124 *
125 * Optimised zero page routine.
126 *
127 * On entry:
128 *   r0 - dest address
129 *
130 * Requires:
131 *   number of bytes per page (NBPG) is a multiple of 512 (BIG_LOOPS), 128
132 *   otherwise
133 */
134
135ENTRY(bzero_page)
136	stmfd	sp!, {r4-r8, lr}
137#ifdef BIG_LOOPS
138	mov	r2, #(NBPG >> 9)
139#else
140	mov	r2, #(NBPG >> 7)
141#endif
142	mov	r3, #0
143	mov	r4, #0
144	mov	r5, #0
145	mov	r6, #0
146	mov	r7, #0
147	mov	r8, #0
148	mov	ip, #0
149	mov	lr, #0
150
1511:
152	stmia	r0!, {r3-r8,ip,lr}
153	stmia	r0!, {r3-r8,ip,lr}
154	stmia	r0!, {r3-r8,ip,lr}
155	stmia	r0!, {r3-r8,ip,lr}
156
157#ifdef BIG_LOOPS
158	/* There is little point making the loop any larger; unless we are
159	   running with the cache off, the load/store overheads will
160	   completely dominate this loop.  */
161	stmia	r0!, {r3-r8,ip,lr}
162	stmia	r0!, {r3-r8,ip,lr}
163	stmia	r0!, {r3-r8,ip,lr}
164	stmia	r0!, {r3-r8,ip,lr}
165
166	stmia	r0!, {r3-r8,ip,lr}
167	stmia	r0!, {r3-r8,ip,lr}
168	stmia	r0!, {r3-r8,ip,lr}
169	stmia	r0!, {r3-r8,ip,lr}
170
171	stmia	r0!, {r3-r8,ip,lr}
172	stmia	r0!, {r3-r8,ip,lr}
173	stmia	r0!, {r3-r8,ip,lr}
174	stmia	r0!, {r3-r8,ip,lr}
175
176#endif
177
178	subs	r2, r2, #1
179	bne	1b
180
181	ldmfd	sp!, {r4-r8, pc}
182