xref: /netbsd/lib/libc/arch/powerpc/string/bzero.S (revision c4a72b64)
1/*	$NetBSD: bzero.S,v 1.5 2002/07/30 06:07:58 matt Exp $ */
2
3/*-
4 * Copyright (C) 2001	Martin J. Laubach <mjl@netbsd.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29/*----------------------------------------------------------------------*/
30
31#include <machine/asm.h>
32#ifdef _KERNEL
33#include <assym.h>
34#endif
35
36#define USE_STSWX 0	/* don't. slower than trivial copy loop */
37
38/*----------------------------------------------------------------------*/
39/*
40     void bzero(void *b %r3, size_t len %r4);
41     void * memset(void *b %r3, int c %r4, size_t len %r5);
42*/
43/*----------------------------------------------------------------------*/
44
45#define r_dst	%r3
46#define r_len	%r4
47#define r_val	%r0
48
49		.text
50		.align 4
51ENTRY(bzero)
52		li	r_val, 0		/* Value to stuff in */
53		b	cb_memset
54
55ENTRY(memset)
56		cmplwi	cr1, %r5, 0
57		mr.	%r0, %r4
58		mr	%r8, %r3
59		beqlr-	cr1			/* Nothing to do */
60
61		rlwimi	%r0, %r4, 8, 16, 23	/* word extend fill value */
62		rlwimi	%r0, %r0, 16, 0, 15
63		mr	%r4, %r5
64		bne-	simple_fill		/* =! 0, use trivial fill */
65cb_memset:
66
67/*----------------------------------------------------------------------*/
68#ifndef _KERNEL
69		/* First find out cache line size */
70#ifdef PIC
71		mflr	%r9
72		bl	_GLOBAL_OFFSET_TABLE_@local-4
73		mflr	%r10
74		mtlr	%r9
75		lwz	%r5,cache_info@got(%r10)
76#else
77		lis	%r5,cache_info@h
78		ori	%r5,%r5,cache_info@l
79#endif
80		lwz	%r6, 4(%r5)
81		cmpwi	%r6, -1
82		bne+	cb_cacheline_known
83
84/*----------------------------------------------------------------------*/
85#define CTL_MACHDEP	7
86#define CPU_CACHELINE	1
87#define	CPU_CACHEINFO	5
88
89#define STKFRAME_SZ	48
90#define MIB		8
91#define OLDPLEN		16
92#define R3_SAVE		20
93#define R4_SAVE		24
94#define R0_SAVE		28
95#define R8_SAVE		32
96
97		mflr	%r6
98		stw	%r6, 4(%r1)
99		stwu	%r1, -STKFRAME_SZ(%r1)
100
101		stw	%r8, R8_SAVE(%r1)
102		stw	%r3, R3_SAVE(%r1)
103		stw	%r4, R4_SAVE(%r1)
104		stw	%r0, R0_SAVE(%r1)
105
106
107
108		li	%r0, CTL_MACHDEP		/* Construct MIB */
109		stw	%r0, MIB(%r1)
110		li	%r0, CPU_CACHEINFO
111		stw	%r0, MIB+4(%r1)
112
113		li	%r0, 4*4			/* Oldlenp := 4*4 */
114		stw	%r0, OLDPLEN(%r1)
115
116		addi	%r3, %r1, MIB
117		li	%r4, 2			/* namelen */
118		/* %r5 already contains &cache_info */
119		addi	%r6, %r1, OLDPLEN
120		li	%r7, 0
121		li	%r8, 0
122		bl	PIC_PLT(_C_LABEL(sysctl))
123
124		cmpwi	%r3, 0			/* Check result */
125		beq	1f
126
127		/* Failure, try older sysctl */
128
129		li	%r0, CTL_MACHDEP		/* Construct MIB */
130		stw	%r0, MIB(%r1)
131		li	%r0, CPU_CACHELINE
132		stw	%r0, MIB+4(%r1)
133
134		li	%r0, 4			/* Oldlenp := 4 */
135		stw	%r0, OLDPLEN(%r1)
136
137		addi	%r3, %r1, MIB
138		li	%r4, 2			/* namelen */
139#ifdef PIC
140		mflr	%r9
141		bl	_GLOBAL_OFFSET_TABLE_@local-4
142		mflr	%r10
143		mtlr	%r9
144		lwz	%r5,cache_info@got(%r10)
145		addi	%r5, %r5, 4
146#else
147		lis	%r5,cache_info+4@h
148		ori	%r5,%r5,cache_info+4@l
149#endif
150		addi	%r6, %r1, OLDPLEN
151		li	%r7, 0
152		li	%r8, 0
153		bl	PIC_PLT(_C_LABEL(sysctl))
1541:
155		lwz	%r8, R8_SAVE(%r1)
156		lwz	%r3, R3_SAVE(%r1)
157		lwz	%r4, R4_SAVE(%r1)
158		lwz	%r0, R0_SAVE(%r1)
159
160#ifdef PIC
161		bl	_GLOBAL_OFFSET_TABLE_@local-4
162		mflr	%r10
163		lwz	%r9, cache_info@got(%r10)
164		lwz	%r9, 4(%r9)
165#else
166		lis	%r5, cache_info+4@ha
167		lwz	%r9, cache_info+4@l(%r5)
168#endif
169		la	%r1, STKFRAME_SZ(%r1)
170		lwz	%r5, 4(%r1)
171		mtlr	%r5
172
173		cntlzw	%r6, %r9			/* compute shift value */
174		li	%r5, 31
175		subf	%r5, %r6, %r5
176
177#ifdef PIC
178		lwz	%r6, cache_sh@got(%r10)
179		stw	%r5, 0(%r6)
180#else
181		lis	%r6, cache_sh@ha
182		stw	%r5, cache_sh@l(%r6)
183#endif
184/*----------------------------------------------------------------------*/
185/* Okay, we know the cache line size (%r9) and shift value (%r10) */
186cb_cacheline_known:
187#ifdef PIC
188		lwz	%r5, cache_info@got(%r10)
189		lwz	%r9, 4(%r5)
190		lwz	%r5, cache_sh@got(%r10)
191		lwz	%r10, 0(%r5)
192#else
193		lis	%r9, cache_info+4@ha
194		lwz	%r9, cache_info+4@l(%r9)
195		lis	%r10, cache_sh@ha
196		lwz	%r10, cache_sh@l(%r10)
197#endif
198
199#else /* _KERNEL */
200#ifdef	MULTIPROCESSOR
201		mfsprg	%r10, 0			/* Get cpu_info pointer */
202#else
203		lis	%r10, cpu_info_store@ha
204		addi	%r10, %r10, cpu_info_store@l
205#endif
206		lwz	%r9, CPU_CI+4(%r10)	/* Load D$ line size */
207		cntlzw	%r10, %r9			/* Calculate shift.. */
208		li	%r6, 31
209		subf	%r10, %r10, %r6
210#endif /* _KERNEL */
211		/* Back in memory filling business */
212
213		cmplwi	cr1, r_len, 0		/* Nothing to do? */
214		add	%r5, %r9, %r9
215		cmplw	r_len, %r5		/* <= 2*CL bytes to move? */
216		beqlr-	cr1			/* then do nothing */
217
218		blt+	simple_fill		/* a trivial fill routine */
219
220		/* Word align the block, fill bytewise until dst even*/
221
222		andi.	%r5, r_dst, 0x03
223		li	%r6, 4
224		beq+	cb_aligned_w		/* already aligned to word? */
225
226		subf	%r5, %r5, %r6		/* bytes to fill to align4 */
227#if USE_STSWX
228		mtxer	%r5
229		stswx	%r0, 0, r_dst
230		add	r_dst, %r5, r_dst
231#else
232		mtctr	%r5
233
234		subi	r_dst, r_dst, 1
2351:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
236		bdnz	1b
237
238		addi	r_dst, r_dst, 1
239#endif
240		subf	r_len, %r5, r_len
241
242cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
243
244		/* I know I have something to do since we had > 2*CL initially */
245		/* so no need to check for r_len = 0 */
246
247		rlwinm.	%r5, r_dst, 30, 29, 31
248		srwi	%r6, %r9, 2
249		beq	cb_aligned_cb		/* already on CL boundary? */
250
251		subf	%r5, %r5, %r6		/* words to fill to alignment */
252		mtctr	%r5
253		slwi	%r5, %r5, 2
254		subf	r_len, %r5, r_len
255
256		subi	r_dst, r_dst, 4
2571:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
258		bdnz	1b
259		addi	r_dst, r_dst, 4
260
261cb_aligned_cb:	/* no need to check r_len, see above */
262
263		srw.	%r5, r_len, %r10		/* Number of cache blocks */
264		mtctr	%r5
265		beq	cblocks_done
266
267		slw	%r5, %r5, %r10
268		subf	r_len, %r5, r_len
269
2701:		dcbz	0, r_dst		/* Clear blockwise */
271		add	r_dst, r_dst, %r9
272		bdnz	1b
273
274cblocks_done:	/* still CL aligned, but less than CL bytes left */
275		cmplwi	cr1, r_len, 0
276		cmplwi	r_len, 8
277		beq-	cr1, sf_return
278
279		blt-	sf_bytewise		/* <8 remaining? */
280		b	sf_aligned_w
281
282/*----------------------------------------------------------------------*/
283wbzero:		li	r_val, 0
284
285		cmplwi	r_len, 0
286		beqlr-				/* Nothing to do */
287
288simple_fill:
289#if USE_STSWX
290		cmplwi	cr1, r_len, 12		/* < 12 bytes to move? */
291#else
292		cmplwi	cr1, r_len, 8		/* < 8 bytes to move? */
293#endif
294		andi.	%r5, r_dst, 0x03		/* bytes to fill to align4 */
295		blt	cr1, sf_bytewise	/* trivial byte mover */
296
297		li	%r6, 4
298		subf	%r5, %r5, %r6
299		beq+	sf_aligned_w		/* dest is word aligned */
300
301#if USE_STSWX
302		mtxer	%r5
303		stswx	%r0, 0, r_dst
304		add	r_dst, %r5, r_dst
305#else
306		mtctr	%r5			/* nope, then fill bytewise */
307		subi	r_dst, r_dst, 1		/* until it is */
3081:		stbu	r_val, 1(r_dst)
309		bdnz	1b
310
311		addi	r_dst, r_dst, 1
312#endif
313		subf	r_len, %r5, r_len
314
315sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
316#if USE_STSWX
317		mr	%r6, %r0
318		mr	%r7, %r0
319
320		srwi	%r5, r_len, 3
321		mtctr	%r5
322
323		slwi	%r5, %r5, 3		/* adjust len */
324		subf.	r_len, %r5, r_len
325
3261:		stswi	%r6, r_dst, 8
327		addi	r_dst, r_dst, 8
328		bdnz	1b
329#else
330		srwi	%r5, r_len, 2		/* words to fill */
331		mtctr	%r5
332
333		slwi	%r5, %r5, 2
334		subf.	r_len, %r5, r_len	/* adjust len for fill */
335
336		subi	r_dst, r_dst, 4
3371:		stwu	r_val, 4(r_dst)
338		bdnz	1b
339		addi	r_dst, r_dst, 4
340#endif
341
342sf_word_done:	bne-	sf_bytewise
343
344sf_return:	mr	%r3, %r8			/* restore orig ptr */
345		blr				/* for memset functionality */
346
347sf_bytewise:
348#if USE_STSWX
349		mr	%r5, %r0
350		mr	%r6, %r0
351		mr	%r7, %r0
352
353		mtxer	r_len
354		stswx	%r5, 0, r_dst
355#else
356		mtctr	r_len
357
358		subi	r_dst, r_dst, 1
3591:		stbu	r_val, 1(r_dst)
360		bdnz	1b
361#endif
362		mr	%r3, %r8			/* restore orig ptr */
363		blr				/* for memset functionality */
364
365/*----------------------------------------------------------------------*/
366#ifndef _KERNEL
367		.data
368cache_info:	.long	-1, -1, -1, -1
369cache_sh:	.long	0
370
371#endif
372/*----------------------------------------------------------------------*/
373