xref: /minix/common/lib/libc/arch/sparc64/string/memset.S (revision ebfedea0)
1/*	$NetBSD: memset.S,v 1.2 2013/03/17 02:12:41 christos Exp $	*/
2
3/*
4 * Copyright (c) 1996-2002 Eduardo Horvath
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 */
26#include "strmacros.h"
27#if defined(LIBC_SCCS) && !defined(lint)
28RCSID("$NetBSD: memset.S,v 1.2 2013/03/17 02:12:41 christos Exp $")
29#endif  /* LIBC_SCCS and not lint */
30
31
32/*
33 * XXXXXXXXXXXXXXXXXXXX
34 * We need to make sure that this doesn't use floating point
35 * before our trap handlers are installed or we could panic
36 * XXXXXXXXXXXXXXXXXXXX
37 */
38/*
39 * memset(addr, c, len)
40 *
41 * We want to use VIS instructions if we're clearing out more than
42 * 256 bytes, but to do that we need to properly save and restore the
43 * FP registers.  Unfortunately the code to do that in the kernel needs
44 * to keep track of the current owner of the FPU, hence the different
45 * code.
46 *
47 * XXXXX To produce more efficient code, we do not allow lengths
48 * greater than 0x80000000000000000, which are negative numbers.
49 * This should not really be an issue since the VA hole should
50 * cause any such ranges to fail anyway.
51 */
52#if !defined(_KERNEL) || defined(_RUMPKERNEL)
53ENTRY(bzero)
54	! %o0 = addr, %o1 = len
55	mov	%o1, %o2
56	clr	%o1			! ser pattern
57#endif
58ENTRY(memset)
59	! %o0 = addr, %o1 = pattern, %o2 = len
60	mov	%o0, %o4		! Save original pointer
61
62Lmemset_internal:
63	btst	7, %o0			! Word aligned?
64	bz,pn	%xcc, 0f
65	 nop
66	inc	%o0
67	deccc	%o2			! Store up to 7 bytes
68	bge,a,pt	CCCR, Lmemset_internal
69	 stb	%o1, [%o0 - 1]
70
71	retl				! Duplicate Lmemset_done
72	 mov	%o4, %o0
730:
74	/*
75	 * Duplicate the pattern so it fills 64-bits.
76	 */
77	andcc	%o1, 0x0ff, %o1		! No need to extend zero
78	bz,pt	%icc, 1f
79	 sllx	%o1, 8, %o3		! sigh.  all dependent insns.
80	or	%o1, %o3, %o1
81	sllx	%o1, 16, %o3
82	or	%o1, %o3, %o1
83	sllx	%o1, 32, %o3
84	 or	%o1, %o3, %o1
851:
86#ifdef USE_BLOCK_STORE_LOAD
87	!! Now we are 64-bit aligned
88	cmp	%o2, 256		! Use block clear if len > 256
89	bge,pt	CCCR, Lmemset_block	! use block store insns
90#endif	/* USE_BLOCK_STORE_LOAD */
91	 deccc	8, %o2
92Lmemset_longs:
93	bl,pn	CCCR, Lmemset_cleanup	! Less than 8 bytes left
94	 nop
953:
96	inc	8, %o0
97	deccc	8, %o2
98	bge,pt	CCCR, 3b
99	 stx	%o1, [%o0 - 8]		! Do 1 longword at a time
100
101	/*
102	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
103	 * -6 => two bytes, etc.  Mop up this remainder, if any.
104	 */
105Lmemset_cleanup:
106	btst	4, %o2
107	bz,pt	CCCR, 5f		! if (len & 4) {
108	 nop
109	stw	%o1, [%o0]		!	*(int *)addr = 0;
110	inc	4, %o0			!	addr += 4;
1115:
112	btst	2, %o2
113	bz,pt	CCCR, 7f		! if (len & 2) {
114	 nop
115	sth	%o1, [%o0]		!	*(short *)addr = 0;
116	inc	2, %o0			!	addr += 2;
1177:
118	btst	1, %o2
119	bnz,a	%icc, Lmemset_done	! if (len & 1)
120	 stb	%o1, [%o0]		!	*addr = 0;
121Lmemset_done:
122	retl
123	 mov	%o4, %o0		! Restore ponter for memset (ugh)
124
125#ifdef USE_BLOCK_STORE_LOAD
126Lmemset_block:
127	sethi	%hi(block_disable), %o3
128	ldx	[ %o3 + %lo(block_disable) ], %o3
129	brnz,pn	%o3, Lmemset_longs
130	!! Make sure our trap table is installed
131	set	_C_LABEL(trapbase), %o5
132	rdpr	%tba, %o3
133	sub	%o3, %o5, %o3
134	brnz,pn	%o3, Lmemset_longs	! No, then don't use block load/store
135	 nop
136/*
137 * Kernel:
138 *
139 * Here we use VIS instructions to do a block clear of a page.
140 * But before we can do that we need to save and enable the FPU.
141 * The last owner of the FPU registers is fplwp, and
142 * fplwp->l_md.md_fpstate is the current fpstate.  If that's not
143 * null, call savefpstate() with it to store our current fp state.
144 *
145 * Next, allocate an aligned fpstate on the stack.  We will properly
146 * nest calls on a particular stack so this should not be a problem.
147 *
148 * Now we grab either curlwp (or if we're on the interrupt stack
149 * lwp0).  We stash its existing fpstate in a local register and
150 * put our new fpstate in curlwp->p_md.md_fpstate.  We point
151 * fplwp at curlwp (or lwp0) and enable the FPU.
152 *
153 * If we are ever preempted, our FPU state will be saved in our
154 * fpstate.  Then, when we're resumed and we take an FPDISABLED
155 * trap, the trap handler will be able to fish our FPU state out
156 * of curlwp (or lwp0).
157 *
158 * On exiting this routine we undo the damage: restore the original
159 * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
160 * the MMU.
161 *
162 */
163
164	ENABLE_FPU(0)
165
166	!! We are now 8-byte aligned.  We need to become 64-byte aligned.
167	btst	63, %i0
168	bz,pt	CCCR, 2f
169	 nop
1701:
171	stx	%i1, [%i0]
172	inc	8, %i0
173	btst	63, %i0
174	bnz,pt	%xcc, 1b
175	 dec	8, %i2
176
1772:
178	brz	%i1, 3f					! Skip the memory op
179	 fzero	%f0					! if pattern is 0
180
181#ifdef _LP64
182	stx	%i1, [%i0]				! Flush this puppy to RAM
183	membar	#StoreLoad
184	ldd	[%i0], %f0
185#else
186	stw	%i1, [%i0]				! Flush this puppy to RAM
187	membar	#StoreLoad
188	ld	[%i0], %f0
189	fmovsa	%icc, %f0, %f1
190#endif
191
1923:
193	fmovd	%f0, %f2				! Duplicate the pattern
194	fmovd	%f0, %f4
195	fmovd	%f0, %f6
196	fmovd	%f0, %f8
197	fmovd	%f0, %f10
198	fmovd	%f0, %f12
199	fmovd	%f0, %f14
200
201	!! Remember: we were 8 bytes too far
202	dec	56, %i2					! Go one iteration too far
2035:
204	stda	%f0, [%i0] ASI_STORE			! Store 64 bytes
205	deccc	BLOCK_SIZE, %i2
206	bg,pt	%icc, 5b
207	 inc	BLOCK_SIZE, %i0
208
209	membar	#Sync
210/*
211 * We've saved our possible fpstate, now disable the fpu
212 * and continue with life.
213 */
214	RESTORE_FPU
215	addcc	%i2, 56, %i2				! Restore the count
216	ba,pt	%xcc, Lmemset_longs			! Finish up the remainder
217	 restore
218#endif	/* USE_BLOCK_STORE_LOAD */
219