xref: /netbsd/sys/lib/libkern/arch/hppa/bcopy.S (revision c4a72b64)
1/*	$NetBSD: bcopy.S,v 1.1 2002/06/06 20:03:38 fredette Exp $	*/
2
3/*
4 * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthew Fredette.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *        This product includes software developed by the NetBSD
21 *        Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 *    contributors may be used to endorse or promote products derived
24 *    from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39/*
40 * Copy routines for NetBSD/hppa.
41 */
42
43#undef _LOCORE
44#define _LOCORE	/* XXX fredette - unfortunate */
45#include <machine/asm.h>
46#include <machine/frame.h>
47
48#if defined(LIBC_SCCS) && !defined(lint)
49        RCSID("$NetBSD: bcopy.S,v 1.1 2002/06/06 20:03:38 fredette Exp $")
50#endif /* LIBC_SCCS and not lint */
51
52/*
53 * The stbys instruction is a little asymmetric.  When (%r2 & 3)
54 * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma.  You
55 * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2)
56 * worked like stws,mb.  But it doesn't.
57 *
58 * This macro works around this problem.  It requires that %t2
59 * hold the number of bytes that will be written by this store
60 * (meaning that it ranges from one to four).
61 *
62 * Watch the delay-slot trickery here.  The comib is used to set
63 * up which instruction, either the stws or the stbys, is run
64 * in the delay slot of the b instruction.
65 */
66#define _STBYS_E_M(r, dst_spc, dst_off)				  \
67	comib,<>	4, t2, 4				! \
68	b		4					! \
69	stws,mb		r, -4(dst_spc, dst_off)			! \
70	stbys,e,m	r, 0(dst_spc, dst_off)
71
72/*
73 * This macro does a bulk copy with no shifting.  cmplt and m are
74 * the completer and displacement multiplier, respectively, for
75 * the load and store instructions.
76 */
77#define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \
78								! \
79	/*							! \
80	 * Loop storing 16 bytes at a time.  Since count 	! \
81	 * may be > INT_MAX, we have to be careful and		! \
82	 * avoid comparisons that treat it as a signed 		! \
83	 * quantity, until after this loop, when count		! \
84	 * is guaranteed to be less than 16.			! \
85	 */							! \
86	comib,>>=,n	15, count, _LABEL(_skip16)		! \
87.label _LABEL(_loop16)						! \
88	addi		-16, count, count			! \
89	ldws,cmplt	m*4(src_spc, src_off), t1		! \
90	ldws,cmplt	m*4(src_spc, src_off), t2		! \
91	ldws,cmplt	m*4(src_spc, src_off), t3		! \
92	ldws,cmplt	m*4(src_spc, src_off), t4		! \
93	stws,cmplt	t1, m*4(dst_spc, dst_off)		! \
94	stws,cmplt	t2, m*4(dst_spc, dst_off)		! \
95	stws,cmplt	t3, m*4(dst_spc, dst_off)		! \
96	comib,<<	15, count, _LABEL(_loop16)		! \
97	stws,cmplt	t4, m*4(dst_spc, dst_off)		! \
98.label _LABEL(_skip16)						! \
99								! \
100	/* Loop storing 4 bytes at a time. */			! \
101	addib,<,n	-4, count, _LABEL(_skip4)		! \
102.label _LABEL(_loop4)						! \
103	ldws,cmplt	m*4(src_spc, src_off), t1		! \
104	addib,>=	-4, count, _LABEL(_loop4)		! \
105	stws,cmplt	t1, m*4(dst_spc, dst_off)		! \
106.label _LABEL(_skip4)						! \
107	/* Restore the correct count. */			! \
108	addi		4, count, count				! \
109								! \
110.label _LABEL(_do1)						! \
111								! \
112	/* Loop storing 1 byte at a time. */			! \
113	addib,<,n	-1, count, _LABEL(_skip1)		! \
114.label _LABEL(_loop1)						! \
115	ldbs,cmplt	m*1(src_spc, src_off), t1		! \
116	addib,>=	-1, count, _LABEL(_loop1)		! \
117	stbs,cmplt	t1, m*1(dst_spc, dst_off)		! \
118.label _LABEL(_skip1)						! \
119	/* Restore the correct count. */			! \
120	b		_LABEL(_done)				! \
121	addi		1, count, count
122
123/*
124 * This macro is definitely strange.  It exists purely to
125 * allow the _COPYS macro to be reused, but because it
126 * requires this long attempt to explain it, I'm starting
127 * to doubt the value of that.
128 *
129 * Part of the expansion of the _COPYS macro below are loops
130 * that copy four words or one word at a time, performing shifts
131 * to get data to line up correctly in the destination buffer.
132 *
133 * The _COPYS macro is used when copying backwards, as well
134 * as forwards.  The 4-word loop always loads into t1, t2, t3,
135 * and t4 in that order.  This means that when copying forward,
136 * t1 will have the word from the lowest address, and t4 will
137 * have the word from the highest address.  When copying
138 * backwards, the opposite is true.
139 *
140 * The shift instructions need pairs of registers with adjacent
141 * words, with the register containing the word from the lowest
142 * address *always* coming first.  It is this assymetry that
143 * gives rise to this macro - depending on which direction
144 * we're copying in, these ordered pairs are different.
145 *
146 * Fortunately, we can compute those register numbers at compile
147 * time, and assemble them manually into a shift instruction.
148 * That's what this macro does.
149 *
150 * This macro takes two arguments.  n ranges from 0 to 3 and
151 * is the "shift number", i.e., n = 0 means we're doing the
152 * shift for what will be the first store.
153 *
154 * m is the displacement multiplier from the _COPYS macro call.
155 * This is 1 for a forward copy and -1 for a backwards copy.
156 * So, the ((m + 1) / 2) term yields 0 for a backwards copy and
157 * 1 for a forward copy, and the ((m - 1) / 2) term yields
158 * 0 for a forward copy, and -1 for a backwards copy.
159 * These terms are used to discriminate the register computations
160 * below.
161 *
162 * When copying forward, then, the first register used with
163 * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or t4,
164 * which matches _COPYS' requirement that the word last loaded
165 * be in t4.  The first register used for the second vshd
166 * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or t1.
167 * And so on to t2 and t3.
168 *
169 * When copying forward, the second register used with the first
170 * vshd will be (19 + (3 - ((n + 0) & 3)), or t1.  It will
171 * continue to be t2, then t3, and finally t4.
172 *
173 * When copying backwards, the values for the first and second
174 * register for each vshd are reversed from the forwards case.
175 * (Symmetry reclaimed!)  Proving this is "left as an exercise
176 * for the reader" (remember the different discriminating values!)
177 */
178#define _VSHD(n, m, t)						  \
179	.word (0xd0000000					| \
180	((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16)	| \
181	((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21)	| \
182	(t))
183
184/*
185 * This macro does a bulk copy with shifting.  cmplt and m are
186 * the completer and displacement multiplier, respectively, for
187 * the load and store instructions.  It is assumed that the
188 * word last loaded is already in t4.
189 */
190#define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \
191								! \
192	/*							! \
193	 * Loop storing 16 bytes at a time.  Since count 	! \
194	 * may be > INT_MAX, we have to be careful and		! \
195	 * avoid comparisons that treat it as a signed 		! \
196	 * quantity, until after this loop, when count		! \
197	 * is guaranteed to be less than 16.			! \
198	 */							! \
199	comib,>>=,n	15, count, _LABEL(S_skip16)		! \
200.label _LABEL(S_loop16)						! \
201	addi		-16, count, count			! \
202	ldws,cmplt	m*4(src_spc, src_off), t1		! \
203	ldws,cmplt	m*4(src_spc, src_off), t2		! \
204	ldws,cmplt	m*4(src_spc, src_off), t3		! \
205	_VSHD(0, m, 1)	/* vshd t4, t1, %r1 */			! \
206	ldws,cmplt	m*4(src_spc, src_off), t4		! \
207	_VSHD(1, m, 22)	/* vshd t1, t2, t1 */			! \
208	_VSHD(2, m, 21)	/* vshd t2, t3, t2 */			! \
209	_VSHD(3, m, 20)	/* vshd t3, t4, t3 */			! \
210	stws,cmplt	%r1, m*4(dst_spc, dst_off)		! \
211	stws,cmplt	t1, m*4(dst_spc, dst_off)		! \
212	stws,cmplt	t2, m*4(dst_spc, dst_off)		! \
213	comib,<<	15, count, _LABEL(S_loop16)		! \
214	stws,cmplt	t3, m*4(dst_spc, dst_off)		! \
215.label _LABEL(S_skip16)						! \
216								! \
217	/* Loop storing 4 bytes at a time. */			! \
218	addib,<,n	-4, count, _LABEL(S_skip4)		! \
219.label _LABEL(S_loop4)						! \
220	ldws,cmplt	m*4(src_spc, src_off), t1		! \
221	_VSHD(0, m, 1)	/* into r1 (1) */			! \
222	copy		t1, t4					! \
223	addib,>=	-4, count, _LABEL(S_loop4)		! \
224	stws,cmplt	%r1, m*4(dst_spc, dst_off)		! \
225.label _LABEL(S_skip4)						! \
226								! \
227	/*							! \
228 	 * We now need to "back up" src_off by the		! \
229	 * number of bytes remaining in the FIFO		! \
230	 * (i.e., the number of bytes remaining in t4),		! \
231	 * because (the correct) count still includes		! \
232	 * these bytes, and we intent to keep it that		! \
233	 * way, and finish with the single-byte copier.		! \
234	 *							! \
235	 * The number of bytes remaining in the FIFO is		! \
236	 * related to the shift count, so recover it,		! \
237	 * restoring the correct count at the same time.	! \
238	 */							! \
239	mfctl	%cr11, t1					! \
240	addi	4, count, count					! \
241	shd	%r0, t1, 3, t1					! \
242								! \
243	/*							! \
244	 * If we're copying forward, the shift count		! \
245	 * is the number of bytes remaining in the		! \
246	 * FIFO, and we want to subtract it from src_off.	! \
247	 * If we're copying backwards, (4 - shift count)	! \
248	 * is the number of bytes remaining in the FIFO,	! \
249	 * and we want to add it to src_off.			! \
250	 *							! \
251	 * We observe that x + (4 - y) = x - (y - 4),		! \
252	 * and introduce this instruction to add -4 when	! \
253	 * m is -1, although this does mean one extra		! \
254	 * instruction in the forward case.			! \
255	 */							! \
256	addi	4*((m - 1) / 2), t1, t1				! \
257								! \
258	/* Now branch to the byte-at-a-time loop. */		! \
259	b	_LABEL(_do1)					! \
260	sub	src_off, t1, src_off
261
262/*
263 * This macro copies a region in the forward direction.
264 */
265#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count)  \
266								! \
267	/*							! \
268	 * Since in the shifting-left case we will		! \
269	 * load 8 bytes before checking count, to		! \
270	 * keep things simple, branch to the byte 		! \
271	 * copier unless we're copying at least 8.		! \
272	 */							! \
273	comib,>>,n	8, count, _LABEL(_do1)			! \
274								! \
275	/*							! \
276	 * Once we 4-byte align the source offset, 		! \
277	 * figure out how many bytes from the region		! \
278	 * will be in the first 4-byte word we read.		! \
279	 * Ditto for writing the destination offset.		! \
280	 */							! \
281	extru		src_off, 31, 2, t1			! \
282	extru		dst_off, 31, 2, t2			! \
283	subi		4, t1, t1				! \
284	subi		4, t2, t2				! \
285								! \
286	/*							! \
287	 * Calculate the byte shift required.  A 		! \
288	 * positive value means a source 4-byte word 		! \
289	 * has to be shifted to the right to line up 		! \
290	 * as a destination 4-byte word.			! \
291	 */							! \
292	sub		t1, t2, t1				! \
293								! \
294	/* 4-byte align src_off. */				! \
295	depi		0, 31, 2, src_off			! \
296								! \
297	/*							! \
298	 * It's somewhat important to note that this		! \
299	 * code thinks of count as "the number of bytes		! \
300	 * that haven't been stored yet", as opposed to		! \
301	 * "the number of bytes that haven't been copied	! \
302	 * yet".  The distinction is subtle, but becomes	! \
303	 * apparent at the end of the shifting code, where	! \
304	 * we "back up" src_off to correspond to count,		! \
305	 * as opposed to flushing the FIFO.			! \
306	 *							! \
307	 * We calculated above how many bytes our first		! \
308	 * store will store, so update count now.		! \
309	 *							! \
310	 * If the shift is zero, strictly as an optimization	! \
311	 * we use a copy loop that does no shifting.		! \
312	 */							! \
313	comb,<>		%r0, t1, _LABEL(_shifting)		! \
314	sub		count, t2, count			! \
315								! \
316	/* Load and store the first word. */			! \
317	ldws,ma		4(src_spc, src_off), t4			! \
318	stbys,b,m	t4, 4(dst_spc, dst_off)			! \
319								! \
320	/* Do the rest of the copy. */				! \
321	_COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1)	! \
322								! \
323.label _LABEL(_shifting)					! \
324								! \
325	/*							! \
326	 * If shift < 0, we need to shift words to the		! \
327	 * left.  Since we can't do this directly, we		! \
328	 * adjust the shift so it's a shift to the right	! \
329	 * and load the first word into the high word of	! \
330	 * the FIFO.  Otherwise, we load a zero into the	! \
331	 * high word of the FIFO.				! \
332	 */							! \
333	comb,<=		%r0, t1, _LABEL(_shiftingrt)		! \
334	copy		%r0, t3					! \
335	addi		4, t1, t1				! \
336	ldws,ma		4(src_spc, src_off), t3			! \
337.label _LABEL(_shiftingrt)					! \
338								! \
339	/*							! \
340	 * Turn the shift byte count into a bit count,		! \
341	 * load the next word, set the Shift Amount 		! \
342	 * Register, and form and store the first word.		! \
343	 */							! \
344	sh3add		t1, %r0, t1				! \
345	ldws,ma		4(src_spc, src_off), t4			! \
346	mtctl		t1, %cr11				! \
347	vshd		t3, t4, %r1				! \
348	stbys,b,m	%r1, 4(dst_spc, dst_off)		! \
349								! \
350	/* Do the rest of the copy. */				! \
351	_COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1)
352
353/* This macro copies a region in the reverse direction. */
354#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count)  \
355								! \
356	/* Immediately add count to both offsets. */		! \
357	add	src_off, count, src_off				! \
358	add	dst_off, count, dst_off				! \
359								! \
360	/*							! \
361	 * Since in the shifting-right case we 			! \
362	 * will load 8 bytes before checking 			! \
363	 * count, to keep things simple, branch 		! \
364	 * to the byte copier unless we're 			! \
365	 * copying at least 8 bytes.				! \
366	 */							! \
367	comib,>>,n	8, count, _LABEL(_do1)			! \
368								! \
369	/*							! \
370	 * Once we 4-byte align the source offset, 		! \
371	 * figure out how many bytes from the region		! \
372	 * will be in the first 4-byte word we read.		! \
373	 * Ditto for writing the destination offset.		! \
374	 */							! \
375	extru,<>	src_off, 31, 2, t1			! \
376	ldi		4, t1					! \
377	extru,<>	dst_off, 31, 2, t2			! \
378	ldi		4, t2					! \
379								! \
380	/*							! \
381	 * Calculate the byte shift required.  A 		! \
382	 * positive value means a source 4-byte 		! \
383	 * word has to be shifted to the right to 		! \
384	 * line up as a destination 4-byte word.		! \
385	 */							! \
386	sub		t2, t1, t1				! \
387								! \
388	/*							! \
389	 * 4-byte align src_off, leaving it pointing 		! \
390	 * to the 4-byte word *after* the next word 		! \
391	 * we intend to load.					! \
392	 *							! \
393	 * It's somewhat important to note that this		! \
394	 * code thinks of count as "the number of bytes		! \
395	 * that haven't been stored yet", as opposed to		! \
396	 * "the number of bytes that haven't been copied	! \
397	 * yet".  The distinction is subtle, but becomes	! \
398	 * apparent at the end of the shifting code, where	! \
399	 * we "back up" src_off to correspond to count,		! \
400	 * as opposed to flushing the FIFO.			! \
401	 *							! \
402	 * We calculated above how many bytes our first		! \
403	 * store will store, so update count now.		! \
404	 *							! \
405	 * If the shift is zero, we use a copy loop that	! \
406	 * does no shifting.  NB: unlike the forward case,	! \
407	 * this is NOT strictly an optimization.  If the	! \
408	 * SAR is zero the vshds do NOT do the right thing.	! \
409	 * This is another assymetry more or less the "fault"	! \
410	 * of vshd.						! \
411	 */							! \
412	addi		3, src_off, src_off			! \
413	sub		count, t2, count			! \
414	comb,<>		%r0, t1, _LABEL(_shifting)		! \
415	depi		0, 31, 2, src_off			! \
416								! \
417	/* Load and store the first word. */			! \
418	ldws,mb		-4(src_spc, src_off), t4		! \
419	_STBYS_E_M(t4, dst_spc, dst_off)			! \
420								! \
421	/* Do the rest of the copy. */				! \
422	_COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1)	! \
423								! \
424.label _LABEL(_shifting)					! \
425								! \
426	/*							! \
427	 * If shift < 0, we need to shift words to the		! \
428	 * left.  Since we can't do this directly, we		! \
429	 * adjust the shift so it's a shift to the right	! \
430	 * and load a zero in to the low word of the FIFO.	! \
431	 * Otherwise, we load the first word into the		! \
432	 * low word of the FIFO.				! \
433	 *							! \
434	 * Note the nullification trickery here.  We 		! \
435	 * assume that we're shifting to the left, and		! \
436	 * load zero into the low word of the FIFO.  Then	! \
437	 * we nullify the addi if we're shifting to the		! \
438	 * right.  If the addi is not nullified, we are		! \
439 	 * shifting to the left, so we nullify the load.	! \
440	 * we branch if we're shifting to the 			! \
441	 */							! \
442	copy		%r0, t3					! \
443	comb,<=,n	%r0, t1, 0				! \
444	addi,tr		4, t1, t1				! \
445	ldws,mb		-4(src_spc, src_off), t3		! \
446								! \
447	/*							! \
448	 * Turn the shift byte count into a bit count,		! \
449	 * load the next word, set the Shift Amount 		! \
450	 * Register, and form and store the first word.		! \
451	 */							! \
452	sh3add		t1, %r0, t1				! \
453	ldws,mb		-4(src_spc, src_off), t4		! \
454	mtctl		t1, %cr11				! \
455	vshd		t4, t3, %r1				! \
456	_STBYS_E_M(%r1, dst_spc, dst_off)			! \
457								! \
458	/* Do the rest of the copy. */				! \
459	_COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1)
460
461/*
462 * For paranoia, when things aren't going well, enable this
463 * code to assemble byte-at-a-time-only copying.
464 */
465#if 1
466#undef _COPY_FORWARD
467#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count)  \
468	comb,=,n	%r0, count, _LABEL(_done)		! \
469	ldbs,ma		1(src_spc, src_off), %r1		! \
470	addib,<>	-1, count, -12				! \
471	stbs,ma		%r1, 1(dst_spc, dst_off)		! \
472	b,n		_LABEL(_done)
473#undef _COPY_REVERSE
474#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count)  \
475	comb,=		%r0, count, _LABEL(_done)		! \
476	add		src_off, count, src_off			! \
477	add		dst_off, count, dst_off			! \
478	ldbs,mb		-1(src_spc, src_off), %r1		! \
479	addib,<>	-1, count, -12				! \
480	stbs,mb		%r1, -1(dst_spc, dst_off)		! \
481	b,n		_LABEL(_done)
482#endif
483
484/*
485 * If none of the following are defined, define BCOPY.
486 */
487#if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE))
488#define BCOPY
489#endif
490
491#if defined(SPCOPY) && !defined(_STANDALONE)
492#include <sys/errno.h>
493#include "assym.h"
494
495/*
496 * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst,
497 * 	size_t len)
498 *
499 * We assume that the regions do not overlap.
500 */
501LEAF_ENTRY(spcopy)
502
503        /*
504	 * Setup the fault handler, and load %ret0
505	 * with EFAULT, assuming the copy will fail.
506	 */
507	.import	curproc, data
508        ldil    L%curproc, %r31
509        ldw     R%curproc(%r31), %r31
510#ifdef	DIAGNOSTIC
511	comb,<>,n %r0, %r31, Lspcopy_curproc_ok
512	ldil	L%panic, %r1
513	ldil	L%Lspcopy_curproc_bad, %arg0
514	ldo	R%panic(%r1), %r1
515	ldo	R%Lspcopy_curproc_bad(%arg0), %arg0
516	.call
517	bv,n    %r0(%r1)
518	nop
519Lspcopy_curproc_bad
520	.asciz	"spcopy: curproc == NULL\n"
521	.align	8
522Lspcopy_curproc_ok
523#endif /* DIAGNOSTIC */
524        ldil    L%spcopy_fault, %r1
525        ldw     P_ADDR(%r31), %r31
526        ldo     R%spcopy_fault(%r1), %r1
527	ldi	EFAULT, %ret0
528        stw     %r1, U_PCB+PCB_ONFAULT(%r31)
529
530	/* Setup the space registers. */
531	mfsp	sr2, %ret1
532	mtsp	%arg0, sr1
533	mtsp	%arg2, sr2
534
535	/* Get the len argument and do the copy. */
536	ldw	HPPA_FRAME_ARG(4)(sp), %arg0
537#define	_LABEL(l) __CONCAT(spcopy,l)
538	_COPY_FORWARD(sr1,%arg1,sr2,%arg3,%arg0)
539_LABEL(_done)
540
541	/* Return. */
542	copy	%r0, %ret0
543ALTENTRY(spcopy_fault)
544        stw     %r0, U_PCB+PCB_ONFAULT(%r31)
545	bv	%r0(%rp)
546	mtsp	%ret1, sr2
547EXIT(spcopy)
548#endif /* SPCOPY && !_STANDALONE */
549
550#ifdef MEMCPY
551/*
552 * void *memcpy(void * restrict dst, const void * restrict src, size_t len);
553 *
554 * memcpy is specifically restricted to working on
555 * non-overlapping regions, so we can just copy forward.
556 */
557LEAF_ENTRY(memcpy)
558	copy	%arg0, %ret0
559#define	_LABEL(l) __CONCAT(memcpy,l)
560	_COPY_FORWARD(sr0,%arg1,sr0,%arg0,%arg2)
561_LABEL(_done)
562	bv,n	%r0(%rp)
563	nop
564EXIT(memcpy)
565#endif /* MEMCPY */
566
567#ifdef BCOPY
568/*
569 * void bcopy(const void *src, void *dst, size_t len);
570 */
571LEAF_ENTRY(bcopy)
572	copy	%arg0, %r1
573	copy	%arg1, %arg0
574	copy	%r1, %arg1
575	/* FALLTHROUGH */
576#define _LABEL_F(l) __CONCAT(bcopy_F,l)
577#define _LABEL_R(l) __CONCAT(bcopy_R,l)
578#endif
579
580#ifdef MEMMOVE
581/*
582 * void *memmove(void *dst, const void *src, size_t len);
583 */
584LEAF_ENTRY(memmove)
585#define _LABEL_F(l) __CONCAT(memmove_F,l)
586#define _LABEL_R(l) __CONCAT(memmove_R,l)
587	copy	%arg0, %ret0
588#endif /* MEMMOVE */
589
590#if defined(BCOPY) || defined(MEMMOVE)
591
592	/*
593	 * If src >= dst or src + len <= dst, we copy
594	 * forward, else we copy in reverse.
595	 */
596	add		%arg1, %arg2, %r1
597	comb,>>=,n	%arg1, %arg0, 0
598	comb,>>,n	%r1, %arg0, _LABEL_R(_go)
599
600#define _LABEL _LABEL_F
601	_COPY_FORWARD(sr0,%arg1,sr0,%arg0,%arg2)
602#undef _LABEL
603
604_LABEL_R(_go)
605#define _LABEL _LABEL_R
606	_COPY_REVERSE(sr0,%arg1,sr0,%arg0,%arg2)
607#undef _LABEL
608
609_LABEL_F(_done)
610_LABEL_R(_done)
611	bv,n	%r0(%rp)
612	nop
613#ifdef BCOPY
614EXIT(bcopy)
615#else
616EXIT(memmove)
617#endif
618#endif /* BCOPY || MEMMOVE */
619