xref: /netbsd/common/lib/libc/arch/alpha/string/bcopy.S (revision 6550d01e)
1/* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */
2
3/*
4 * Copyright (c) 1995 Carnegie-Mellon University.
5 * All rights reserved.
6 *
7 * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
8 *	   added by Chris Demetriou.
9 *
10 * Permission to use, copy, modify and distribute this software and
11 * its documentation is hereby granted, provided that both the copyright
12 * notice and this permission notice appear in all copies of the
13 * software, derivative works or modified versions, and any portions
14 * thereof, and that both notices appear in supporting documentation.
15 *
16 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19 *
20 * Carnegie Mellon requests users of this software to return to
21 *
22 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23 *  School of Computer Science
24 *  Carnegie Mellon University
25 *  Pittsburgh PA 15213-3890
26 *
27 * any improvements or extensions that they make and grant Carnegie the
28 * rights to redistribute these changes.
29 */
30
31#include <machine/asm.h>
32
33#if defined(MEMCOPY) || defined(MEMMOVE)
34#ifdef MEMCOPY
35#define	FUNCTION	memcpy
36#else
37#define FUNCTION	memmove
38#endif
39#define	SRCREG		a1
40#define	DSTREG		a0
41#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
42#define	FUNCTION	bcopy
43#define	SRCREG		a0
44#define	DSTREG		a1
45#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
46
47#define	SIZEREG		a2
48
49/*
50 * Copy bytes.
51 *
52 * void bcopy(char *from, char *to, size_t len);
53 * char *memcpy(void *to, const void *from, size_t len);
54 * char *memmove(void *to, const void *from, size_t len);
55 *
56 * No matter how invoked, the source and destination registers
57 * for calculation.  There's no point in copying them to "working"
58 * registers, since the code uses their values "in place," and
59 * copying them would be slower.
60 */
61
62LEAF(FUNCTION,3)
63
64#if defined(MEMCOPY) || defined(MEMMOVE)
65	/* set up return value, while we still can */
66	mov	DSTREG,v0
67#endif
68
69	/* Check for negative length */
70	ble	SIZEREG,bcopy_done
71
72	/* Check for overlap */
73	subq	DSTREG,SRCREG,t5
74	cmpult	t5,SIZEREG,t5
75	bne	t5,bcopy_overlap
76
77	/* a3 = end address */
78	addq	SRCREG,SIZEREG,a3
79
80	/* Get the first word */
81	ldq_u	t2,0(SRCREG)
82
83	/* Do they have the same alignment? */
84	xor	SRCREG,DSTREG,t0
85	and	t0,7,t0
86	and	DSTREG,7,t1
87	bne	t0,bcopy_different_alignment
88
89	/* src & dst have same alignment */
90	beq	t1,bcopy_all_aligned
91
92	ldq_u	t3,0(DSTREG)
93	addq	SIZEREG,t1,SIZEREG
94	mskqh	t2,SRCREG,t2
95	mskql	t3,SRCREG,t3
96	or	t2,t3,t2
97
98	/* Dst is 8-byte aligned */
99
100bcopy_all_aligned:
101	/* If less than 8 bytes,skip loop */
102	subq	SIZEREG,1,t0
103	and	SIZEREG,7,SIZEREG
104	bic	t0,7,t0
105	beq	t0,bcopy_samealign_lp_end
106
107bcopy_samealign_lp:
108	stq_u	t2,0(DSTREG)
109	addq	DSTREG,8,DSTREG
110	ldq_u	t2,8(SRCREG)
111	subq	t0,8,t0
112	addq	SRCREG,8,SRCREG
113	bne	t0,bcopy_samealign_lp
114
115bcopy_samealign_lp_end:
116	/* If we're done, exit */
117	bne	SIZEREG,bcopy_small_left
118	stq_u	t2,0(DSTREG)
119	RET
120
121bcopy_small_left:
122	mskql	t2,SIZEREG,t4
123	ldq_u	t3,0(DSTREG)
124	mskqh	t3,SIZEREG,t3
125	or	t4,t3,t4
126	stq_u	t4,0(DSTREG)
127	RET
128
129bcopy_different_alignment:
130	/*
131	 * this is the fun part
132	 */
133	addq	SRCREG,SIZEREG,a3
134	cmpule	SIZEREG,8,t0
135	bne	t0,bcopy_da_finish
136
137	beq	t1,bcopy_da_noentry
138
139	/* Do the initial partial word */
140	subq	zero,DSTREG,t0
141	and	t0,7,t0
142	ldq_u	t3,7(SRCREG)
143	extql	t2,SRCREG,t2
144	extqh	t3,SRCREG,t3
145	or	t2,t3,t5
146	insql	t5,DSTREG,t5
147	ldq_u	t6,0(DSTREG)
148	mskql	t6,DSTREG,t6
149	or	t5,t6,t5
150	stq_u	t5,0(DSTREG)
151	addq	SRCREG,t0,SRCREG
152	addq	DSTREG,t0,DSTREG
153	subq	SIZEREG,t0,SIZEREG
154	ldq_u	t2,0(SRCREG)
155
156bcopy_da_noentry:
157	subq	SIZEREG,1,t0
158	bic	t0,7,t0
159	and	SIZEREG,7,SIZEREG
160	beq	t0,bcopy_da_finish2
161
162bcopy_da_lp:
163	ldq_u	t3,7(SRCREG)
164	addq	SRCREG,8,SRCREG
165	extql	t2,SRCREG,t4
166	extqh	t3,SRCREG,t5
167	subq	t0,8,t0
168	or	t4,t5,t5
169	stq	t5,0(DSTREG)
170	addq	DSTREG,8,DSTREG
171	beq	t0,bcopy_da_finish1
172	ldq_u	t2,7(SRCREG)
173	addq	SRCREG,8,SRCREG
174	extql	t3,SRCREG,t4
175	extqh	t2,SRCREG,t5
176	subq	t0,8,t0
177	or	t4,t5,t5
178	stq	t5,0(DSTREG)
179	addq	DSTREG,8,DSTREG
180	bne	t0,bcopy_da_lp
181
182bcopy_da_finish2:
183	/* Do the last new word */
184	mov	t2,t3
185
186bcopy_da_finish1:
187	/* Do the last partial word */
188	ldq_u	t2,-1(a3)
189	extql	t3,SRCREG,t3
190	extqh	t2,SRCREG,t2
191	or	t2,t3,t2
192	br	zero,bcopy_samealign_lp_end
193
194bcopy_da_finish:
195	/* Do the last word in the next source word */
196	ldq_u	t3,-1(a3)
197	extql	t2,SRCREG,t2
198	extqh	t3,SRCREG,t3
199	or	t2,t3,t2
200	insqh	t2,DSTREG,t3
201	insql	t2,DSTREG,t2
202	lda	t4,-1(zero)
203	mskql	t4,SIZEREG,t5
204	cmovne	t5,t5,t4
205	insqh	t4,DSTREG,t5
206	insql	t4,DSTREG,t4
207	addq	DSTREG,SIZEREG,a4
208	ldq_u	t6,0(DSTREG)
209	ldq_u	t7,-1(a4)
210	bic	t6,t4,t6
211	bic	t7,t5,t7
212	and	t2,t4,t2
213	and	t3,t5,t3
214	or	t2,t6,t2
215	or	t3,t7,t3
216	stq_u	t3,-1(a4)
217	stq_u	t2,0(DSTREG)
218	RET
219
220bcopy_overlap:
221	/*
222	 * Basically equivalent to previous case, only backwards.
223	 * Not quite as highly optimized
224	 */
225	addq	SRCREG,SIZEREG,a3
226	addq	DSTREG,SIZEREG,a4
227
228	/* less than 8 bytes - don't worry about overlap */
229	cmpule	SIZEREG,8,t0
230	bne	t0,bcopy_ov_short
231
232	/* Possibly do a partial first word */
233	and	a4,7,t4
234	beq	t4,bcopy_ov_nostart2
235	subq	a3,t4,a3
236	subq	a4,t4,a4
237	ldq_u	t1,0(a3)
238	subq	SIZEREG,t4,SIZEREG
239	ldq_u	t2,7(a3)
240	ldq	t3,0(a4)
241	extql	t1,a3,t1
242	extqh	t2,a3,t2
243	or	t1,t2,t1
244	mskqh	t3,t4,t3
245	mskql	t1,t4,t1
246	or	t1,t3,t1
247	stq	t1,0(a4)
248
249bcopy_ov_nostart2:
250	bic	SIZEREG,7,t4
251	and	SIZEREG,7,SIZEREG
252	beq	t4,bcopy_ov_lp_end
253
254bcopy_ov_lp:
255	/* This could be more pipelined, but it doesn't seem worth it */
256	ldq_u	t0,-8(a3)
257	subq	a4,8,a4
258	ldq_u	t1,-1(a3)
259	subq	a3,8,a3
260	extql	t0,a3,t0
261	extqh	t1,a3,t1
262	subq	t4,8,t4
263	or	t0,t1,t0
264	stq	t0,0(a4)
265	bne	t4,bcopy_ov_lp
266
267bcopy_ov_lp_end:
268	beq	SIZEREG,bcopy_done
269
270	ldq_u	t0,0(SRCREG)
271	ldq_u	t1,7(SRCREG)
272	ldq_u	t2,0(DSTREG)
273	extql	t0,SRCREG,t0
274	extqh	t1,SRCREG,t1
275	or	t0,t1,t0
276	insql	t0,DSTREG,t0
277	mskql	t2,DSTREG,t2
278	or	t2,t0,t2
279	stq_u	t2,0(DSTREG)
280
281bcopy_done:
282	RET
283
284bcopy_ov_short:
285	ldq_u	t2,0(SRCREG)
286	br	zero,bcopy_da_finish
287
288	END(FUNCTION)
289