xref: /openbsd/lib/libc/arch/alpha/string/bcopy.S (revision 7b36286a)
1/*	$OpenBSD: bcopy.S,v 1.5 2005/08/07 16:40:13 espie Exp $	*/
2/*	$NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $	*/
3
4/*
5 * Copyright (c) 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
9 *	   added by Chris Demetriou.
10 *
11 * Permission to use, copy, modify and distribute this software and
12 * its documentation is hereby granted, provided that both the copyright
13 * notice and this permission notice appear in all copies of the
14 * software, derivative works or modified versions, and any portions
15 * thereof, and that both notices appear in supporting documentation.
16 *
17 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
18 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
19 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
20 *
21 * Carnegie Mellon requests users of this software to return to
22 *
23 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
24 *  School of Computer Science
25 *  Carnegie Mellon University
26 *  Pittsburgh PA 15213-3890
27 *
28 * any improvements or extensions that they make and grant Carnegie the
29 * rights to redistribute these changes.
30 */
31
32#include <machine/asm.h>
33
34#if defined(MEMCOPY) || defined(MEMMOVE)
35#ifdef MEMCOPY
36#define	FUNCTION	memcpy
37#else
38#define FUNCTION	memmove
39#endif
40#define	SRCREG		a1
41#define	DSTREG		a0
42#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
43#define	FUNCTION	bcopy
44#define	SRCREG		a0
45#define	DSTREG		a1
46#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
47
48#define	SIZEREG		a2
49
50/*
51 * Copy bytes.
52 *
53 * void bcopy(char *from, char *to, size_t len);
54 * char *memcpy(void *to, const void *from, size_t len);
55 * char *memmove(void *to, const void *from, size_t len);
56 *
57 * No matter how invoked, the source and destination registers
58 * for calculation.  There's no point in copying them to "working"
59 * registers, since the code uses their values "in place," and
60 * copying them would be slower.
61 */
62
63LEAF(FUNCTION,3)
64
65#if defined(MEMCOPY) || defined(MEMMOVE)
66	/* set up return value, while we still can */
67	mov	DSTREG,v0
68#endif
69
70	/* Check for negative length */
71	ble	SIZEREG,bcopy_done
72
73	/* Check for overlap */
74	subq	DSTREG,SRCREG,t5
75	cmpult	t5,SIZEREG,t5
76	bne	t5,bcopy_overlap
77
78	/* a3 = end address */
79	addq	SRCREG,SIZEREG,a3
80
81	/* Get the first word */
82	ldq_u	t2,0(SRCREG)
83
84	/* Do they have the same alignment? */
85	xor	SRCREG,DSTREG,t0
86	and	t0,7,t0
87	and	DSTREG,7,t1
88	bne	t0,bcopy_different_alignment
89
90	/* src & dst have same alignment */
91	beq	t1,bcopy_all_aligned
92
93	ldq_u	t3,0(DSTREG)
94	addq	SIZEREG,t1,SIZEREG
95	mskqh	t2,SRCREG,t2
96	mskql	t3,SRCREG,t3
97	or	t2,t3,t2
98
99	/* Dst is 8-byte aligned */
100
101bcopy_all_aligned:
102	/* If less than 8 bytes,skip loop */
103	subq	SIZEREG,1,t0
104	and	SIZEREG,7,SIZEREG
105	bic	t0,7,t0
106	beq	t0,bcopy_samealign_lp_end
107
108bcopy_samealign_lp:
109	stq_u	t2,0(DSTREG)
110	addq	DSTREG,8,DSTREG
111	ldq_u	t2,8(SRCREG)
112	subq	t0,8,t0
113	addq	SRCREG,8,SRCREG
114	bne	t0,bcopy_samealign_lp
115
116bcopy_samealign_lp_end:
117	/* If we're done, exit */
118	bne	SIZEREG,bcopy_small_left
119	stq_u	t2,0(DSTREG)
120	RET
121
122bcopy_small_left:
123	mskql	t2,SIZEREG,t4
124	ldq_u	t3,0(DSTREG)
125	mskqh	t3,SIZEREG,t3
126	or	t4,t3,t4
127	stq_u	t4,0(DSTREG)
128	RET
129
130bcopy_different_alignment:
131	/*
132	 * this is the fun part
133	 */
134	addq	SRCREG,SIZEREG,a3
135	cmpule	SIZEREG,8,t0
136	bne	t0,bcopy_da_finish
137
138	beq	t1,bcopy_da_noentry
139
140	/* Do the initial partial word */
141	subq	zero,DSTREG,t0
142	and	t0,7,t0
143	ldq_u	t3,7(SRCREG)
144	extql	t2,SRCREG,t2
145	extqh	t3,SRCREG,t3
146	or	t2,t3,t5
147	insql	t5,DSTREG,t5
148	ldq_u	t6,0(DSTREG)
149	mskql	t6,DSTREG,t6
150	or	t5,t6,t5
151	stq_u	t5,0(DSTREG)
152	addq	SRCREG,t0,SRCREG
153	addq	DSTREG,t0,DSTREG
154	subq	SIZEREG,t0,SIZEREG
155	ldq_u	t2,0(SRCREG)
156
157bcopy_da_noentry:
158	subq	SIZEREG,1,t0
159	bic	t0,7,t0
160	and	SIZEREG,7,SIZEREG
161	beq	t0,bcopy_da_finish2
162
163bcopy_da_lp:
164	ldq_u	t3,7(SRCREG)
165	addq	SRCREG,8,SRCREG
166	extql	t2,SRCREG,t4
167	extqh	t3,SRCREG,t5
168	subq	t0,8,t0
169	or	t4,t5,t5
170	stq	t5,0(DSTREG)
171	addq	DSTREG,8,DSTREG
172	beq	t0,bcopy_da_finish1
173	ldq_u	t2,7(SRCREG)
174	addq	SRCREG,8,SRCREG
175	extql	t3,SRCREG,t4
176	extqh	t2,SRCREG,t5
177	subq	t0,8,t0
178	or	t4,t5,t5
179	stq	t5,0(DSTREG)
180	addq	DSTREG,8,DSTREG
181	bne	t0,bcopy_da_lp
182
183bcopy_da_finish2:
184	/* Do the last new word */
185	mov	t2,t3
186
187bcopy_da_finish1:
188	/* Do the last partial word */
189	ldq_u	t2,-1(a3)
190	extql	t3,SRCREG,t3
191	extqh	t2,SRCREG,t2
192	or	t2,t3,t2
193	br	zero,bcopy_samealign_lp_end
194
195bcopy_da_finish:
196	/* Do the last word in the next source word */
197	ldq_u	t3,-1(a3)
198	extql	t2,SRCREG,t2
199	extqh	t3,SRCREG,t3
200	or	t2,t3,t2
201	insqh	t2,DSTREG,t3
202	insql	t2,DSTREG,t2
203	lda	t4,-1(zero)
204	mskql	t4,SIZEREG,t5
205	cmovne	t5,t5,t4
206	insqh	t4,DSTREG,t5
207	insql	t4,DSTREG,t4
208	addq	DSTREG,SIZEREG,a4
209	ldq_u	t6,0(DSTREG)
210	ldq_u	t7,-1(a4)
211	bic	t6,t4,t6
212	bic	t7,t5,t7
213	and	t2,t4,t2
214	and	t3,t5,t3
215	or	t2,t6,t2
216	or	t3,t7,t3
217	stq_u	t3,-1(a4)
218	stq_u	t2,0(DSTREG)
219	RET
220
221bcopy_overlap:
222	/*
223	 * Basically equivalent to previous case, only backwards.
224	 * Not quite as highly optimized
225	 */
226	addq	SRCREG,SIZEREG,a3
227	addq	DSTREG,SIZEREG,a4
228
229	/* less than 8 bytes - don't worry about overlap */
230	cmpule	SIZEREG,8,t0
231	bne	t0,bcopy_ov_short
232
233	/* Possibly do a partial first word */
234	and	a4,7,t4
235	beq	t4,bcopy_ov_nostart2
236	subq	a3,t4,a3
237	subq	a4,t4,a4
238	ldq_u	t1,0(a3)
239	subq	SIZEREG,t4,SIZEREG
240	ldq_u	t2,7(a3)
241	ldq	t3,0(a4)
242	extql	t1,a3,t1
243	extqh	t2,a3,t2
244	or	t1,t2,t1
245	mskqh	t3,t4,t3
246	mskql	t1,t4,t1
247	or	t1,t3,t1
248	stq	t1,0(a4)
249
250bcopy_ov_nostart2:
251	bic	SIZEREG,7,t4
252	and	SIZEREG,7,SIZEREG
253	beq	t4,bcopy_ov_lp_end
254
255bcopy_ov_lp:
256	/* This could be more pipelined, but it doesn't seem worth it */
257	ldq_u	t0,-8(a3)
258	subq	a4,8,a4
259	ldq_u	t1,-1(a3)
260	subq	a3,8,a3
261	extql	t0,a3,t0
262	extqh	t1,a3,t1
263	subq	t4,8,t4
264	or	t0,t1,t0
265	stq	t0,0(a4)
266	bne	t4,bcopy_ov_lp
267
268bcopy_ov_lp_end:
269	beq	SIZEREG,bcopy_done
270
271	ldq_u	t0,0(SRCREG)
272	ldq_u	t1,7(SRCREG)
273	ldq_u	t2,0(DSTREG)
274	extql	t0,SRCREG,t0
275	extqh	t1,SRCREG,t1
276	or	t0,t1,t0
277	insql	t0,DSTREG,t0
278	mskql	t2,DSTREG,t2
279	or	t2,t0,t2
280	stq_u	t2,0(DSTREG)
281
282bcopy_done:
283	RET
284
285bcopy_ov_short:
286	ldq_u	t2,0(SRCREG)
287	br	zero,bcopy_da_finish
288
289	END(FUNCTION)
290