xref: /openbsd/lib/libc/arch/alpha/string/bcopy.S (revision 73471bf0)
1/*	$OpenBSD: bcopy.S,v 1.8 2015/08/31 02:53:56 guenther Exp $	*/
2/*	$NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $	*/
3
4/*
5 * Copyright (c) 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
9 *	   added by Chris Demetriou.
10 *
11 * Permission to use, copy, modify and distribute this software and
12 * its documentation is hereby granted, provided that both the copyright
13 * notice and this permission notice appear in all copies of the
14 * software, derivative works or modified versions, and any portions
15 * thereof, and that both notices appear in supporting documentation.
16 *
17 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
18 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
19 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
20 *
21 * Carnegie Mellon requests users of this software to return to
22 *
23 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
24 *  School of Computer Science
25 *  Carnegie Mellon University
26 *  Pittsburgh PA 15213-3890
27 *
28 * any improvements or extensions that they make and grant Carnegie the
29 * rights to redistribute these changes.
30 */
31
32#include "SYS.h"
33
34#define	SRCREG		a0
35#define	DSTREG		a1
36#define	SIZEREG		a2
37
38/*
39 * Copy bytes.
40 *
41 * void bcopy(char *from, char *to, size_t len);
42 *
43 * No matter how invoked, the source and destination registers
44 * for calculation.  There's no point in copying them to "working"
45 * registers, since the code uses their values "in place," and
46 * copying them would be slower.
47 */
48
49LEAF(bcopy,3)
50	/* Check for zero length */
51	beq	SIZEREG,bcopy_done
52
53	/* Check for overlap */
54	subq	DSTREG,SRCREG,t5
55	cmpult	t5,SIZEREG,t5
56	bne	t5,bcopy_overlap
57
58	/* a3 = end address */
59	addq	SRCREG,SIZEREG,a3
60
61	/* Get the first word */
62	ldq_u	t2,0(SRCREG)
63
64	/* Do they have the same alignment? */
65	xor	SRCREG,DSTREG,t0
66	and	t0,7,t0
67	and	DSTREG,7,t1
68	bne	t0,bcopy_different_alignment
69
70	/* src & dst have same alignment */
71	beq	t1,bcopy_all_aligned
72
73	ldq_u	t3,0(DSTREG)
74	addq	SIZEREG,t1,SIZEREG
75	mskqh	t2,SRCREG,t2
76	mskql	t3,SRCREG,t3
77	or	t2,t3,t2
78
79	/* Dst is 8-byte aligned */
80
81bcopy_all_aligned:
82	/* If less than 8 bytes,skip loop */
83	subq	SIZEREG,1,t0
84	and	SIZEREG,7,SIZEREG
85	bic	t0,7,t0
86	beq	t0,bcopy_samealign_lp_end
87
88bcopy_samealign_lp:
89	stq_u	t2,0(DSTREG)
90	addq	DSTREG,8,DSTREG
91	ldq_u	t2,8(SRCREG)
92	subq	t0,8,t0
93	addq	SRCREG,8,SRCREG
94	bne	t0,bcopy_samealign_lp
95
96bcopy_samealign_lp_end:
97	/* If we're done, exit */
98	bne	SIZEREG,bcopy_small_left
99	stq_u	t2,0(DSTREG)
100	RET
101
102bcopy_small_left:
103	mskql	t2,SIZEREG,t4
104	ldq_u	t3,0(DSTREG)
105	mskqh	t3,SIZEREG,t3
106	or	t4,t3,t4
107	stq_u	t4,0(DSTREG)
108	RET
109
110bcopy_different_alignment:
111	/*
112	 * this is the fun part
113	 */
114	addq	SRCREG,SIZEREG,a3
115	cmpule	SIZEREG,8,t0
116	bne	t0,bcopy_da_finish
117
118	beq	t1,bcopy_da_noentry
119
120	/* Do the initial partial word */
121	subq	zero,DSTREG,t0
122	and	t0,7,t0
123	ldq_u	t3,7(SRCREG)
124	extql	t2,SRCREG,t2
125	extqh	t3,SRCREG,t3
126	or	t2,t3,t5
127	insql	t5,DSTREG,t5
128	ldq_u	t6,0(DSTREG)
129	mskql	t6,DSTREG,t6
130	or	t5,t6,t5
131	stq_u	t5,0(DSTREG)
132	addq	SRCREG,t0,SRCREG
133	addq	DSTREG,t0,DSTREG
134	subq	SIZEREG,t0,SIZEREG
135	ldq_u	t2,0(SRCREG)
136
137bcopy_da_noentry:
138	subq	SIZEREG,1,t0
139	bic	t0,7,t0
140	and	SIZEREG,7,SIZEREG
141	beq	t0,bcopy_da_finish2
142
143bcopy_da_lp:
144	ldq_u	t3,7(SRCREG)
145	addq	SRCREG,8,SRCREG
146	extql	t2,SRCREG,t4
147	extqh	t3,SRCREG,t5
148	subq	t0,8,t0
149	or	t4,t5,t5
150	stq	t5,0(DSTREG)
151	addq	DSTREG,8,DSTREG
152	beq	t0,bcopy_da_finish1
153	ldq_u	t2,7(SRCREG)
154	addq	SRCREG,8,SRCREG
155	extql	t3,SRCREG,t4
156	extqh	t2,SRCREG,t5
157	subq	t0,8,t0
158	or	t4,t5,t5
159	stq	t5,0(DSTREG)
160	addq	DSTREG,8,DSTREG
161	bne	t0,bcopy_da_lp
162
163bcopy_da_finish2:
164	/* Do the last new word */
165	mov	t2,t3
166
167bcopy_da_finish1:
168	/* Do the last partial word */
169	ldq_u	t2,-1(a3)
170	extql	t3,SRCREG,t3
171	extqh	t2,SRCREG,t2
172	or	t2,t3,t2
173	br	zero,bcopy_samealign_lp_end
174
175bcopy_da_finish:
176	/* Do the last word in the next source word */
177	ldq_u	t3,-1(a3)
178	extql	t2,SRCREG,t2
179	extqh	t3,SRCREG,t3
180	or	t2,t3,t2
181	insqh	t2,DSTREG,t3
182	insql	t2,DSTREG,t2
183	lda	t4,-1(zero)
184	mskql	t4,SIZEREG,t5
185	cmovne	t5,t5,t4
186	insqh	t4,DSTREG,t5
187	insql	t4,DSTREG,t4
188	addq	DSTREG,SIZEREG,a4
189	ldq_u	t6,0(DSTREG)
190	ldq_u	t7,-1(a4)
191	bic	t6,t4,t6
192	bic	t7,t5,t7
193	and	t2,t4,t2
194	and	t3,t5,t3
195	or	t2,t6,t2
196	or	t3,t7,t3
197	stq_u	t3,-1(a4)
198	stq_u	t2,0(DSTREG)
199	RET
200
201bcopy_overlap:
202	/*
203	 * Basically equivalent to previous case, only backwards.
204	 * Not quite as highly optimized
205	 */
206	addq	SRCREG,SIZEREG,a3
207	addq	DSTREG,SIZEREG,a4
208
209	/* less than 8 bytes - don't worry about overlap */
210	cmpule	SIZEREG,8,t0
211	bne	t0,bcopy_ov_short
212
213	/* Possibly do a partial first word */
214	and	a4,7,t4
215	beq	t4,bcopy_ov_nostart2
216	subq	a3,t4,a3
217	subq	a4,t4,a4
218	ldq_u	t1,0(a3)
219	subq	SIZEREG,t4,SIZEREG
220	ldq_u	t2,7(a3)
221	ldq	t3,0(a4)
222	extql	t1,a3,t1
223	extqh	t2,a3,t2
224	or	t1,t2,t1
225	mskqh	t3,t4,t3
226	mskql	t1,t4,t1
227	or	t1,t3,t1
228	stq	t1,0(a4)
229
230bcopy_ov_nostart2:
231	bic	SIZEREG,7,t4
232	and	SIZEREG,7,SIZEREG
233	beq	t4,bcopy_ov_lp_end
234
235bcopy_ov_lp:
236	/* This could be more pipelined, but it doesn't seem worth it */
237	ldq_u	t0,-8(a3)
238	subq	a4,8,a4
239	ldq_u	t1,-1(a3)
240	subq	a3,8,a3
241	extql	t0,a3,t0
242	extqh	t1,a3,t1
243	subq	t4,8,t4
244	or	t0,t1,t0
245	stq	t0,0(a4)
246	bne	t4,bcopy_ov_lp
247
248bcopy_ov_lp_end:
249	beq	SIZEREG,bcopy_done
250
251	ldq_u	t0,0(SRCREG)
252	ldq_u	t1,7(SRCREG)
253	ldq_u	t2,0(DSTREG)
254	extql	t0,SRCREG,t0
255	extqh	t1,SRCREG,t1
256	or	t0,t1,t0
257	insql	t0,DSTREG,t0
258	mskql	t2,DSTREG,t2
259	or	t2,t0,t2
260	stq_u	t2,0(DSTREG)
261
262bcopy_done:
263	RET
264
265bcopy_ov_short:
266	ldq_u	t2,0(SRCREG)
267	br	zero,bcopy_da_finish
268
269	END_WEAK(bcopy)
270