xref: /openbsd/sys/lib/libkern/arch/sh/memcpy.S (revision 898184e3)
1/*	$NetBSD: memcpy.S,v 1.2 2006/04/22 23:53:47 uwe Exp $	*/
2
3/*
4 * Copyright (c) 2000 SHIMIZU Ryo <ryo@misakimix.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <machine/asm.h>
31
32#if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY)
33#define MEMCOPY
34#endif
35
36#if defined(MEMCOPY) || defined(MEMMOVE)
37#define	REG_DST0	r3
38#define	REG_SRC		r5
39#define	REG_DST		r4
40#else
41#define	REG_SRC		r4
42#define	REG_DST		r5
43#endif
44
45#define	REG_LEN		r6
46
47#if defined(MEMCOPY)
48ENTRY(memcpy)
49#elif defined(MEMMOVE)
50ENTRY(memmove)
51#elif defined(BCOPY)
52ENTRY(bcopy)
53ALTENTRY(ovbcopy)
54#endif
55#ifdef REG_DST0
56	mov	REG_DST,REG_DST0
57#endif
58	cmp/eq	REG_DST,REG_SRC	/* if ( src == dst ) return; */
59	bt/s	bcopy_return
60	cmp/hi	REG_DST,REG_SRC
61	bf/s	bcopy_overlap
62
63	mov	REG_SRC,r0
64	xor	REG_DST,r0
65	and	#3,r0
66	mov	r0,r1
67	tst	r0,r0		/* (src ^ dst) & 3         */
68	bf/s	word_align
69
70longword_align:
71	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
72	bt/s	bcopy_return
73
74
75	mov	REG_SRC,r0
76	tst	#1,r0		/* if ( src & 1 )          */
77	bt	1f
78	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
79	add	#-1,REG_LEN
80	mov.b	r0,@REG_DST
81	add	#1,REG_DST
821:
83
84
85	mov	#1,r0
86	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
87	bf/s	1f
88	mov	REG_SRC,r0
89	tst	#2,r0		/*      (src & 2) {        */
90	bt	1f
91	mov.w	@REG_SRC+,r0	/*        *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
92	add	#-2,REG_LEN	/*        len -= 2;                                              */
93	mov.w	r0,@REG_DST
94	add	#2,REG_DST	/* }                       */
951:
96
97
98	mov	#3,r1
99	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
100	bf/s	no_align_delay
101	tst	REG_LEN,REG_LEN
1022:
103	mov.l	@REG_SRC+,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
104	add	#-4,REG_LEN	/*   len -= 4;                                                   */
105	mov.l	r0,@REG_DST
106	cmp/hi	r1,REG_LEN
107	bt/s	2b
108	add	#4,REG_DST	/* }                       */
109
110	bra	no_align_delay
111	tst	REG_LEN,REG_LEN
112
113
114word_align:
115	mov	r1,r0
116	tst	#1,r0
117	bf/s	no_align_delay
118	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
119	bt	bcopy_return
120
121
122	mov	REG_SRC,r0	/* if ( src & 1 )          */
123	tst	#1,r0
124	bt	1f
125	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
126	add	#-1,REG_LEN
127	mov.b	r0,@REG_DST
128	add	#1,REG_DST
1291:
130
131
132	mov	#1,r1
133	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
134	bf/s	no_align_delay
135	tst	REG_LEN,REG_LEN
1362:
137	mov.w	@REG_SRC+,r0	/*   *((unsigned short*)dst)++ = *((unsigned short*)src)++;      */
138	add	#-2,REG_LEN	/*   len -= 2;                                                   */
139	mov.w	r0,@REG_DST
140	cmp/hi	r1,REG_LEN
141	bt/s	2b
142	add	#2,REG_DST	/* }                       */
143
144
145no_align:
146	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
147no_align_delay:
148	bt	bcopy_return
1491:
150	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
151	add	#-1,REG_LEN	/*    len--;               */
152	mov.b	r0,@REG_DST
153	tst	REG_LEN,REG_LEN
154	bf/s	1b
155	add	#1,REG_DST	/* }                       */
156bcopy_return:
157	rts
158#ifdef REG_DST0
159	mov	REG_DST0,r0
160#else
161	nop
162#endif
163
164
165bcopy_overlap:
166	add	REG_LEN,REG_SRC
167	add	REG_LEN,REG_DST
168
169	mov	REG_SRC,r0
170	xor	REG_DST,r0
171	and	#3,r0
172	mov	r0,r1
173	tst	r0,r0		/* (src ^ dst) & 3         */
174	bf/s	ov_word_align
175
176ov_longword_align:
177	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
178	bt/s	bcopy_return
179
180
181	mov	REG_SRC,r0
182	tst	#1,r0		/* if ( src & 1 )          */
183	bt	1f
184	add	#-1,REG_SRC	/*    *--dst = *--src;     */
185	mov.b	@REG_SRC,r0
186	mov.b	r0,@-REG_DST
187	add	#-1,REG_LEN
1881:
189
190
191	mov	#1,r0
192	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
193	bf/s	1f
194	mov	REG_SRC,r0
195	tst	#2,r0		/*      (src & 2) {        */
196	bt	1f
197	add	#-2,REG_SRC	/*        *--((unsigned short*)dst) = *--((unsigned short*)src); */
198	mov.w	@REG_SRC,r0
199	add	#-2,REG_LEN	/*        len -= 2;                                              */
200	mov.w	r0,@-REG_DST	/* }                       */
2011:
202
203
204	mov	#3,r1
205	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
206	bf/s	ov_no_align_delay
207	tst	REG_LEN,REG_LEN
2082:
209	add	#-4,REG_SRC
210	mov.l	@REG_SRC,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
211	add	#-4,REG_LEN	/*   len -= 4;                                                   */
212	cmp/hi	r1,REG_LEN
213	bt/s	2b
214	mov.l	r0,@-REG_DST	/* }                       */
215
216	bra	ov_no_align_delay
217	tst	REG_LEN,REG_LEN
218
219
220ov_word_align:
221	mov	r1,r0
222	tst	#1,r0
223	bf/s	ov_no_align_delay
224	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
225	bt	bcopy_return
226
227
228	mov	REG_SRC,r0	/* if ( src & 1 )          */
229	tst	#1,r0
230	bt	1f
231	add	#-1,REG_SRC
232	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
233	add	#-1,REG_LEN
234	mov.b	r0,@-REG_DST
2351:
236
237
238	mov	#1,r1
239	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
240	bf/s	ov_no_align_delay
241	tst	REG_LEN,REG_LEN
2422:
243	add	#-2,REG_SRC
244	mov.w	@REG_SRC,r0	/*   *--((unsigned short*)dst) = *--((unsigned short*)src);      */
245	add	#-2,REG_LEN	/*   len -= 2;                                                   */
246	cmp/hi	r1,REG_LEN
247	bt/s	2b
248	mov.w	r0,@-REG_DST	/* }                       */
249
250
251ov_no_align:
252	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
253ov_no_align_delay:
254	bt	9f
2551:
256	add	#-1,REG_SRC
257	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
258	add	#-1,REG_LEN	/*    len--;               */
259	tst	REG_LEN,REG_LEN
260	bf/s	1b
261	mov.b	r0,@-REG_DST	/* }                       */
2629:
263	rts
264#ifdef REG_DST0
265	mov	REG_DST0,r0
266#else
267	nop
268#endif
269