xref: /openbsd/sys/lib/libkern/arch/sh/memmove.S (revision 4cfece93)
1/*	$NetBSD: memcpy.S,v 1.2 2006/04/22 23:53:47 uwe Exp $	*/
2
3/*
4 * Copyright (c) 2000 SHIMIZU Ryo <ryo@misakimix.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <machine/asm.h>
31
32#define	REG_DST0	r3
33#define	REG_SRC		r5
34#define	REG_DST		r4
35#define	REG_LEN		r6
36
37ENTRY(bcopy)
38	/* swap registers, use DST0 as a temporary */
39	mov	REG_DST,REG_DST0
40	mov	REG_SRC,REG_DST
41	mov	REG_DST0,REG_SRC
42
43ENTRY(memmove)
44	mov	REG_DST,REG_DST0
45	cmp/hi	REG_DST,REG_SRC
46	bf/s	bcopy_overlap
47
48ENTRY(memcpy)
49	mov	REG_DST,REG_DST0
50	cmp/eq	REG_DST,REG_SRC	/* if ( src == dst ) return; */
51	bt/s	bcopy_return
52	mov	REG_SRC,r0
53	xor	REG_DST,r0
54	and	#3,r0
55	mov	r0,r1
56	tst	r0,r0		/* (src ^ dst) & 3         */
57	bf/s	word_align
58
59longword_align:
60	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
61	bt/s	bcopy_return
62
63
64	mov	REG_SRC,r0
65	tst	#1,r0		/* if ( src & 1 )          */
66	bt	1f
67	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
68	add	#-1,REG_LEN
69	mov.b	r0,@REG_DST
70	add	#1,REG_DST
711:
72
73	mov	#1,r0
74	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
75	bf/s	1f
76	mov	REG_SRC,r0
77	tst	#2,r0		/*      (src & 2) {        */
78	bt	1f
79	mov.w	@REG_SRC+,r0	/*        *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
80	add	#-2,REG_LEN	/*        len -= 2;                                              */
81	mov.w	r0,@REG_DST
82	add	#2,REG_DST	/* }                       */
831:
84
85
86	mov	#3,r1
87	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
88	bf/s	no_align_delay
89	tst	REG_LEN,REG_LEN
902:
91	mov.l	@REG_SRC+,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
92	add	#-4,REG_LEN	/*   len -= 4;                                                   */
93	mov.l	r0,@REG_DST
94	cmp/hi	r1,REG_LEN
95	bt/s	2b
96	add	#4,REG_DST	/* }                       */
97
98	bra	no_align_delay
99	tst	REG_LEN,REG_LEN
100
101
102word_align:
103	mov	r1,r0
104	tst	#1,r0
105	bf/s	no_align_delay
106	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
107	bt	bcopy_return
108
109
110	mov	REG_SRC,r0	/* if ( src & 1 )          */
111	tst	#1,r0
112	bt	1f
113	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
114	add	#-1,REG_LEN
115	mov.b	r0,@REG_DST
116	add	#1,REG_DST
1171:
118
119
120	mov	#1,r1
121	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
122	bf/s	no_align_delay
123	tst	REG_LEN,REG_LEN
1242:
125	mov.w	@REG_SRC+,r0	/*   *((unsigned short*)dst)++ = *((unsigned short*)src)++;      */
126	add	#-2,REG_LEN	/*   len -= 2;                                                   */
127	mov.w	r0,@REG_DST
128	cmp/hi	r1,REG_LEN
129	bt/s	2b
130	add	#2,REG_DST	/* }                       */
131
132
133no_align:
134	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
135no_align_delay:
136	bt	bcopy_return
1371:
138	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
139	add	#-1,REG_LEN	/*    len--;               */
140	mov.b	r0,@REG_DST
141	tst	REG_LEN,REG_LEN
142	bf/s	1b
143	add	#1,REG_DST	/* }                       */
144bcopy_return:
145	rts
146	mov	REG_DST0,r0
147
148bcopy_overlap:
149	add	REG_LEN,REG_SRC
150	add	REG_LEN,REG_DST
151
152	mov	REG_SRC,r0
153	xor	REG_DST,r0
154	and	#3,r0
155	mov	r0,r1
156	tst	r0,r0		/* (src ^ dst) & 3         */
157	bf/s	ov_word_align
158
159ov_longword_align:
160	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
161	bt/s	bcopy_return
162
163
164	mov	REG_SRC,r0
165	tst	#1,r0		/* if ( src & 1 )          */
166	bt	1f
167	add	#-1,REG_SRC	/*    *--dst = *--src;     */
168	mov.b	@REG_SRC,r0
169	mov.b	r0,@-REG_DST
170	add	#-1,REG_LEN
1711:
172
173
174	mov	#1,r0
175	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
176	bf/s	1f
177	mov	REG_SRC,r0
178	tst	#2,r0		/*      (src & 2) {        */
179	bt	1f
180	add	#-2,REG_SRC	/*        *--((unsigned short*)dst) = *--((unsigned short*)src); */
181	mov.w	@REG_SRC,r0
182	add	#-2,REG_LEN	/*        len -= 2;                                              */
183	mov.w	r0,@-REG_DST	/* }                       */
1841:
185
186
187	mov	#3,r1
188	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
189	bf/s	ov_no_align_delay
190	tst	REG_LEN,REG_LEN
1912:
192	add	#-4,REG_SRC
193	mov.l	@REG_SRC,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
194	add	#-4,REG_LEN	/*   len -= 4;                                                   */
195	cmp/hi	r1,REG_LEN
196	bt/s	2b
197	mov.l	r0,@-REG_DST	/* }                       */
198
199	bra	ov_no_align_delay
200	tst	REG_LEN,REG_LEN
201
202
203ov_word_align:
204	mov	r1,r0
205	tst	#1,r0
206	bf/s	ov_no_align_delay
207	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
208	bt	bcopy_return
209
210
211	mov	REG_SRC,r0	/* if ( src & 1 )          */
212	tst	#1,r0
213	bt	1f
214	add	#-1,REG_SRC
215	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
216	add	#-1,REG_LEN
217	mov.b	r0,@-REG_DST
2181:
219
220
221	mov	#1,r1
222	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
223	bf/s	ov_no_align_delay
224	tst	REG_LEN,REG_LEN
2252:
226	add	#-2,REG_SRC
227	mov.w	@REG_SRC,r0	/*   *--((unsigned short*)dst) = *--((unsigned short*)src);      */
228	add	#-2,REG_LEN	/*   len -= 2;                                                   */
229	cmp/hi	r1,REG_LEN
230	bt/s	2b
231	mov.w	r0,@-REG_DST	/* }                       */
232
233
234ov_no_align:
235	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
236ov_no_align_delay:
237	bt	9f
2381:
239	add	#-1,REG_SRC
240	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
241	add	#-1,REG_LEN	/*    len--;               */
242	tst	REG_LEN,REG_LEN
243	bf/s	1b
244	mov.b	r0,@-REG_DST	/* }                       */
2459:
246	rts
247	mov	REG_DST0,r0
248