xref: /openbsd/lib/libc/arch/sh/string/bcopy.S (revision cecf84d4)
1/*	$OpenBSD: bcopy.S,v 1.2 2014/11/30 19:43:56 deraadt Exp $	*/
2/*	$NetBSD: memcpy.S,v 1.2 2006/04/22 23:53:47 uwe Exp $	*/
3
4/*
5 * Copyright (c) 2000 SHIMIZU Ryo <ryo@misakimix.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <machine/asm.h>
32
33#define	REG_SRC		r4
34#define	REG_DST		r5
35#define	REG_LEN		r6
36
37ENTRY(bcopy)
38	cmp/eq	REG_DST,REG_SRC	/* if ( src == dst ) return; */
39	bt/s	bcopy_return
40	cmp/hi	REG_DST,REG_SRC
41	bf/s	bcopy_overlap
42
43	mov	REG_SRC,r0
44	xor	REG_DST,r0
45	and	#3,r0
46	mov	r0,r1
47	tst	r0,r0		/* (src ^ dst) & 3         */
48	bf/s	word_align
49
50longword_align:
51	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
52	bt/s	bcopy_return
53
54
55	mov	REG_SRC,r0
56	tst	#1,r0		/* if ( src & 1 )          */
57	bt	1f
58	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
59	add	#-1,REG_LEN
60	mov.b	r0,@REG_DST
61	add	#1,REG_DST
621:
63
64
65	mov	#1,r0
66	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
67	bf/s	1f
68	mov	REG_SRC,r0
69	tst	#2,r0		/*      (src & 2) {        */
70	bt	1f
71	mov.w	@REG_SRC+,r0	/*        *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
72	add	#-2,REG_LEN	/*        len -= 2;                                              */
73	mov.w	r0,@REG_DST
74	add	#2,REG_DST	/* }                       */
751:
76
77
78	mov	#3,r1
79	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
80	bf/s	no_align_delay
81	tst	REG_LEN,REG_LEN
822:
83	mov.l	@REG_SRC+,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
84	add	#-4,REG_LEN	/*   len -= 4;                                                   */
85	mov.l	r0,@REG_DST
86	cmp/hi	r1,REG_LEN
87	bt/s	2b
88	add	#4,REG_DST	/* }                       */
89
90	bra	no_align_delay
91	tst	REG_LEN,REG_LEN
92
93
94word_align:
95	mov	r1,r0
96	tst	#1,r0
97	bf/s	no_align_delay
98	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
99	bt	bcopy_return
100
101
102	mov	REG_SRC,r0	/* if ( src & 1 )          */
103	tst	#1,r0
104	bt	1f
105	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
106	add	#-1,REG_LEN
107	mov.b	r0,@REG_DST
108	add	#1,REG_DST
1091:
110
111
112	mov	#1,r1
113	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
114	bf/s	no_align_delay
115	tst	REG_LEN,REG_LEN
1162:
117	mov.w	@REG_SRC+,r0	/*   *((unsigned short*)dst)++ = *((unsigned short*)src)++;      */
118	add	#-2,REG_LEN	/*   len -= 2;                                                   */
119	mov.w	r0,@REG_DST
120	cmp/hi	r1,REG_LEN
121	bt/s	2b
122	add	#2,REG_DST	/* }                       */
123
124
125no_align:
126	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
127no_align_delay:
128	bt	bcopy_return
1291:
130	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
131	add	#-1,REG_LEN	/*    len--;               */
132	mov.b	r0,@REG_DST
133	tst	REG_LEN,REG_LEN
134	bf/s	1b
135	add	#1,REG_DST	/* }                       */
136bcopy_return:
137	rts
138	nop
139
140
141bcopy_overlap:
142	add	REG_LEN,REG_SRC
143	add	REG_LEN,REG_DST
144
145	mov	REG_SRC,r0
146	xor	REG_DST,r0
147	and	#3,r0
148	mov	r0,r1
149	tst	r0,r0		/* (src ^ dst) & 3         */
150	bf/s	ov_word_align
151
152ov_longword_align:
153	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
154	bt/s	bcopy_return
155
156
157	mov	REG_SRC,r0
158	tst	#1,r0		/* if ( src & 1 )          */
159	bt	1f
160	add	#-1,REG_SRC	/*    *--dst = *--src;     */
161	mov.b	@REG_SRC,r0
162	mov.b	r0,@-REG_DST
163	add	#-1,REG_LEN
1641:
165
166
167	mov	#1,r0
168	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
169	bf/s	1f
170	mov	REG_SRC,r0
171	tst	#2,r0		/*      (src & 2) {        */
172	bt	1f
173	add	#-2,REG_SRC	/*        *--((unsigned short*)dst) = *--((unsigned short*)src); */
174	mov.w	@REG_SRC,r0
175	add	#-2,REG_LEN	/*        len -= 2;                                              */
176	mov.w	r0,@-REG_DST	/* }                       */
1771:
178
179
180	mov	#3,r1
181	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
182	bf/s	ov_no_align_delay
183	tst	REG_LEN,REG_LEN
1842:
185	add	#-4,REG_SRC
186	mov.l	@REG_SRC,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
187	add	#-4,REG_LEN	/*   len -= 4;                                                   */
188	cmp/hi	r1,REG_LEN
189	bt/s	2b
190	mov.l	r0,@-REG_DST	/* }                       */
191
192	bra	ov_no_align_delay
193	tst	REG_LEN,REG_LEN
194
195
196ov_word_align:
197	mov	r1,r0
198	tst	#1,r0
199	bf/s	ov_no_align_delay
200	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
201	bt	bcopy_return
202
203
204	mov	REG_SRC,r0	/* if ( src & 1 )          */
205	tst	#1,r0
206	bt	1f
207	add	#-1,REG_SRC
208	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
209	add	#-1,REG_LEN
210	mov.b	r0,@-REG_DST
2111:
212
213
214	mov	#1,r1
215	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
216	bf/s	ov_no_align_delay
217	tst	REG_LEN,REG_LEN
2182:
219	add	#-2,REG_SRC
220	mov.w	@REG_SRC,r0	/*   *--((unsigned short*)dst) = *--((unsigned short*)src);      */
221	add	#-2,REG_LEN	/*   len -= 2;                                                   */
222	cmp/hi	r1,REG_LEN
223	bt/s	2b
224	mov.w	r0,@-REG_DST	/* }                       */
225
226
227ov_no_align:
228	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
229ov_no_align_delay:
230	bt	9f
2311:
232	add	#-1,REG_SRC
233	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
234	add	#-1,REG_LEN	/*    len--;               */
235	tst	REG_LEN,REG_LEN
236	bf/s	1b
237	mov.b	r0,@-REG_DST	/* }                       */
2389:
239	rts
240	nop
241