xref: /minix/common/lib/libc/arch/arm/string/memmove.S (revision 0a6a1f1d)
1/*	$NetBSD: memmove.S,v 1.9 2015/03/26 13:34:51 justin Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33
34#ifndef _BCOPY
35/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
36ENTRY(memmove)
37#else
38/* bcopy = memcpy/memmove with arguments reversed. */
39/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
40ENTRY(bcopy)
41	/* switch the source and destination registers */
42	eor     r0, r1, r0
43	eor     r1, r0, r1
44	eor     r0, r1, r0
45#endif
46	/* Do the buffers overlap? */
47	cmp	r0, r1
48	RETc(eq)		/* Bail now if src/dst are the same */
49	subhs	r3, r0, r1	/* if (dst > src) r3 = dst - src */
50	sublo	r3, r1, r0	/* if (src > dst) r3 = src - dst */
51	cmp	r3, r2		/* if (r3 >= len) we have an overlap */
52	bhs	PLT_SYM(_C_LABEL(memcpy))
53
54	/* Determine copy direction */
55	cmp	r1, r0
56	bcc	.Lmemmove_backwards
57
58	moveq	r0, #0			/* Quick abort for len=0 */
59	RETc(eq)
60
61	push	{r0, lr}		/* memmove() returns dest addr */
62	subs	r2, r2, #4
63	blt	.Lmemmove_fl4		/* less than 4 bytes */
64	ands	r12, r0, #3
65	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
66	ands	r12, r1, #3
67	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
68
69.Lmemmove_ft8:
70	/* We have aligned source and destination */
71	subs	r2, r2, #8
72	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
73	subs	r2, r2, #0x14
74	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
75	push	{r4}		/* borrow r4 */
76
77	/* blat 32 bytes at a time */
78	/* XXX for really big copies perhaps we should use more registers */
79.Lmemmove_floop32:
80	ldmia	r1!, {r3, r4, r12, lr}
81	stmia	r0!, {r3, r4, r12, lr}
82	ldmia	r1!, {r3, r4, r12, lr}
83	stmia	r0!, {r3, r4, r12, lr}
84	subs	r2, r2, #0x20
85	bge	.Lmemmove_floop32
86
87	cmn	r2, #0x10
88	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
89	stmiage	r0!, {r3, r4, r12, lr}
90	subge	r2, r2, #0x10
91	pop	{r4}		/* return r4 */
92
93.Lmemmove_fl32:
94	adds	r2, r2, #0x14
95
96	/* blat 12 bytes at a time */
97.Lmemmove_floop12:
98	ldmiage	r1!, {r3, r12, lr}
99	stmiage	r0!, {r3, r12, lr}
100	subsge	r2, r2, #0x0c
101	bge	.Lmemmove_floop12
102
103.Lmemmove_fl12:
104	adds	r2, r2, #8
105	blt	.Lmemmove_fl4
106
107	subs	r2, r2, #4
108	ldrlt	r3, [r1], #4
109	strlt	r3, [r0], #4
110	ldmiage	r1!, {r3, r12}
111	stmiage	r0!, {r3, r12}
112	subge	r2, r2, #4
113
114.Lmemmove_fl4:
115	/* less than 4 bytes to go */
116	adds	r2, r2, #4
117	popeq	{r0, pc}		/* done */
118
119	/* copy the crud byte at a time */
120	cmp	r2, #2
121	ldrb	r3, [r1], #1
122	strb	r3, [r0], #1
123	ldrbge	r3, [r1], #1
124	strbge	r3, [r0], #1
125	ldrbgt	r3, [r1], #1
126	strbgt	r3, [r0], #1
127	pop	{r0, pc}
128
129	/* erg - unaligned destination */
130.Lmemmove_fdestul:
131	rsb	r12, r12, #4
132	cmp	r12, #2
133
134	/* align destination with byte copies */
135	ldrb	r3, [r1], #1
136	strb	r3, [r0], #1
137	ldrbge	r3, [r1], #1
138	strbge	r3, [r0], #1
139	ldrbgt	r3, [r1], #1
140	strbgt	r3, [r0], #1
141	subs	r2, r2, r12
142	blt	.Lmemmove_fl4		/* less the 4 bytes */
143
144	ands	r12, r1, #3
145	beq	.Lmemmove_ft8		/* we have an aligned source */
146
147	/* erg - unaligned source */
148	/* This is where it gets nasty ... */
149.Lmemmove_fsrcul:
150	bic	r1, r1, #3
151	ldr	lr, [r1], #4
152	cmp	r12, #2
153	bgt	.Lmemmove_fsrcul3
154	beq	.Lmemmove_fsrcul2
155	cmp	r2, #0x0c
156	blt	.Lmemmove_fsrcul1loop4
157	sub	r2, r2, #0x0c
158	push	{r4, r5}
159
160.Lmemmove_fsrcul1loop16:
161#ifdef __ARMEB__
162	mov	r3, lr, lsl #8
163#else
164	mov	r3, lr, lsr #8
165#endif
166	ldmia	r1!, {r4, r5, r12, lr}
167#ifdef __ARMEB__
168	orr	r3, r3, r4, lsr #24
169	mov	r4, r4, lsl #8
170	orr	r4, r4, r5, lsr #24
171	mov	r5, r5, lsl #8
172	orr	r5, r5, r12, lsr #24
173	mov	r12, r12, lsl #8
174	orr	r12, r12, lr, lsr #24
175#else
176	orr	r3, r3, r4, lsl #24
177	mov	r4, r4, lsr #8
178	orr	r4, r4, r5, lsl #24
179	mov	r5, r5, lsr #8
180	orr	r5, r5, r12, lsl #24
181	mov	r12, r12, lsr #8
182	orr	r12, r12, lr, lsl #24
183#endif
184	stmia	r0!, {r3-r5, r12}
185	subs	r2, r2, #0x10
186	bge	.Lmemmove_fsrcul1loop16
187	pop	{r4, r5}
188	adds	r2, r2, #0x0c
189	blt	.Lmemmove_fsrcul1l4
190
191.Lmemmove_fsrcul1loop4:
192#ifdef __ARMEB__
193	mov	r12, lr, lsl #8
194#else
195	mov	r12, lr, lsr #8
196#endif
197	ldr	lr, [r1], #4
198#ifdef __ARMEB__
199	orr	r12, r12, lr, lsr #24
200#else
201	orr	r12, r12, lr, lsl #24
202#endif
203	str	r12, [r0], #4
204	subs	r2, r2, #4
205	bge	.Lmemmove_fsrcul1loop4
206
207.Lmemmove_fsrcul1l4:
208	sub	r1, r1, #3
209	b	.Lmemmove_fl4
210
211.Lmemmove_fsrcul2:
212	cmp	r2, #0x0c
213	blt	.Lmemmove_fsrcul2loop4
214	sub	r2, r2, #0x0c
215	push	{r4, r5}
216
217.Lmemmove_fsrcul2loop16:
218#ifdef __ARMEB__
219	mov	r3, lr, lsl #16
220#else
221	mov	r3, lr, lsr #16
222#endif
223	ldmia	r1!, {r4, r5, r12, lr}
224#ifdef __ARMEB__
225	orr	r3, r3, r4, lsr #16
226	mov	r4, r4, lsl #16
227	orr	r4, r4, r5, lsr #16
228	mov	r5, r5, lsl #16
229	orr	r5, r5, r12, lsr #16
230	mov	r12, r12, lsl #16
231	orr	r12, r12, lr, lsr #16
232#else
233	orr	r3, r3, r4, lsl #16
234	mov	r4, r4, lsr #16
235	orr	r4, r4, r5, lsl #16
236	mov	r5, r5, lsr #16
237	orr	r5, r5, r12, lsl #16
238	mov	r12, r12, lsr #16
239	orr	r12, r12, lr, lsl #16
240#endif
241	stmia	r0!, {r3-r5, r12}
242	subs	r2, r2, #0x10
243	bge	.Lmemmove_fsrcul2loop16
244	pop	{r4, r5}
245	adds	r2, r2, #0x0c
246	blt	.Lmemmove_fsrcul2l4
247
248.Lmemmove_fsrcul2loop4:
249#ifdef __ARMEB__
250	mov	r12, lr, lsl #16
251#else
252	mov	r12, lr, lsr #16
253#endif
254	ldr	lr, [r1], #4
255#ifdef __ARMEB__
256	orr	r12, r12, lr, lsr #16
257#else
258	orr	r12, r12, lr, lsl #16
259#endif
260	str	r12, [r0], #4
261	subs	r2, r2, #4
262	bge	.Lmemmove_fsrcul2loop4
263
264.Lmemmove_fsrcul2l4:
265	sub	r1, r1, #2
266	b	.Lmemmove_fl4
267
268.Lmemmove_fsrcul3:
269	cmp	r2, #0x0c
270	blt	.Lmemmove_fsrcul3loop4
271	sub	r2, r2, #0x0c
272	push	{r4, r5}
273
274.Lmemmove_fsrcul3loop16:
275#ifdef __ARMEB__
276	mov	r3, lr, lsl #24
277#else
278	mov	r3, lr, lsr #24
279#endif
280	ldmia	r1!, {r4, r5, r12, lr}
281#ifdef __ARMEB__
282	orr	r3, r3, r4, lsr #8
283	mov	r4, r4, lsl #24
284	orr	r4, r4, r5, lsr #8
285	mov	r5, r5, lsl #24
286	orr	r5, r5, r12, lsr #8
287	mov	r12, r12, lsl #24
288	orr	r12, r12, lr, lsr #8
289#else
290	orr	r3, r3, r4, lsl #8
291	mov	r4, r4, lsr #24
292	orr	r4, r4, r5, lsl #8
293	mov	r5, r5, lsr #24
294	orr	r5, r5, r12, lsl #8
295	mov	r12, r12, lsr #24
296	orr	r12, r12, lr, lsl #8
297#endif
298	stmia	r0!, {r3-r5, r12}
299	subs	r2, r2, #0x10
300	bge	.Lmemmove_fsrcul3loop16
301	pop	{r4, r5}
302	adds	r2, r2, #0x0c
303	blt	.Lmemmove_fsrcul3l4
304
305.Lmemmove_fsrcul3loop4:
306#ifdef __ARMEB__
307	mov	r12, lr, lsl #24
308#else
309	mov	r12, lr, lsr #24
310#endif
311	ldr	lr, [r1], #4
312#ifdef __ARMEB__
313	orr	r12, r12, lr, lsr #8
314#else
315	orr	r12, r12, lr, lsl #8
316#endif
317	str	r12, [r0], #4
318	subs	r2, r2, #4
319	bge	.Lmemmove_fsrcul3loop4
320
321.Lmemmove_fsrcul3l4:
322	sub	r1, r1, #1
323	b	.Lmemmove_fl4
324
325.Lmemmove_backwards:
326	add	r1, r1, r2
327	add	r0, r0, r2
328	subs	r2, r2, #4
329	blt	.Lmemmove_bl4		/* less than 4 bytes */
330	ands	r12, r0, #3
331	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
332	ands	r12, r1, #3
333	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
334
335.Lmemmove_bt8:
336	/* We have aligned source and destination */
337	subs	r2, r2, #8
338	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
339	push	{r4, lr}
340	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
341	blt	.Lmemmove_bl32
342
343	/* blat 32 bytes at a time */
344	/* XXX for really big copies perhaps we should use more registers */
345.Lmemmove_bloop32:
346	ldmdb	r1!, {r3, r4, r12, lr}
347	stmdb	r0!, {r3, r4, r12, lr}
348	ldmdb	r1!, {r3, r4, r12, lr}
349	stmdb	r0!, {r3, r4, r12, lr}
350	subs	r2, r2, #0x20
351	bge	.Lmemmove_bloop32
352
353.Lmemmove_bl32:
354	cmn	r2, #0x10
355	ldmdbge	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
356	stmdbge	r0!, {r3, r4, r12, lr}
357	subge	r2, r2, #0x10
358	adds	r2, r2, #0x14
359	ldmdbge	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
360	stmdbge	r0!, {r3, r12, lr}
361	subge	r2, r2, #0x0c
362	pop	{r4, lr}
363
364.Lmemmove_bl12:
365	adds	r2, r2, #8
366	blt	.Lmemmove_bl4
367	subs	r2, r2, #4
368	ldrlt	r3, [r1, #-4]!
369	strlt	r3, [r0, #-4]!
370	ldmdbge	r1!, {r3, r12}
371	stmdbge	r0!, {r3, r12}
372	subge	r2, r2, #4
373
374.Lmemmove_bl4:
375	/* less than 4 bytes to go */
376	adds	r2, r2, #4
377	RETc(eq)
378
379	/* copy the crud byte at a time */
380	cmp	r2, #2
381	ldrb	r3, [r1, #-1]!
382	strb	r3, [r0, #-1]!
383	ldrbge	r3, [r1, #-1]!
384	strbge	r3, [r0, #-1]!
385	ldrbgt	r3, [r1, #-1]!
386	strbgt	r3, [r0, #-1]!
387	RET
388
389	/* erg - unaligned destination */
390.Lmemmove_bdestul:
391	cmp	r12, #2
392
393	/* align destination with byte copies */
394	ldrb	r3, [r1, #-1]!
395	strb	r3, [r0, #-1]!
396	ldrbge	r3, [r1, #-1]!
397	strbge	r3, [r0, #-1]!
398	ldrbgt	r3, [r1, #-1]!
399	strbgt	r3, [r0, #-1]!
400	subs	r2, r2, r12
401	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
402	ands	r12, r1, #3
403	beq	.Lmemmove_bt8		/* we have an aligned source */
404
405	/* erg - unaligned source */
406	/* This is where it gets nasty ... */
407.Lmemmove_bsrcul:
408	bic	r1, r1, #3
409	ldr	r3, [r1, #0]
410	cmp	r12, #2
411	blt	.Lmemmove_bsrcul1
412	beq	.Lmemmove_bsrcul2
413	cmp	r2, #0x0c
414	blt	.Lmemmove_bsrcul3loop4
415	sub	r2, r2, #0x0c
416	push	{r4, r5, lr}
417
418.Lmemmove_bsrcul3loop16:
419#ifdef __ARMEB__
420	mov	lr, r3, lsr #8
421#else
422	mov	lr, r3, lsl #8
423#endif
424	ldmdb	r1!, {r3-r5, r12}
425#ifdef __ARMEB__
426	orr	lr, lr, r12, lsl #24
427	mov	r12, r12, lsr #8
428	orr	r12, r12, r5, lsl #24
429	mov	r5, r5, lsr #8
430	orr	r5, r5, r4, lsl #24
431	mov	r4, r4, lsr #8
432	orr	r4, r4, r3, lsl #24
433#else
434	orr	lr, lr, r12, lsr #24
435	mov	r12, r12, lsl #8
436	orr	r12, r12, r5, lsr #24
437	mov	r5, r5, lsl #8
438	orr	r5, r5, r4, lsr #24
439	mov	r4, r4, lsl #8
440	orr	r4, r4, r3, lsr #24
441#endif
442	stmdb	r0!, {r4, r5, r12, lr}
443	subs	r2, r2, #0x10
444	bge	.Lmemmove_bsrcul3loop16
445	pop	{r4, r5, lr}
446	adds	r2, r2, #0x0c
447	blt	.Lmemmove_bsrcul3l4
448
449.Lmemmove_bsrcul3loop4:
450#ifdef __ARMEB__
451	mov	r12, r3, lsr #8
452#else
453	mov	r12, r3, lsl #8
454#endif
455	ldr	r3, [r1, #-4]!
456#ifdef __ARMEB__
457	orr	r12, r12, r3, lsl #24
458#else
459	orr	r12, r12, r3, lsr #24
460#endif
461	str	r12, [r0, #-4]!
462	subs	r2, r2, #4
463	bge	.Lmemmove_bsrcul3loop4
464
465.Lmemmove_bsrcul3l4:
466	add	r1, r1, #3
467	b	.Lmemmove_bl4
468
469.Lmemmove_bsrcul2:
470	cmp	r2, #0x0c
471	blt	.Lmemmove_bsrcul2loop4
472	sub	r2, r2, #0x0c
473	push	{r4, r5, lr}
474
475.Lmemmove_bsrcul2loop16:
476#ifdef __ARMEB__
477	mov	lr, r3, lsr #16
478#else
479	mov	lr, r3, lsl #16
480#endif
481	ldmdb	r1!, {r3-r5, r12}
482#ifdef __ARMEB__
483	orr	lr, lr, r12, lsl #16
484	mov	r12, r12, lsr #16
485	orr	r12, r12, r5, lsl #16
486	mov	r5, r5, lsr #16
487	orr	r5, r5, r4, lsl #16
488	mov	r4, r4, lsr #16
489	orr	r4, r4, r3, lsl #16
490#else
491	orr	lr, lr, r12, lsr #16
492	mov	r12, r12, lsl #16
493	orr	r12, r12, r5, lsr #16
494	mov	r5, r5, lsl #16
495	orr	r5, r5, r4, lsr #16
496	mov	r4, r4, lsl #16
497	orr	r4, r4, r3, lsr #16
498#endif
499	stmdb	r0!, {r4, r5, r12, lr}
500	subs	r2, r2, #0x10
501	bge	.Lmemmove_bsrcul2loop16
502	pop	{r4, r5, lr}
503	adds	r2, r2, #0x0c
504	blt	.Lmemmove_bsrcul2l4
505
506.Lmemmove_bsrcul2loop4:
507#ifdef __ARMEB__
508	mov	r12, r3, lsr #16
509#else
510	mov	r12, r3, lsl #16
511#endif
512	ldr	r3, [r1, #-4]!
513#ifdef __ARMEB__
514	orr	r12, r12, r3, lsl #16
515#else
516	orr	r12, r12, r3, lsr #16
517#endif
518	str	r12, [r0, #-4]!
519	subs	r2, r2, #4
520	bge	.Lmemmove_bsrcul2loop4
521
522.Lmemmove_bsrcul2l4:
523	add	r1, r1, #2
524	b	.Lmemmove_bl4
525
526.Lmemmove_bsrcul1:
527	cmp	r2, #0x0c
528	blt	.Lmemmove_bsrcul1loop4
529	sub	r2, r2, #0x0c
530	push	{r4, r5, lr}
531
532.Lmemmove_bsrcul1loop32:
533#ifdef __ARMEB__
534	mov	lr, r3, lsr #24
535#else
536	mov	lr, r3, lsl #24
537#endif
538	ldmdb	r1!, {r3-r5, r12}
539#ifdef __ARMEB__
540	orr	lr, lr, r12, lsl #8
541	mov	r12, r12, lsr #24
542	orr	r12, r12, r5, lsl #8
543	mov	r5, r5, lsr #24
544	orr	r5, r5, r4, lsl #8
545	mov	r4, r4, lsr #24
546	orr	r4, r4, r3, lsl #8
547#else
548	orr	lr, lr, r12, lsr #8
549	mov	r12, r12, lsl #24
550	orr	r12, r12, r5, lsr #8
551	mov	r5, r5, lsl #24
552	orr	r5, r5, r4, lsr #8
553	mov	r4, r4, lsl #24
554	orr	r4, r4, r3, lsr #8
555#endif
556	stmdb	r0!, {r4, r5, r12, lr}
557	subs	r2, r2, #0x10
558	bge	.Lmemmove_bsrcul1loop32
559	pop	{r4, r5, lr}
560	adds	r2, r2, #0x0c
561	blt	.Lmemmove_bsrcul1l4
562
563.Lmemmove_bsrcul1loop4:
564#ifdef __ARMEB__
565	mov	r12, r3, lsr #24
566#else
567	mov	r12, r3, lsl #24
568#endif
569	ldr	r3, [r1, #-4]!
570#ifdef __ARMEB__
571	orr	r12, r12, r3, lsl #8
572#else
573	orr	r12, r12, r3, lsr #8
574#endif
575	str	r12, [r0, #-4]!
576	subs	r2, r2, #4
577	bge	.Lmemmove_bsrcul1loop4
578
579.Lmemmove_bsrcul1l4:
580	add	r1, r1, #1
581	b	.Lmemmove_bl4
582#ifndef _BCOPY
583END(memmove)
584#else
585END(bcopy)
586#endif
587
588#if defined(__ARM_EABI__) && !defined(BCOPY) && !defined(_RUMPKERNEL)
589STRONG_ALIAS(__aeabi_memmove, memmove)
590STRONG_ALIAS(__aeabi_memmove4, memmove)
591STRONG_ALIAS(__aeabi_memmove8, memmove)
592#endif
593