xref: /netbsd/common/lib/libc/arch/arm/string/strcpy_arm.S (revision 4532691d)
1f841ee64Smatt/*-
2f841ee64Smatt * Copyright (c) 2013 The NetBSD Foundation, Inc.
3f841ee64Smatt * All rights reserved.
4f841ee64Smatt *
5f841ee64Smatt * This code is derived from software contributed to The NetBSD Foundation
6f841ee64Smatt * by Matt Thomas of 3am Software Foundry.
7f841ee64Smatt *
8f841ee64Smatt * Redistribution and use in source and binary forms, with or without
9f841ee64Smatt * modification, are permitted provided that the following conditions
10f841ee64Smatt * are met:
11f841ee64Smatt * 1. Redistributions of source code must retain the above copyright
12f841ee64Smatt *    notice, this list of conditions and the following disclaimer.
13f841ee64Smatt * 2. Redistributions in binary form must reproduce the above copyright
14f841ee64Smatt *    notice, this list of conditions and the following disclaimer in the
15f841ee64Smatt *    documentation and/or other materials provided with the distribution.
16f841ee64Smatt *
17f841ee64Smatt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18f841ee64Smatt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19f841ee64Smatt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20f841ee64Smatt * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21f841ee64Smatt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22f841ee64Smatt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23f841ee64Smatt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24f841ee64Smatt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25f841ee64Smatt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26f841ee64Smatt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27f841ee64Smatt * POSSIBILITY OF SUCH DAMAGE.
28f841ee64Smatt */
29f841ee64Smatt
30f841ee64Smatt#include <machine/asm.h>
31f841ee64Smatt
32*4532691dSchristosRCSID("$NetBSD: strcpy_arm.S,v 1.6 2017/01/14 03:35:21 christos Exp $")
33f841ee64Smatt
34f841ee64Smatt#ifdef STRLCPY
35f841ee64Smatt#ifdef _LIBC
36f841ee64SmattWEAK_ALIAS(strlcpy, _strlcpy)
37ce1ff10bSchristos#  define	FUNCNAME	_strlcpy
38*4532691dSchristos# else
39*4532691dSchristos#  define	FUNCNAME	strlcpy
40*4532691dSchristos# endif
41f841ee64Smatt#elif defined(STRNCPY)
42*4532691dSchristos# ifdef _LIBC
4309b3ffa5SchristosWEAK_ALIAS(strncpy, _strncpy)
44ce1ff10bSchristos#  define	FUNCNAME	_strncpy
45f841ee64Smatt# else
46*4532691dSchristos#  define	FUNCNAME	strncpy
47*4532691dSchristos# endif
48*4532691dSchristos#else
49*4532691dSchristos# ifdef _LIBC
5009b3ffa5SchristosWEAK_ALIAS(strcpy, _strcpy)
51ce1ff10bSchristos#  define	FUNCNAME	_strcpy
52*4532691dSchristos# else
53*4532691dSchristos#  define	FUNCNAME	strcpy
54*4532691dSchristos# endif
55f841ee64Smatt#endif
56f841ee64Smatt
57f841ee64Smatt#ifdef __ARMEL__
58f841ee64Smatt#define	lslo	lsr		/* shift to lower address */
59f841ee64Smatt#define	lshi	lsl		/* shift to higher address */
60f841ee64Smatt#define	BYTE0	0x000000ff
61f841ee64Smatt#define	BYTE1	0x0000ff00
62f841ee64Smatt#define	BYTE2	0x00ff0000
63f841ee64Smatt#define	BYTE3	0xff000000
64f841ee64Smatt#else
65f841ee64Smatt#define	lslo	lsl		/* shift to lower address */
66f841ee64Smatt#define	lshi	lsr		/* shift to higher address */
67f841ee64Smatt#define	BYTE0	0xff000000
68f841ee64Smatt#define	BYTE1	0x00ff0000
69f841ee64Smatt#define	BYTE2	0x0000ff00
70f841ee64Smatt#define	BYTE3	0x000000ff
71f841ee64Smatt#endif
72f841ee64Smatt
73f841ee64Smatt/*
74f841ee64Smatt * On armv6 and later, to quickly determine if a word contains a NUL (0) byte,
75f841ee64Smatt * we add 254 to each byte using the UQADD8 (unsigned saturating add 8)
76f841ee64Smatt * instruction.  For every non-NUL byte, the result for that byte will become
77f841ee64Smatt * 255.  For NUL, it will be 254.  When we complement the result of all 4 adds,
78f841ee64Smatt * if the result is non-0 then we must have encountered a NUL.
79f841ee64Smatt *
80f841ee64Smatt * For earlier architecture, we just use tst on all 4 bytes.  There are other
81f841ee64Smatt * algorithms to detect NULs but they take longer and use more instructions.
82f841ee64Smatt */
83f841ee64Smatt
84f841ee64Smatt/*
85f841ee64Smatt * char *strcpy(char *dst, const char *src);
86f841ee64Smatt * char *strncpy(char *dst, const char *src, size_t len);
87f841ee64Smatt * size_t strlcpy(char *dst, const char *src, size_t len);
88f841ee64Smatt */
89f841ee64Smatt
90f841ee64Smatt	.text
91f841ee64SmattENTRY(FUNCNAME)
92f841ee64Smatt#if defined(STRLCPY)
93f841ee64Smatt	cmp	r2, #1			/* is length 1 or less? */
94f841ee64Smatt	bhi	1f			/*   no, do normal */
95f841ee64Smatt	moveq	r3, #0			/*   = 1? load NUL */
966397e35fSmatt	strbeq	r3, [r0]		/*   = 1? write NUL to dst */
97f841ee64Smatt	mov	r0, r1			/* move src to r0 */
98f841ee64Smatt	b	PLT_SYM(_C_LABEL(strlen)) /* and tailcall strlen */
99f841ee64Smatt1:
100f841ee64Smatt	sub	r2, r2, #1		/* leave one byte for NUL */
101f841ee64Smatt#endif
102f841ee64Smatt#if defined(STRNCPY)
103f841ee64Smatt	cmp	r2, #0			/* 0 length? */
104f841ee64Smatt	RETc(eq)			/*   yes, just return */
105f841ee64Smatt#endif
106f841ee64Smatt	push	{r4-r9}			/* save some registers */
107f841ee64Smatt#ifdef _ARM_ARCH_6
108f841ee64Smatt#ifdef _ARM_ARCH_7
109f841ee64Smatt	movw	r7, #0xfefe		/* magic constant; 254 in each byte */
110f841ee64Smatt#else
111f841ee64Smatt	mov	r7, #0xfe		/* put 254 in low byte */
112f841ee64Smatt	orr	r7, r7, r7, lsl #8	/* move to next byte */
113f841ee64Smatt#endif
114f841ee64Smatt	orr	r7, r7, r7, lsl #16	/* move to next halfword */
115f841ee64Smatt#endif
116f841ee64Smatt
117f841ee64Smatt#if defined(STRLCPY)
118f841ee64Smatt	add	r6, r1, #1		/* save for return (deal with NUL) */
119f841ee64Smatt#else
120f841ee64Smatt	mov	r6, r0			/* save for return */
121f841ee64Smatt#endif
122f841ee64Smatt
123f841ee64Smatt.Ldst_align:
124f841ee64Smatt	tst	r0, #3			/* check for dst alignment */
125f841ee64Smatt	beq	.Ldst_aligned		/*   ok, proceed to next check */
126f841ee64Smatt	ldrb	r5, [r1], #1		/* load a byte */
127f841ee64Smatt#if defined(STRNCPY)
128f841ee64Smatt	subs	r2, r2, #1		/* subtract out from count */
129f841ee64Smatt	bmi	.Ldst_full		/*   zero? the dst has no more room */
130f841ee64Smatt#endif
131f841ee64Smatt	strb	r5, [r0], #1		/* store a byte */
132f841ee64Smatt	teq	r5, #0			/* was it a NUL? */
133f841ee64Smatt	beq	.Lend_of_string		/*   yes, we are done */
134f841ee64Smatt#if defined(STRLCPY)
135f841ee64Smatt	subs	r2, r2, #1		/* subtract one from count */
1366397e35fSmatt	strbeq	r2, [r0], #1		/*    zero? write trailing NUL */
137f841ee64Smatt	beq	.Ldst_full		/*    zero? the dst has no more room */
138f841ee64Smatt#endif
139f841ee64Smatt	b	.Ldst_align		/* loop around for next byte */
140f841ee64Smatt.Ldst_aligned:
141f841ee64Smatt	tst	r1, #3			/* get the misalignment of src */
142f841ee64Smatt	bne	.Lincongruent		/*  !=? incongruent (slower) */
143f841ee64Smatt
144f841ee64Smatt	/*   =?   congruent (faster) */
145f841ee64Smatt
146f841ee64Smatt.Lcongruent:
147f841ee64Smatt#if defined(STRLCPY)
148f841ee64Smatt	add	r6, r6, #3		/* compensate for word post-inc */
149f841ee64Smatt#endif
150f841ee64Smatt	b	.Lcongruent_mainloop_load
151f841ee64Smatt.Lcongruent_mainloop:
152f841ee64Smatt#if defined(STRLCPY) || defined(STRNCPY)
153f841ee64Smatt	subs	r2, r2, #4		/* subtract 4 from the count */
154f841ee64Smatt	bmi	.Lno_more_room
155f841ee64Smatt#endif
156f841ee64Smatt	str	r5, [r0], #4		/* store word into dst */
157f841ee64Smatt#if defined(STRLCPY)
158f841ee64Smatt	beq	.Lno_more_room		/*   count is 0? no room in dst */
159f841ee64Smatt#endif
160f841ee64Smatt#if defined(STRNCPY)
161f841ee64Smatt	beq	.Ldst_full_word_aligned	/*   count is 0? no room in dst */
162f841ee64Smatt#endif
163f841ee64Smatt.Lcongruent_mainloop_load:
164f841ee64Smatt	ldr	r5, [r1], #4		/* load word from source */
165f841ee64Smatt#if defined(_ARM_ARCH_6)
166f841ee64Smatt	uqadd8	r3, r5, r7		/* magic happens here */
167f841ee64Smatt	mvns	r3, r3			/* is the complemented result 0? */
168f841ee64Smatt	beq	.Lcongruent_mainloop	/*    yes, no NULs, do it again */
169f841ee64Smatt#else
170f841ee64Smatt	tst	r5, #BYTE0		/* does byte 0 contain a NUL? */
171f841ee64Smatt	tstne	r5, #BYTE1		/*   no, does byte 1 contain a NUL? */
172f841ee64Smatt	tstne	r5, #BYTE2		/*   no, does byte 2 contain a NUL? */
173f841ee64Smatt	tstne	r5, #BYTE3		/*   no, does byte 3 contain a NUL? */
174f841ee64Smatt	bne	.Lcongruent_mainloop	/*    yes, no NULs, do it again */
175f841ee64Smatt#endif
176f841ee64Smatt#if defined(STRLCPY) && 0
177f841ee64Smatt	sub	r1, r1, #3		/* back up src pointer */
178f841ee64Smatt#endif
179f841ee64Smatt#if defined(_ARM_ARCH_6)
180f841ee64Smatt#ifdef __ARMEL__
181f841ee64Smatt	rev	r3, r3			/* CLZ needs BE data */
182f841ee64Smatt#endif
183f841ee64Smatt	clz	r3, r3			/* count leading zeros */
184f841ee64Smatt#else
185f841ee64Smatt	mov	r3, #0			/* assume NUL is in byte 0 */
186f841ee64Smatt	tst	r5, #BYTE0		/* is NUL in byte 2? */
187f841ee64Smatt	beq	.Lcongruent_last_bytes	/*   yes, done searching. */
188f841ee64Smatt	mov	r3, #8			/* assume NUL is in byte 1 */
189f841ee64Smatt	tst	r5, #BYTE1		/* is NUL in byte 2? */
190f841ee64Smatt	beq	.Lcongruent_last_bytes	/*   yes, done searching. */
191f841ee64Smatt	mov	r3, #16			/* assume NUL is in byte 2 */
192f841ee64Smatt	tst	r5, #BYTE2		/* is NUL in byte 2? */
193f841ee64Smatt#if !defined(STRLCPY)
194f841ee64Smatt	beq	.Lcongruent_last_bytes	/*   yes, done searching. */
195f841ee64Smatt	mov	r3, #24			/* NUL must be in byte 3 */
196f841ee64Smatt#else
197f841ee64Smatt	movne	r3, #24			/*    no, then NUL is in byte 3 */
198f841ee64Smatt#endif
199f841ee64Smatt#endif /* _ARM_ARCH_6 */
200f841ee64Smatt#if defined(STRLCPY)
201f841ee64Smatt.Lcongruent_last_bytes:
202f841ee64Smatt#endif
203f841ee64Smatt#if defined(STRLCPY)
204f841ee64Smatt	add	r1, r1, r3, lsr #3	/* position to point at NUL + 4 */
205f841ee64Smatt#endif
206f841ee64Smatt	b	.Llast_bytes		/* store the last bytes */
207f841ee64Smatt
208f841ee64Smatt
209f841ee64Smatt.Lincongruent:
210f841ee64Smatt	/*
211f841ee64Smatt	 * At this point dst is word aligned by src is not.  Read bytes
212f841ee64Smatt	 * from src until it is read aligned.
213f841ee64Smatt	 */
214f841ee64Smatt	and	r3, r1, #3		/* extract misalignment */
215f841ee64Smatt	mov	r9, r3, lsl #3		/* calculate discard shift */
216f841ee64Smatt	rsb	r8, r9, #32		/* calculate insertion shift */
217f841ee64Smatt#if defined(STRLCPY)
218f841ee64Smatt	add	r6, r6, #3		/* compensate for word post-inc */
219f841ee64Smatt#endif
220f841ee64Smatt	bic	r1, r1, #3		/* word align src */
221f841ee64Smatt	ldr	r5, [r1], #4		/* load word frm src */
222f841ee64Smatt	mov	r4, r5, lslo r9		/* discard lo bytes from src */
223f841ee64Smatt	tst	r4, #BYTE0		/* does byte 0 contain a NUL? */
224f841ee64Smatt#if defined(STRNCPY)
225f841ee64Smatt	beq	.Lend_of_string		/*   yes, zero fill rest of string */
226f841ee64Smatt#else
227f841ee64Smatt	moveq	r3, r9			/*   yes, set offset */
228f841ee64Smatt	beq	.Lincongruent_end_of_string /*   yes, deal with the last bytes */
229f841ee64Smatt#endif
230f841ee64Smatt	/*
231f841ee64Smatt	 * To make our test for NULs below do not generate false positives,
232f841ee64Smatt	 * fill the bytes in the word we don't want to match with all 1s.
233f841ee64Smatt	 */
234f841ee64Smatt	mvn	r3, #0			/* create a mask */
235a4606c2cSmatt	mov	r3, r3, lslo r8		/* zero out bytes being kept */
236a4606c2cSmatt	orr	r5, r5, r3		/* merge src and mask */
237f841ee64Smatt#ifdef _ARM_ARCH_6
238a4606c2cSmatt	uqadd8	r3, r5, r7		/* NUL detection magic happens */
239f841ee64Smatt	mvns	r3, r3			/* is the complemented result 0? */
240f841ee64Smatt	beq	.Lincongruent_mainloop_load /*   yes, no NUL encountered! */
241f841ee64Smatt#ifdef __ARMEL__
242f841ee64Smatt	rev	r3, r3			/* CLZ wants BE input */
243f841ee64Smatt#endif
244f841ee64Smatt	clz	r3, r3			/* count leading zeros */
245f841ee64Smatt#else
246f841ee64Smatt	/*
247f841ee64Smatt	 * We already tested for byte 0 above so we don't need to it again.
248f841ee64Smatt	 */
249f841ee64Smatt	mov	r3, #24			/* assume NUL is in byte 3 */
250f841ee64Smatt	tst	r5, #BYTE1		/* did we find a NUL in byte 1? */
251f841ee64Smatt	subeq	r3, r3, #8		/*   yes, decremnt byte position */
252f841ee64Smatt	tstne	r5, #BYTE2		/*   no, did we find a NUL in byte 2? */
253f841ee64Smatt	subeq	r3, r3, #8		/*   yes, decremnt byte position */
254f841ee64Smatt	tstne	r5, #BYTE3		/*   no, did we find a NUL in byte 3? */
255f841ee64Smatt	bne	.Lincongruent_mainloop_load /*   no, no NUL encountered! */
256f841ee64Smatt#endif
257f841ee64Smatt	mov	r5, r4			/* discard already dealt with bytes */
258f841ee64Smatt.Lincongruent_end_of_string:
259f841ee64Smatt#if defined(STRLCPY)
260f841ee64Smatt	add	r1, r1, r3, lsr #3	/* then add offset to NUL */
261f841ee64Smatt#endif
262f841ee64Smatt	sub	r3, r3, r9		/* adjust NUL offset */
263f841ee64Smatt	b	.Llast_bytes		/* NUL encountered! finish up */
264f841ee64Smatt
265f841ee64Smatt#if defined(STRLCPY) || defined(STRNCPY)
266f841ee64Smatt.Lincongruent_no_more_room:
267f841ee64Smatt	mov	r5, r4			/* move data to be stored to r5 */
268f841ee64Smatt	b	.Lno_more_room		/* fill remaining space */
269f841ee64Smatt#endif /* STRLCPY || STRNCPY */
270f841ee64Smatt
271f841ee64Smatt	/*
272f841ee64Smatt	 * At this point both dst and src are word aligned and r4 contains
273f841ee64Smatt	 * partial contents from src.
274f841ee64Smatt	 */
275f841ee64Smatt.Lincongruent_mainloop:
276f841ee64Smatt	orr	r4, r4, r5, lshi r8	/* put new src data into dst word */
277f841ee64Smatt#if defined(STRLCPY) || defined(STRNCPY)
278f841ee64Smatt	subs	r2, r2, #4		/* subtract 4 from count */
279f841ee64Smatt	bmi	.Lincongruent_no_more_room /*   count < 0? dst will be full */
280f841ee64Smatt#endif
281f841ee64Smatt	str	r4, [r0], #4		/* store word in dst */
282f841ee64Smatt#if defined(STRLCPY)
283f841ee64Smatt	beq	.Lno_more_room		/*   space left is 0? stop copy */
284f841ee64Smatt#endif
285f841ee64Smatt#if defined(STRNCPY)
286f841ee64Smatt	beq	.Ldst_full_word_aligned	/*   space left is 0? stop copy */
287f841ee64Smatt#endif
288f841ee64Smatt	mov	r4, r5, lslo r9		/* move rest of src into dst word */
289f841ee64Smatt.Lincongruent_mainloop_load:
290f841ee64Smatt	ldr	r5, [r1], #4		/* read src */
291f841ee64Smatt#ifdef _ARM_ARCH_6
292f841ee64Smatt	uqadd8	r3, r5, r7		/* magic happens here */
293f841ee64Smatt	mvns	r3, r3			/* is the complemented result 0? */
294f841ee64Smatt	beq	.Lincongruent_mainloop	/*   yes, no NUL encountered! */
295f841ee64Smatt	/*
296f841ee64Smatt	 * fall into this since we encountered a NULL.  At this point we have
297f841ee64Smatt	 * from 1-5 bytes (excluding trailing NUL) to write.
298f841ee64Smatt	 */
299f841ee64Smatt#ifdef __ARMEL__
300f841ee64Smatt	rev	r3, r3			/* CLZ works on BE data */
301f841ee64Smatt#endif
302f841ee64Smatt	clz	r3, r3			/* count leading zeroes */
303f841ee64Smatt#else
304f841ee64Smatt	tst	r5, #BYTE0		/* does byte 0 contain a NUL? */
305f841ee64Smatt	tstne	r5, #BYTE1		/*   no, does byte 1 contain a NUL? */
306f841ee64Smatt	tstne	r5, #BYTE2		/*   no, does byte 2 contain a NUL? */
307f841ee64Smatt	tstne	r5, #BYTE3		/*   no, does byte 3 contain a NUL? */
308f841ee64Smatt	bne	.Lincongruent_mainloop	/*   no, no NUL encountered! */
309f841ee64Smatt	/*
310f841ee64Smatt	 * fall into this since we encountered a NULL.  At this point we have
311f841ee64Smatt	 * from 1-5 bytes (excluding trailing NUL) to write.
312f841ee64Smatt	 */
313f841ee64Smatt	mov	r3, #0			/* assume a NUL is in byte 0 */
314f841ee64Smatt	tst	r5, #BYTE0		/* is there a NUL in byte 0? */
315f841ee64Smatt	beq	1f			/*   yes, found a NUL! */
316f841ee64Smatt	mov	r3, #8			/* assume a NUL is in byte 1 */
317f841ee64Smatt	tst	r5, #BYTE1		/* is there a NUL in byte 0? */
318f841ee64Smatt	beq	1f			/*   yes, found a NUL! */
319f841ee64Smatt	tst	r5, #BYTE2		/* is there a NUL in byte 2? */
320f841ee64Smatt	moveq	r3, #16			/*   yes, mark its position */
321f841ee64Smatt	movne	r3, #24			/*   no, it must be in byte 3 */
322f841ee64Smatt1:
323f841ee64Smatt#endif
324f841ee64Smatt	orr	r4, r4, r5, lshi r8	/* merge new and old src words */
325f841ee64Smatt#if defined(STRLCPY)
326f841ee64Smatt	add	r1, r1, r3, lsr #3	/* adjust src to point to NUL */
327f841ee64Smatt#endif
328f841ee64Smatt	add	r3, r3, r8		/* add remainder bytes worth */
329f841ee64Smatt	cmp	r3, #32			/* do we have at least one word to write? */
330f841ee64Smatt	movlt	r5, r4			/*   no, move source bytes to expected reg */
331f841ee64Smatt	blt	.Llast_bytes		/*   no, deal with them */
332f841ee64Smatt#if defined(STRLCPY)
333f841ee64Smatt	subs	r2, r2, #4		/* subtract 4 from count */
334f841ee64Smatt	bpl	1f			/*   we have space for at least 4 */
335f841ee64Smatt	/*
336f841ee64Smatt	 * Since the space just went minus, we don't have enough room to
337f841ee64Smatt	 * write all 4 bytes.  In fact, the most we can write is 3 so just
338f841ee64Smatt	 * just lie and say we have 3 bytes to write and discard the rest.
339f841ee64Smatt	 */
340f841ee64Smatt	add	r2, r2, #4		/* add 4 back */
341f841ee64Smatt	mov	r3, #24			/* say we have 3 bytes */
342f841ee64Smatt	mov	r5, r4			/* discard the bytes we can't store */
343f841ee64Smatt	b	.Llast_bytes		/* and treat this as our last word */
344f841ee64Smatt1:
345f841ee64Smatt#elif defined(STRNCPY)
346f841ee64Smatt	subs	r2, r2, #4		/* subtract 4 from count */
347f841ee64Smatt	bmi	.Lincongruent_no_more_room /*   count < 0? dst will be full */
348f841ee64Smatt#endif
349f841ee64Smatt	str	r4, [r0], #4		/* store dst word */
350f841ee64Smatt#if defined(STRNCPY)
351f841ee64Smatt	beq	.Ldst_full_word_aligned	/*   space left is 0? stop copy */
352f841ee64Smatt#endif
353f841ee64Smatt#if defined(STRLCPY)
354f841ee64Smatt	bne	1f			/* we still have space remaining */
355f841ee64Smatt	strb	r2, [r0]		/* write final NUL */
356f841ee64Smatt	b	.Lend_of_string		/* we are done */
357f841ee64Smatt1:
358f841ee64Smatt#endif
359f841ee64Smatt	/*
360f841ee64Smatt	 * Subtract the 32 bits just written from the number of bits left
361f841ee64Smatt	 * to write.  If 0 bits are left and not doing strncpy, just write
362f841ee64Smatt	 * the trailing NUL and be done.
363f841ee64Smatt	 */
364f841ee64Smatt	subs	r3, r3, #32		/* we wrote one word */
365f841ee64Smatt#if !defined(STRNCPY)
366f841ee64Smatt	bne	1f			/* no more data? */
367f841ee64Smatt	strb	r3, [r0]		/* write final NUL */
368f841ee64Smatt	b	.Lend_of_string		/* we are done */
369f841ee64Smatt1:
370f841ee64Smatt#endif
371f841ee64Smatt	/*
372f841ee64Smatt	 * At this point after writing 4 bytes, we have 0 or 1 bytes left to
373f841ee64Smatt	 * write (excluding the trailing NUL).
374f841ee64Smatt	 */
375f841ee64Smatt	mov	r5, r5, lslo r9		/* get remainder of src */
376f841ee64Smatt
377f841ee64Smatt	/* fall into .Llast_bytes */
378f841ee64Smatt
379f841ee64Smatt#if !defined(STRLCPY)
380f841ee64Smatt.Lcongruent_last_bytes:
381f841ee64Smatt#endif
382f841ee64Smatt.Llast_bytes:
383f841ee64Smatt	/*
384f841ee64Smatt	 * r5 contains the last word and is in host byte order.
385f841ee64Smatt	 * r3 contains number of bits left to copy (0..31).
386f841ee64Smatt	 * r1 should point to the NUL + 4.
387f841ee64Smatt	 */
388f841ee64Smatt	bics	ip, r3, #7		/* truncate bits, is result 0? */
389f841ee64Smatt#if !defined(STRNCPY)
390f841ee64Smatt	bne	1f			/*   no, have to write some bytes */
391f841ee64Smatt	strb	ip, [r0]		/*   yes, write trailing NUL */
392f841ee64Smatt	b	.Lend_of_string		/*   yes, and we are the end */
393f841ee64Smatt1:
394f841ee64Smatt#endif
395f841ee64Smatt#if defined(STRLCPY) || defined(STRNCPY)
396f841ee64Smatt	cmp	r2, ip, lsr #3		/* is there enough room? */
397f841ee64Smatt	movlt	ip, r2, lsl #3		/*   no, only fill remaining space */
398f841ee64Smatt#endif
399f841ee64Smatt	mvn	r3, #0			/* create a mask */
400f841ee64Smatt	mov	r3, r3, lshi ip		/* clear leading bytes */
401f841ee64Smatt	bic	r5, r5, r3		/* clear trailing bytes */
402f841ee64Smatt#if defined(STRNCPY)
403f841ee64Smatt	cmp	r2, #4			/* room for 4 bytes? */
404f841ee64Smatt	movge	ip, #32			/*   yes, we will write 4 bytes */
405f841ee64Smatt	bge	2f			/*   yes, and go do it */
406f841ee64Smatt	mvn	r3, #0			/* create a mask (again) */
407f841ee64Smatt	mov	ip, r2, lsl #3		/* remaining space bytes -> bits */
408f841ee64Smatt	mov	r3, r3, lshi ip		/* clear remaining bytes */
409f841ee64Smatt#elif defined(STRLCPY)
410f841ee64Smatt	cmp	r2, #3			/* do we have room for 3 bytes & NUL? */
411f841ee64Smatt	bge	2f			/*   yes, just clear out dst */
412f841ee64Smatt	mov	r3, r3, lshi #8		/* mask out trailing NUL */
413f841ee64Smatt#else
414f841ee64Smatt	cmp	ip, #24			/* are we writing 3 bytes & a NUL? */
415f841ee64Smatt	bge	2f			/*   yes, just overwrite dst */
416f841ee64Smatt	mov	r3, r3, lshi #8		/* mask out trailing NUL */
417f841ee64Smatt#endif /* !STRNCPY */
418f841ee64Smatt	ldr	r4, [r0]		/* fetch dst word */
419f841ee64Smatt	and	r4, r4, r3		/* preserve trailing bytes */
420f841ee64Smatt	orr	r5, r5, r4		/* merge dst with src */
421f841ee64Smatt2:	str	r5, [r0], #4		/* store last word */
422f841ee64Smatt#if defined(STRNCPY)
423f841ee64Smatt	subs	r2, r2, ip, lsr #3	/* subtract bytes cleared from count */
424f841ee64Smatt	beq	.Ldst_full_word_aligned
425f841ee64Smatt#endif
426f841ee64Smatt	b	.Lend_of_string
427f841ee64Smatt
428f841ee64Smatt#if defined(STRLCPY) || defined(STRNCPY)
429f841ee64Smatt.Lno_more_room:
430f841ee64Smatt#if defined(STRLCPY)
431f841ee64Smatt	cmp	r2, #-1			/* tried to write 3 bytes? */
432f841ee64Smatt	blt	1f			/*   less, partial word write */
433f841ee64Smatt	cmp	r2, #0			/* no space left? */
4346397e35fSmatt	strbeq	r2, [r0]		/* write the final NUL */
435f841ee64Smatt	bicne	r5, r5, #BYTE3		/* clear trailing NUL */
436f841ee64Smatt	strne	r5, [r0]		/* write last word */
437f841ee64Smatt	b	.Ldst_full_word_aligned	/* the dst buffer is full */
438f841ee64Smatt1:
439f841ee64Smatt#endif /* STRLCPY */
440f841ee64Smatt	add	r2, r2, #4		/* restore remaining space */
441f841ee64Smatt	ldr	r4, [r0]		/* load dst */
442f841ee64Smatt	mvn	r3, #0			/* create a mask */
443f841ee64Smatt	mov	r2, r2, lsl #3		/* bytes -> bits */
444f841ee64Smatt	mov	r3, r3, lshi r2		/* clear leading bytes */
445f841ee64Smatt	bic	r5, r5, r3		/* clear trailing bytes from src */
446f841ee64Smatt#if defined(STRLCPY)
447f841ee64Smatt	mov	r3, r3, lshi #8		/* mask out trailing NUL */
448f841ee64Smatt#endif /* STRLCPY */
449f841ee64Smatt	and	r4, r4, r3		/* preserve trailing bytes in dst */
450f841ee64Smatt	orr	r4, r4, r5		/* merge src with dst */
451f841ee64Smatt	str	r4, [r0], #4		/* write last word */
452f841ee64Smatt	b	.Ldst_full_word_aligned
453f841ee64Smatt#endif /* STRLCPY || STRNCPY */
454f841ee64Smatt
455f841ee64Smatt#if defined(STRLCPY)
456f841ee64Smatt	/*
457f841ee64Smatt	 * Destination was filled (and NUL terminated).
458f841ee64Smatt	 * All that's left is count the number of bytes left in src.
459f841ee64Smatt	 */
460f841ee64Smatt.Ldst_full:
461f841ee64Smatt1:	tst	r1, #3			/* dst word aligned? */
462f841ee64Smatt	beq	2f			/*   yes, so do it word by word */
463f841ee64Smatt	ldrb	r5, [r1], #1		/* load next byte */
464f841ee64Smatt	teq	r5, #0			/* is it a NUL? */
465f841ee64Smatt	bne	1b			/*   no, check alignment */
466f841ee64Smatt	b	.Lend_of_string		/* and return */
467f841ee64Smatt2:	add	r6, r6, #3		/* compensate for post-inc */
468f841ee64Smatt.Ldst_full_word_aligned:
469f841ee64Smatt3:	ldr	r5, [r1], #4		/* load word from src */
470f841ee64Smatt#ifdef _ARM_ARCH_6
471f841ee64Smatt	uqadd8	r5, r5, r7		/* perform NUL magic */
472f841ee64Smatt	mvns	r5, r5			/* complement all 0s? */
473f841ee64Smatt	beq	3b			/*   yes, no NUL so get next word */
474f841ee64Smatt#else
475f841ee64Smatt	tst	r5, #BYTE0		/* does byte 0 contain a NUL? */
476f841ee64Smatt	tstne	r5, #BYTE1		/*   no, does byte 1 contain a NUL? */
477f841ee64Smatt	tstne	r5, #BYTE2		/*   no, does byte 2 contain a NUL? */
478f841ee64Smatt	tstne	r5, #BYTE3		/*   no, does byte 3 contain a NUL? */
479f841ee64Smatt	bne	3b			/*   no, no NUL encountered! */
480f841ee64Smatt#endif
481f841ee64Smatt#ifdef _ARM_ARCH_6
482f841ee64Smatt#ifdef __ARMEL__
483f841ee64Smatt	rev	r5, r5			/* CLZ needs BE data */
484f841ee64Smatt#endif
485f841ee64Smatt	clz	r5, r5			/* count leading zeros */
486f841ee64Smatt	add	r1, r1, r5, lsr #3	/* add offset to NUL to src pointer */
487f841ee64Smatt#else
488f841ee64Smatt	tst	r5, #BYTE0		/* is there a NUL in byte 0? */
489f841ee64Smatt	beq	4f			/*   yes, don't check any further */
490f841ee64Smatt	add	r1, r1, #1		/*   no, advance src pointer by 1 */
491f841ee64Smatt	tst	r5, #BYTE1		/* is there a NUL in byte 1? */
492f841ee64Smatt	beq	4f			/*   yes, don't check any further */
493f841ee64Smatt	add	r1, r1, #1		/*   no, advance src pointer by 1 */
494f841ee64Smatt	tst	r5, #BYTE2		/* is there a NUL in byte 2? */
495f841ee64Smatt	addne	r1, r1, #1		/*   no, there must be in byte 3 */
496f841ee64Smatt4:
497f841ee64Smatt#endif /* _ARM_ARCH_6 */
498f841ee64Smatt.Lend_of_string:
499f841ee64Smatt	sub	r0, r1, r6		/* subtract start from finish */
500f841ee64Smatt	pop	{r4-r9}			/* restore registers */
501f841ee64Smatt	RET
502f841ee64Smatt#elif defined(STRNCPY)
503f841ee64Smatt.Lend_of_string:
504f841ee64Smatt	teq	r2, #0			/* any bytes left to zero? */
505f841ee64Smatt	beq	3f 			/*   no, just return. */
506f841ee64Smatt	mov	r1, #0			/*   yes, prepare to zero */
507f841ee64Smatt	cmp	r2, #16			/* some, but not a lot? */
508f841ee64Smatt	ble	1f
509f841ee64Smatt	mov	r4, lr			/* preserve lr */
510f841ee64Smatt	bl	PLT_SYM(_C_LABEL(memset)) /*   yes, and let memset do it */
511f841ee64Smatt	mov	lr, r4			/* restore lr */
512f841ee64Smatt	b	3f			/* return */
513f841ee64Smatt1:	add	ip, r0, r2		/* calculate stopping point */
514f841ee64Smatt2:	strb	r1, [r0], #1		/* clear a byte */
515f841ee64Smatt	cmp	r0, ip			/* done? */
516f841ee64Smatt	blt	2b			/*   no, clear next byte */
517f841ee64Smatt3:	mov	r0, r6			/* restore dst pointer */
518f841ee64Smatt	pop	{r4-r9}			/* restore registers */
519f841ee64Smatt	RET
520f841ee64Smatt.Ldst_full:
521f841ee64Smatt.Ldst_full_word_aligned:
522f841ee64Smatt	/*
523f841ee64Smatt	 * Destination was filled (but not NUL terminated).
524f841ee64Smatt	 * All that's left is return the start of dst
525f841ee64Smatt	 */
526f841ee64Smatt	mov	r0, r6			/* restore dst pointer */
527f841ee64Smatt	pop	{r4-r9}			/* restore registers */
528f841ee64Smatt	RET
529f841ee64Smatt#else
530f841ee64Smatt.Lend_of_string:
531f841ee64Smatt	mov	r0, r6			/* restore dst pointer */
532f841ee64Smatt	pop	{r4-r9}			/* restore registers */
533f841ee64Smatt	RET
534f841ee64Smatt#endif
535f841ee64SmattEND(FUNCNAME)
536