xref: /illumos-gate/usr/src/lib/libc/sparc/gen/strncpy.S (revision 55fea89d)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24*5d9d9091SRichard Lowe * Use is subject to license terms.
25*5d9d9091SRichard Lowe */
26*5d9d9091SRichard Lowe
27*5d9d9091SRichard Lowe	.file	"strncpy.s"
28*5d9d9091SRichard Lowe
29*5d9d9091SRichard Lowe/*
30*5d9d9091SRichard Lowe * strncpy(s1, s2)
31*5d9d9091SRichard Lowe *
32*5d9d9091SRichard Lowe * Copy string s2 to s1, truncating or null-padding to always copy n bytes
33*5d9d9091SRichard Lowe * return s1.
34*5d9d9091SRichard Lowe *
35*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for strncpy
36*5d9d9091SRichard Lowe * which represents the `standard' for the C-library.
37*5d9d9091SRichard Lowe *
38*5d9d9091SRichard Lowe *	char *
39*5d9d9091SRichard Lowe *	strncpy(char *s1, const char *s2, size_t n)
40*5d9d9091SRichard Lowe *	{
41*5d9d9091SRichard Lowe *		char *os1 = s1;
42*5d9d9091SRichard Lowe *
43*5d9d9091SRichard Lowe *		n++;
44*5d9d9091SRichard Lowe *		while ((--n != 0) &&  ((*s1++ = *s2++) != '\0'))
45*5d9d9091SRichard Lowe *			;
46*5d9d9091SRichard Lowe *		if (n != 0)
47*5d9d9091SRichard Lowe *			while (--n != 0)
48*5d9d9091SRichard Lowe *				*s1++ = '\0';
49*5d9d9091SRichard Lowe *		return (os1);
50*5d9d9091SRichard Lowe *	}
51*5d9d9091SRichard Lowe */
52*5d9d9091SRichard Lowe
53*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
54*5d9d9091SRichard Lowe
55*5d9d9091SRichard Lowe	! strncpy works similarly to strcpy, except that n bytes of s2
56*5d9d9091SRichard Lowe	! are copied to s1. If a null character is reached in s2 yet more
57*5d9d9091SRichard Lowe	! bytes remain to be copied, strncpy will copy null bytes into
58*5d9d9091SRichard Lowe	! the destination string.
59*5d9d9091SRichard Lowe	!
60*5d9d9091SRichard Lowe	! This implementation works by first aligning the src ptr and
61*5d9d9091SRichard Lowe	! performing small copies until it is aligned.  Then, the string
62*5d9d9091SRichard Lowe	! is copied based upon destination alignment.  (byte, half-word,
63*5d9d9091SRichard Lowe	! word, etc.)
64*5d9d9091SRichard Lowe
65*5d9d9091SRichard Lowe	ENTRY(strncpy)
66*5d9d9091SRichard Lowe
67*5d9d9091SRichard Lowe	.align 32
68*5d9d9091SRichard Lowe	subcc	%g0, %o2, %o4		! n = -n
69*5d9d9091SRichard Lowe	bz	.doneshort		! if n == 0, done
70*5d9d9091SRichard Lowe	cmp	%o2, 7			! n < 7 ?
71*5d9d9091SRichard Lowe	add	%o1, %o2, %o3		! src = src + n
72*5d9d9091SRichard Lowe	blu	.shortcpy		! n < 7, use byte-wise copy
73*5d9d9091SRichard Lowe	add	%o0, %o2, %o2		! dst = dst + n
74*5d9d9091SRichard Lowe	andcc	%o1, 3, %o5		! src word aligned ?
75*5d9d9091SRichard Lowe	bz	.wordaligned		! yup
76*5d9d9091SRichard Lowe	save	%sp, -0x40, %sp		! create new register window
77*5d9d9091SRichard Lowe	sub	%i5, 4, %i5		! bytes until src aligned
78*5d9d9091SRichard Lowe	nop				! align loop on 16-byte boundary
79*5d9d9091SRichard Lowe	nop				! align loop on 16-byte boundary
80*5d9d9091SRichard Lowe
81*5d9d9091SRichard Lowe.alignsrc:
82*5d9d9091SRichard Lowe	ldub	[%i3 + %i4], %i1	! src[]
83*5d9d9091SRichard Lowe	stb	%i1, [%i2 + %i4]	! dst[] = src[]
84*5d9d9091SRichard Lowe	inccc	%i4			! src++, dst++, n--
85*5d9d9091SRichard Lowe	bz	.done			! n == 0, done
86*5d9d9091SRichard Lowe	tst     %i1			! end of src reached (null byte) ?
87*5d9d9091SRichard Lowe	bz,a	.bytepad		! yes, at least one byte to pad here
88*5d9d9091SRichard Lowe	add 	%i2, %i4, %l0		! need single dest pointer for fill
89*5d9d9091SRichard Lowe	inccc	%i5			! src aligned now?
90*5d9d9091SRichard Lowe	bnz	.alignsrc		! no, copy another byte
91*5d9d9091SRichard Lowe	.empty
92*5d9d9091SRichard Lowe
93*5d9d9091SRichard Lowe.wordaligned:
94*5d9d9091SRichard Lowe	add	%i2, %i4, %l0		! dst
95*5d9d9091SRichard Lowe	sethi	%hi(0x01010101), %l1	! Alan Mycroft's magic1
96*5d9d9091SRichard Lowe	sub	%i2, 4, %i2		! adjust for dest pre-incr in cpy loops
97*5d9d9091SRichard Lowe	or	%l1, %lo(0x01010101),%l1!  finish loading magic1
98*5d9d9091SRichard Lowe	andcc	%l0, 3, %g1		! destination word aligned ?
99*5d9d9091SRichard Lowe	bnz	.dstnotaligned		! nope
100*5d9d9091SRichard Lowe	sll	%l1, 7, %i5		! create Alan Mycroft's magic2
101*5d9d9091SRichard Lowe
102*5d9d9091SRichard Lowe.storeword:
103*5d9d9091SRichard Lowe	lduw	[%i3 + %i4], %i1	! src dword
104*5d9d9091SRichard Lowe	addcc	%i4, 4, %i4		! n += 4, src += 4, dst += 4
105*5d9d9091SRichard Lowe	bcs	.lastword		! if counter wraps, last word
106*5d9d9091SRichard Lowe	andn	%i5, %i1, %g1		! ~dword & 0x80808080
107*5d9d9091SRichard Lowe	sub	%i1, %l1, %l0		! dword - 0x01010101
108*5d9d9091SRichard Lowe	andcc	%l0, %g1, %g0		! ((dword - 0x01010101) & ~dword & 0x80808080)
109*5d9d9091SRichard Lowe	bz,a	.storeword		! no zero byte if magic expression == 0
110*5d9d9091SRichard Lowe	stw	%i1, [%i2 + %i4]	! store word to dst (address pre-incremented)
111*5d9d9091SRichard Lowe
112*5d9d9091SRichard Lowe	! n has not expired, but src is at the end. we need to push out the
113*5d9d9091SRichard Lowe	! remaining src bytes and then start padding with null bytes
114*5d9d9091SRichard Lowe
115*5d9d9091SRichard Lowe.zerobyte:
116*5d9d9091SRichard Lowe	add	%i2, %i4, %l0		! pointer to dest string
117*5d9d9091SRichard Lowe	srl	%i1, 24, %g1		! first byte
118*5d9d9091SRichard Lowe	stb	%g1, [%l0]		! store it
119*5d9d9091SRichard Lowe	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
120*5d9d9091SRichard Lowe	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
121*5d9d9091SRichard Lowe	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
122*5d9d9091SRichard Lowe	srl	%i1, 16, %g1		! second byte
123*5d9d9091SRichard Lowe	stb	%g1, [%l0 + 1]		! store it
124*5d9d9091SRichard Lowe	and	%g1, 0xff, %g1		! isolate byte
125*5d9d9091SRichard Lowe	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
126*5d9d9091SRichard Lowe	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
127*5d9d9091SRichard Lowe	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
128*5d9d9091SRichard Lowe	srl	%i1, 8, %g1		! third byte
129*5d9d9091SRichard Lowe	stb	%g1, [%l0 + 2]		! store it
130*5d9d9091SRichard Lowe	and	%g1, 0xff, %g1		! isolate byte
131*5d9d9091SRichard Lowe	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
132*5d9d9091SRichard Lowe	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
133*5d9d9091SRichard Lowe	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
134*5d9d9091SRichard Lowe	stb	%i1, [%l0 + 3]		! store fourth byte
135*5d9d9091SRichard Lowe	addcc	%i4, 8, %g0		! number of pad bytes < 8 ?
136*5d9d9091SRichard Lowe	bcs	.bytepad		! yes, do simple byte wise fill
137*5d9d9091SRichard Lowe	add	%l0, 4, %l0		! dst += 4
138*5d9d9091SRichard Lowe	andcc	%l0, 3, %l1		! dst offset relative to word boundary
139*5d9d9091SRichard Lowe	bz	.fillaligned		! dst already word aligned
140*5d9d9091SRichard Lowe
141*5d9d9091SRichard Lowe	! here there is a least one more byte to zero out: otherwise we would
142*5d9d9091SRichard Lowe	! have exited through label .lastword
143*5d9d9091SRichard Lowe
144*5d9d9091SRichard Lowe	sub	%l1, 4, %l1		! bytes to align dst to word boundary
145*5d9d9091SRichard Lowe.makealigned:
146*5d9d9091SRichard Lowe	stb	%g0, [%l0]		! dst[] = 0
147*5d9d9091SRichard Lowe	addcc	%i4, 1, %i4		! n--
148*5d9d9091SRichard Lowe	bz	.done			! n == 0, we are done
149*5d9d9091SRichard Lowe	addcc	%l1, 1, %l1		! any more byte needed to align
150*5d9d9091SRichard Lowe	bnz	.makealigned		! yup, pad another byte
151*5d9d9091SRichard Lowe	add	%l0, 1, %l0		! dst++
152*5d9d9091SRichard Lowe	nop				! pad to align copy loop below
153*5d9d9091SRichard Lowe
154*5d9d9091SRichard Lowe	! here we know that there at least another 4 bytes to pad, since
155*5d9d9091SRichard Lowe	! we don't get here unless there were >= 8 bytes to pad to begin
156*5d9d9091SRichard Lowe	! with, and we have padded at most 3 bytes suring dst aligning
157*5d9d9091SRichard Lowe
158*5d9d9091SRichard Lowe.fillaligned:
159*5d9d9091SRichard Lowe	add	%i4, 3, %i2		! round up to next word boundary
160*5d9d9091SRichard Lowe	and	%i2, -4, %l1		! pointer to next word boundary
161*5d9d9091SRichard Lowe	and	%i2, 4, %i2		! word count odd ? 4 : 0
162*5d9d9091SRichard Lowe	stw	%g0, [%l0]		! store first word
163*5d9d9091SRichard Lowe	addcc	%l1, %i2, %l1		! dword count == 1 ?
164*5d9d9091SRichard Lowe	add	%i4, %i2, %i4		! if word count odd, n -= 4
165*5d9d9091SRichard Lowe	bz	.bytepad		! if word count == 1, pad bytes left
166*5d9d9091SRichard Lowe	add	%l0, %i2, %l0		! bump dst if word count odd
167*5d9d9091SRichard Lowe
168*5d9d9091SRichard Lowe.fillword:
169*5d9d9091SRichard Lowe	addcc	%l1, 8, %l1		! count -= 8
170*5d9d9091SRichard Lowe	stw	%g0, [%l0]		! dst[n] = 0
171*5d9d9091SRichard Lowe	stw	%g0, [%l0 + 4]		! dst[n+4] = 0
172*5d9d9091SRichard Lowe	add	%l0, 8, %l0		! dst += 8
173*5d9d9091SRichard Lowe	bcc	.fillword		! fill words until count == 0
174*5d9d9091SRichard Lowe	addcc	%i4, 8, %i4		! n -= 8
175*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we are done
176*5d9d9091SRichard Lowe	.empty
177*5d9d9091SRichard Lowe
178*5d9d9091SRichard Lowe.bytepad:
179*5d9d9091SRichard Lowe	and	%i4, 1, %i2		! byte count odd ? 1 : 0
180*5d9d9091SRichard Lowe	stb	%g0, [%l0]		! store first byte
181*5d9d9091SRichard Lowe	addcc	%i4, %i2, %i4		! byte count == 1 ?
182*5d9d9091SRichard Lowe	bz	.done			! yup, we are done
183*5d9d9091SRichard Lowe	add	%l0, %i2, %l0		! bump pointer if odd
184*5d9d9091SRichard Lowe
185*5d9d9091SRichard Lowe.fillbyte:
186*5d9d9091SRichard Lowe	addcc	%i4, 2, %i4		! n -= 2
187*5d9d9091SRichard Lowe	stb	%g0, [%l0]		! dst[n] = 0
188*5d9d9091SRichard Lowe	stb	%g0, [%l0 + 1]		! dst[n+1] = 0
189*5d9d9091SRichard Lowe	bnz	.fillbyte		! fill until n == 0
190*5d9d9091SRichard Lowe	add	%l0, 2, %l0		! dst += 2
191*5d9d9091SRichard Lowe
192*5d9d9091SRichard Lowe.done:
193*5d9d9091SRichard Lowe	ret				! done
194*5d9d9091SRichard Lowe	restore	%i0, %g0, %o0		! restore reg window, return dst
195*5d9d9091SRichard Lowe
196*5d9d9091SRichard Lowe	! this is the last word. It may contain null bytes. store bytes
197*5d9d9091SRichard Lowe	! until n == 0. if null byte encountered, continue
198*5d9d9091SRichard Lowe
199*5d9d9091SRichard Lowe.lastword:
200*5d9d9091SRichard Lowe	sub	%i4, 4, %i4		! undo counter pre-increment
201*5d9d9091SRichard Lowe	add	%i2, 4, %i2		! adjust dst for counter un-bumping
202*5d9d9091SRichard Lowe
203*5d9d9091SRichard Lowe	srl	%i1, 24, %g1		! first byte
204*5d9d9091SRichard Lowe	stb	%g1, [%i2 + %i4]	! store it
205*5d9d9091SRichard Lowe	inccc	%i4			! n--
206*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we're done
207*5d9d9091SRichard Lowe	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
208*5d9d9091SRichard Lowe	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
209*5d9d9091SRichard Lowe	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
210*5d9d9091SRichard Lowe	srl	%i1, 16, %g1		! second byte
211*5d9d9091SRichard Lowe	stb	%g1, [%i2 + %i4]	! store it
212*5d9d9091SRichard Lowe	inccc	%i4			! n--
213*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we're done
214*5d9d9091SRichard Lowe	and	%g1, 0xff, %g1		! isolate byte
215*5d9d9091SRichard Lowe	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
216*5d9d9091SRichard Lowe	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
217*5d9d9091SRichard Lowe	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
218*5d9d9091SRichard Lowe	srl	%i1, 8, %g1		! third byte
219*5d9d9091SRichard Lowe	stb	%g1, [%i2 + %i4]	! store it
220*5d9d9091SRichard Lowe	inccc	%i4			! n--
221*5d9d9091SRichard Lowe	bz	.done			! if n == 0, we're done
222*5d9d9091SRichard Lowe	and	%g1, 0xff, %g1		! isolate byte
223*5d9d9091SRichard Lowe	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
224*5d9d9091SRichard Lowe	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
225*5d9d9091SRichard Lowe	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
226*5d9d9091SRichard Lowe	ba	.done			! here n must be zero, we are done
227*5d9d9091SRichard Lowe	stb	%i1, [%i2 + %i4]	! store fourth byte
228*5d9d9091SRichard Lowe
229*5d9d9091SRichard Lowe.dstnotaligned:
230*5d9d9091SRichard Lowe	cmp	%g1, 2			! dst half word aligned?
231*5d9d9091SRichard Lowe	be	.storehalfword2		! yup, store half word at a time
232*5d9d9091SRichard Lowe	.empty
233*5d9d9091SRichard Lowe.storebyte:
234*5d9d9091SRichard Lowe	lduw	[%i3 + %i4], %i1	! x = src[]
235*5d9d9091SRichard Lowe	addcc	%i4, 4, %i4		! src += 4, dst += 4, n -= 4
236*5d9d9091SRichard Lowe	bcs	.lastword		! if counter wraps, last word
237*5d9d9091SRichard Lowe	andn	%i5, %i1, %g1		! ~x & 0x80808080
238*5d9d9091SRichard Lowe	sub	%i1, %l1, %l0		! x - 0x01010101
239*5d9d9091SRichard Lowe	andcc	%l0, %g1, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
240*5d9d9091SRichard Lowe	bnz	.zerobyte		! end of src found, may need to pad
241*5d9d9091SRichard Lowe	add	%i2, %i4, %l0		! dst (in pointer form)
242*5d9d9091SRichard Lowe	srl	%i1, 24, %g1		! %g1<7:0> = 1st byte; half-word aligned now
243*5d9d9091SRichard Lowe	stb	%g1, [%l0]		! store first byte
244*5d9d9091SRichard Lowe	srl	%i1, 8, %g1		! %g1<15:0> = bytes 2, 3
245*5d9d9091SRichard Lowe	sth	%g1, [%l0 + 1]		! store bytes 2, 3
246*5d9d9091SRichard Lowe	ba	.storebyte		! next word
247*5d9d9091SRichard Lowe	stb	%i1, [%l0 + 3]		! store fourth byte
248*5d9d9091SRichard Lowe	nop
249*5d9d9091SRichard Lowe	nop
250*5d9d9091SRichard Lowe
251*5d9d9091SRichard Lowe.storehalfword:
252*5d9d9091SRichard Lowe	lduw	[%i3 + %i4], %i1	! x = src[]
253*5d9d9091SRichard Lowe.storehalfword2:
254*5d9d9091SRichard Lowe	addcc	%i4, 4, %i4		! src += 4, dst += 4, n -= 4
255*5d9d9091SRichard Lowe	bcs	.lastword		! if counter wraps, last word
256*5d9d9091SRichard Lowe	andn	%i5, %i1, %g1		! ~x & 0x80808080
257*5d9d9091SRichard Lowe	sub	%i1, %l1, %l0		! x - 0x01010101
258*5d9d9091SRichard Lowe	andcc	%l0, %g1, %g0		! ((x -0x01010101) & ~x & 0x8080808080)
259*5d9d9091SRichard Lowe	bnz	.zerobyte		! x has zero byte, handle end cases
260*5d9d9091SRichard Lowe	add	%i2, %i4, %l0		! dst (in pointer form)
261*5d9d9091SRichard Lowe	srl	%i1, 16, %g1		! %g1<15:0> = bytes 1, 2
262*5d9d9091SRichard Lowe	sth	%g1, [%l0]		! store bytes 1, 2
263*5d9d9091SRichard Lowe	ba	.storehalfword		! next dword
264*5d9d9091SRichard Lowe	sth	%i1, [%l0 + 2]		! store bytes 3, 4
265*5d9d9091SRichard Lowe
266*5d9d9091SRichard Lowe.shortcpy:
267*5d9d9091SRichard Lowe	ldub	[%o3 + %o4], %o5	! src[]
268*5d9d9091SRichard Lowe	stb	%o5, [%o2 + %o4]	! dst[] = src[]
269*5d9d9091SRichard Lowe	inccc	%o4			! src++, dst++, n--
270*5d9d9091SRichard Lowe	bz	.doneshort		! if n == 0, done
271*5d9d9091SRichard Lowe	tst	%o5			! src[] == 0 ?
272*5d9d9091SRichard Lowe	bnz,a	.shortcpy		! nope, next byte
273*5d9d9091SRichard Lowe	nop				! empty delay slot
274*5d9d9091SRichard Lowe
275*5d9d9091SRichard Lowe.padbyte:
276*5d9d9091SRichard Lowe	stb	%g0, [%o2 + %o4]	! dst[] = 0
277*5d9d9091SRichard Lowe.padbyte2:
278*5d9d9091SRichard Lowe	addcc	%o4, 1, %o4		! dst++, n--
279*5d9d9091SRichard Lowe	bnz,a	.padbyte2		! if n != 0, next byte
280*5d9d9091SRichard Lowe	stb	%g0, [%o2 + %o4]	! dst[] = 0
281*5d9d9091SRichard Lowe	nop				! align label below to 16-byte boundary
282*5d9d9091SRichard Lowe
283*5d9d9091SRichard Lowe.doneshort:
284*5d9d9091SRichard Lowe	retl				! return from leaf
285*5d9d9091SRichard Lowe	nop				! empty delay slot
286*5d9d9091SRichard Lowe	SET_SIZE(strncpy)
287