xref: /netbsd/common/lib/libc/arch/x86_64/string/strcat.S (revision 6550d01e)
1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7
8#if defined(LIBC_SCCS)
9	RCSID("$NetBSD: strcat.S,v 1.1 2005/12/20 19:28:51 christos Exp $")
10#endif
11
12ENTRY(strcat)
13	movq	%rdi,%rax
14	movabsq	$0x0101010101010101,%r8
15	movabsq	$0x8080808080808080,%r9
16
17	/*
18	 * Align destination to word boundary.
19	 * Consider unrolling loop?
20	 */
21.Lscan:
22.Lscan_align:
23	testb	$7,%dil
24	je	.Lscan_aligned
25	cmpb	$0,(%rdi)
26	je	.Lcopy
27	incq	%rdi
28	jmp	.Lscan_align
29
30	_ALIGN_TEXT
31.Lscan_aligned:
32.Lscan_loop:
33	movq	(%rdi),%rdx
34	addq	$8,%rdi
35	subq	%r8,%rdx
36	testq	%r9,%rdx
37	je	.Lscan_loop
38
39	/*
40	 * In rare cases, the above loop may exit prematurely. We must
41	 * return to the loop if none of the bytes in the word equal 0.
42	 */
43
44	cmpb	$0,-8(%rdi)	/* 1st byte == 0? */
45	jne	1f
46	subq	$8,%rdi
47	jmp	.Lcopy
48
491:	cmpb	$0,-7(%rdi)	/* 2nd byte == 0? */
50	jne	1f
51	subq	$7,%rdi
52	jmp	.Lcopy
53
541:	cmpb	$0,-6(%rdi)	/* 3rd byte == 0? */
55	jne	1f
56	subq	$6,%rdi
57	jmp	.Lcopy
58
591:	cmpb	$0,-5(%rdi)	/* 4th byte == 0? */
60	jne	1f
61	subq	$5,%rdi
62	jmp	.Lcopy
63
641:	cmpb	$0,-4(%rdi)	/* 5th byte == 0? */
65	jne	1f
66	subq	$4,%rdi
67	jmp	.Lcopy
68
691:	cmpb	$0,-3(%rdi)	/* 6th byte == 0? */
70	jne	1f
71	subq	$3,%rdi
72	jmp	.Lcopy
73
741:	cmpb	$0,-2(%rdi)	/* 7th byte == 0? */
75	jne	1f
76	subq	$2,%rdi
77	jmp	.Lcopy
78
791:	cmpb	$0,-1(%rdi)	/* 8th byte == 0? */
80	jne	.Lscan_loop
81	subq	$1,%rdi
82
83	/*
84	 * Align source to a word boundary.
85	 * Consider unrolling loop?
86	 */
87.Lcopy:
88.Lcopy_align:
89	testb	$7,%sil
90	je	.Lcopy_aligned
91	movb	(%rsi),%dl
92	incq	%rsi
93	movb	%dl,(%rdi)
94	incq	%rdi
95	testb	%dl,%dl
96	jne	.Lcopy_align
97	ret
98
99	_ALIGN_TEXT
100.Lcopy_loop:
101	movq	%rdx,(%rdi)
102	addq	$8,%rdi
103.Lcopy_aligned:
104	movq	(%rsi),%rdx
105	movq	%rdx,%rcx
106	addq	$8,%rsi
107	subq	%r8,%rcx
108	testq	%r9,%rcx
109	je	.Lcopy_loop
110
111	/*
112	 * In rare cases, the above loop may exit prematurely. We must
113	 * return to the loop if none of the bytes in the word equal 0.
114	 */
115
116	movb	%dl,(%rdi)
117	incq	%rdi
118	testb	%dl,%dl		/* 1st byte == 0? */
119	je	.Ldone
120
121	shrq	$8,%rdx
122	movb	%dl,(%rdi)
123	incq	%rdi
124	testb	%dl,%dl		/* 2nd byte == 0? */
125	je	.Ldone
126
127	shrq	$8,%rdx
128	movb	%dl,(%rdi)
129	incq	%rdi
130	testb	%dl,%dl		/* 3rd byte == 0? */
131	je	.Ldone
132
133	shrq	$8,%rdx
134	movb	%dl,(%rdi)
135	incq	%rdi
136	testb	%dl,%dl		/* 4th byte == 0? */
137	je	.Ldone
138
139	shrq	$8,%rdx
140	movb	%dl,(%rdi)
141	incq	%rdi
142	testb	%dl,%dl		/* 5th byte == 0? */
143	je	.Ldone
144
145	shrq	$8,%rdx
146	movb	%dl,(%rdi)
147	incq	%rdi
148	testb	%dl,%dl		/* 6th byte == 0? */
149	je	.Ldone
150
151	shrq	$8,%rdx
152	movb	%dl,(%rdi)
153	incq	%rdi
154	testb	%dl,%dl		/* 7th byte == 0? */
155	je	.Ldone
156
157	shrq	$8,%rdx
158	movb	%dl,(%rdi)
159	incq	%rdi
160	testb	%dl,%dl		/* 8th byte == 0? */
161	jne	.Lcopy_aligned
162
163.Ldone:
164	ret
165