xref: /dragonfly/lib/libc/x86_64/string/strcat.S (revision 0fe46dc6)
1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 *
5 * $NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $
6 * $FreeBSD: src/lib/libc/amd64/string/strcat.S,v 1.2 2008/11/02 01:10:54 peter Exp $
7 */
8
9#include <machine/asm.h>
10
11ENTRY(strcat)
12	movq	%rdi,%rax
13	movabsq	$0x0101010101010101,%r8
14	movabsq	$0x8080808080808080,%r9
15
16	/*
17	 * Align destination to word boundary.
18	 * Consider unrolling loop?
19	 */
20.Lscan:
21.Lscan_align:
22	testb	$7,%dil
23	je	.Lscan_aligned
24	cmpb	$0,(%rdi)
25	je	.Lcopy
26	incq	%rdi
27	jmp	.Lscan_align
28
29	.align	4
30.Lscan_aligned:
31.Lscan_loop:
32	movq	(%rdi),%rdx
33	addq	$8,%rdi
34	subq	%r8,%rdx
35	testq	%r9,%rdx
36	je	.Lscan_loop
37
38	/*
39	 * In rare cases, the above loop may exit prematurely. We must
40	 * return to the loop if none of the bytes in the word equal 0.
41	 */
42
43	cmpb	$0,-8(%rdi)	/* 1st byte == 0? */
44	jne	1f
45	subq	$8,%rdi
46	jmp	.Lcopy
47
481:	cmpb	$0,-7(%rdi)	/* 2nd byte == 0? */
49	jne	1f
50	subq	$7,%rdi
51	jmp	.Lcopy
52
531:	cmpb	$0,-6(%rdi)	/* 3rd byte == 0? */
54	jne	1f
55	subq	$6,%rdi
56	jmp	.Lcopy
57
581:	cmpb	$0,-5(%rdi)	/* 4th byte == 0? */
59	jne	1f
60	subq	$5,%rdi
61	jmp	.Lcopy
62
631:	cmpb	$0,-4(%rdi)	/* 5th byte == 0? */
64	jne	1f
65	subq	$4,%rdi
66	jmp	.Lcopy
67
681:	cmpb	$0,-3(%rdi)	/* 6th byte == 0? */
69	jne	1f
70	subq	$3,%rdi
71	jmp	.Lcopy
72
731:	cmpb	$0,-2(%rdi)	/* 7th byte == 0? */
74	jne	1f
75	subq	$2,%rdi
76	jmp	.Lcopy
77
781:	cmpb	$0,-1(%rdi)	/* 8th byte == 0? */
79	jne	.Lscan_loop
80	subq	$1,%rdi
81
82	/*
83	 * Align source to a word boundary.
84	 * Consider unrolling loop?
85	 */
86.Lcopy:
87.Lcopy_align:
88	testb	$7,%sil
89	je	.Lcopy_aligned
90	movb	(%rsi),%dl
91	incq	%rsi
92	movb	%dl,(%rdi)
93	incq	%rdi
94	testb	%dl,%dl
95	jne	.Lcopy_align
96	ret
97
98	.align	4
99.Lcopy_loop:
100	movq	%rdx,(%rdi)
101	addq	$8,%rdi
102.Lcopy_aligned:
103	movq	(%rsi),%rdx
104	movq	%rdx,%rcx
105	addq	$8,%rsi
106	subq	%r8,%rcx
107	testq	%r9,%rcx
108	je	.Lcopy_loop
109
110	/*
111	 * In rare cases, the above loop may exit prematurely. We must
112	 * return to the loop if none of the bytes in the word equal 0.
113	 */
114
115	movb	%dl,(%rdi)
116	incq	%rdi
117	testb	%dl,%dl		/* 1st byte == 0? */
118	je	.Ldone
119
120	shrq	$8,%rdx
121	movb	%dl,(%rdi)
122	incq	%rdi
123	testb	%dl,%dl		/* 2nd byte == 0? */
124	je	.Ldone
125
126	shrq	$8,%rdx
127	movb	%dl,(%rdi)
128	incq	%rdi
129	testb	%dl,%dl		/* 3rd byte == 0? */
130	je	.Ldone
131
132	shrq	$8,%rdx
133	movb	%dl,(%rdi)
134	incq	%rdi
135	testb	%dl,%dl		/* 4th byte == 0? */
136	je	.Ldone
137
138	shrq	$8,%rdx
139	movb	%dl,(%rdi)
140	incq	%rdi
141	testb	%dl,%dl		/* 5th byte == 0? */
142	je	.Ldone
143
144	shrq	$8,%rdx
145	movb	%dl,(%rdi)
146	incq	%rdi
147	testb	%dl,%dl		/* 6th byte == 0? */
148	je	.Ldone
149
150	shrq	$8,%rdx
151	movb	%dl,(%rdi)
152	incq	%rdi
153	testb	%dl,%dl		/* 7th byte == 0? */
154	je	.Ldone
155
156	shrq	$8,%rdx
157	movb	%dl,(%rdi)
158	incq	%rdi
159	testb	%dl,%dl		/* 8th byte == 0? */
160	jne	.Lcopy_aligned
161
162.Ldone:
163	ret
164END(strcat)
165
166	.section .note.GNU-stack,"",%progbits
167