xref: /freebsd/lib/libc/amd64/string/strchrnul.S (revision 1d386b48)
1/*-
2 * Copyright (c) 2023 The FreeBSD Foundation
3 *
4 * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE
27 */
28
29#include <machine/asm.h>
30
31#include "amd64_archlevel.h"
32
33#define ALIGN_TEXT	.p2align 4,0x90	# 16-byte alignment, nop-filled
34
35	.weak	strchrnul
36	.set	strchrnul, __strchrnul
37
38ARCHFUNCS(__strchrnul)
39	ARCHFUNC(__strchrnul, scalar)
40	ARCHFUNC(__strchrnul, baseline)
41ENDARCHFUNCS(__strchrnul)
42
43/*
44 * strchrnul(str, c)
45 * This is implemented like strlen(str), but we check for the
46 * presence of both NUL and c in each iteration.
47 */
48ARCHENTRY(__strchrnul, scalar)
49	mov	%edi, %ecx
50	and	$~7, %rdi		# align to 8 byte
51	movzbl	%sil, %esi		# clear stray high bits
52	movabs	$0x0101010101010101, %r8
53	mov	(%rdi), %rax		# load first word
54	imul	%r8, %rsi		# replicate char 8 times
55	movabs	$0x8080808080808080, %r9
56
57	/*
58	 * Unaligned input: align to 8 bytes.  Then proceed the same
59	 * way as with aligned input, but ignore matches before the
60	 * beginning of the string.  This is achieved by shifting r9
61	 * into r10 to have 0x00 bytes before the string begins.
62	 */
63	shl	$3, %ecx
64	mov	%r9, %r10
65	add	$8, %rdi
66	shl	%cl, %r10		# 0x80 where the string is
67	neg	%r8			# negate 01..01 so we can use lea
68
69	mov	%rsi, %rcx
70	xor	%rax, %rcx		# str ^ c
71	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
72	lea	(%rcx, %r8, 1), %r11	# (str ^ c) - 0x01..01
73	not	%rax			# ~str
74	not	%rcx			# ~(str ^ c)
75	and	%rdx, %rax		# (str - 0x01..01) & ~str
76	and	%r11, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
77	or	%rcx, %rax		# matches for both
78	and	%r10, %rax		# not including junk bytes or bytes before the string
79	jnz	1f
80
81	/* main loop unrolled twice */
82	ALIGN_TEXT
830:	mov	(%rdi), %rax		# str
84	mov	%rsi, %rcx
85	xor	%rax, %rcx		# str ^ c
86	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
87	lea	(%rcx, %r8, 1), %r11	# (str ^ c) - 0x01..01
88	not	%rax			# ~str
89	not	%rcx			# ~(str ^ c)
90	and	%rdx, %rax		# (str - 0x01..01) & ~str
91	and	%r11, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
92	or	%rcx, %rax		# matches for both
93	and	%r9, %rax		# not including junk bits
94	jnz	2f
95
96	mov	8(%rdi), %rax		# str
97	add	$16, %rdi
98	mov	%rsi, %rcx
99	xor	%rax, %rcx		# str ^ c
100	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
101	lea	(%rcx, %r8, 1), %r11	# (str ^ c) - 0x01..01
102	not	%rax			# ~str
103	not	%rcx			# ~(str ^ c)
104	and	%rdx, %rax		# (str - 0x01..01) & ~str
105	and	%r11, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
106	or	%rcx, %rax		# matches for both
107	and	%r9, %rax		# not including junk bits
108	jz	0b
109
110	/* NUL or c found */
1111:	sub	$8, %rdi		# undo advance past buffer
1122:	tzcnt	%rax, %rax		# first NUL or c byte match
113	shr	$3, %eax		# scale from bit to byte index
114	add	%rdi, %rax		# pointer to found c or NUL
115	ret
116ARCHEND(__strchrnul, scalar)
117
118ARCHENTRY(__strchrnul, baseline)
119	mov		%edi, %ecx
120	and		$~0xf, %rdi		# align to 16 byte
121	movdqa		(%rdi), %xmm1
122	movd		%esi, %xmm0
123	and		$0xf, %ecx		# distance from (%rdi) to start of string
124	pxor		%xmm2, %xmm2
125	mov		$-1, %edx
126	punpcklbw	%xmm0, %xmm0		# c -> cc
127	shl		%cl, %edx		# bits corresponding to bytes in the string
128	punpcklwd	%xmm0, %xmm0		# cc -> cccc
129	add		$16, %rdi
130
131	/* check for match in head */
132	pcmpeqb		%xmm1, %xmm2		# NUL bytes present?
133	pshufd		$0, %xmm0, %xmm0	# cccc -> cccccccccccccccc
134	pcmpeqb		%xmm0, %xmm1		# c present?
135	por		%xmm2, %xmm1		# either present?
136	pmovmskb	%xmm1, %eax
137	and		%edx, %eax		# match in the string?
138	jnz		1f
139
140	/* main loop unrolled twice */
141	ALIGN_TEXT
1420:	movdqa		(%rdi), %xmm1
143	pxor		%xmm2, %xmm2
144	pcmpeqb		%xmm1, %xmm2		# NUL bytes present?
145	pcmpeqb		%xmm0, %xmm1		# c present?
146	por		%xmm2, %xmm1		# either present?
147	pmovmskb	%xmm1, %eax
148	test		%eax, %eax		# match in the string?
149	jnz		2f
150
151	movdqa		16(%rdi), %xmm1
152	add		$32, %rdi
153	pxor		%xmm2, %xmm2
154	pcmpeqb		%xmm1, %xmm2		# NUL bytes present?
155	pcmpeqb		%xmm0, %xmm1		# c present?
156	por		%xmm2, %xmm1		# either present?
157	pmovmskb	%xmm1, %eax
158	test		%eax, %eax		# match in the string?
159	jz		0b
160
1611:	sub		$16, %rdi		# undo advance past buffer
1622:	tzcnt		%eax, %eax		# where is the match?
163	add		%rdi, %rax		# pointer to found c or NUL
164	ret
165ARCHEND(__strchrnul, baseline)
166
167	.section .note.GNU-stack,"",%progbits
168