xref: /openbsd/sys/lib/libkern/arch/amd64/strrchr.S (revision 28c67577)
1*28c67577Sguenther/*	$OpenBSD: strrchr.S,v 1.6 2022/12/07 19:26:39 guenther Exp $	*/
28c688dc9Sreyk/*	$NetBSD: strrchr.S,v 1.3 2014/03/22 19:16:34 jakllsch Exp $	*/
38c688dc9Sreyk
44d6af78aSderaadt/*
58c688dc9Sreyk * Written by J.T. Conklin <jtc@acorntoolworks.com>
64d6af78aSderaadt * Public domain.
74d6af78aSderaadt */
8f5df1827Smickey
94d6af78aSderaadt#include <machine/asm.h>
104d6af78aSderaadt
118c688dc9SreykSTRONG_ALIAS(rindex, strrchr)
128c688dc9Sreyk
134d6af78aSderaadtENTRY(strrchr)
141d66f0a0Smortimer	RETGUARD_SETUP(strrchr, r10)
158c688dc9Sreyk	movzbq	%sil,%rcx
168c688dc9Sreyk
178c688dc9Sreyk	/* zero return value */
188c688dc9Sreyk	xorq	%rax,%rax
198c688dc9Sreyk
208c688dc9Sreyk	/*
218c688dc9Sreyk	 * Align to word boundary.
228c688dc9Sreyk	 * Consider unrolling loop?
238c688dc9Sreyk	 */
248c688dc9Sreyk.Lalign:
258c688dc9Sreyk	testb	$7,%dil
268c688dc9Sreyk	je	.Lword_aligned
274d6af78aSderaadt	movb	(%rdi),%dl
288c688dc9Sreyk	cmpb	%cl,%dl
298c688dc9Sreyk	cmoveq	%rdi,%rax
304d6af78aSderaadt	incq	%rdi
318c688dc9Sreyk	testb	%dl,%dl
328c688dc9Sreyk	jne	.Lalign
338c688dc9Sreyk	jmp	.Ldone
348c688dc9Sreyk
358c688dc9Sreyk.Lword_aligned:
368c688dc9Sreyk	/* copy char to all bytes in word */
378c688dc9Sreyk	movb	%cl,%ch
388c688dc9Sreyk	movq	%rcx,%rdx
398c688dc9Sreyk	salq	$16,%rcx
408c688dc9Sreyk	orq	%rdx,%rcx
418c688dc9Sreyk	movq	%rcx,%rdx
428c688dc9Sreyk	salq	$32,%rcx
438c688dc9Sreyk	orq	%rdx,%rcx
448c688dc9Sreyk
458c688dc9Sreyk	movabsq	$0x0101010101010101,%r8
468c688dc9Sreyk	movabsq	$0x8080808080808080,%r9
478c688dc9Sreyk
488c688dc9Sreyk	/* Check whether any byte in the word is equal to ch or 0. */
498c688dc9Sreyk	_ALIGN_TEXT
508c688dc9Sreyk.Lloop:
518c688dc9Sreyk	movq	(%rdi),%rdx
528c688dc9Sreyk	addq	$8,%rdi
538c688dc9Sreyk	movq	%rdx,%rsi
548c688dc9Sreyk	subq	%r8,%rdx
558c688dc9Sreyk	xorq	%rcx,%rsi
568c688dc9Sreyk	subq	%r8,%rsi
578c688dc9Sreyk	orq	%rsi,%rdx
588c688dc9Sreyk	testq	%r9,%rdx
598c688dc9Sreyk	je	.Lloop
608c688dc9Sreyk
618c688dc9Sreyk	/*
628c688dc9Sreyk	 * In rare cases, the above loop may exit prematurely. We must
638c688dc9Sreyk	 * return to the loop if none of the bytes in the word match
648c688dc9Sreyk	 * ch or are equal to 0.
658c688dc9Sreyk	 */
668c688dc9Sreyk
678c688dc9Sreyk	movb	-8(%rdi),%dl
688c688dc9Sreyk	cmpb	%cl,%dl		/* 1st byte == ch? */
698c688dc9Sreyk	jne	1f
708c688dc9Sreyk	leaq	-8(%rdi),%rax
718c688dc9Sreyk1:	testb	%dl,%dl		/* 1st byte == 0? */
728c688dc9Sreyk	je	.Ldone
738c688dc9Sreyk
748c688dc9Sreyk	movb	-7(%rdi),%dl
758c688dc9Sreyk	cmpb	%cl,%dl		/* 2nd byte == ch? */
768c688dc9Sreyk	jne	1f
778c688dc9Sreyk	leaq	-7(%rdi),%rax
788c688dc9Sreyk1:	testb	%dl,%dl		/* 2nd byte == 0? */
798c688dc9Sreyk	je	.Ldone
808c688dc9Sreyk
818c688dc9Sreyk	movb	-6(%rdi),%dl
828c688dc9Sreyk	cmpb	%cl,%dl		/* 3rd byte == ch? */
838c688dc9Sreyk	jne	1f
848c688dc9Sreyk	leaq	-6(%rdi),%rax
858c688dc9Sreyk1:	testb	%dl,%dl		/* 3rd byte == 0? */
868c688dc9Sreyk	je	.Ldone
878c688dc9Sreyk
888c688dc9Sreyk	movb	-5(%rdi),%dl
898c688dc9Sreyk	cmpb	%cl,%dl		/* 4th byte == ch? */
908c688dc9Sreyk	jne	1f
918c688dc9Sreyk	leaq	-5(%rdi),%rax
928c688dc9Sreyk1:	testb	%dl,%dl		/* 4th byte == 0? */
938c688dc9Sreyk	je	.Ldone
948c688dc9Sreyk
958c688dc9Sreyk	movb	-4(%rdi),%dl
968c688dc9Sreyk	cmpb	%cl,%dl		/* 5th byte == ch? */
978c688dc9Sreyk	jne	1f
988c688dc9Sreyk	leaq	-4(%rdi),%rax
998c688dc9Sreyk1:	testb	%dl,%dl		/* 5th byte == 0? */
1008c688dc9Sreyk	je	.Ldone
1018c688dc9Sreyk
1028c688dc9Sreyk	movb	-3(%rdi),%dl
1038c688dc9Sreyk	cmpb	%cl,%dl		/* 6th byte == ch? */
1048c688dc9Sreyk	jne	1f
1058c688dc9Sreyk	leaq	-3(%rdi),%rax
1068c688dc9Sreyk1:	testb	%dl,%dl		/* 6th byte == 0? */
1078c688dc9Sreyk	je	.Ldone
1088c688dc9Sreyk
1098c688dc9Sreyk	movb	-2(%rdi),%dl
1108c688dc9Sreyk	cmpb	%cl,%dl		/* 7th byte == ch? */
1118c688dc9Sreyk	jne	1f
1128c688dc9Sreyk	leaq	-2(%rdi),%rax
1138c688dc9Sreyk1:	testb	%dl,%dl		/* 7th byte == 0? */
1148c688dc9Sreyk	je	.Ldone
1158c688dc9Sreyk
1168c688dc9Sreyk	movb	-1(%rdi),%dl
1178c688dc9Sreyk	cmpb	%cl,%dl		/* 8th byte == ch? */
1188c688dc9Sreyk	jne	1f
1198c688dc9Sreyk	leaq	-1(%rdi),%rax
1208c688dc9Sreyk1:	testb	%dl,%dl		/* 8th byte == 0? */
1218c688dc9Sreyk	jne	.Lloop
1228c688dc9Sreyk
1238c688dc9Sreyk.Ldone:
1241d66f0a0Smortimer	RETGUARD_CHECK(strrchr, r10)
1254d6af78aSderaadt	ret
126fc541c5dSguenther	lfence
127*28c67577SguentherEND(strrchr)
128