xref: /freebsd/lib/libc/amd64/string/memcmp.S (revision 6419bb52)
1/*-
2 * Copyright (c) 2018 The FreeBSD Foundation
3 *
4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31#include <machine/asm.h>
32__FBSDID("$FreeBSD$");
33
34#define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
35
36ENTRY(memcmp)
37	xorl	%eax,%eax
3810:
39	cmpq	$16,%rdx
40	ja	101632f
41
42100816:
43	cmpb	$8,%dl
44	jl	100408f
45	movq	(%rdi),%r8
46	movq	(%rsi),%r9
47	cmpq	%r8,%r9
48	jne	80f
49	movq	-8(%rdi,%rdx),%r8
50	movq	-8(%rsi,%rdx),%r9
51	cmpq	%r8,%r9
52	jne	10081608f
53	ret
54100408:
55	cmpb	$4,%dl
56	jl	100204f
57	movl	(%rdi),%r8d
58	movl	(%rsi),%r9d
59	cmpl	%r8d,%r9d
60	jne	80f
61	movl	-4(%rdi,%rdx),%r8d
62	movl	-4(%rsi,%rdx),%r9d
63	cmpl	%r8d,%r9d
64	jne	10040804f
65	ret
66100204:
67	cmpb	$2,%dl
68	jl	100001f
69	movzwl	(%rdi),%r8d
70	movzwl	(%rsi),%r9d
71	cmpl	%r8d,%r9d
72	jne	1f
73	movzwl	-2(%rdi,%rdx),%r8d
74	movzwl	-2(%rsi,%rdx),%r9d
75	cmpl	%r8d,%r9d
76	jne	1f
77	ret
78100001:
79	cmpb	$1,%dl
80	jl	100000f
81	movzbl	(%rdi),%eax
82	movzbl	(%rsi),%r8d
83	subl	%r8d,%eax
84100000:
85	ret
86ALIGN_TEXT
87101632:
88	cmpq	$32,%rdx
89	ja	103200f
90	movq	(%rdi),%r8
91	movq	(%rsi),%r9
92	cmpq	%r8,%r9
93	jne	80f
94	movq	8(%rdi),%r8
95	movq	8(%rsi),%r9
96	cmpq	%r8,%r9
97	jne	10163208f
98	movq	-16(%rdi,%rdx),%r8
99	movq	-16(%rsi,%rdx),%r9
100	cmpq	%r8,%r9
101	jne	10163216f
102	movq	-8(%rdi,%rdx),%r8
103	movq	-8(%rsi,%rdx),%r9
104	cmpq	%r8,%r9
105	jne	10163224f
106	ret
107ALIGN_TEXT
108103200:
109	movq	(%rdi),%r8
110	movq	8(%rdi),%r9
111	subq	(%rsi),%r8
112	subq	8(%rsi),%r9
113	orq	%r8,%r9
114	jnz	10320000f
115
116	movq    16(%rdi),%r8
117	movq    24(%rdi),%r9
118	subq    16(%rsi),%r8
119	subq    24(%rsi),%r9
120	orq	%r8,%r9
121	jnz     10320016f
122
123	leaq	32(%rdi),%rdi
124	leaq	32(%rsi),%rsi
125	subq	$32,%rdx
126	cmpq	$32,%rdx
127	jae	103200b
128	cmpb	$0,%dl
129	jne	10b
130	ret
131
132/*
133 * Mismatch was found.
134 *
135 * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
136 */
137ALIGN_TEXT
13810320016:
139	leaq	16(%rdi),%rdi
140	leaq	16(%rsi),%rsi
14110320000:
142	movq	(%rdi),%r8
143	movq	(%rsi),%r9
144	cmpq	%r8,%r9
145	jne	80f
146	leaq	8(%rdi),%rdi
147	leaq	8(%rsi),%rsi
148	jmp	80f
149ALIGN_TEXT
15010081608:
15110163224:
152	leaq	-8(%rdi,%rdx),%rdi
153	leaq	-8(%rsi,%rdx),%rsi
154	jmp	80f
155ALIGN_TEXT
15610163216:
157	leaq	-16(%rdi,%rdx),%rdi
158	leaq	-16(%rsi,%rdx),%rsi
159	jmp	80f
160ALIGN_TEXT
16110163208:
162	leaq	8(%rdi),%rdi
163	leaq	8(%rsi),%rsi
164	jmp	80f
165ALIGN_TEXT
16610040804:
167	leaq	-4(%rdi,%rdx),%rdi
168	leaq	-4(%rsi,%rdx),%rsi
169	jmp	1f
170
171ALIGN_TEXT
17280:
173	movl	(%rdi),%r8d
174	movl	(%rsi),%r9d
175	cmpl	%r8d,%r9d
176	jne	1f
177	leaq	4(%rdi),%rdi
178	leaq	4(%rsi),%rsi
179
180/*
181 * We have up to 4 bytes to inspect.
182 */
1831:
184	movzbl	(%rdi),%eax
185	movzbl	(%rsi),%r8d
186	cmpb	%r8b,%al
187	jne	2f
188
189	movzbl	1(%rdi),%eax
190	movzbl	1(%rsi),%r8d
191	cmpb	%r8b,%al
192	jne	2f
193
194	movzbl	2(%rdi),%eax
195	movzbl	2(%rsi),%r8d
196	cmpb	%r8b,%al
197	jne	2f
198
199	movzbl	3(%rdi),%eax
200	movzbl	3(%rsi),%r8d
2012:
202	subl	%r8d,%eax
203	ret
204END(memcmp)
205
206	.section .note.GNU-stack,"",%progbits
207