1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "textflag.h"
7
8// memequal(a, b unsafe.Pointer, size uintptr) bool
9TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
10	// AX = a    (want in SI)
11	// BX = b    (want in DI)
12	// CX = size (want in BX)
13	CMPQ	AX, BX
14	JNE	neq
15	MOVQ	$1, AX	// return 1
16	RET
17neq:
18	MOVQ	AX, SI
19	MOVQ	BX, DI
20	MOVQ	CX, BX
21	JMP	memeqbody<>(SB)
22
23// memequal_varlen(a, b unsafe.Pointer) bool
24TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
25	// AX = a       (want in SI)
26	// BX = b       (want in DI)
27	// 8(DX) = size (want in BX)
28	CMPQ	AX, BX
29	JNE	neq
30	MOVQ	$1, AX	// return 1
31	RET
32neq:
33	MOVQ	AX, SI
34	MOVQ	BX, DI
35	MOVQ	8(DX), BX    // compiler stores size at offset 8 in the closure
36	JMP	memeqbody<>(SB)
37
38// Input:
39//   a in SI
40//   b in DI
41//   count in BX
42// Output:
43//   result in AX
44TEXT memeqbody<>(SB),NOSPLIT,$0-0
45	CMPQ	BX, $8
46	JB	small
47	CMPQ	BX, $64
48	JB	bigloop
49	CMPB	internalcpu·X86+const_offsetX86HasAVX2(SB), $1
50	JE	hugeloop_avx2
51
52	// 64 bytes at a time using xmm registers
53hugeloop:
54	CMPQ	BX, $64
55	JB	bigloop
56	MOVOU	(SI), X0
57	MOVOU	(DI), X1
58	MOVOU	16(SI), X2
59	MOVOU	16(DI), X3
60	MOVOU	32(SI), X4
61	MOVOU	32(DI), X5
62	MOVOU	48(SI), X6
63	MOVOU	48(DI), X7
64	PCMPEQB	X1, X0
65	PCMPEQB	X3, X2
66	PCMPEQB	X5, X4
67	PCMPEQB	X7, X6
68	PAND	X2, X0
69	PAND	X6, X4
70	PAND	X4, X0
71	PMOVMSKB X0, DX
72	ADDQ	$64, SI
73	ADDQ	$64, DI
74	SUBQ	$64, BX
75	CMPL	DX, $0xffff
76	JEQ	hugeloop
77	XORQ	AX, AX	// return 0
78	RET
79
80	// 64 bytes at a time using ymm registers
81hugeloop_avx2:
82	CMPQ	BX, $64
83	JB	bigloop_avx2
84	VMOVDQU	(SI), Y0
85	VMOVDQU	(DI), Y1
86	VMOVDQU	32(SI), Y2
87	VMOVDQU	32(DI), Y3
88	VPCMPEQB	Y1, Y0, Y4
89	VPCMPEQB	Y2, Y3, Y5
90	VPAND	Y4, Y5, Y6
91	VPMOVMSKB Y6, DX
92	ADDQ	$64, SI
93	ADDQ	$64, DI
94	SUBQ	$64, BX
95	CMPL	DX, $0xffffffff
96	JEQ	hugeloop_avx2
97	VZEROUPPER
98	XORQ	AX, AX	// return 0
99	RET
100
101bigloop_avx2:
102	VZEROUPPER
103
104	// 8 bytes at a time using 64-bit register
105bigloop:
106	CMPQ	BX, $8
107	JBE	leftover
108	MOVQ	(SI), CX
109	MOVQ	(DI), DX
110	ADDQ	$8, SI
111	ADDQ	$8, DI
112	SUBQ	$8, BX
113	CMPQ	CX, DX
114	JEQ	bigloop
115	XORQ	AX, AX	// return 0
116	RET
117
118	// remaining 0-8 bytes
119leftover:
120	MOVQ	-8(SI)(BX*1), CX
121	MOVQ	-8(DI)(BX*1), DX
122	CMPQ	CX, DX
123	SETEQ	AX
124	RET
125
126small:
127	CMPQ	BX, $0
128	JEQ	equal
129
130	LEAQ	0(BX*8), CX
131	NEGQ	CX
132
133	CMPB	SI, $0xf8
134	JA	si_high
135
136	// load at SI won't cross a page boundary.
137	MOVQ	(SI), SI
138	JMP	si_finish
139si_high:
140	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
141	MOVQ	-8(SI)(BX*1), SI
142	SHRQ	CX, SI
143si_finish:
144
145	// same for DI.
146	CMPB	DI, $0xf8
147	JA	di_high
148	MOVQ	(DI), DI
149	JMP	di_finish
150di_high:
151	MOVQ	-8(DI)(BX*1), DI
152	SHRQ	CX, DI
153di_finish:
154
155	SUBQ	SI, DI
156	SHLQ	CX, DI
157equal:
158	SETEQ	AX
159	RET
160