1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "textflag.h"
7
8// memequal(a, b unsafe.Pointer, size uintptr) bool
9TEXT runtime·memequal(SB),NOSPLIT,$0-13
10	MOVL	a+0(FP), SI
11	MOVL	b+4(FP), DI
12	CMPL	SI, DI
13	JEQ	eq
14	MOVL	size+8(FP), BX
15	LEAL	ret+12(FP), AX
16	JMP	memeqbody<>(SB)
17eq:
18	MOVB    $1, ret+12(FP)
19	RET
20
21// memequal_varlen(a, b unsafe.Pointer) bool
22TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
23	MOVL    a+0(FP), SI
24	MOVL    b+4(FP), DI
25	CMPL    SI, DI
26	JEQ     eq
27	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
28	LEAL	ret+8(FP), AX
29	JMP	memeqbody<>(SB)
30eq:
31	MOVB    $1, ret+8(FP)
32	RET
33
34// a in SI
35// b in DI
36// count in BX
37// address of result byte in AX
38TEXT memeqbody<>(SB),NOSPLIT,$0-0
39	CMPL	BX, $4
40	JB	small
41
42	// 64 bytes at a time using xmm registers
43hugeloop:
44	CMPL	BX, $64
45	JB	bigloop
46#ifdef GO386_softfloat
47	JMP	bigloop
48#endif
49	MOVOU	(SI), X0
50	MOVOU	(DI), X1
51	MOVOU	16(SI), X2
52	MOVOU	16(DI), X3
53	MOVOU	32(SI), X4
54	MOVOU	32(DI), X5
55	MOVOU	48(SI), X6
56	MOVOU	48(DI), X7
57	PCMPEQB	X1, X0
58	PCMPEQB	X3, X2
59	PCMPEQB	X5, X4
60	PCMPEQB	X7, X6
61	PAND	X2, X0
62	PAND	X6, X4
63	PAND	X4, X0
64	PMOVMSKB X0, DX
65	ADDL	$64, SI
66	ADDL	$64, DI
67	SUBL	$64, BX
68	CMPL	DX, $0xffff
69	JEQ	hugeloop
70	MOVB	$0, (AX)
71	RET
72
73	// 4 bytes at a time using 32-bit register
74bigloop:
75	CMPL	BX, $4
76	JBE	leftover
77	MOVL	(SI), CX
78	MOVL	(DI), DX
79	ADDL	$4, SI
80	ADDL	$4, DI
81	SUBL	$4, BX
82	CMPL	CX, DX
83	JEQ	bigloop
84	MOVB	$0, (AX)
85	RET
86
87	// remaining 0-4 bytes
88leftover:
89	MOVL	-4(SI)(BX*1), CX
90	MOVL	-4(DI)(BX*1), DX
91	CMPL	CX, DX
92	SETEQ	(AX)
93	RET
94
95small:
96	CMPL	BX, $0
97	JEQ	equal
98
99	LEAL	0(BX*8), CX
100	NEGL	CX
101
102	MOVL	SI, DX
103	CMPB	DX, $0xfc
104	JA	si_high
105
106	// load at SI won't cross a page boundary.
107	MOVL	(SI), SI
108	JMP	si_finish
109si_high:
110	// address ends in 111111xx. Load up to bytes we want, move to correct position.
111	MOVL	-4(SI)(BX*1), SI
112	SHRL	CX, SI
113si_finish:
114
115	// same for DI.
116	MOVL	DI, DX
117	CMPB	DX, $0xfc
118	JA	di_high
119	MOVL	(DI), DI
120	JMP	di_finish
121di_high:
122	MOVL	-4(DI)(BX*1), DI
123	SHRL	CX, DI
124di_finish:
125
126	SUBL	SI, DI
127	SHLL	CX, DI
128equal:
129	SETEQ	(AX)
130	RET
131