1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build !plan9
6
7#include "go_asm.h"
8#include "textflag.h"
9
10// NOTE: Windows externalthreadhandler expects memclr to preserve DX.
11
12// func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
13TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT, $0-8
14	MOVL	ptr+0(FP), DI
15	MOVL	n+4(FP), BX
16	XORL	AX, AX
17
18	// MOVOU seems always faster than REP STOSL.
19tail:
20	// BSR+branch table make almost all memmove/memclr benchmarks worse. Not worth doing.
21	TESTL	BX, BX
22	JEQ	_0
23	CMPL	BX, $2
24	JBE	_1or2
25	CMPL	BX, $4
26	JB	_3
27	JE	_4
28	CMPL	BX, $8
29	JBE	_5through8
30	CMPL	BX, $16
31	JBE	_9through16
32	CMPB	internalcpu·X86+const_offsetX86HasSSE2(SB), $1
33	JNE	nosse2
34	PXOR	X0, X0
35	CMPL	BX, $32
36	JBE	_17through32
37	CMPL	BX, $64
38	JBE	_33through64
39	CMPL	BX, $128
40	JBE	_65through128
41	CMPL	BX, $256
42	JBE	_129through256
43
44loop:
45	MOVOU	X0, 0(DI)
46	MOVOU	X0, 16(DI)
47	MOVOU	X0, 32(DI)
48	MOVOU	X0, 48(DI)
49	MOVOU	X0, 64(DI)
50	MOVOU	X0, 80(DI)
51	MOVOU	X0, 96(DI)
52	MOVOU	X0, 112(DI)
53	MOVOU	X0, 128(DI)
54	MOVOU	X0, 144(DI)
55	MOVOU	X0, 160(DI)
56	MOVOU	X0, 176(DI)
57	MOVOU	X0, 192(DI)
58	MOVOU	X0, 208(DI)
59	MOVOU	X0, 224(DI)
60	MOVOU	X0, 240(DI)
61	SUBL	$256, BX
62	ADDL	$256, DI
63	CMPL	BX, $256
64	JAE	loop
65	JMP	tail
66
67_1or2:
68	MOVB	AX, (DI)
69	MOVB	AX, -1(DI)(BX*1)
70	RET
71_0:
72	RET
73_3:
74	MOVW	AX, (DI)
75	MOVB	AX, 2(DI)
76	RET
77_4:
78	// We need a separate case for 4 to make sure we clear pointers atomically.
79	MOVL	AX, (DI)
80	RET
81_5through8:
82	MOVL	AX, (DI)
83	MOVL	AX, -4(DI)(BX*1)
84	RET
85_9through16:
86	MOVL	AX, (DI)
87	MOVL	AX, 4(DI)
88	MOVL	AX, -8(DI)(BX*1)
89	MOVL	AX, -4(DI)(BX*1)
90	RET
91_17through32:
92	MOVOU	X0, (DI)
93	MOVOU	X0, -16(DI)(BX*1)
94	RET
95_33through64:
96	MOVOU	X0, (DI)
97	MOVOU	X0, 16(DI)
98	MOVOU	X0, -32(DI)(BX*1)
99	MOVOU	X0, -16(DI)(BX*1)
100	RET
101_65through128:
102	MOVOU	X0, (DI)
103	MOVOU	X0, 16(DI)
104	MOVOU	X0, 32(DI)
105	MOVOU	X0, 48(DI)
106	MOVOU	X0, -64(DI)(BX*1)
107	MOVOU	X0, -48(DI)(BX*1)
108	MOVOU	X0, -32(DI)(BX*1)
109	MOVOU	X0, -16(DI)(BX*1)
110	RET
111_129through256:
112	MOVOU	X0, (DI)
113	MOVOU	X0, 16(DI)
114	MOVOU	X0, 32(DI)
115	MOVOU	X0, 48(DI)
116	MOVOU	X0, 64(DI)
117	MOVOU	X0, 80(DI)
118	MOVOU	X0, 96(DI)
119	MOVOU	X0, 112(DI)
120	MOVOU	X0, -128(DI)(BX*1)
121	MOVOU	X0, -112(DI)(BX*1)
122	MOVOU	X0, -96(DI)(BX*1)
123	MOVOU	X0, -80(DI)(BX*1)
124	MOVOU	X0, -64(DI)(BX*1)
125	MOVOU	X0, -48(DI)(BX*1)
126	MOVOU	X0, -32(DI)(BX*1)
127	MOVOU	X0, -16(DI)(BX*1)
128	RET
129nosse2:
130	MOVL	BX, CX
131	SHRL	$2, CX
132	REP
133	STOSL
134	ANDL	$3, BX
135	JNE	tail
136	RET
137