1/*	$NetBSD: lock_stubs.S,v 1.26 2016/04/11 14:14:27 bouyer Exp $	*/
2
3/*-
4 * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * AMD64 lock stubs.  Calling convention:
34 *
35 * %rdi		arg 1
36 * %rsi		arg 2
37 * %rdx		arg 3
38 * %rax		return value
39 */
40
41#include "opt_multiprocessor.h"
42#include "opt_lockdebug.h"
43
44#include <machine/asm.h>
45#include <machine/frameasm.h>
46
47#include "assym.h"
48
49#define	ENDLABEL(name,a) .align	a; LABEL(name)
50#define	LOCK(num)	.Lpatch ## num: lock
51#define	RET(num)	.Lret ## num: ret; nop; nop; ret
52
53#ifndef LOCKDEBUG
54
55/*
56 * void mutex_enter(kmutex_t *mtx);
57 *
58 * Acquire a mutex and post a load fence.
59 */
60	.align	64
61
62ENTRY(mutex_enter)
63	movq	CPUVAR(CURLWP), %rcx
64	xorq	%rax, %rax
65	LOCK(1)
66	cmpxchgq %rcx, (%rdi)
67	jnz	1f
68	RET(1)
691:
70	jmp	_C_LABEL(mutex_vector_enter)
71END(mutex_enter)
72
73/*
74 * void mutex_exit(kmutex_t *mtx);
75 *
76 * Release a mutex and post a load fence.
77 *
78 * See comments in mutex_vector_enter() about doing this operation unlocked
79 * on multiprocessor systems, and comments in arch/x86/include/lock.h about
80 * memory ordering on Intel x86 systems.
81 */
82ENTRY(mutex_exit)
83	movq	CPUVAR(CURLWP), %rax
84	xorq	%rdx, %rdx
85	cmpxchgq %rdx, (%rdi)
86	jnz	1f
87	ret
881:
89	jmp	_C_LABEL(mutex_vector_exit)
90END(mutex_exit)
91
92/*
93 * void mutex_spin_enter(kmutex_t *mtx);
94 *
95 * Acquire a spin mutex and post a load fence.
96 */
97ENTRY(mutex_spin_enter)
98	movl	$1, %eax
99	movl	CPUVAR(ILEVEL), %esi
100	movzbl	MTX_IPL(%rdi), %ecx		/* new SPL */
101	cmpl	%ecx, %esi			/* higher? */
102	cmovgl	%esi, %ecx
103	movl	%ecx, CPUVAR(ILEVEL)		/* splraiseipl() */
104	subl	%eax, CPUVAR(MTX_COUNT)		/* decl doesnt set CF */
105	cmovncl	CPUVAR(MTX_OLDSPL), %esi
106	movl	%esi, CPUVAR(MTX_OLDSPL)
107	xchgb	%al, MTX_LOCK(%rdi)		/* lock */
108#ifdef MULTIPROCESSOR	/* XXX for xen */
109	testb	%al, %al
110	jnz	1f
111#endif
112	RET(2)
1131:
114	jmp	_C_LABEL(mutex_spin_retry)	/* failed; hard case */
115END(mutex_spin_enter)
116
117/*
118 * void mutex_spin_exit(kmutex_t *mtx);
119 *
120 * Release a spin mutex and post a load fence.
121 */
122ENTRY(mutex_spin_exit)
123#ifdef DIAGNOSTIC
124
125	movl	$0x0001, %eax			/* new + expected value */
126	movq	CPUVAR(SELF), %r8
127	cmpxchgb %ah, MTX_LOCK(%rdi)		/* unlock */
128	jnz	_C_LABEL(mutex_vector_exit)	/* hard case if problems */
129	movl	CPU_INFO_MTX_OLDSPL(%r8), %edi
130	incl	CPU_INFO_MTX_COUNT(%r8)
131	jnz	1f
132	cmpl	CPU_INFO_ILEVEL(%r8), %edi
133	jae	1f
134	movl	CPU_INFO_IUNMASK(%r8,%rdi,4), %esi
135	CLI(ax)
136	testl	CPU_INFO_IPENDING(%r8), %esi
137	jnz	_C_LABEL(Xspllower)
138	movl	%edi, CPU_INFO_ILEVEL(%r8)
139	STI(ax)
1401:	rep					/* double byte ret as branch */
141	ret					/* target: see AMD docs */
142
143#else	/* DIAGNOSTIC */
144
145	movq	CPUVAR(SELF), %rsi
146	movb	$0x00, MTX_LOCK(%rdi)
147	movl	CPU_INFO_MTX_OLDSPL(%rsi), %ecx
148	incl	CPU_INFO_MTX_COUNT(%rsi)
149	movl	CPU_INFO_ILEVEL(%rsi),%edx
150	cmovnzl	%edx,%ecx
151	pushq	%rbx
152	cmpl	%edx,%ecx			/* new level is lower? */
153	jae	2f
1541:
155	movl	CPU_INFO_IPENDING(%rsi),%eax
156	testl	%eax,CPU_INFO_IUNMASK(%rsi,%rcx,4)/* deferred interrupts? */
157	jnz	3f
158	movl	%eax,%ebx
159	cmpxchg8b CPU_INFO_ISTATE(%rsi)		/* swap in new ilevel */
160	jnz	4f
1612:
162	popq	%rbx
163	ret
1643:
165	popq	%rbx
166	movl	%ecx, %edi
167	jmp	_C_LABEL(Xspllower)
1684:
169	jmp	1b
170
171#endif	/* DIAGNOSTIC */
172
173END(mutex_spin_exit)
174
175/*
176 * void	rw_enter(krwlock_t *rwl, krw_t op);
177 *
178 * Acquire one hold on a RW lock.
179 */
180ENTRY(rw_enter)
181	cmpl	$RW_READER, %esi
182	jne	2f
183
184	/*
185	 * Reader: this is the most common case.
186	 */
187	movq	(%rdi), %rax
1880:
189	testb	$(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al
190	jnz	3f
191	leaq	RW_READ_INCR(%rax), %rdx
192	LOCK(2)
193	cmpxchgq %rdx, (%rdi)
194	jnz	1f
195	RET(3)
1961:
197	jmp	0b
198
199	/*
200	 * Writer: if the compare-and-set fails, don't bother retrying.
201	 */
2022:	movq	CPUVAR(CURLWP), %rcx
203	xorq	%rax, %rax
204	orq	$RW_WRITE_LOCKED, %rcx
205	LOCK(3)
206	cmpxchgq %rcx, (%rdi)
207	jnz	3f
208	RET(4)
2093:
210	jmp	_C_LABEL(rw_vector_enter)
211END(rw_enter)
212
213/*
214 * void	rw_exit(krwlock_t *rwl);
215 *
216 * Release one hold on a RW lock.
217 */
218ENTRY(rw_exit)
219	movq	(%rdi), %rax
220	testb	$RW_WRITE_LOCKED, %al
221	jnz	2f
222
223	/*
224	 * Reader
225	 */
2260:	testb	$RW_HAS_WAITERS, %al
227	jnz	3f
228	cmpq	$RW_READ_INCR, %rax
229	jb	3f
230	leaq	-RW_READ_INCR(%rax), %rdx
231	LOCK(4)
232	cmpxchgq %rdx, (%rdi)
233	jnz	1f
234	ret
2351:
236	jmp	0b
237
238	/*
239	 * Writer
240	 */
2412:	leaq	-RW_WRITE_LOCKED(%rax), %rdx
242	subq	CPUVAR(CURLWP), %rdx
243	jnz	3f
244	LOCK(5)
245	cmpxchgq %rdx, (%rdi)
246	jnz	3f
247	ret
248
2493:	jmp	_C_LABEL(rw_vector_exit)
250END(rw_exit)
251
252/*
253 * int	rw_tryenter(krwlock_t *rwl, krw_t op);
254 *
255 * Try to acquire one hold on a RW lock.
256 */
257ENTRY(rw_tryenter)
258	cmpl	$RW_READER, %esi
259	jne	2f
260
261	/*
262	 * Reader: this is the most common case.
263	 */
264	movq	(%rdi), %rax
2650:
266	testb	$(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al
267	jnz	4f
268	leaq	RW_READ_INCR(%rax), %rdx
269	LOCK(8)
270	cmpxchgq %rdx, (%rdi)
271	jnz	1f
272	movl	%edx, %eax			/* nonzero */
273	RET(5)
2741:
275	jmp	0b
276
277	/*
278	 * Writer: if the compare-and-set fails, don't bother retrying.
279	 */
2802:	movq	CPUVAR(CURLWP), %rcx
281	xorq	%rax, %rax
282	orq	$RW_WRITE_LOCKED, %rcx
283	LOCK(9)
284	cmpxchgq %rcx, (%rdi)
285	movl	$0, %eax
286	setz	%al
2873:
288	RET(6)
289	ret
2904:
291	xorl	%eax, %eax
292	jmp	3b
293END(rw_tryenter)
294
295#endif	/* LOCKDEBUG */
296
297/*
298 * Spinlocks.
299 */
300ENTRY(__cpu_simple_lock_init)
301	movb	$0, (%rdi)
302	ret
303END(__cpu_simple_lock_init)
304
305NENTRY(__cpu_simple_lock)
306	movl	$0x0100, %eax
3071:
308	LOCK(6)
309	cmpxchgb %ah, (%rdi)
310	jnz	2f
311	RET(7)
3122:
313	movl	$0x0100, %eax
314	pause
315	nop
316	nop
317	cmpb	$0, (%rdi)
318	je	1b
319	jmp	2b
320END(__cpu_simple_lock)
321
322NENTRY(__cpu_simple_unlock)
323	movb	$0, (%rdi)
324	ret
325END(__cpu_simple_unlock)
326
327ENTRY(__cpu_simple_lock_try)
328	movl	$0x0100, %eax
329	LOCK(7)
330	cmpxchgb %ah, (%rdi)
331	movl	$0, %eax
332	setz	%al
333	RET(8)
334END(__cpu_simple_lock_try)
335
336/*
337 * Patchpoints to replace with NOP when ncpu == 1.
338 */
339#ifndef LOCKDEBUG
340	.type	_C_LABEL(x86_lockpatch), @object
341LABEL(x86_lockpatch)
342	.quad	.Lpatch1, .Lpatch2, .Lpatch3, .Lpatch4
343	.quad	.Lpatch5, .Lpatch6, .Lpatch7, .Lpatch8
344	.quad	.Lpatch9
345	.quad	0
346END(x86_lockpatch)
347#endif
348
349	.type	_C_LABEL(x86_retpatch), @object
350LABEL(x86_retpatch)
351#ifndef LOCKDEBUG
352	.quad	.Lret1, .Lret2, .Lret3, .Lret4, .Lret5, .Lret6
353#endif
354	.quad	.Lret7, .Lret8
355	.quad	0
356END(x86_retpatch)
357