xref: /freebsd/sys/amd64/amd64/cpu_switch.S (revision 39beb93c)
1/*-
2 * Copyright (c) 2003 Peter Wemm.
3 * Copyright (c) 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD$
34 */
35
36#include <machine/asmacros.h>
37#include <machine/specialreg.h>
38
39#include "assym.s"
40#include "opt_sched.h"
41
42/*****************************************************************************/
43/* Scheduling                                                                */
44/*****************************************************************************/
45
46	.text
47
48#ifdef SMP
49#define LK	lock ;
50#else
51#define LK
52#endif
53
54#if defined(SCHED_ULE) && defined(SMP)
55#define	SETLK	xchgq
56#else
57#define	SETLK	movq
58#endif
59
60/*
61 * cpu_throw()
62 *
63 * This is the second half of cpu_switch(). It is used when the current
64 * thread is either a dummy or slated to die, and we no longer care
65 * about its state.  This is only a slight optimization and is probably
66 * not worth it anymore.  Note that we need to clear the pm_active bits so
67 * we do need the old proc if it still exists.
68 * %rdi = oldtd
69 * %rsi = newtd
70 */
71ENTRY(cpu_throw)
72	testq	%rdi,%rdi
73	jnz	1f
74	movq	PCPU(IDLETHREAD),%rdi
751:
76	movq	TD_PCB(%rdi),%r8		/* Old pcb */
77	movl	PCPU(CPUID), %eax
78	movq	PCB_FSBASE(%r8),%r9
79	movq	PCB_GSBASE(%r8),%r10
80	/* release bit from old pm_active */
81	movq	TD_PROC(%rdi), %rdx		/* oldtd->td_proc */
82	movq	P_VMSPACE(%rdx), %rdx		/* proc->p_vmspace */
83	LK btrl	%eax, VM_PMAP+PM_ACTIVE(%rdx)	/* clear old */
84	movq	TD_PCB(%rsi),%r8		/* newtd->td_proc */
85	movq	PCB_CR3(%r8),%rdx
86	movq	%rdx,%cr3			/* new address space */
87	jmp	swact
88END(cpu_throw)
89
90/*
91 * cpu_switch(old, new, mtx)
92 *
93 * Save the current thread state, then select the next thread to run
94 * and load its state.
95 * %rdi = oldtd
96 * %rsi = newtd
97 * %rdx = mtx
98 */
99ENTRY(cpu_switch)
100	/* Switch to new thread.  First, save context. */
101	movq	TD_PCB(%rdi),%r8
102
103	movq	(%rsp),%rax			/* Hardware registers */
104	movq	%r15,PCB_R15(%r8)
105	movq	%r14,PCB_R14(%r8)
106	movq	%r13,PCB_R13(%r8)
107	movq	%r12,PCB_R12(%r8)
108	movq	%rbp,PCB_RBP(%r8)
109	movq	%rsp,PCB_RSP(%r8)
110	movq	%rbx,PCB_RBX(%r8)
111	movq	%rax,PCB_RIP(%r8)
112
113	/*
114	 * Reread fs and gs bases. Explicit fs segment register load
115	 * by the usermode code may change actual fs base without
116	 * updating pcb_{fs,gs}base.
117	 *
118	 * %rdx still contains the mtx, save %rdx around rdmsr.
119	 */
120	movq	%rdx,%r11
121	movl	$MSR_FSBASE,%ecx
122	rdmsr
123	shlq	$32,%rdx
124	leaq	(%rax,%rdx),%r9
125	movl	$MSR_KGSBASE,%ecx
126	rdmsr
127	shlq	$32,%rdx
128	leaq	(%rax,%rdx),%r10
129	movq	%r11,%rdx
130
131	testl	$PCB_32BIT,PCB_FLAGS(%r8)
132	jnz	store_seg
133done_store_seg:
134
135	testl	$PCB_DBREGS,PCB_FLAGS(%r8)
136	jnz	store_dr			/* static predict not taken */
137done_store_dr:
138
139	/* have we used fp, and need a save? */
140	cmpq	%rdi,PCPU(FPCURTHREAD)
141	jne	1f
142	addq	$PCB_SAVEFPU,%r8
143	clts
144	fxsave	(%r8)
145	smsw	%ax
146	orb	$CR0_TS,%al
147	lmsw	%ax
148	xorl	%eax,%eax
149	movq	%rax,PCPU(FPCURTHREAD)
1501:
151
152	/* Save is done.  Now fire up new thread. Leave old vmspace. */
153	movq	TD_PCB(%rsi),%r8
154
155	/* switch address space */
156	movq	PCB_CR3(%r8),%rcx
157	movq	%cr3,%rax
158	cmpq	%rcx,%rax			/* Same address space? */
159	jne	swinact
160	SETLK	%rdx, TD_LOCK(%rdi)		/* Release the old thread */
161	jmp	sw1
162swinact:
163	movq	%rcx,%cr3			/* new address space */
164	movl	PCPU(CPUID), %eax
165	/* Release bit from old pmap->pm_active */
166	movq	TD_PROC(%rdi), %rcx		/* oldproc */
167	movq	P_VMSPACE(%rcx), %rcx
168	LK btrl	%eax, VM_PMAP+PM_ACTIVE(%rcx)	/* clear old */
169	SETLK	%rdx, TD_LOCK(%rdi)		/* Release the old thread */
170swact:
171	/* Set bit in new pmap->pm_active */
172	movq	TD_PROC(%rsi),%rdx		/* newproc */
173	movq	P_VMSPACE(%rdx), %rdx
174	LK btsl	%eax, VM_PMAP+PM_ACTIVE(%rdx)	/* set new */
175
176sw1:
177#if defined(SCHED_ULE) && defined(SMP)
178	/* Wait for the new thread to become unblocked */
179	movq	$blocked_lock, %rdx
1801:
181	movq	TD_LOCK(%rsi),%rcx
182	cmpq	%rcx, %rdx
183	pause
184	je	1b
185#endif
186	/*
187	 * At this point, we've switched address spaces and are ready
188	 * to load up the rest of the next context.
189	 */
190
191	/* Skip loading user fsbase/gsbase for kthreads */
192	testl	$TDP_KTHREAD,TD_PFLAGS(%rsi)
193	jnz	do_kthread
194
195	testl	$PCB_32BIT,PCB_FLAGS(%r8)
196	jnz	load_seg
197done_load_seg:
198
199	cmpq	PCB_FSBASE(%r8),%r9
200	jz	1f
201	/* Restore userland %fs */
202restore_fsbase:
203	movl	$MSR_FSBASE,%ecx
204	movl	PCB_FSBASE(%r8),%eax
205	movl	PCB_FSBASE+4(%r8),%edx
206	wrmsr
2071:
208	cmpq	PCB_GSBASE(%r8),%r10
209	jz	2f
210	/* Restore userland %gs */
211	movl	$MSR_KGSBASE,%ecx
212	movl	PCB_GSBASE(%r8),%eax
213	movl	PCB_GSBASE+4(%r8),%edx
214	wrmsr
2152:
216
217do_tss:
218	/* Update the TSS_RSP0 pointer for the next interrupt */
219	movq	PCPU(TSSP), %rax
220	movq	%r8, PCPU(RSP0)
221	movq	%r8, PCPU(CURPCB)
222	addq	$COMMON_TSS_RSP0, %rax
223	movq	%rsi, PCPU(CURTHREAD)		/* into next thread */
224	movq	%r8, (%rax)
225
226	/* Test if debug registers should be restored. */
227	testl	$PCB_DBREGS,PCB_FLAGS(%r8)
228	jnz	load_dr				/* static predict not taken */
229done_load_dr:
230
231	/* Restore context. */
232	movq	PCB_R15(%r8),%r15
233	movq	PCB_R14(%r8),%r14
234	movq	PCB_R13(%r8),%r13
235	movq	PCB_R12(%r8),%r12
236	movq	PCB_RBP(%r8),%rbp
237	movq	PCB_RSP(%r8),%rsp
238	movq	PCB_RBX(%r8),%rbx
239	movq	PCB_RIP(%r8),%rax
240	movq	%rax,(%rsp)
241	ret
242
243	/*
244	 * We order these strangely for several reasons.
245	 * 1: I wanted to use static branch prediction hints
246	 * 2: Most athlon64/opteron cpus don't have them.  They define
247	 *    a forward branch as 'predict not taken'.  Intel cores have
248	 *    the 'rep' prefix to invert this.
249	 * So, to make it work on both forms of cpu we do the detour.
250	 * We use jumps rather than call in order to avoid the stack.
251	 */
252
253do_kthread:
254	/*
255	 * Copy old fs/gsbase to new kthread pcb for future switches
256	 * This maintains curpcb->pcb_[fg]sbase as caches of the MSR
257	 */
258	movq	%r9,PCB_FSBASE(%r8)
259	movq	%r10,PCB_GSBASE(%r8)
260	jmp	do_tss
261
262store_seg:
263	mov	%gs,PCB_GS(%r8)
264	testl	$PCB_GS32BIT,PCB_FLAGS(%r8)
265	jnz	2f
2661:	mov	%ds,PCB_DS(%r8)
267	mov	%es,PCB_ES(%r8)
268	mov	%fs,PCB_FS(%r8)
269	jmp	done_store_seg
2702:	movq	PCPU(GS32P),%rax
271	movq	(%rax),%rax
272	movq	%rax,PCB_GS32SD(%r8)
273	jmp	1b
274
275load_seg:
276	testl	$PCB_GS32BIT,PCB_FLAGS(%r8)
277	jnz	2f
2781:	movl	$MSR_GSBASE,%ecx
279	rdmsr
280	mov	PCB_GS(%r8),%gs
281	wrmsr
282	mov	PCB_DS(%r8),%ds
283	mov	PCB_ES(%r8),%es
284	mov	PCB_FS(%r8),%fs
285	jmp	restore_fsbase
286	/* Restore userland %gs while preserving kernel gsbase */
2872:	movq	PCPU(GS32P),%rax
288	movq	PCB_GS32SD(%r8),%rcx
289	movq	%rcx,(%rax)
290	jmp	1b
291
292store_dr:
293	movq	%dr7,%rax			/* yes, do the save */
294	movq	%dr0,%r15
295	movq	%dr1,%r14
296	movq	%dr2,%r13
297	movq	%dr3,%r12
298	movq	%dr6,%r11
299	andq	$0x0000fc00, %rax		/* disable all watchpoints */
300	movq	%r15,PCB_DR0(%r8)
301	movq	%r14,PCB_DR1(%r8)
302	movq	%r13,PCB_DR2(%r8)
303	movq	%r12,PCB_DR3(%r8)
304	movq	%r11,PCB_DR6(%r8)
305	movq	%rax,PCB_DR7(%r8)
306	movq	%rax,%dr7
307	jmp	done_store_dr
308
309load_dr:
310	movq	%dr7,%rax
311	movq	PCB_DR0(%r8),%r15
312	movq	PCB_DR1(%r8),%r14
313	movq	PCB_DR2(%r8),%r13
314	movq	PCB_DR3(%r8),%r12
315	movq	PCB_DR6(%r8),%r11
316	movq	PCB_DR7(%r8),%rcx
317	movq	%r15,%dr0
318	movq	%r14,%dr1
319	/* Preserve reserved bits in %dr7 */
320	andq	$0x0000fc00,%rax
321	andq	$~0x0000fc00,%rcx
322	movq	%r13,%dr2
323	movq	%r12,%dr3
324	orq	%rcx,%rax
325	movq	%r11,%dr6
326	movq	%rax,%dr7
327	jmp	done_load_dr
328
329END(cpu_switch)
330
331/*
332 * savectx(pcb)
333 * Update pcb, saving current processor state.
334 */
335ENTRY(savectx)
336	/* Fetch PCB. */
337	movq	%rdi,%rcx
338
339	/* Save caller's return address. */
340	movq	(%rsp),%rax
341	movq	%rax,PCB_RIP(%rcx)
342
343	movq	%cr3,%rax
344	movq	%rax,PCB_CR3(%rcx)
345
346	movq	%rbx,PCB_RBX(%rcx)
347	movq	%rsp,PCB_RSP(%rcx)
348	movq	%rbp,PCB_RBP(%rcx)
349	movq	%r12,PCB_R12(%rcx)
350	movq	%r13,PCB_R13(%rcx)
351	movq	%r14,PCB_R14(%rcx)
352	movq	%r15,PCB_R15(%rcx)
353
354	/*
355	 * If fpcurthread == NULL, then the fpu h/w state is irrelevant and the
356	 * state had better already be in the pcb.  This is true for forks
357	 * but not for dumps (the old book-keeping with FP flags in the pcb
358	 * always lost for dumps because the dump pcb has 0 flags).
359	 *
360	 * If fpcurthread != NULL, then we have to save the fpu h/w state to
361	 * fpcurthread's pcb and copy it to the requested pcb, or save to the
362	 * requested pcb and reload.  Copying is easier because we would
363	 * have to handle h/w bugs for reloading.  We used to lose the
364	 * parent's fpu state for forks by forgetting to reload.
365	 */
366	pushfq
367	cli
368	movq	PCPU(FPCURTHREAD),%rax
369	testq	%rax,%rax
370	je	1f
371
372	movq	TD_PCB(%rax),%rdi
373	leaq	PCB_SAVEFPU(%rdi),%rdi
374	clts
375	fxsave	(%rdi)
376	smsw	%ax
377	orb	$CR0_TS,%al
378	lmsw	%ax
379
380	movq	$PCB_SAVEFPU_SIZE,%rdx	/* arg 3 */
381	leaq	PCB_SAVEFPU(%rcx),%rsi	/* arg 2 */
382	/* arg 1 (%rdi) already loaded */
383	call	bcopy
3841:
385	popfq
386
387	ret
388END(savectx)
389