1/*- 2 * Copyright (c) 2003 Peter Wemm. 3 * Copyright (c) 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $FreeBSD$ 34 */ 35 36#include <machine/asmacros.h> 37#include <machine/specialreg.h> 38 39#include "assym.s" 40#include "opt_sched.h" 41 42/*****************************************************************************/ 43/* Scheduling */ 44/*****************************************************************************/ 45 46 .text 47 48#ifdef SMP 49#define LK lock ; 50#else 51#define LK 52#endif 53 54#if defined(SCHED_ULE) && defined(SMP) 55#define SETLK xchgq 56#else 57#define SETLK movq 58#endif 59 60/* 61 * cpu_throw() 62 * 63 * This is the second half of cpu_switch(). It is used when the current 64 * thread is either a dummy or slated to die, and we no longer care 65 * about its state. This is only a slight optimization and is probably 66 * not worth it anymore. Note that we need to clear the pm_active bits so 67 * we do need the old proc if it still exists. 68 * %rdi = oldtd 69 * %rsi = newtd 70 */ 71ENTRY(cpu_throw) 72 testq %rdi,%rdi 73 jnz 1f 74 movq PCPU(IDLETHREAD),%rdi 751: 76 movq TD_PCB(%rdi),%r8 /* Old pcb */ 77 movl PCPU(CPUID), %eax 78 movq PCB_FSBASE(%r8),%r9 79 movq PCB_GSBASE(%r8),%r10 80 /* release bit from old pm_active */ 81 movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ 82 movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ 83 LK btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ 84 movq TD_PCB(%rsi),%r8 /* newtd->td_proc */ 85 movq PCB_CR3(%r8),%rdx 86 movq %rdx,%cr3 /* new address space */ 87 jmp swact 88END(cpu_throw) 89 90/* 91 * cpu_switch(old, new, mtx) 92 * 93 * Save the current thread state, then select the next thread to run 94 * and load its state. 95 * %rdi = oldtd 96 * %rsi = newtd 97 * %rdx = mtx 98 */ 99ENTRY(cpu_switch) 100 /* Switch to new thread. First, save context. */ 101 movq TD_PCB(%rdi),%r8 102 103 movq (%rsp),%rax /* Hardware registers */ 104 movq %r15,PCB_R15(%r8) 105 movq %r14,PCB_R14(%r8) 106 movq %r13,PCB_R13(%r8) 107 movq %r12,PCB_R12(%r8) 108 movq %rbp,PCB_RBP(%r8) 109 movq %rsp,PCB_RSP(%r8) 110 movq %rbx,PCB_RBX(%r8) 111 movq %rax,PCB_RIP(%r8) 112 113 /* 114 * Reread fs and gs bases. Explicit fs segment register load 115 * by the usermode code may change actual fs base without 116 * updating pcb_{fs,gs}base. 117 * 118 * %rdx still contains the mtx, save %rdx around rdmsr. 119 */ 120 movq %rdx,%r11 121 movl $MSR_FSBASE,%ecx 122 rdmsr 123 shlq $32,%rdx 124 leaq (%rax,%rdx),%r9 125 movl $MSR_KGSBASE,%ecx 126 rdmsr 127 shlq $32,%rdx 128 leaq (%rax,%rdx),%r10 129 movq %r11,%rdx 130 131 testl $PCB_32BIT,PCB_FLAGS(%r8) 132 jnz store_seg 133done_store_seg: 134 135 testl $PCB_DBREGS,PCB_FLAGS(%r8) 136 jnz store_dr /* static predict not taken */ 137done_store_dr: 138 139 /* have we used fp, and need a save? */ 140 cmpq %rdi,PCPU(FPCURTHREAD) 141 jne 1f 142 addq $PCB_SAVEFPU,%r8 143 clts 144 fxsave (%r8) 145 smsw %ax 146 orb $CR0_TS,%al 147 lmsw %ax 148 xorl %eax,%eax 149 movq %rax,PCPU(FPCURTHREAD) 1501: 151 152 /* Save is done. Now fire up new thread. Leave old vmspace. */ 153 movq TD_PCB(%rsi),%r8 154 155 /* switch address space */ 156 movq PCB_CR3(%r8),%rcx 157 movq %cr3,%rax 158 cmpq %rcx,%rax /* Same address space? */ 159 jne swinact 160 SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ 161 jmp sw1 162swinact: 163 movq %rcx,%cr3 /* new address space */ 164 movl PCPU(CPUID), %eax 165 /* Release bit from old pmap->pm_active */ 166 movq TD_PROC(%rdi), %rcx /* oldproc */ 167 movq P_VMSPACE(%rcx), %rcx 168 LK btrl %eax, VM_PMAP+PM_ACTIVE(%rcx) /* clear old */ 169 SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ 170swact: 171 /* Set bit in new pmap->pm_active */ 172 movq TD_PROC(%rsi),%rdx /* newproc */ 173 movq P_VMSPACE(%rdx), %rdx 174 LK btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ 175 176sw1: 177#if defined(SCHED_ULE) && defined(SMP) 178 /* Wait for the new thread to become unblocked */ 179 movq $blocked_lock, %rdx 1801: 181 movq TD_LOCK(%rsi),%rcx 182 cmpq %rcx, %rdx 183 pause 184 je 1b 185#endif 186 /* 187 * At this point, we've switched address spaces and are ready 188 * to load up the rest of the next context. 189 */ 190 191 /* Skip loading user fsbase/gsbase for kthreads */ 192 testl $TDP_KTHREAD,TD_PFLAGS(%rsi) 193 jnz do_kthread 194 195 testl $PCB_32BIT,PCB_FLAGS(%r8) 196 jnz load_seg 197done_load_seg: 198 199 cmpq PCB_FSBASE(%r8),%r9 200 jz 1f 201 /* Restore userland %fs */ 202restore_fsbase: 203 movl $MSR_FSBASE,%ecx 204 movl PCB_FSBASE(%r8),%eax 205 movl PCB_FSBASE+4(%r8),%edx 206 wrmsr 2071: 208 cmpq PCB_GSBASE(%r8),%r10 209 jz 2f 210 /* Restore userland %gs */ 211 movl $MSR_KGSBASE,%ecx 212 movl PCB_GSBASE(%r8),%eax 213 movl PCB_GSBASE+4(%r8),%edx 214 wrmsr 2152: 216 217do_tss: 218 /* Update the TSS_RSP0 pointer for the next interrupt */ 219 movq PCPU(TSSP), %rax 220 movq %r8, PCPU(RSP0) 221 movq %r8, PCPU(CURPCB) 222 addq $COMMON_TSS_RSP0, %rax 223 movq %rsi, PCPU(CURTHREAD) /* into next thread */ 224 movq %r8, (%rax) 225 226 /* Test if debug registers should be restored. */ 227 testl $PCB_DBREGS,PCB_FLAGS(%r8) 228 jnz load_dr /* static predict not taken */ 229done_load_dr: 230 231 /* Restore context. */ 232 movq PCB_R15(%r8),%r15 233 movq PCB_R14(%r8),%r14 234 movq PCB_R13(%r8),%r13 235 movq PCB_R12(%r8),%r12 236 movq PCB_RBP(%r8),%rbp 237 movq PCB_RSP(%r8),%rsp 238 movq PCB_RBX(%r8),%rbx 239 movq PCB_RIP(%r8),%rax 240 movq %rax,(%rsp) 241 ret 242 243 /* 244 * We order these strangely for several reasons. 245 * 1: I wanted to use static branch prediction hints 246 * 2: Most athlon64/opteron cpus don't have them. They define 247 * a forward branch as 'predict not taken'. Intel cores have 248 * the 'rep' prefix to invert this. 249 * So, to make it work on both forms of cpu we do the detour. 250 * We use jumps rather than call in order to avoid the stack. 251 */ 252 253do_kthread: 254 /* 255 * Copy old fs/gsbase to new kthread pcb for future switches 256 * This maintains curpcb->pcb_[fg]sbase as caches of the MSR 257 */ 258 movq %r9,PCB_FSBASE(%r8) 259 movq %r10,PCB_GSBASE(%r8) 260 jmp do_tss 261 262store_seg: 263 mov %gs,PCB_GS(%r8) 264 testl $PCB_GS32BIT,PCB_FLAGS(%r8) 265 jnz 2f 2661: mov %ds,PCB_DS(%r8) 267 mov %es,PCB_ES(%r8) 268 mov %fs,PCB_FS(%r8) 269 jmp done_store_seg 2702: movq PCPU(GS32P),%rax 271 movq (%rax),%rax 272 movq %rax,PCB_GS32SD(%r8) 273 jmp 1b 274 275load_seg: 276 testl $PCB_GS32BIT,PCB_FLAGS(%r8) 277 jnz 2f 2781: movl $MSR_GSBASE,%ecx 279 rdmsr 280 mov PCB_GS(%r8),%gs 281 wrmsr 282 mov PCB_DS(%r8),%ds 283 mov PCB_ES(%r8),%es 284 mov PCB_FS(%r8),%fs 285 jmp restore_fsbase 286 /* Restore userland %gs while preserving kernel gsbase */ 2872: movq PCPU(GS32P),%rax 288 movq PCB_GS32SD(%r8),%rcx 289 movq %rcx,(%rax) 290 jmp 1b 291 292store_dr: 293 movq %dr7,%rax /* yes, do the save */ 294 movq %dr0,%r15 295 movq %dr1,%r14 296 movq %dr2,%r13 297 movq %dr3,%r12 298 movq %dr6,%r11 299 andq $0x0000fc00, %rax /* disable all watchpoints */ 300 movq %r15,PCB_DR0(%r8) 301 movq %r14,PCB_DR1(%r8) 302 movq %r13,PCB_DR2(%r8) 303 movq %r12,PCB_DR3(%r8) 304 movq %r11,PCB_DR6(%r8) 305 movq %rax,PCB_DR7(%r8) 306 movq %rax,%dr7 307 jmp done_store_dr 308 309load_dr: 310 movq %dr7,%rax 311 movq PCB_DR0(%r8),%r15 312 movq PCB_DR1(%r8),%r14 313 movq PCB_DR2(%r8),%r13 314 movq PCB_DR3(%r8),%r12 315 movq PCB_DR6(%r8),%r11 316 movq PCB_DR7(%r8),%rcx 317 movq %r15,%dr0 318 movq %r14,%dr1 319 /* Preserve reserved bits in %dr7 */ 320 andq $0x0000fc00,%rax 321 andq $~0x0000fc00,%rcx 322 movq %r13,%dr2 323 movq %r12,%dr3 324 orq %rcx,%rax 325 movq %r11,%dr6 326 movq %rax,%dr7 327 jmp done_load_dr 328 329END(cpu_switch) 330 331/* 332 * savectx(pcb) 333 * Update pcb, saving current processor state. 334 */ 335ENTRY(savectx) 336 /* Fetch PCB. */ 337 movq %rdi,%rcx 338 339 /* Save caller's return address. */ 340 movq (%rsp),%rax 341 movq %rax,PCB_RIP(%rcx) 342 343 movq %cr3,%rax 344 movq %rax,PCB_CR3(%rcx) 345 346 movq %rbx,PCB_RBX(%rcx) 347 movq %rsp,PCB_RSP(%rcx) 348 movq %rbp,PCB_RBP(%rcx) 349 movq %r12,PCB_R12(%rcx) 350 movq %r13,PCB_R13(%rcx) 351 movq %r14,PCB_R14(%rcx) 352 movq %r15,PCB_R15(%rcx) 353 354 /* 355 * If fpcurthread == NULL, then the fpu h/w state is irrelevant and the 356 * state had better already be in the pcb. This is true for forks 357 * but not for dumps (the old book-keeping with FP flags in the pcb 358 * always lost for dumps because the dump pcb has 0 flags). 359 * 360 * If fpcurthread != NULL, then we have to save the fpu h/w state to 361 * fpcurthread's pcb and copy it to the requested pcb, or save to the 362 * requested pcb and reload. Copying is easier because we would 363 * have to handle h/w bugs for reloading. We used to lose the 364 * parent's fpu state for forks by forgetting to reload. 365 */ 366 pushfq 367 cli 368 movq PCPU(FPCURTHREAD),%rax 369 testq %rax,%rax 370 je 1f 371 372 movq TD_PCB(%rax),%rdi 373 leaq PCB_SAVEFPU(%rdi),%rdi 374 clts 375 fxsave (%rdi) 376 smsw %ax 377 orb $CR0_TS,%al 378 lmsw %ax 379 380 movq $PCB_SAVEFPU_SIZE,%rdx /* arg 3 */ 381 leaq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ 382 /* arg 1 (%rdi) already loaded */ 383 call bcopy 3841: 385 popfq 386 387 ret 388END(savectx) 389