1 /*- 2 * Copyright (c) 2003 Peter Wemm. 3 * Copyright (c) 1992 Terrence R. Lambert. 4 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * William Jolitz. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_atpic.h" 45 #include "opt_compat.h" 46 #include "opt_cpu.h" 47 #include "opt_ddb.h" 48 #include "opt_inet.h" 49 #include "opt_isa.h" 50 #include "opt_kstack_pages.h" 51 #include "opt_maxmem.h" 52 #include "opt_mp_watchdog.h" 53 #include "opt_platform.h" 54 #include "opt_sched.h" 55 56 #include <sys/param.h> 57 #include <sys/proc.h> 58 #include <sys/systm.h> 59 #include <sys/bio.h> 60 #include <sys/buf.h> 61 #include <sys/bus.h> 62 #include <sys/callout.h> 63 #include <sys/cons.h> 64 #include <sys/cpu.h> 65 #include <sys/efi.h> 66 #include <sys/eventhandler.h> 67 #include <sys/exec.h> 68 #include <sys/imgact.h> 69 #include <sys/kdb.h> 70 #include <sys/kernel.h> 71 #include <sys/ktr.h> 72 #include <sys/linker.h> 73 #include <sys/lock.h> 74 #include <sys/malloc.h> 75 #include <sys/memrange.h> 76 #include <sys/msgbuf.h> 77 #include <sys/mutex.h> 78 #include <sys/pcpu.h> 79 #include <sys/ptrace.h> 80 #include <sys/reboot.h> 81 #include <sys/rwlock.h> 82 #include <sys/sched.h> 83 #include <sys/signalvar.h> 84 #ifdef SMP 85 #include <sys/smp.h> 86 #endif 87 #include <sys/syscallsubr.h> 88 #include <sys/sysctl.h> 89 #include <sys/sysent.h> 90 #include <sys/sysproto.h> 91 #include <sys/ucontext.h> 92 #include <sys/vmmeter.h> 93 94 #include <vm/vm.h> 95 #include <vm/vm_extern.h> 96 #include <vm/vm_kern.h> 97 #include <vm/vm_page.h> 98 #include <vm/vm_map.h> 99 #include <vm/vm_object.h> 100 #include <vm/vm_pager.h> 101 #include <vm/vm_param.h> 102 103 #ifdef DDB 104 #ifndef KDB 105 #error KDB must be enabled in order for DDB to work! 106 #endif 107 #include <ddb/ddb.h> 108 #include <ddb/db_sym.h> 109 #endif 110 111 #include <net/netisr.h> 112 113 #include <machine/clock.h> 114 #include <machine/cpu.h> 115 #include <machine/cputypes.h> 116 #include <machine/intr_machdep.h> 117 #include <x86/mca.h> 118 #include <machine/md_var.h> 119 #include <machine/metadata.h> 120 #include <machine/mp_watchdog.h> 121 #include <machine/pc/bios.h> 122 #include <machine/pcb.h> 123 #include <machine/proc.h> 124 #include <machine/reg.h> 125 #include <machine/sigframe.h> 126 #include <machine/specialreg.h> 127 #include <machine/tss.h> 128 #ifdef SMP 129 #include <machine/smp.h> 130 #endif 131 #ifdef FDT 132 #include <x86/fdt.h> 133 #endif 134 135 #ifdef DEV_ATPIC 136 #include <x86/isa/icu.h> 137 #else 138 #include <x86/apicvar.h> 139 #endif 140 141 #include <isa/isareg.h> 142 #include <isa/rtc.h> 143 #include <x86/init.h> 144 145 /* Sanity check for __curthread() */ 146 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 147 148 extern u_int64_t hammer_time(u_int64_t, u_int64_t); 149 150 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 151 #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 152 153 static void cpu_startup(void *); 154 static void get_fpcontext(struct thread *td, mcontext_t *mcp, 155 char *xfpusave, size_t xfpusave_len); 156 static int set_fpcontext(struct thread *td, mcontext_t *mcp, 157 char *xfpustate, size_t xfpustate_len); 158 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 159 160 /* Preload data parse function */ 161 static caddr_t native_parse_preload_data(u_int64_t); 162 163 /* Native function to fetch and parse the e820 map */ 164 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); 165 166 /* Default init_ops implementation. */ 167 struct init_ops init_ops = { 168 .parse_preload_data = native_parse_preload_data, 169 .early_clock_source_init = i8254_init, 170 .early_delay = i8254_delay, 171 .parse_memmap = native_parse_memmap, 172 #ifdef SMP 173 .mp_bootaddress = mp_bootaddress, 174 .start_all_aps = native_start_all_aps, 175 #endif 176 .msi_init = msi_init, 177 }; 178 179 /* 180 * The file "conf/ldscript.amd64" defines the symbol "kernphys". Its value is 181 * the physical address at which the kernel is loaded. 182 */ 183 extern char kernphys[]; 184 185 struct msgbuf *msgbufp; 186 187 /* 188 * Physical address of the EFI System Table. Stashed from the metadata hints 189 * passed into the kernel and used by the EFI code to call runtime services. 190 */ 191 vm_paddr_t efi_systbl_phys; 192 193 /* Intel ICH registers */ 194 #define ICH_PMBASE 0x400 195 #define ICH_SMI_EN ICH_PMBASE + 0x30 196 197 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; 198 199 int cold = 1; 200 201 long Maxmem = 0; 202 long realmem = 0; 203 204 /* 205 * The number of PHYSMAP entries must be one less than the number of 206 * PHYSSEG entries because the PHYSMAP entry that spans the largest 207 * physical address that is accessible by ISA DMA is split into two 208 * PHYSSEG entries. 209 */ 210 #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) 211 212 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; 213 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; 214 215 /* must be 2 less so 0 0 can signal end of chunks */ 216 #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2) 217 #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2) 218 219 struct kva_md_info kmi; 220 221 static struct trapframe proc0_tf; 222 struct region_descriptor r_gdt, r_idt; 223 224 struct pcpu __pcpu[MAXCPU]; 225 226 struct mtx icu_lock; 227 228 struct mem_range_softc mem_range_softc; 229 230 struct mtx dt_lock; /* lock for GDT and LDT */ 231 232 void (*vmm_resume_p)(void); 233 234 static void 235 cpu_startup(dummy) 236 void *dummy; 237 { 238 uintmax_t memsize; 239 char *sysenv; 240 241 /* 242 * On MacBooks, we need to disallow the legacy USB circuit to 243 * generate an SMI# because this can cause several problems, 244 * namely: incorrect CPU frequency detection and failure to 245 * start the APs. 246 * We do this by disabling a bit in the SMI_EN (SMI Control and 247 * Enable register) of the Intel ICH LPC Interface Bridge. 248 */ 249 sysenv = kern_getenv("smbios.system.product"); 250 if (sysenv != NULL) { 251 if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 252 strncmp(sysenv, "MacBook3,1", 10) == 0 || 253 strncmp(sysenv, "MacBook4,1", 10) == 0 || 254 strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 255 strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 256 strncmp(sysenv, "MacBookPro3,1", 13) == 0 || 257 strncmp(sysenv, "MacBookPro4,1", 13) == 0 || 258 strncmp(sysenv, "Macmini1,1", 10) == 0) { 259 if (bootverbose) 260 printf("Disabling LEGACY_USB_EN bit on " 261 "Intel ICH.\n"); 262 outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 263 } 264 freeenv(sysenv); 265 } 266 267 /* 268 * Good {morning,afternoon,evening,night}. 269 */ 270 startrtclock(); 271 printcpuinfo(); 272 273 /* 274 * Display physical memory if SMBIOS reports reasonable amount. 275 */ 276 memsize = 0; 277 sysenv = kern_getenv("smbios.memory.enabled"); 278 if (sysenv != NULL) { 279 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; 280 freeenv(sysenv); 281 } 282 if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) 283 memsize = ptoa((uintmax_t)Maxmem); 284 printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); 285 realmem = atop(memsize); 286 287 /* 288 * Display any holes after the first chunk of extended memory. 289 */ 290 if (bootverbose) { 291 int indx; 292 293 printf("Physical memory chunk(s):\n"); 294 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 295 vm_paddr_t size; 296 297 size = phys_avail[indx + 1] - phys_avail[indx]; 298 printf( 299 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 300 (uintmax_t)phys_avail[indx], 301 (uintmax_t)phys_avail[indx + 1] - 1, 302 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 303 } 304 } 305 306 vm_ksubmap_init(&kmi); 307 308 printf("avail memory = %ju (%ju MB)\n", 309 ptoa((uintmax_t)vm_cnt.v_free_count), 310 ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); 311 312 /* 313 * Set up buffers, so they can be used to read disk labels. 314 */ 315 bufinit(); 316 vm_pager_bufferinit(); 317 318 cpu_setregs(); 319 } 320 321 /* 322 * Send an interrupt to process. 323 * 324 * Stack is set up to allow sigcode stored 325 * at top to call routine, followed by call 326 * to sigreturn routine below. After sigreturn 327 * resets the signal mask, the stack, and the 328 * frame pointer, it returns to the user 329 * specified pc, psl. 330 */ 331 void 332 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 333 { 334 struct sigframe sf, *sfp; 335 struct pcb *pcb; 336 struct proc *p; 337 struct thread *td; 338 struct sigacts *psp; 339 char *sp; 340 struct trapframe *regs; 341 char *xfpusave; 342 size_t xfpusave_len; 343 int sig; 344 int oonstack; 345 346 td = curthread; 347 pcb = td->td_pcb; 348 p = td->td_proc; 349 PROC_LOCK_ASSERT(p, MA_OWNED); 350 sig = ksi->ksi_signo; 351 psp = p->p_sigacts; 352 mtx_assert(&psp->ps_mtx, MA_OWNED); 353 regs = td->td_frame; 354 oonstack = sigonstack(regs->tf_rsp); 355 356 if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { 357 xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); 358 xfpusave = __builtin_alloca(xfpusave_len); 359 } else { 360 xfpusave_len = 0; 361 xfpusave = NULL; 362 } 363 364 /* Save user context. */ 365 bzero(&sf, sizeof(sf)); 366 sf.sf_uc.uc_sigmask = *mask; 367 sf.sf_uc.uc_stack = td->td_sigstk; 368 sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 369 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; 370 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; 371 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); 372 sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ 373 get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); 374 fpstate_drop(td); 375 sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; 376 sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; 377 bzero(sf.sf_uc.uc_mcontext.mc_spare, 378 sizeof(sf.sf_uc.uc_mcontext.mc_spare)); 379 bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); 380 381 /* Allocate space for the signal handler context. */ 382 if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && 383 SIGISMEMBER(psp->ps_sigonstack, sig)) { 384 sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size; 385 #if defined(COMPAT_43) 386 td->td_sigstk.ss_flags |= SS_ONSTACK; 387 #endif 388 } else 389 sp = (char *)regs->tf_rsp - 128; 390 if (xfpusave != NULL) { 391 sp -= xfpusave_len; 392 sp = (char *)((unsigned long)sp & ~0x3Ful); 393 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; 394 } 395 sp -= sizeof(struct sigframe); 396 /* Align to 16 bytes. */ 397 sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); 398 399 /* Build the argument list for the signal handler. */ 400 regs->tf_rdi = sig; /* arg 1 in %rdi */ 401 regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ 402 bzero(&sf.sf_si, sizeof(sf.sf_si)); 403 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 404 /* Signal handler installed with SA_SIGINFO. */ 405 regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ 406 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; 407 408 /* Fill in POSIX parts */ 409 sf.sf_si = ksi->ksi_info; 410 sf.sf_si.si_signo = sig; /* maybe a translated signal */ 411 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ 412 } else { 413 /* Old FreeBSD-style arguments. */ 414 regs->tf_rsi = ksi->ksi_code; /* arg 2 in %rsi */ 415 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ 416 sf.sf_ahu.sf_handler = catcher; 417 } 418 mtx_unlock(&psp->ps_mtx); 419 PROC_UNLOCK(p); 420 421 /* 422 * Copy the sigframe out to the user's stack. 423 */ 424 if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || 425 (xfpusave != NULL && copyout(xfpusave, 426 (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) 427 != 0)) { 428 #ifdef DEBUG 429 printf("process %ld has trashed its stack\n", (long)p->p_pid); 430 #endif 431 PROC_LOCK(p); 432 sigexit(td, SIGILL); 433 } 434 435 regs->tf_rsp = (long)sfp; 436 regs->tf_rip = p->p_sysent->sv_sigcode_base; 437 regs->tf_rflags &= ~(PSL_T | PSL_D); 438 regs->tf_cs = _ucodesel; 439 regs->tf_ds = _udatasel; 440 regs->tf_ss = _udatasel; 441 regs->tf_es = _udatasel; 442 regs->tf_fs = _ufssel; 443 regs->tf_gs = _ugssel; 444 regs->tf_flags = TF_HASSEGS; 445 set_pcb_flags(pcb, PCB_FULL_IRET); 446 PROC_LOCK(p); 447 mtx_lock(&psp->ps_mtx); 448 } 449 450 /* 451 * System call to cleanup state after a signal 452 * has been taken. Reset signal mask and 453 * stack state from context left by sendsig (above). 454 * Return to previous pc and psl as specified by 455 * context left by sendsig. Check carefully to 456 * make sure that the user has not modified the 457 * state to gain improper privileges. 458 * 459 * MPSAFE 460 */ 461 int 462 sys_sigreturn(td, uap) 463 struct thread *td; 464 struct sigreturn_args /* { 465 const struct __ucontext *sigcntxp; 466 } */ *uap; 467 { 468 ucontext_t uc; 469 struct pcb *pcb; 470 struct proc *p; 471 struct trapframe *regs; 472 ucontext_t *ucp; 473 char *xfpustate; 474 size_t xfpustate_len; 475 long rflags; 476 int cs, error, ret; 477 ksiginfo_t ksi; 478 479 pcb = td->td_pcb; 480 p = td->td_proc; 481 482 error = copyin(uap->sigcntxp, &uc, sizeof(uc)); 483 if (error != 0) { 484 uprintf("pid %d (%s): sigreturn copyin failed\n", 485 p->p_pid, td->td_name); 486 return (error); 487 } 488 ucp = &uc; 489 if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { 490 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, 491 td->td_name, ucp->uc_mcontext.mc_flags); 492 return (EINVAL); 493 } 494 regs = td->td_frame; 495 rflags = ucp->uc_mcontext.mc_rflags; 496 /* 497 * Don't allow users to change privileged or reserved flags. 498 */ 499 if (!EFL_SECURE(rflags, regs->tf_rflags)) { 500 uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid, 501 td->td_name, rflags); 502 return (EINVAL); 503 } 504 505 /* 506 * Don't allow users to load a valid privileged %cs. Let the 507 * hardware check for invalid selectors, excess privilege in 508 * other selectors, invalid %eip's and invalid %esp's. 509 */ 510 cs = ucp->uc_mcontext.mc_cs; 511 if (!CS_SECURE(cs)) { 512 uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid, 513 td->td_name, cs); 514 ksiginfo_init_trap(&ksi); 515 ksi.ksi_signo = SIGBUS; 516 ksi.ksi_code = BUS_OBJERR; 517 ksi.ksi_trapno = T_PROTFLT; 518 ksi.ksi_addr = (void *)regs->tf_rip; 519 trapsignal(td, &ksi); 520 return (EINVAL); 521 } 522 523 if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { 524 xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; 525 if (xfpustate_len > cpu_max_ext_state_size - 526 sizeof(struct savefpu)) { 527 uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", 528 p->p_pid, td->td_name, xfpustate_len); 529 return (EINVAL); 530 } 531 xfpustate = __builtin_alloca(xfpustate_len); 532 error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, 533 xfpustate, xfpustate_len); 534 if (error != 0) { 535 uprintf( 536 "pid %d (%s): sigreturn copying xfpustate failed\n", 537 p->p_pid, td->td_name); 538 return (error); 539 } 540 } else { 541 xfpustate = NULL; 542 xfpustate_len = 0; 543 } 544 ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); 545 if (ret != 0) { 546 uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", 547 p->p_pid, td->td_name, ret); 548 return (ret); 549 } 550 bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); 551 pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; 552 pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; 553 554 #if defined(COMPAT_43) 555 if (ucp->uc_mcontext.mc_onstack & 1) 556 td->td_sigstk.ss_flags |= SS_ONSTACK; 557 else 558 td->td_sigstk.ss_flags &= ~SS_ONSTACK; 559 #endif 560 561 kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); 562 set_pcb_flags(pcb, PCB_FULL_IRET); 563 return (EJUSTRETURN); 564 } 565 566 #ifdef COMPAT_FREEBSD4 567 int 568 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) 569 { 570 571 return sys_sigreturn(td, (struct sigreturn_args *)uap); 572 } 573 #endif 574 575 /* 576 * Reset registers to default values on exec. 577 */ 578 void 579 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) 580 { 581 struct trapframe *regs = td->td_frame; 582 struct pcb *pcb = td->td_pcb; 583 584 mtx_lock(&dt_lock); 585 if (td->td_proc->p_md.md_ldt != NULL) 586 user_ldt_free(td); 587 else 588 mtx_unlock(&dt_lock); 589 590 pcb->pcb_fsbase = 0; 591 pcb->pcb_gsbase = 0; 592 clear_pcb_flags(pcb, PCB_32BIT); 593 pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; 594 set_pcb_flags(pcb, PCB_FULL_IRET); 595 596 bzero((char *)regs, sizeof(struct trapframe)); 597 regs->tf_rip = imgp->entry_addr; 598 regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; 599 regs->tf_rdi = stack; /* argv */ 600 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 601 regs->tf_ss = _udatasel; 602 regs->tf_cs = _ucodesel; 603 regs->tf_ds = _udatasel; 604 regs->tf_es = _udatasel; 605 regs->tf_fs = _ufssel; 606 regs->tf_gs = _ugssel; 607 regs->tf_flags = TF_HASSEGS; 608 td->td_retval[1] = 0; 609 610 /* 611 * Reset the hardware debug registers if they were in use. 612 * They won't have any meaning for the newly exec'd process. 613 */ 614 if (pcb->pcb_flags & PCB_DBREGS) { 615 pcb->pcb_dr0 = 0; 616 pcb->pcb_dr1 = 0; 617 pcb->pcb_dr2 = 0; 618 pcb->pcb_dr3 = 0; 619 pcb->pcb_dr6 = 0; 620 pcb->pcb_dr7 = 0; 621 if (pcb == curpcb) { 622 /* 623 * Clear the debug registers on the running 624 * CPU, otherwise they will end up affecting 625 * the next process we switch to. 626 */ 627 reset_dbregs(); 628 } 629 clear_pcb_flags(pcb, PCB_DBREGS); 630 } 631 632 /* 633 * Drop the FP state if we hold it, so that the process gets a 634 * clean FP state if it uses the FPU again. 635 */ 636 fpstate_drop(td); 637 } 638 639 void 640 cpu_setregs(void) 641 { 642 register_t cr0; 643 644 cr0 = rcr0(); 645 /* 646 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the 647 * BSP. See the comments there about why we set them. 648 */ 649 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 650 load_cr0(cr0); 651 } 652 653 /* 654 * Initialize amd64 and configure to run kernel 655 */ 656 657 /* 658 * Initialize segments & interrupt table 659 */ 660 661 struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */ 662 static struct gate_descriptor idt0[NIDT]; 663 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 664 665 static char dblfault_stack[PAGE_SIZE] __aligned(16); 666 667 static char nmi0_stack[PAGE_SIZE] __aligned(16); 668 CTASSERT(sizeof(struct nmi_pcpu) == 16); 669 670 struct amd64tss common_tss[MAXCPU]; 671 672 /* 673 * Software prototypes -- in more palatable form. 674 * 675 * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same 676 * slots as corresponding segments for i386 kernel. 677 */ 678 struct soft_segment_descriptor gdt_segs[] = { 679 /* GNULL_SEL 0 Null Descriptor */ 680 { .ssd_base = 0x0, 681 .ssd_limit = 0x0, 682 .ssd_type = 0, 683 .ssd_dpl = 0, 684 .ssd_p = 0, 685 .ssd_long = 0, 686 .ssd_def32 = 0, 687 .ssd_gran = 0 }, 688 /* GNULL2_SEL 1 Null Descriptor */ 689 { .ssd_base = 0x0, 690 .ssd_limit = 0x0, 691 .ssd_type = 0, 692 .ssd_dpl = 0, 693 .ssd_p = 0, 694 .ssd_long = 0, 695 .ssd_def32 = 0, 696 .ssd_gran = 0 }, 697 /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ 698 { .ssd_base = 0x0, 699 .ssd_limit = 0xfffff, 700 .ssd_type = SDT_MEMRWA, 701 .ssd_dpl = SEL_UPL, 702 .ssd_p = 1, 703 .ssd_long = 0, 704 .ssd_def32 = 1, 705 .ssd_gran = 1 }, 706 /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ 707 { .ssd_base = 0x0, 708 .ssd_limit = 0xfffff, 709 .ssd_type = SDT_MEMRWA, 710 .ssd_dpl = SEL_UPL, 711 .ssd_p = 1, 712 .ssd_long = 0, 713 .ssd_def32 = 1, 714 .ssd_gran = 1 }, 715 /* GCODE_SEL 4 Code Descriptor for kernel */ 716 { .ssd_base = 0x0, 717 .ssd_limit = 0xfffff, 718 .ssd_type = SDT_MEMERA, 719 .ssd_dpl = SEL_KPL, 720 .ssd_p = 1, 721 .ssd_long = 1, 722 .ssd_def32 = 0, 723 .ssd_gran = 1 }, 724 /* GDATA_SEL 5 Data Descriptor for kernel */ 725 { .ssd_base = 0x0, 726 .ssd_limit = 0xfffff, 727 .ssd_type = SDT_MEMRWA, 728 .ssd_dpl = SEL_KPL, 729 .ssd_p = 1, 730 .ssd_long = 1, 731 .ssd_def32 = 0, 732 .ssd_gran = 1 }, 733 /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ 734 { .ssd_base = 0x0, 735 .ssd_limit = 0xfffff, 736 .ssd_type = SDT_MEMERA, 737 .ssd_dpl = SEL_UPL, 738 .ssd_p = 1, 739 .ssd_long = 0, 740 .ssd_def32 = 1, 741 .ssd_gran = 1 }, 742 /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ 743 { .ssd_base = 0x0, 744 .ssd_limit = 0xfffff, 745 .ssd_type = SDT_MEMRWA, 746 .ssd_dpl = SEL_UPL, 747 .ssd_p = 1, 748 .ssd_long = 0, 749 .ssd_def32 = 1, 750 .ssd_gran = 1 }, 751 /* GUCODE_SEL 8 64 bit Code Descriptor for user */ 752 { .ssd_base = 0x0, 753 .ssd_limit = 0xfffff, 754 .ssd_type = SDT_MEMERA, 755 .ssd_dpl = SEL_UPL, 756 .ssd_p = 1, 757 .ssd_long = 1, 758 .ssd_def32 = 0, 759 .ssd_gran = 1 }, 760 /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ 761 { .ssd_base = 0x0, 762 .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, 763 .ssd_type = SDT_SYSTSS, 764 .ssd_dpl = SEL_KPL, 765 .ssd_p = 1, 766 .ssd_long = 0, 767 .ssd_def32 = 0, 768 .ssd_gran = 0 }, 769 /* Actually, the TSS is a system descriptor which is double size */ 770 { .ssd_base = 0x0, 771 .ssd_limit = 0x0, 772 .ssd_type = 0, 773 .ssd_dpl = 0, 774 .ssd_p = 0, 775 .ssd_long = 0, 776 .ssd_def32 = 0, 777 .ssd_gran = 0 }, 778 /* GUSERLDT_SEL 11 LDT Descriptor */ 779 { .ssd_base = 0x0, 780 .ssd_limit = 0x0, 781 .ssd_type = 0, 782 .ssd_dpl = 0, 783 .ssd_p = 0, 784 .ssd_long = 0, 785 .ssd_def32 = 0, 786 .ssd_gran = 0 }, 787 /* GUSERLDT_SEL 12 LDT Descriptor, double size */ 788 { .ssd_base = 0x0, 789 .ssd_limit = 0x0, 790 .ssd_type = 0, 791 .ssd_dpl = 0, 792 .ssd_p = 0, 793 .ssd_long = 0, 794 .ssd_def32 = 0, 795 .ssd_gran = 0 }, 796 }; 797 798 void 799 setidt(int idx, inthand_t *func, int typ, int dpl, int ist) 800 { 801 struct gate_descriptor *ip; 802 803 ip = idt + idx; 804 ip->gd_looffset = (uintptr_t)func; 805 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); 806 ip->gd_ist = ist; 807 ip->gd_xx = 0; 808 ip->gd_type = typ; 809 ip->gd_dpl = dpl; 810 ip->gd_p = 1; 811 ip->gd_hioffset = ((uintptr_t)func)>>16 ; 812 } 813 814 extern inthand_t 815 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 816 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 817 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 818 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 819 IDTVEC(xmm), IDTVEC(dblfault), 820 #ifdef KDTRACE_HOOKS 821 IDTVEC(dtrace_ret), 822 #endif 823 #ifdef XENHVM 824 IDTVEC(xen_intr_upcall), 825 #endif 826 IDTVEC(fast_syscall), IDTVEC(fast_syscall32); 827 828 #ifdef DDB 829 /* 830 * Display the index and function name of any IDT entries that don't use 831 * the default 'rsvd' entry point. 832 */ 833 DB_SHOW_COMMAND(idt, db_show_idt) 834 { 835 struct gate_descriptor *ip; 836 int idx; 837 uintptr_t func; 838 839 ip = idt; 840 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 841 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 842 if (func != (uintptr_t)&IDTVEC(rsvd)) { 843 db_printf("%3d\t", idx); 844 db_printsym(func, DB_STGY_PROC); 845 db_printf("\n"); 846 } 847 ip++; 848 } 849 } 850 851 /* Show privileged registers. */ 852 DB_SHOW_COMMAND(sysregs, db_show_sysregs) 853 { 854 struct { 855 uint16_t limit; 856 uint64_t base; 857 } __packed idtr, gdtr; 858 uint16_t ldt, tr; 859 860 __asm __volatile("sidt %0" : "=m" (idtr)); 861 db_printf("idtr\t0x%016lx/%04x\n", 862 (u_long)idtr.base, (u_int)idtr.limit); 863 __asm __volatile("sgdt %0" : "=m" (gdtr)); 864 db_printf("gdtr\t0x%016lx/%04x\n", 865 (u_long)gdtr.base, (u_int)gdtr.limit); 866 __asm __volatile("sldt %0" : "=r" (ldt)); 867 db_printf("ldtr\t0x%04x\n", ldt); 868 __asm __volatile("str %0" : "=r" (tr)); 869 db_printf("tr\t0x%04x\n", tr); 870 db_printf("cr0\t0x%016lx\n", rcr0()); 871 db_printf("cr2\t0x%016lx\n", rcr2()); 872 db_printf("cr3\t0x%016lx\n", rcr3()); 873 db_printf("cr4\t0x%016lx\n", rcr4()); 874 if (rcr4() & CR4_XSAVE) 875 db_printf("xcr0\t0x%016lx\n", rxcr(0)); 876 db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); 877 if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) 878 db_printf("FEATURES_CTL\t%016lx\n", 879 rdmsr(MSR_IA32_FEATURE_CONTROL)); 880 db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); 881 db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); 882 db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); 883 } 884 885 DB_SHOW_COMMAND(dbregs, db_show_dbregs) 886 { 887 888 db_printf("dr0\t0x%016lx\n", rdr0()); 889 db_printf("dr1\t0x%016lx\n", rdr1()); 890 db_printf("dr2\t0x%016lx\n", rdr2()); 891 db_printf("dr3\t0x%016lx\n", rdr3()); 892 db_printf("dr6\t0x%016lx\n", rdr6()); 893 db_printf("dr7\t0x%016lx\n", rdr7()); 894 } 895 #endif 896 897 void 898 sdtossd(sd, ssd) 899 struct user_segment_descriptor *sd; 900 struct soft_segment_descriptor *ssd; 901 { 902 903 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 904 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 905 ssd->ssd_type = sd->sd_type; 906 ssd->ssd_dpl = sd->sd_dpl; 907 ssd->ssd_p = sd->sd_p; 908 ssd->ssd_long = sd->sd_long; 909 ssd->ssd_def32 = sd->sd_def32; 910 ssd->ssd_gran = sd->sd_gran; 911 } 912 913 void 914 ssdtosd(ssd, sd) 915 struct soft_segment_descriptor *ssd; 916 struct user_segment_descriptor *sd; 917 { 918 919 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 920 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; 921 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 922 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 923 sd->sd_type = ssd->ssd_type; 924 sd->sd_dpl = ssd->ssd_dpl; 925 sd->sd_p = ssd->ssd_p; 926 sd->sd_long = ssd->ssd_long; 927 sd->sd_def32 = ssd->ssd_def32; 928 sd->sd_gran = ssd->ssd_gran; 929 } 930 931 void 932 ssdtosyssd(ssd, sd) 933 struct soft_segment_descriptor *ssd; 934 struct system_segment_descriptor *sd; 935 { 936 937 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 938 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; 939 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 940 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 941 sd->sd_type = ssd->ssd_type; 942 sd->sd_dpl = ssd->ssd_dpl; 943 sd->sd_p = ssd->ssd_p; 944 sd->sd_gran = ssd->ssd_gran; 945 } 946 947 #if !defined(DEV_ATPIC) && defined(DEV_ISA) 948 #include <isa/isavar.h> 949 #include <isa/isareg.h> 950 /* 951 * Return a bitmap of the current interrupt requests. This is 8259-specific 952 * and is only suitable for use at probe time. 953 * This is only here to pacify sio. It is NOT FATAL if this doesn't work. 954 * It shouldn't be here. There should probably be an APIC centric 955 * implementation in the apic driver code, if at all. 956 */ 957 intrmask_t 958 isa_irq_pending(void) 959 { 960 u_char irr1; 961 u_char irr2; 962 963 irr1 = inb(IO_ICU1); 964 irr2 = inb(IO_ICU2); 965 return ((irr2 << 8) | irr1); 966 } 967 #endif 968 969 u_int basemem; 970 971 static int 972 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, 973 int *physmap_idxp) 974 { 975 int i, insert_idx, physmap_idx; 976 977 physmap_idx = *physmap_idxp; 978 979 if (length == 0) 980 return (1); 981 982 /* 983 * Find insertion point while checking for overlap. Start off by 984 * assuming the new entry will be added to the end. 985 * 986 * NB: physmap_idx points to the next free slot. 987 */ 988 insert_idx = physmap_idx; 989 for (i = 0; i <= physmap_idx; i += 2) { 990 if (base < physmap[i + 1]) { 991 if (base + length <= physmap[i]) { 992 insert_idx = i; 993 break; 994 } 995 if (boothowto & RB_VERBOSE) 996 printf( 997 "Overlapping memory regions, ignoring second region\n"); 998 return (1); 999 } 1000 } 1001 1002 /* See if we can prepend to the next entry. */ 1003 if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { 1004 physmap[insert_idx] = base; 1005 return (1); 1006 } 1007 1008 /* See if we can append to the previous entry. */ 1009 if (insert_idx > 0 && base == physmap[insert_idx - 1]) { 1010 physmap[insert_idx - 1] += length; 1011 return (1); 1012 } 1013 1014 physmap_idx += 2; 1015 *physmap_idxp = physmap_idx; 1016 if (physmap_idx == PHYSMAP_SIZE) { 1017 printf( 1018 "Too many segments in the physical address map, giving up\n"); 1019 return (0); 1020 } 1021 1022 /* 1023 * Move the last 'N' entries down to make room for the new 1024 * entry if needed. 1025 */ 1026 for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { 1027 physmap[i] = physmap[i - 2]; 1028 physmap[i + 1] = physmap[i - 1]; 1029 } 1030 1031 /* Insert the new entry. */ 1032 physmap[insert_idx] = base; 1033 physmap[insert_idx + 1] = base + length; 1034 return (1); 1035 } 1036 1037 void 1038 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, 1039 vm_paddr_t *physmap, int *physmap_idx) 1040 { 1041 struct bios_smap *smap, *smapend; 1042 1043 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 1044 1045 for (smap = smapbase; smap < smapend; smap++) { 1046 if (boothowto & RB_VERBOSE) 1047 printf("SMAP type=%02x base=%016lx len=%016lx\n", 1048 smap->type, smap->base, smap->length); 1049 1050 if (smap->type != SMAP_TYPE_MEMORY) 1051 continue; 1052 1053 if (!add_physmap_entry(smap->base, smap->length, physmap, 1054 physmap_idx)) 1055 break; 1056 } 1057 } 1058 1059 static void 1060 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, 1061 int *physmap_idx) 1062 { 1063 struct efi_md *map, *p; 1064 const char *type; 1065 size_t efisz; 1066 int ndesc, i; 1067 1068 static const char *types[] = { 1069 "Reserved", 1070 "LoaderCode", 1071 "LoaderData", 1072 "BootServicesCode", 1073 "BootServicesData", 1074 "RuntimeServicesCode", 1075 "RuntimeServicesData", 1076 "ConventionalMemory", 1077 "UnusableMemory", 1078 "ACPIReclaimMemory", 1079 "ACPIMemoryNVS", 1080 "MemoryMappedIO", 1081 "MemoryMappedIOPortSpace", 1082 "PalCode", 1083 "PersistentMemory" 1084 }; 1085 1086 /* 1087 * Memory map data provided by UEFI via the GetMemoryMap 1088 * Boot Services API. 1089 */ 1090 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 1091 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 1092 1093 if (efihdr->descriptor_size == 0) 1094 return; 1095 ndesc = efihdr->memory_size / efihdr->descriptor_size; 1096 1097 if (boothowto & RB_VERBOSE) 1098 printf("%23s %12s %12s %8s %4s\n", 1099 "Type", "Physical", "Virtual", "#Pages", "Attr"); 1100 1101 for (i = 0, p = map; i < ndesc; i++, 1102 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 1103 if (boothowto & RB_VERBOSE) { 1104 if (p->md_type < nitems(types)) 1105 type = types[p->md_type]; 1106 else 1107 type = "<INVALID>"; 1108 printf("%23s %012lx %12p %08lx ", type, p->md_phys, 1109 p->md_virt, p->md_pages); 1110 if (p->md_attr & EFI_MD_ATTR_UC) 1111 printf("UC "); 1112 if (p->md_attr & EFI_MD_ATTR_WC) 1113 printf("WC "); 1114 if (p->md_attr & EFI_MD_ATTR_WT) 1115 printf("WT "); 1116 if (p->md_attr & EFI_MD_ATTR_WB) 1117 printf("WB "); 1118 if (p->md_attr & EFI_MD_ATTR_UCE) 1119 printf("UCE "); 1120 if (p->md_attr & EFI_MD_ATTR_WP) 1121 printf("WP "); 1122 if (p->md_attr & EFI_MD_ATTR_RP) 1123 printf("RP "); 1124 if (p->md_attr & EFI_MD_ATTR_XP) 1125 printf("XP "); 1126 if (p->md_attr & EFI_MD_ATTR_NV) 1127 printf("NV "); 1128 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) 1129 printf("MORE_RELIABLE "); 1130 if (p->md_attr & EFI_MD_ATTR_RO) 1131 printf("RO "); 1132 if (p->md_attr & EFI_MD_ATTR_RT) 1133 printf("RUNTIME"); 1134 printf("\n"); 1135 } 1136 1137 switch (p->md_type) { 1138 case EFI_MD_TYPE_CODE: 1139 case EFI_MD_TYPE_DATA: 1140 case EFI_MD_TYPE_BS_CODE: 1141 case EFI_MD_TYPE_BS_DATA: 1142 case EFI_MD_TYPE_FREE: 1143 /* 1144 * We're allowed to use any entry with these types. 1145 */ 1146 break; 1147 default: 1148 continue; 1149 } 1150 1151 if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), 1152 physmap, physmap_idx)) 1153 break; 1154 } 1155 } 1156 1157 static char bootmethod[16] = ""; 1158 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0, 1159 "System firmware boot method"); 1160 1161 static void 1162 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) 1163 { 1164 struct bios_smap *smap; 1165 struct efi_map_header *efihdr; 1166 u_int32_t size; 1167 1168 /* 1169 * Memory map from INT 15:E820. 1170 * 1171 * subr_module.c says: 1172 * "Consumer may safely assume that size value precedes data." 1173 * ie: an int32_t immediately precedes smap. 1174 */ 1175 1176 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 1177 MODINFO_METADATA | MODINFOMD_EFI_MAP); 1178 smap = (struct bios_smap *)preload_search_info(kmdp, 1179 MODINFO_METADATA | MODINFOMD_SMAP); 1180 if (efihdr == NULL && smap == NULL) 1181 panic("No BIOS smap or EFI map info from loader!"); 1182 1183 if (efihdr != NULL) { 1184 add_efi_map_entries(efihdr, physmap, physmap_idx); 1185 strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); 1186 } else { 1187 size = *((u_int32_t *)smap - 1); 1188 bios_add_smap_entries(smap, size, physmap, physmap_idx); 1189 strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); 1190 } 1191 } 1192 1193 #define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) 1194 1195 /* 1196 * Populate the (physmap) array with base/bound pairs describing the 1197 * available physical memory in the system, then test this memory and 1198 * build the phys_avail array describing the actually-available memory. 1199 * 1200 * Total memory size may be set by the kernel environment variable 1201 * hw.physmem or the compile-time define MAXMEM. 1202 * 1203 * XXX first should be vm_paddr_t. 1204 */ 1205 static void 1206 getmemsize(caddr_t kmdp, u_int64_t first) 1207 { 1208 int i, physmap_idx, pa_indx, da_indx; 1209 vm_paddr_t pa, physmap[PHYSMAP_SIZE]; 1210 u_long physmem_start, physmem_tunable, memtest; 1211 pt_entry_t *pte; 1212 quad_t dcons_addr, dcons_size; 1213 int page_counter; 1214 1215 bzero(physmap, sizeof(physmap)); 1216 physmap_idx = 0; 1217 1218 init_ops.parse_memmap(kmdp, physmap, &physmap_idx); 1219 physmap_idx -= 2; 1220 1221 /* 1222 * Find the 'base memory' segment for SMP 1223 */ 1224 basemem = 0; 1225 for (i = 0; i <= physmap_idx; i += 2) { 1226 if (physmap[i] <= 0xA0000) { 1227 basemem = physmap[i + 1] / 1024; 1228 break; 1229 } 1230 } 1231 if (basemem == 0 || basemem > 640) { 1232 if (bootverbose) 1233 printf( 1234 "Memory map doesn't contain a basemem segment, faking it"); 1235 basemem = 640; 1236 } 1237 1238 /* 1239 * Make hole for "AP -> long mode" bootstrap code. The 1240 * mp_bootaddress vector is only available when the kernel 1241 * is configured to support APs and APs for the system start 1242 * in 32bit mode (e.g. SMP bare metal). 1243 */ 1244 if (init_ops.mp_bootaddress) { 1245 if (physmap[1] >= 0x100000000) 1246 panic( 1247 "Basemem segment is not suitable for AP bootstrap code!"); 1248 physmap[1] = init_ops.mp_bootaddress(physmap[1] / 1024); 1249 } 1250 1251 /* 1252 * Maxmem isn't the "maximum memory", it's one larger than the 1253 * highest page of the physical address space. It should be 1254 * called something like "Maxphyspage". We may adjust this 1255 * based on ``hw.physmem'' and the results of the memory test. 1256 */ 1257 Maxmem = atop(physmap[physmap_idx + 1]); 1258 1259 #ifdef MAXMEM 1260 Maxmem = MAXMEM / 4; 1261 #endif 1262 1263 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 1264 Maxmem = atop(physmem_tunable); 1265 1266 /* 1267 * The boot memory test is disabled by default, as it takes a 1268 * significant amount of time on large-memory systems, and is 1269 * unfriendly to virtual machines as it unnecessarily touches all 1270 * pages. 1271 * 1272 * A general name is used as the code may be extended to support 1273 * additional tests beyond the current "page present" test. 1274 */ 1275 memtest = 0; 1276 TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); 1277 1278 /* 1279 * Don't allow MAXMEM or hw.physmem to extend the amount of memory 1280 * in the system. 1281 */ 1282 if (Maxmem > atop(physmap[physmap_idx + 1])) 1283 Maxmem = atop(physmap[physmap_idx + 1]); 1284 1285 if (atop(physmap[physmap_idx + 1]) != Maxmem && 1286 (boothowto & RB_VERBOSE)) 1287 printf("Physical memory use set to %ldK\n", Maxmem * 4); 1288 1289 /* call pmap initialization to make new kernel address space */ 1290 pmap_bootstrap(&first); 1291 1292 /* 1293 * Size up each available chunk of physical memory. 1294 * 1295 * XXX Some BIOSes corrupt low 64KB between suspend and resume. 1296 * By default, mask off the first 16 pages unless we appear to be 1297 * running in a VM. 1298 */ 1299 physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; 1300 TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); 1301 if (physmap[0] < physmem_start) { 1302 if (physmem_start < PAGE_SIZE) 1303 physmap[0] = PAGE_SIZE; 1304 else if (physmem_start >= physmap[1]) 1305 physmap[0] = round_page(physmap[1] - PAGE_SIZE); 1306 else 1307 physmap[0] = round_page(physmem_start); 1308 } 1309 pa_indx = 0; 1310 da_indx = 1; 1311 phys_avail[pa_indx++] = physmap[0]; 1312 phys_avail[pa_indx] = physmap[0]; 1313 dump_avail[da_indx] = physmap[0]; 1314 pte = CMAP1; 1315 1316 /* 1317 * Get dcons buffer address 1318 */ 1319 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 1320 getenv_quad("dcons.size", &dcons_size) == 0) 1321 dcons_addr = 0; 1322 1323 /* 1324 * physmap is in bytes, so when converting to page boundaries, 1325 * round up the start address and round down the end address. 1326 */ 1327 page_counter = 0; 1328 if (memtest != 0) 1329 printf("Testing system memory"); 1330 for (i = 0; i <= physmap_idx; i += 2) { 1331 vm_paddr_t end; 1332 1333 end = ptoa((vm_paddr_t)Maxmem); 1334 if (physmap[i + 1] < end) 1335 end = trunc_page(physmap[i + 1]); 1336 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 1337 int tmp, page_bad, full; 1338 int *ptr = (int *)CADDR1; 1339 1340 full = FALSE; 1341 /* 1342 * block out kernel memory as not available. 1343 */ 1344 if (pa >= (vm_paddr_t)kernphys && pa < first) 1345 goto do_dump_avail; 1346 1347 /* 1348 * block out dcons buffer 1349 */ 1350 if (dcons_addr > 0 1351 && pa >= trunc_page(dcons_addr) 1352 && pa < dcons_addr + dcons_size) 1353 goto do_dump_avail; 1354 1355 page_bad = FALSE; 1356 if (memtest == 0) 1357 goto skip_memtest; 1358 1359 /* 1360 * Print a "." every GB to show we're making 1361 * progress. 1362 */ 1363 page_counter++; 1364 if ((page_counter % PAGES_PER_GB) == 0) 1365 printf("."); 1366 1367 /* 1368 * map page into kernel: valid, read/write,non-cacheable 1369 */ 1370 *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; 1371 invltlb(); 1372 1373 tmp = *(int *)ptr; 1374 /* 1375 * Test for alternating 1's and 0's 1376 */ 1377 *(volatile int *)ptr = 0xaaaaaaaa; 1378 if (*(volatile int *)ptr != 0xaaaaaaaa) 1379 page_bad = TRUE; 1380 /* 1381 * Test for alternating 0's and 1's 1382 */ 1383 *(volatile int *)ptr = 0x55555555; 1384 if (*(volatile int *)ptr != 0x55555555) 1385 page_bad = TRUE; 1386 /* 1387 * Test for all 1's 1388 */ 1389 *(volatile int *)ptr = 0xffffffff; 1390 if (*(volatile int *)ptr != 0xffffffff) 1391 page_bad = TRUE; 1392 /* 1393 * Test for all 0's 1394 */ 1395 *(volatile int *)ptr = 0x0; 1396 if (*(volatile int *)ptr != 0x0) 1397 page_bad = TRUE; 1398 /* 1399 * Restore original value. 1400 */ 1401 *(int *)ptr = tmp; 1402 1403 skip_memtest: 1404 /* 1405 * Adjust array of valid/good pages. 1406 */ 1407 if (page_bad == TRUE) 1408 continue; 1409 /* 1410 * If this good page is a continuation of the 1411 * previous set of good pages, then just increase 1412 * the end pointer. Otherwise start a new chunk. 1413 * Note that "end" points one higher than end, 1414 * making the range >= start and < end. 1415 * If we're also doing a speculative memory 1416 * test and we at or past the end, bump up Maxmem 1417 * so that we keep going. The first bad page 1418 * will terminate the loop. 1419 */ 1420 if (phys_avail[pa_indx] == pa) { 1421 phys_avail[pa_indx] += PAGE_SIZE; 1422 } else { 1423 pa_indx++; 1424 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 1425 printf( 1426 "Too many holes in the physical address space, giving up\n"); 1427 pa_indx--; 1428 full = TRUE; 1429 goto do_dump_avail; 1430 } 1431 phys_avail[pa_indx++] = pa; /* start */ 1432 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 1433 } 1434 physmem++; 1435 do_dump_avail: 1436 if (dump_avail[da_indx] == pa) { 1437 dump_avail[da_indx] += PAGE_SIZE; 1438 } else { 1439 da_indx++; 1440 if (da_indx == DUMP_AVAIL_ARRAY_END) { 1441 da_indx--; 1442 goto do_next; 1443 } 1444 dump_avail[da_indx++] = pa; /* start */ 1445 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 1446 } 1447 do_next: 1448 if (full) 1449 break; 1450 } 1451 } 1452 *pte = 0; 1453 invltlb(); 1454 if (memtest != 0) 1455 printf("\n"); 1456 1457 /* 1458 * XXX 1459 * The last chunk must contain at least one page plus the message 1460 * buffer to avoid complicating other code (message buffer address 1461 * calculation, etc.). 1462 */ 1463 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1464 round_page(msgbufsize) >= phys_avail[pa_indx]) { 1465 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1466 phys_avail[pa_indx--] = 0; 1467 phys_avail[pa_indx--] = 0; 1468 } 1469 1470 Maxmem = atop(phys_avail[pa_indx]); 1471 1472 /* Trim off space for the message buffer. */ 1473 phys_avail[pa_indx] -= round_page(msgbufsize); 1474 1475 /* Map the message buffer. */ 1476 msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); 1477 } 1478 1479 static caddr_t 1480 native_parse_preload_data(u_int64_t modulep) 1481 { 1482 caddr_t kmdp; 1483 char *envp; 1484 #ifdef DDB 1485 vm_offset_t ksym_start; 1486 vm_offset_t ksym_end; 1487 #endif 1488 1489 preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); 1490 preload_bootstrap_relocate(KERNBASE); 1491 kmdp = preload_search_by_type("elf kernel"); 1492 if (kmdp == NULL) 1493 kmdp = preload_search_by_type("elf64 kernel"); 1494 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); 1495 envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); 1496 if (envp != NULL) 1497 envp += KERNBASE; 1498 init_static_kenv(envp, 0); 1499 #ifdef DDB 1500 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); 1501 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); 1502 db_fetch_ksymtab(ksym_start, ksym_end); 1503 #endif 1504 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); 1505 1506 return (kmdp); 1507 } 1508 1509 static void 1510 amd64_kdb_init(void) 1511 { 1512 kdb_init(); 1513 #ifdef KDB 1514 if (boothowto & RB_KDB) 1515 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 1516 #endif 1517 } 1518 1519 u_int64_t 1520 hammer_time(u_int64_t modulep, u_int64_t physfree) 1521 { 1522 caddr_t kmdp; 1523 int gsel_tss, x; 1524 struct pcpu *pc; 1525 struct nmi_pcpu *np; 1526 struct xstate_hdr *xhdr; 1527 u_int64_t msr; 1528 char *env; 1529 size_t kstack0_sz; 1530 int late_console; 1531 1532 /* 1533 * This may be done better later if it gets more high level 1534 * components in it. If so just link td->td_proc here. 1535 */ 1536 proc_linkup0(&proc0, &thread0); 1537 1538 kmdp = init_ops.parse_preload_data(modulep); 1539 1540 /* Init basic tunables, hz etc */ 1541 init_param1(); 1542 1543 thread0.td_kstack = physfree + KERNBASE; 1544 thread0.td_kstack_pages = kstack_pages; 1545 kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; 1546 bzero((void *)thread0.td_kstack, kstack0_sz); 1547 physfree += kstack0_sz; 1548 1549 /* 1550 * make gdt memory segments 1551 */ 1552 for (x = 0; x < NGDT; x++) { 1553 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 1554 x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) 1555 ssdtosd(&gdt_segs[x], &gdt[x]); 1556 } 1557 gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; 1558 ssdtosyssd(&gdt_segs[GPROC0_SEL], 1559 (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1560 1561 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 1562 r_gdt.rd_base = (long) gdt; 1563 lgdt(&r_gdt); 1564 pc = &__pcpu[0]; 1565 1566 wrmsr(MSR_FSBASE, 0); /* User value */ 1567 wrmsr(MSR_GSBASE, (u_int64_t)pc); 1568 wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ 1569 1570 pcpu_init(pc, 0, sizeof(struct pcpu)); 1571 dpcpu_init((void *)(physfree + KERNBASE), 0); 1572 physfree += DPCPU_SIZE; 1573 PCPU_SET(prvspace, pc); 1574 PCPU_SET(curthread, &thread0); 1575 /* Non-late cninit() and printf() can be moved up to here. */ 1576 PCPU_SET(tssp, &common_tss[0]); 1577 PCPU_SET(commontssp, &common_tss[0]); 1578 PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1579 PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); 1580 PCPU_SET(fs32p, &gdt[GUFS32_SEL]); 1581 PCPU_SET(gs32p, &gdt[GUGS32_SEL]); 1582 1583 /* 1584 * Initialize mutexes. 1585 * 1586 * icu_lock: in order to allow an interrupt to occur in a critical 1587 * section, to set pcpu->ipending (etc...) properly, we 1588 * must be able to get the icu lock, so it can't be 1589 * under witness. 1590 */ 1591 mutex_init(); 1592 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); 1593 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); 1594 1595 /* exceptions */ 1596 for (x = 0; x < NIDT; x++) 1597 setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); 1598 setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); 1599 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); 1600 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); 1601 setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); 1602 setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); 1603 setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); 1604 setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); 1605 setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); 1606 setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); 1607 setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); 1608 setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); 1609 setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); 1610 setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); 1611 setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); 1612 setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); 1613 setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); 1614 setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); 1615 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); 1616 setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); 1617 #ifdef KDTRACE_HOOKS 1618 setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); 1619 #endif 1620 #ifdef XENHVM 1621 setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_UPL, 0); 1622 #endif 1623 1624 r_idt.rd_limit = sizeof(idt0) - 1; 1625 r_idt.rd_base = (long) idt; 1626 lidt(&r_idt); 1627 1628 /* 1629 * Initialize the clock before the console so that console 1630 * initialization can use DELAY(). 1631 */ 1632 clock_init(); 1633 1634 /* 1635 * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) 1636 * transition). 1637 * Once bootblocks have updated, we can test directly for 1638 * efi_systbl != NULL here... 1639 */ 1640 if (preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP) 1641 != NULL) 1642 vty_set_preferred(VTY_VT); 1643 1644 identify_cpu(); /* Final stage of CPU initialization */ 1645 initializecpu(); /* Initialize CPU registers */ 1646 initializecpucache(); 1647 1648 /* doublefault stack space, runs on ist1 */ 1649 common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; 1650 1651 /* 1652 * NMI stack, runs on ist2. The pcpu pointer is stored just 1653 * above the start of the ist2 stack. 1654 */ 1655 np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; 1656 np->np_pcpu = (register_t) pc; 1657 common_tss[0].tss_ist2 = (long) np; 1658 1659 /* Set the IO permission bitmap (empty due to tss seg limit) */ 1660 common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; 1661 1662 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1663 ltr(gsel_tss); 1664 1665 /* Set up the fast syscall stuff */ 1666 msr = rdmsr(MSR_EFER) | EFER_SCE; 1667 wrmsr(MSR_EFER, msr); 1668 wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); 1669 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 1670 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1671 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 1672 wrmsr(MSR_STAR, msr); 1673 wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); 1674 1675 /* 1676 * Temporary forge some valid pointer to PCB, for exception 1677 * handlers. It is reinitialized properly below after FPU is 1678 * set up. Also set up td_critnest to short-cut the page 1679 * fault handler. 1680 */ 1681 cpu_max_ext_state_size = sizeof(struct savefpu); 1682 thread0.td_pcb = get_pcb_td(&thread0); 1683 thread0.td_critnest = 1; 1684 1685 /* 1686 * The console and kdb should be initialized even earlier than here, 1687 * but some console drivers don't work until after getmemsize(). 1688 * Default to late console initialization to support these drivers. 1689 * This loses mainly printf()s in getmemsize() and early debugging. 1690 */ 1691 late_console = 1; 1692 TUNABLE_INT_FETCH("debug.late_console", &late_console); 1693 if (!late_console) { 1694 cninit(); 1695 amd64_kdb_init(); 1696 } 1697 1698 getmemsize(kmdp, physfree); 1699 init_param2(physmem); 1700 1701 /* now running on new page tables, configured,and u/iom is accessible */ 1702 1703 if (late_console) 1704 cninit(); 1705 1706 #ifdef DEV_ISA 1707 #ifdef DEV_ATPIC 1708 elcr_probe(); 1709 atpic_startup(); 1710 #else 1711 /* Reset and mask the atpics and leave them shut down. */ 1712 atpic_reset(); 1713 1714 /* 1715 * Point the ICU spurious interrupt vectors at the APIC spurious 1716 * interrupt handler. 1717 */ 1718 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1719 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1720 #endif 1721 #else 1722 #error "have you forgotten the isa device?"; 1723 #endif 1724 1725 if (late_console) 1726 amd64_kdb_init(); 1727 1728 msgbufinit(msgbufp, msgbufsize); 1729 fpuinit(); 1730 1731 /* 1732 * Set up thread0 pcb after fpuinit calculated pcb + fpu save 1733 * area size. Zero out the extended state header in fpu save 1734 * area. 1735 */ 1736 thread0.td_pcb = get_pcb_td(&thread0); 1737 thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0); 1738 bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); 1739 if (use_xsave) { 1740 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 1741 1); 1742 xhdr->xstate_bv = xsave_mask; 1743 } 1744 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1745 common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb; 1746 /* Ensure the stack is aligned to 16 bytes */ 1747 common_tss[0].tss_rsp0 &= ~0xFul; 1748 PCPU_SET(rsp0, common_tss[0].tss_rsp0); 1749 PCPU_SET(curpcb, thread0.td_pcb); 1750 1751 /* transfer to user mode */ 1752 1753 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 1754 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 1755 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); 1756 _ufssel = GSEL(GUFS32_SEL, SEL_UPL); 1757 _ugssel = GSEL(GUGS32_SEL, SEL_UPL); 1758 1759 load_ds(_udatasel); 1760 load_es(_udatasel); 1761 load_fs(_ufssel); 1762 1763 /* setup proc 0's pcb */ 1764 thread0.td_pcb->pcb_flags = 0; 1765 thread0.td_frame = &proc0_tf; 1766 1767 env = kern_getenv("kernelname"); 1768 if (env != NULL) 1769 strlcpy(kernelname, env, sizeof(kernelname)); 1770 1771 cpu_probe_amdc1e(); 1772 1773 #ifdef FDT 1774 x86_init_fdt(); 1775 #endif 1776 thread0.td_critnest = 0; 1777 1778 /* Location of kernel stack for locore */ 1779 return ((u_int64_t)thread0.td_pcb); 1780 } 1781 1782 void 1783 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 1784 { 1785 1786 pcpu->pc_acpi_id = 0xffffffff; 1787 } 1788 1789 static int 1790 smap_sysctl_handler(SYSCTL_HANDLER_ARGS) 1791 { 1792 struct bios_smap *smapbase; 1793 struct bios_smap_xattr smap; 1794 caddr_t kmdp; 1795 uint32_t *smapattr; 1796 int count, error, i; 1797 1798 /* Retrieve the system memory map from the loader. */ 1799 kmdp = preload_search_by_type("elf kernel"); 1800 if (kmdp == NULL) 1801 kmdp = preload_search_by_type("elf64 kernel"); 1802 smapbase = (struct bios_smap *)preload_search_info(kmdp, 1803 MODINFO_METADATA | MODINFOMD_SMAP); 1804 if (smapbase == NULL) 1805 return (0); 1806 smapattr = (uint32_t *)preload_search_info(kmdp, 1807 MODINFO_METADATA | MODINFOMD_SMAP_XATTR); 1808 count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); 1809 error = 0; 1810 for (i = 0; i < count; i++) { 1811 smap.base = smapbase[i].base; 1812 smap.length = smapbase[i].length; 1813 smap.type = smapbase[i].type; 1814 if (smapattr != NULL) 1815 smap.xattr = smapattr[i]; 1816 else 1817 smap.xattr = 0; 1818 error = SYSCTL_OUT(req, &smap, sizeof(smap)); 1819 } 1820 return (error); 1821 } 1822 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, 1823 smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data"); 1824 1825 static int 1826 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) 1827 { 1828 struct efi_map_header *efihdr; 1829 caddr_t kmdp; 1830 uint32_t efisize; 1831 1832 kmdp = preload_search_by_type("elf kernel"); 1833 if (kmdp == NULL) 1834 kmdp = preload_search_by_type("elf64 kernel"); 1835 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 1836 MODINFO_METADATA | MODINFOMD_EFI_MAP); 1837 if (efihdr == NULL) 1838 return (0); 1839 efisize = *((uint32_t *)efihdr - 1); 1840 return (SYSCTL_OUT(req, efihdr, efisize)); 1841 } 1842 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, 1843 efi_map_sysctl_handler, "S,efi_map_header", "Raw EFI Memory Map"); 1844 1845 void 1846 spinlock_enter(void) 1847 { 1848 struct thread *td; 1849 register_t flags; 1850 1851 td = curthread; 1852 if (td->td_md.md_spinlock_count == 0) { 1853 flags = intr_disable(); 1854 td->td_md.md_spinlock_count = 1; 1855 td->td_md.md_saved_flags = flags; 1856 } else 1857 td->td_md.md_spinlock_count++; 1858 critical_enter(); 1859 } 1860 1861 void 1862 spinlock_exit(void) 1863 { 1864 struct thread *td; 1865 register_t flags; 1866 1867 td = curthread; 1868 critical_exit(); 1869 flags = td->td_md.md_saved_flags; 1870 td->td_md.md_spinlock_count--; 1871 if (td->td_md.md_spinlock_count == 0) 1872 intr_restore(flags); 1873 } 1874 1875 /* 1876 * Construct a PCB from a trapframe. This is called from kdb_trap() where 1877 * we want to start a backtrace from the function that caused us to enter 1878 * the debugger. We have the context in the trapframe, but base the trace 1879 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 1880 * enough for a backtrace. 1881 */ 1882 void 1883 makectx(struct trapframe *tf, struct pcb *pcb) 1884 { 1885 1886 pcb->pcb_r12 = tf->tf_r12; 1887 pcb->pcb_r13 = tf->tf_r13; 1888 pcb->pcb_r14 = tf->tf_r14; 1889 pcb->pcb_r15 = tf->tf_r15; 1890 pcb->pcb_rbp = tf->tf_rbp; 1891 pcb->pcb_rbx = tf->tf_rbx; 1892 pcb->pcb_rip = tf->tf_rip; 1893 pcb->pcb_rsp = tf->tf_rsp; 1894 } 1895 1896 int 1897 ptrace_set_pc(struct thread *td, unsigned long addr) 1898 { 1899 1900 td->td_frame->tf_rip = addr; 1901 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 1902 return (0); 1903 } 1904 1905 int 1906 ptrace_single_step(struct thread *td) 1907 { 1908 td->td_frame->tf_rflags |= PSL_T; 1909 return (0); 1910 } 1911 1912 int 1913 ptrace_clear_single_step(struct thread *td) 1914 { 1915 td->td_frame->tf_rflags &= ~PSL_T; 1916 return (0); 1917 } 1918 1919 int 1920 fill_regs(struct thread *td, struct reg *regs) 1921 { 1922 struct trapframe *tp; 1923 1924 tp = td->td_frame; 1925 return (fill_frame_regs(tp, regs)); 1926 } 1927 1928 int 1929 fill_frame_regs(struct trapframe *tp, struct reg *regs) 1930 { 1931 regs->r_r15 = tp->tf_r15; 1932 regs->r_r14 = tp->tf_r14; 1933 regs->r_r13 = tp->tf_r13; 1934 regs->r_r12 = tp->tf_r12; 1935 regs->r_r11 = tp->tf_r11; 1936 regs->r_r10 = tp->tf_r10; 1937 regs->r_r9 = tp->tf_r9; 1938 regs->r_r8 = tp->tf_r8; 1939 regs->r_rdi = tp->tf_rdi; 1940 regs->r_rsi = tp->tf_rsi; 1941 regs->r_rbp = tp->tf_rbp; 1942 regs->r_rbx = tp->tf_rbx; 1943 regs->r_rdx = tp->tf_rdx; 1944 regs->r_rcx = tp->tf_rcx; 1945 regs->r_rax = tp->tf_rax; 1946 regs->r_rip = tp->tf_rip; 1947 regs->r_cs = tp->tf_cs; 1948 regs->r_rflags = tp->tf_rflags; 1949 regs->r_rsp = tp->tf_rsp; 1950 regs->r_ss = tp->tf_ss; 1951 if (tp->tf_flags & TF_HASSEGS) { 1952 regs->r_ds = tp->tf_ds; 1953 regs->r_es = tp->tf_es; 1954 regs->r_fs = tp->tf_fs; 1955 regs->r_gs = tp->tf_gs; 1956 } else { 1957 regs->r_ds = 0; 1958 regs->r_es = 0; 1959 regs->r_fs = 0; 1960 regs->r_gs = 0; 1961 } 1962 return (0); 1963 } 1964 1965 int 1966 set_regs(struct thread *td, struct reg *regs) 1967 { 1968 struct trapframe *tp; 1969 register_t rflags; 1970 1971 tp = td->td_frame; 1972 rflags = regs->r_rflags & 0xffffffff; 1973 if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) 1974 return (EINVAL); 1975 tp->tf_r15 = regs->r_r15; 1976 tp->tf_r14 = regs->r_r14; 1977 tp->tf_r13 = regs->r_r13; 1978 tp->tf_r12 = regs->r_r12; 1979 tp->tf_r11 = regs->r_r11; 1980 tp->tf_r10 = regs->r_r10; 1981 tp->tf_r9 = regs->r_r9; 1982 tp->tf_r8 = regs->r_r8; 1983 tp->tf_rdi = regs->r_rdi; 1984 tp->tf_rsi = regs->r_rsi; 1985 tp->tf_rbp = regs->r_rbp; 1986 tp->tf_rbx = regs->r_rbx; 1987 tp->tf_rdx = regs->r_rdx; 1988 tp->tf_rcx = regs->r_rcx; 1989 tp->tf_rax = regs->r_rax; 1990 tp->tf_rip = regs->r_rip; 1991 tp->tf_cs = regs->r_cs; 1992 tp->tf_rflags = rflags; 1993 tp->tf_rsp = regs->r_rsp; 1994 tp->tf_ss = regs->r_ss; 1995 if (0) { /* XXXKIB */ 1996 tp->tf_ds = regs->r_ds; 1997 tp->tf_es = regs->r_es; 1998 tp->tf_fs = regs->r_fs; 1999 tp->tf_gs = regs->r_gs; 2000 tp->tf_flags = TF_HASSEGS; 2001 } 2002 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 2003 return (0); 2004 } 2005 2006 /* XXX check all this stuff! */ 2007 /* externalize from sv_xmm */ 2008 static void 2009 fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) 2010 { 2011 struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; 2012 struct envxmm *penv_xmm = &sv_xmm->sv_env; 2013 int i; 2014 2015 /* pcb -> fpregs */ 2016 bzero(fpregs, sizeof(*fpregs)); 2017 2018 /* FPU control/status */ 2019 penv_fpreg->en_cw = penv_xmm->en_cw; 2020 penv_fpreg->en_sw = penv_xmm->en_sw; 2021 penv_fpreg->en_tw = penv_xmm->en_tw; 2022 penv_fpreg->en_opcode = penv_xmm->en_opcode; 2023 penv_fpreg->en_rip = penv_xmm->en_rip; 2024 penv_fpreg->en_rdp = penv_xmm->en_rdp; 2025 penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; 2026 penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; 2027 2028 /* FPU registers */ 2029 for (i = 0; i < 8; ++i) 2030 bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); 2031 2032 /* SSE registers */ 2033 for (i = 0; i < 16; ++i) 2034 bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); 2035 } 2036 2037 /* internalize from fpregs into sv_xmm */ 2038 static void 2039 set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) 2040 { 2041 struct envxmm *penv_xmm = &sv_xmm->sv_env; 2042 struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; 2043 int i; 2044 2045 /* fpregs -> pcb */ 2046 /* FPU control/status */ 2047 penv_xmm->en_cw = penv_fpreg->en_cw; 2048 penv_xmm->en_sw = penv_fpreg->en_sw; 2049 penv_xmm->en_tw = penv_fpreg->en_tw; 2050 penv_xmm->en_opcode = penv_fpreg->en_opcode; 2051 penv_xmm->en_rip = penv_fpreg->en_rip; 2052 penv_xmm->en_rdp = penv_fpreg->en_rdp; 2053 penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; 2054 penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask; 2055 2056 /* FPU registers */ 2057 for (i = 0; i < 8; ++i) 2058 bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); 2059 2060 /* SSE registers */ 2061 for (i = 0; i < 16; ++i) 2062 bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); 2063 } 2064 2065 /* externalize from td->pcb */ 2066 int 2067 fill_fpregs(struct thread *td, struct fpreg *fpregs) 2068 { 2069 2070 KASSERT(td == curthread || TD_IS_SUSPENDED(td) || 2071 P_SHOULDSTOP(td->td_proc), 2072 ("not suspended thread %p", td)); 2073 fpugetregs(td); 2074 fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs); 2075 return (0); 2076 } 2077 2078 /* internalize to td->pcb */ 2079 int 2080 set_fpregs(struct thread *td, struct fpreg *fpregs) 2081 { 2082 2083 set_fpregs_xmm(fpregs, get_pcb_user_save_td(td)); 2084 fpuuserinited(td); 2085 return (0); 2086 } 2087 2088 /* 2089 * Get machine context. 2090 */ 2091 int 2092 get_mcontext(struct thread *td, mcontext_t *mcp, int flags) 2093 { 2094 struct pcb *pcb; 2095 struct trapframe *tp; 2096 2097 pcb = td->td_pcb; 2098 tp = td->td_frame; 2099 PROC_LOCK(curthread->td_proc); 2100 mcp->mc_onstack = sigonstack(tp->tf_rsp); 2101 PROC_UNLOCK(curthread->td_proc); 2102 mcp->mc_r15 = tp->tf_r15; 2103 mcp->mc_r14 = tp->tf_r14; 2104 mcp->mc_r13 = tp->tf_r13; 2105 mcp->mc_r12 = tp->tf_r12; 2106 mcp->mc_r11 = tp->tf_r11; 2107 mcp->mc_r10 = tp->tf_r10; 2108 mcp->mc_r9 = tp->tf_r9; 2109 mcp->mc_r8 = tp->tf_r8; 2110 mcp->mc_rdi = tp->tf_rdi; 2111 mcp->mc_rsi = tp->tf_rsi; 2112 mcp->mc_rbp = tp->tf_rbp; 2113 mcp->mc_rbx = tp->tf_rbx; 2114 mcp->mc_rcx = tp->tf_rcx; 2115 mcp->mc_rflags = tp->tf_rflags; 2116 if (flags & GET_MC_CLEAR_RET) { 2117 mcp->mc_rax = 0; 2118 mcp->mc_rdx = 0; 2119 mcp->mc_rflags &= ~PSL_C; 2120 } else { 2121 mcp->mc_rax = tp->tf_rax; 2122 mcp->mc_rdx = tp->tf_rdx; 2123 } 2124 mcp->mc_rip = tp->tf_rip; 2125 mcp->mc_cs = tp->tf_cs; 2126 mcp->mc_rsp = tp->tf_rsp; 2127 mcp->mc_ss = tp->tf_ss; 2128 mcp->mc_ds = tp->tf_ds; 2129 mcp->mc_es = tp->tf_es; 2130 mcp->mc_fs = tp->tf_fs; 2131 mcp->mc_gs = tp->tf_gs; 2132 mcp->mc_flags = tp->tf_flags; 2133 mcp->mc_len = sizeof(*mcp); 2134 get_fpcontext(td, mcp, NULL, 0); 2135 mcp->mc_fsbase = pcb->pcb_fsbase; 2136 mcp->mc_gsbase = pcb->pcb_gsbase; 2137 mcp->mc_xfpustate = 0; 2138 mcp->mc_xfpustate_len = 0; 2139 bzero(mcp->mc_spare, sizeof(mcp->mc_spare)); 2140 return (0); 2141 } 2142 2143 /* 2144 * Set machine context. 2145 * 2146 * However, we don't set any but the user modifiable flags, and we won't 2147 * touch the cs selector. 2148 */ 2149 int 2150 set_mcontext(struct thread *td, mcontext_t *mcp) 2151 { 2152 struct pcb *pcb; 2153 struct trapframe *tp; 2154 char *xfpustate; 2155 long rflags; 2156 int ret; 2157 2158 pcb = td->td_pcb; 2159 tp = td->td_frame; 2160 if (mcp->mc_len != sizeof(*mcp) || 2161 (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) 2162 return (EINVAL); 2163 rflags = (mcp->mc_rflags & PSL_USERCHANGE) | 2164 (tp->tf_rflags & ~PSL_USERCHANGE); 2165 if (mcp->mc_flags & _MC_HASFPXSTATE) { 2166 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - 2167 sizeof(struct savefpu)) 2168 return (EINVAL); 2169 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); 2170 ret = copyin((void *)mcp->mc_xfpustate, xfpustate, 2171 mcp->mc_xfpustate_len); 2172 if (ret != 0) 2173 return (ret); 2174 } else 2175 xfpustate = NULL; 2176 ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); 2177 if (ret != 0) 2178 return (ret); 2179 tp->tf_r15 = mcp->mc_r15; 2180 tp->tf_r14 = mcp->mc_r14; 2181 tp->tf_r13 = mcp->mc_r13; 2182 tp->tf_r12 = mcp->mc_r12; 2183 tp->tf_r11 = mcp->mc_r11; 2184 tp->tf_r10 = mcp->mc_r10; 2185 tp->tf_r9 = mcp->mc_r9; 2186 tp->tf_r8 = mcp->mc_r8; 2187 tp->tf_rdi = mcp->mc_rdi; 2188 tp->tf_rsi = mcp->mc_rsi; 2189 tp->tf_rbp = mcp->mc_rbp; 2190 tp->tf_rbx = mcp->mc_rbx; 2191 tp->tf_rdx = mcp->mc_rdx; 2192 tp->tf_rcx = mcp->mc_rcx; 2193 tp->tf_rax = mcp->mc_rax; 2194 tp->tf_rip = mcp->mc_rip; 2195 tp->tf_rflags = rflags; 2196 tp->tf_rsp = mcp->mc_rsp; 2197 tp->tf_ss = mcp->mc_ss; 2198 tp->tf_flags = mcp->mc_flags; 2199 if (tp->tf_flags & TF_HASSEGS) { 2200 tp->tf_ds = mcp->mc_ds; 2201 tp->tf_es = mcp->mc_es; 2202 tp->tf_fs = mcp->mc_fs; 2203 tp->tf_gs = mcp->mc_gs; 2204 } 2205 if (mcp->mc_flags & _MC_HASBASES) { 2206 pcb->pcb_fsbase = mcp->mc_fsbase; 2207 pcb->pcb_gsbase = mcp->mc_gsbase; 2208 } 2209 set_pcb_flags(pcb, PCB_FULL_IRET); 2210 return (0); 2211 } 2212 2213 static void 2214 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, 2215 size_t xfpusave_len) 2216 { 2217 size_t max_len, len; 2218 2219 mcp->mc_ownedfp = fpugetregs(td); 2220 bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], 2221 sizeof(mcp->mc_fpstate)); 2222 mcp->mc_fpformat = fpuformat(); 2223 if (!use_xsave || xfpusave_len == 0) 2224 return; 2225 max_len = cpu_max_ext_state_size - sizeof(struct savefpu); 2226 len = xfpusave_len; 2227 if (len > max_len) { 2228 len = max_len; 2229 bzero(xfpusave + max_len, len - max_len); 2230 } 2231 mcp->mc_flags |= _MC_HASFPXSTATE; 2232 mcp->mc_xfpustate_len = len; 2233 bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); 2234 } 2235 2236 static int 2237 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, 2238 size_t xfpustate_len) 2239 { 2240 struct savefpu *fpstate; 2241 int error; 2242 2243 if (mcp->mc_fpformat == _MC_FPFMT_NODEV) 2244 return (0); 2245 else if (mcp->mc_fpformat != _MC_FPFMT_XMM) 2246 return (EINVAL); 2247 else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { 2248 /* We don't care what state is left in the FPU or PCB. */ 2249 fpstate_drop(td); 2250 error = 0; 2251 } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || 2252 mcp->mc_ownedfp == _MC_FPOWNED_PCB) { 2253 fpstate = (struct savefpu *)&mcp->mc_fpstate; 2254 fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; 2255 error = fpusetregs(td, fpstate, xfpustate, xfpustate_len); 2256 } else 2257 return (EINVAL); 2258 return (error); 2259 } 2260 2261 void 2262 fpstate_drop(struct thread *td) 2263 { 2264 2265 KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); 2266 critical_enter(); 2267 if (PCPU_GET(fpcurthread) == td) 2268 fpudrop(); 2269 /* 2270 * XXX force a full drop of the fpu. The above only drops it if we 2271 * owned it. 2272 * 2273 * XXX I don't much like fpugetuserregs()'s semantics of doing a full 2274 * drop. Dropping only to the pcb matches fnsave's behaviour. 2275 * We only need to drop to !PCB_INITDONE in sendsig(). But 2276 * sendsig() is the only caller of fpugetuserregs()... perhaps we just 2277 * have too many layers. 2278 */ 2279 clear_pcb_flags(curthread->td_pcb, 2280 PCB_FPUINITDONE | PCB_USERFPUINITDONE); 2281 critical_exit(); 2282 } 2283 2284 int 2285 fill_dbregs(struct thread *td, struct dbreg *dbregs) 2286 { 2287 struct pcb *pcb; 2288 2289 if (td == NULL) { 2290 dbregs->dr[0] = rdr0(); 2291 dbregs->dr[1] = rdr1(); 2292 dbregs->dr[2] = rdr2(); 2293 dbregs->dr[3] = rdr3(); 2294 dbregs->dr[6] = rdr6(); 2295 dbregs->dr[7] = rdr7(); 2296 } else { 2297 pcb = td->td_pcb; 2298 dbregs->dr[0] = pcb->pcb_dr0; 2299 dbregs->dr[1] = pcb->pcb_dr1; 2300 dbregs->dr[2] = pcb->pcb_dr2; 2301 dbregs->dr[3] = pcb->pcb_dr3; 2302 dbregs->dr[6] = pcb->pcb_dr6; 2303 dbregs->dr[7] = pcb->pcb_dr7; 2304 } 2305 dbregs->dr[4] = 0; 2306 dbregs->dr[5] = 0; 2307 dbregs->dr[8] = 0; 2308 dbregs->dr[9] = 0; 2309 dbregs->dr[10] = 0; 2310 dbregs->dr[11] = 0; 2311 dbregs->dr[12] = 0; 2312 dbregs->dr[13] = 0; 2313 dbregs->dr[14] = 0; 2314 dbregs->dr[15] = 0; 2315 return (0); 2316 } 2317 2318 int 2319 set_dbregs(struct thread *td, struct dbreg *dbregs) 2320 { 2321 struct pcb *pcb; 2322 int i; 2323 2324 if (td == NULL) { 2325 load_dr0(dbregs->dr[0]); 2326 load_dr1(dbregs->dr[1]); 2327 load_dr2(dbregs->dr[2]); 2328 load_dr3(dbregs->dr[3]); 2329 load_dr6(dbregs->dr[6]); 2330 load_dr7(dbregs->dr[7]); 2331 } else { 2332 /* 2333 * Don't let an illegal value for dr7 get set. Specifically, 2334 * check for undefined settings. Setting these bit patterns 2335 * result in undefined behaviour and can lead to an unexpected 2336 * TRCTRAP or a general protection fault right here. 2337 * Upper bits of dr6 and dr7 must not be set 2338 */ 2339 for (i = 0; i < 4; i++) { 2340 if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) 2341 return (EINVAL); 2342 if (td->td_frame->tf_cs == _ucode32sel && 2343 DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8) 2344 return (EINVAL); 2345 } 2346 if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 || 2347 (dbregs->dr[7] & 0xffffffff00000000ul) != 0) 2348 return (EINVAL); 2349 2350 pcb = td->td_pcb; 2351 2352 /* 2353 * Don't let a process set a breakpoint that is not within the 2354 * process's address space. If a process could do this, it 2355 * could halt the system by setting a breakpoint in the kernel 2356 * (if ddb was enabled). Thus, we need to check to make sure 2357 * that no breakpoints are being enabled for addresses outside 2358 * process's address space. 2359 * 2360 * XXX - what about when the watched area of the user's 2361 * address space is written into from within the kernel 2362 * ... wouldn't that still cause a breakpoint to be generated 2363 * from within kernel mode? 2364 */ 2365 2366 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { 2367 /* dr0 is enabled */ 2368 if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) 2369 return (EINVAL); 2370 } 2371 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { 2372 /* dr1 is enabled */ 2373 if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) 2374 return (EINVAL); 2375 } 2376 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { 2377 /* dr2 is enabled */ 2378 if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) 2379 return (EINVAL); 2380 } 2381 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { 2382 /* dr3 is enabled */ 2383 if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) 2384 return (EINVAL); 2385 } 2386 2387 pcb->pcb_dr0 = dbregs->dr[0]; 2388 pcb->pcb_dr1 = dbregs->dr[1]; 2389 pcb->pcb_dr2 = dbregs->dr[2]; 2390 pcb->pcb_dr3 = dbregs->dr[3]; 2391 pcb->pcb_dr6 = dbregs->dr[6]; 2392 pcb->pcb_dr7 = dbregs->dr[7]; 2393 2394 set_pcb_flags(pcb, PCB_DBREGS); 2395 } 2396 2397 return (0); 2398 } 2399 2400 void 2401 reset_dbregs(void) 2402 { 2403 2404 load_dr7(0); /* Turn off the control bits first */ 2405 load_dr0(0); 2406 load_dr1(0); 2407 load_dr2(0); 2408 load_dr3(0); 2409 load_dr6(0); 2410 } 2411 2412 /* 2413 * Return > 0 if a hardware breakpoint has been hit, and the 2414 * breakpoint was in user space. Return 0, otherwise. 2415 */ 2416 int 2417 user_dbreg_trap(void) 2418 { 2419 u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */ 2420 u_int64_t bp; /* breakpoint bits extracted from dr6 */ 2421 int nbp; /* number of breakpoints that triggered */ 2422 caddr_t addr[4]; /* breakpoint addresses */ 2423 int i; 2424 2425 dr7 = rdr7(); 2426 if ((dr7 & 0x000000ff) == 0) { 2427 /* 2428 * all GE and LE bits in the dr7 register are zero, 2429 * thus the trap couldn't have been caused by the 2430 * hardware debug registers 2431 */ 2432 return 0; 2433 } 2434 2435 nbp = 0; 2436 dr6 = rdr6(); 2437 bp = dr6 & 0x0000000f; 2438 2439 if (!bp) { 2440 /* 2441 * None of the breakpoint bits are set meaning this 2442 * trap was not caused by any of the debug registers 2443 */ 2444 return 0; 2445 } 2446 2447 /* 2448 * at least one of the breakpoints were hit, check to see 2449 * which ones and if any of them are user space addresses 2450 */ 2451 2452 if (bp & 0x01) { 2453 addr[nbp++] = (caddr_t)rdr0(); 2454 } 2455 if (bp & 0x02) { 2456 addr[nbp++] = (caddr_t)rdr1(); 2457 } 2458 if (bp & 0x04) { 2459 addr[nbp++] = (caddr_t)rdr2(); 2460 } 2461 if (bp & 0x08) { 2462 addr[nbp++] = (caddr_t)rdr3(); 2463 } 2464 2465 for (i = 0; i < nbp; i++) { 2466 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { 2467 /* 2468 * addr[i] is in user space 2469 */ 2470 return nbp; 2471 } 2472 } 2473 2474 /* 2475 * None of the breakpoints are in user space. 2476 */ 2477 return 0; 2478 } 2479 2480 #ifdef KDB 2481 2482 /* 2483 * Provide inb() and outb() as functions. They are normally only available as 2484 * inline functions, thus cannot be called from the debugger. 2485 */ 2486 2487 /* silence compiler warnings */ 2488 u_char inb_(u_short); 2489 void outb_(u_short, u_char); 2490 2491 u_char 2492 inb_(u_short port) 2493 { 2494 return inb(port); 2495 } 2496 2497 void 2498 outb_(u_short port, u_char data) 2499 { 2500 outb(port, data); 2501 } 2502 2503 #endif /* KDB */ 2504