1 /* $OpenBSD: machdep.c,v 1.273 2021/03/11 11:16:55 jsg Exp $ */ 2 /* $NetBSD: machdep.c,v 1.3 2003/05/07 22:58:18 fvdl Exp $ */ 3 4 /*- 5 * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /*- 35 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 36 * All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * William Jolitz. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)machdep.c 7.4 (Berkeley) 6/3/91 66 */ 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/signal.h> 71 #include <sys/signalvar.h> 72 #include <sys/proc.h> 73 #include <sys/user.h> 74 #include <sys/exec.h> 75 #include <sys/buf.h> 76 #include <sys/reboot.h> 77 #include <sys/conf.h> 78 #include <sys/msgbuf.h> 79 #include <sys/mount.h> 80 #include <sys/extent.h> 81 #include <sys/core.h> 82 #include <sys/kcore.h> 83 #include <sys/syscallargs.h> 84 85 #include <dev/cons.h> 86 #include <stand/boot/bootarg.h> 87 88 #include <net/if.h> 89 #include <uvm/uvm_extern.h> 90 91 #include <sys/sysctl.h> 92 93 #include <machine/cpu_full.h> 94 #include <machine/cpufunc.h> 95 #include <machine/pio.h> 96 #include <machine/psl.h> 97 #include <machine/reg.h> 98 #include <machine/fpu.h> 99 #include <machine/biosvar.h> 100 #include <machine/mpbiosvar.h> 101 #include <machine/kcore.h> 102 #include <machine/tss.h> 103 104 #include <dev/isa/isareg.h> 105 #include <dev/ic/i8042reg.h> 106 107 #ifdef DDB 108 #include <machine/db_machdep.h> 109 #include <ddb/db_extern.h> 110 extern int db_console; 111 #endif 112 113 #include "isa.h" 114 #include "isadma.h" 115 #include "ksyms.h" 116 117 #include "acpi.h" 118 #if NACPI > 0 119 #include <dev/acpi/acpivar.h> 120 #endif 121 122 #include "com.h" 123 #if NCOM > 0 124 #include <sys/tty.h> 125 #include <dev/ic/comvar.h> 126 #include <dev/ic/comreg.h> 127 #endif 128 129 #include "softraid.h" 130 #if NSOFTRAID > 0 131 #include <dev/softraidvar.h> 132 #endif 133 134 #ifdef HIBERNATE 135 #include <machine/hibernate_var.h> 136 #endif /* HIBERNATE */ 137 138 #include "ukbd.h" 139 #include "pckbc.h" 140 #if NPCKBC > 0 && NUKBD > 0 141 #include <dev/ic/pckbcvar.h> 142 #endif 143 144 /* #define MACHDEP_DEBUG */ 145 146 #ifdef MACHDEP_DEBUG 147 #define DPRINTF(x...) do { printf(x); } while(0) 148 #else 149 #define DPRINTF(x...) 150 #endif /* MACHDEP_DEBUG */ 151 152 /* the following is used externally (sysctl_hw) */ 153 char machine[] = MACHINE; 154 155 /* 156 * switchto vectors 157 */ 158 void (*cpu_idle_cycle_fcn)(void) = NULL; 159 160 /* the following is used externally for concurrent handlers */ 161 int setperf_prio = 0; 162 163 #ifdef CPURESET_DELAY 164 int cpureset_delay = CPURESET_DELAY; 165 #else 166 int cpureset_delay = 0; 167 #endif 168 169 int physmem; 170 u_int64_t dumpmem_low; 171 u_int64_t dumpmem_high; 172 extern int boothowto; 173 int cpu_class; 174 175 paddr_t dumpmem_paddr; 176 vaddr_t dumpmem_vaddr; 177 psize_t dumpmem_sz; 178 179 vaddr_t kern_end; 180 181 vaddr_t msgbuf_vaddr; 182 paddr_t msgbuf_paddr; 183 184 vaddr_t idt_vaddr; 185 paddr_t idt_paddr; 186 187 vaddr_t lo32_vaddr; 188 paddr_t lo32_paddr; 189 paddr_t tramp_pdirpa; 190 191 int kbd_reset; 192 int lid_action = 1; 193 int pwr_action = 1; 194 int forceukbd; 195 196 /* 197 * safepri is a safe priority for sleep to set for a spin-wait 198 * during autoconfiguration or after a panic. 199 */ 200 int safepri = 0; 201 202 struct vm_map *exec_map = NULL; 203 struct vm_map *phys_map = NULL; 204 205 /* UVM constraint ranges. */ 206 struct uvm_constraint_range isa_constraint = { 0x0, 0x00ffffffUL }; 207 struct uvm_constraint_range dma_constraint = { 0x0, 0xffffffffUL }; 208 struct uvm_constraint_range *uvm_md_constraints[] = { 209 &isa_constraint, 210 &dma_constraint, 211 NULL, 212 }; 213 214 paddr_t avail_start; 215 paddr_t avail_end; 216 217 void (*delay_func)(int) = i8254_delay; 218 void (*initclock_func)(void) = i8254_initclocks; 219 220 /* 221 * Format of boot information passed to us by 32-bit /boot 222 */ 223 typedef struct _boot_args32 { 224 int ba_type; 225 int ba_size; 226 int ba_nextX; /* a ptr in 32-bit world, but not here */ 227 char ba_arg[1]; 228 } bootarg32_t; 229 230 #define BOOTARGC_MAX NBPG /* one page */ 231 232 bios_bootmac_t *bios_bootmac; 233 234 /* locore copies the arguments from /boot to here for us */ 235 char bootinfo[BOOTARGC_MAX]; 236 int bootinfo_size = BOOTARGC_MAX; 237 238 void getbootinfo(char *, int); 239 240 /* Data passed to us by /boot, filled in by getbootinfo() */ 241 bios_diskinfo_t *bios_diskinfo; 242 bios_memmap_t *bios_memmap; 243 u_int32_t bios_cksumlen; 244 bios_efiinfo_t *bios_efiinfo; 245 bios_ucode_t *bios_ucode; 246 247 /* 248 * Size of memory segments, before any memory is stolen. 249 */ 250 phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; 251 int mem_cluster_cnt; 252 253 int cpu_dump(void); 254 int cpu_dumpsize(void); 255 u_long cpu_dump_mempagecnt(void); 256 void dumpsys(void); 257 void cpu_init_extents(void); 258 void map_tramps(void); 259 void init_x86_64(paddr_t); 260 void (*cpuresetfn)(void); 261 void enter_shared_special_pages(void); 262 263 #ifdef APERTURE 264 int allowaperture = 0; 265 #endif 266 267 /* 268 * Machine-dependent startup code 269 */ 270 void 271 cpu_startup(void) 272 { 273 vaddr_t minaddr, maxaddr; 274 275 msgbuf_vaddr = PMAP_DIRECT_MAP(msgbuf_paddr); 276 initmsgbuf((caddr_t)msgbuf_vaddr, round_page(MSGBUFSIZE)); 277 278 printf("%s", version); 279 startclocks(); 280 rtcinit(); 281 282 printf("real mem = %lu (%luMB)\n", ptoa((psize_t)physmem), 283 ptoa((psize_t)physmem)/1024/1024); 284 285 /* 286 * Allocate a submap for exec arguments. This map effectively 287 * limits the number of processes exec'ing at any time. 288 */ 289 minaddr = vm_map_min(kernel_map); 290 exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, 291 16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL); 292 293 /* 294 * Allocate a submap for physio 295 */ 296 minaddr = vm_map_min(kernel_map); 297 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, 298 VM_PHYS_SIZE, 0, FALSE, NULL); 299 300 printf("avail mem = %lu (%luMB)\n", ptoa((psize_t)uvmexp.free), 301 ptoa((psize_t)uvmexp.free)/1024/1024); 302 303 bufinit(); 304 305 if (boothowto & RB_CONFIG) { 306 #ifdef BOOT_CONFIG 307 user_config(); 308 #else 309 printf("kernel does not support -c; continuing..\n"); 310 #endif 311 } 312 313 /* Safe for i/o port / memory space allocation to use malloc now. */ 314 x86_bus_space_mallocok(); 315 316 #ifndef SMALL_KERNEL 317 cpu_ucode_setup(); 318 cpu_ucode_apply(&cpu_info_primary); 319 #endif 320 cpu_tsx_disable(&cpu_info_primary); 321 322 /* enter the IDT and trampoline code in the u-k maps */ 323 enter_shared_special_pages(); 324 325 /* initialize CPU0's TSS and GDT and put them in the u-k maps */ 326 cpu_enter_pages(&cpu_info_full_primary); 327 } 328 329 /* 330 * enter_shared_special_pages 331 * 332 * Requests mapping of various special pages required in the Intel Meltdown 333 * case (to be entered into the U-K page table): 334 * 335 * 1 IDT page 336 * Various number of pages covering the U-K ".kutext" section. This section 337 * contains code needed during trampoline operation 338 * Various number of pages covering the U-K ".kudata" section. This section 339 * contains data accessed by the trampoline, before switching to U+K 340 * (for example, various shared global variables used by IPIs, etc) 341 * 342 * The linker script places the required symbols in the sections above. 343 * 344 * On CPUs not affected by Meltdown, the calls to pmap_enter_special below 345 * become no-ops. 346 */ 347 void 348 enter_shared_special_pages(void) 349 { 350 extern char __kutext_start[], __kutext_end[], __kernel_kutext_phys[]; 351 extern char __text_page_start[], __text_page_end[]; 352 extern char __kernel_kutext_page_phys[]; 353 extern char __kudata_start[], __kudata_end[], __kernel_kudata_phys[]; 354 vaddr_t va; 355 paddr_t pa; 356 357 /* idt */ 358 pmap_enter_special(idt_vaddr, idt_paddr, PROT_READ); 359 DPRINTF("%s: entered idt page va 0x%llx pa 0x%llx\n", __func__, 360 (uint64_t)idt_vaddr, (uint64_t)idt_paddr); 361 362 /* .kutext section */ 363 va = (vaddr_t)__kutext_start; 364 pa = (paddr_t)__kernel_kutext_phys; 365 while (va < (vaddr_t)__kutext_end) { 366 pmap_enter_special(va, pa, PROT_READ | PROT_EXEC); 367 DPRINTF("%s: entered kutext page va 0x%llx pa 0x%llx\n", 368 __func__, (uint64_t)va, (uint64_t)pa); 369 va += PAGE_SIZE; 370 pa += PAGE_SIZE; 371 } 372 373 /* .kutext.page section */ 374 va = (vaddr_t)__text_page_start; 375 pa = (paddr_t)__kernel_kutext_page_phys; 376 while (va < (vaddr_t)__text_page_end) { 377 pmap_enter_special(va, pa, PROT_READ | PROT_EXEC); 378 DPRINTF("%s: entered kutext.page va 0x%llx pa 0x%llx\n", 379 __func__, (uint64_t)va, (uint64_t)pa); 380 va += PAGE_SIZE; 381 pa += PAGE_SIZE; 382 } 383 384 /* .kudata section */ 385 va = (vaddr_t)__kudata_start; 386 pa = (paddr_t)__kernel_kudata_phys; 387 while (va < (vaddr_t)__kudata_end) { 388 pmap_enter_special(va, pa, PROT_READ | PROT_WRITE); 389 DPRINTF("%s: entered kudata page va 0x%llx pa 0x%llx\n", 390 __func__, (uint64_t)va, (uint64_t)pa); 391 va += PAGE_SIZE; 392 pa += PAGE_SIZE; 393 } 394 } 395 396 /* 397 * Set up proc0's PCB and the cpu's TSS. 398 */ 399 void 400 x86_64_proc0_tss_ldt_init(void) 401 { 402 struct pcb *pcb; 403 404 cpu_info_primary.ci_curpcb = pcb = &proc0.p_addr->u_pcb; 405 pcb->pcb_fsbase = 0; 406 pcb->pcb_kstack = (u_int64_t)proc0.p_addr + USPACE - 16; 407 proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_kstack - 1; 408 409 ltr(GSYSSEL(GPROC0_SEL, SEL_KPL)); 410 lldt(0); 411 } 412 413 bios_diskinfo_t * 414 bios_getdiskinfo(dev_t dev) 415 { 416 bios_diskinfo_t *pdi; 417 418 if (bios_diskinfo == NULL) 419 return NULL; 420 421 for (pdi = bios_diskinfo; pdi->bios_number != -1; pdi++) { 422 if ((dev & B_MAGICMASK) == B_DEVMAGIC) { /* search by bootdev */ 423 if (pdi->bsd_dev == dev) 424 break; 425 } else { 426 if (pdi->bios_number == dev) 427 break; 428 } 429 } 430 431 if (pdi->bios_number == -1) 432 return NULL; 433 else 434 return pdi; 435 } 436 437 int 438 bios_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 439 size_t newlen, struct proc *p) 440 { 441 bios_diskinfo_t *pdi; 442 extern dev_t bootdev; 443 int biosdev; 444 445 /* all sysctl names at this level except diskinfo are terminal */ 446 if (namelen != 1 && name[0] != BIOS_DISKINFO) 447 return (ENOTDIR); /* overloaded */ 448 449 if (!(bootapiver & BAPIV_VECTOR)) 450 return EOPNOTSUPP; 451 452 switch (name[0]) { 453 case BIOS_DEV: 454 if ((pdi = bios_getdiskinfo(bootdev)) == NULL) 455 return ENXIO; 456 biosdev = pdi->bios_number; 457 return sysctl_rdint(oldp, oldlenp, newp, biosdev); 458 case BIOS_DISKINFO: 459 if (namelen != 2) 460 return ENOTDIR; 461 if ((pdi = bios_getdiskinfo(name[1])) == NULL) 462 return ENXIO; 463 return sysctl_rdstruct(oldp, oldlenp, newp, pdi, sizeof(*pdi)); 464 case BIOS_CKSUMLEN: 465 return sysctl_rdint(oldp, oldlenp, newp, bios_cksumlen); 466 default: 467 return EOPNOTSUPP; 468 } 469 /* NOTREACHED */ 470 } 471 472 extern int tsc_is_invariant; 473 extern int amd64_has_xcrypt; 474 475 const struct sysctl_bounded_args cpuctl_vars[] = { 476 { CPU_LIDACTION, &lid_action, 0, 2 }, 477 { CPU_PWRACTION, &pwr_action, 0, 2 }, 478 { CPU_CPUID, &cpu_id, 1, 0 }, 479 { CPU_CPUFEATURE, &cpu_feature, 1, 0 }, 480 { CPU_XCRYPT, &amd64_has_xcrypt, 1, 0 }, 481 { CPU_INVARIANTTSC, &tsc_is_invariant, 1, 0 }, 482 }; 483 484 /* 485 * machine dependent system variables. 486 */ 487 int 488 cpu_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 489 size_t newlen, struct proc *p) 490 { 491 extern uint64_t tsc_frequency; 492 dev_t consdev; 493 dev_t dev; 494 495 switch (name[0]) { 496 case CPU_CONSDEV: 497 if (namelen != 1) 498 return (ENOTDIR); /* overloaded */ 499 if (cn_tab != NULL) 500 consdev = cn_tab->cn_dev; 501 else 502 consdev = NODEV; 503 return (sysctl_rdstruct(oldp, oldlenp, newp, &consdev, 504 sizeof consdev)); 505 case CPU_CHR2BLK: 506 if (namelen != 2) 507 return (ENOTDIR); /* overloaded */ 508 dev = chrtoblk((dev_t)name[1]); 509 return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev)); 510 case CPU_BIOS: 511 return bios_sysctl(name + 1, namelen - 1, oldp, oldlenp, 512 newp, newlen, p); 513 case CPU_CPUVENDOR: 514 return (sysctl_rdstring(oldp, oldlenp, newp, cpu_vendor)); 515 case CPU_KBDRESET: 516 if (securelevel > 0) 517 return (sysctl_rdint(oldp, oldlenp, newp, 518 kbd_reset)); 519 else 520 return (sysctl_int(oldp, oldlenp, newp, newlen, 521 &kbd_reset)); 522 case CPU_ALLOWAPERTURE: 523 if (namelen != 1) 524 return (ENOTDIR); /* overloaded */ 525 #ifdef APERTURE 526 if (securelevel > 0) 527 return (sysctl_int_lower(oldp, oldlenp, newp, newlen, 528 &allowaperture)); 529 else 530 return (sysctl_int(oldp, oldlenp, newp, newlen, 531 &allowaperture)); 532 #else 533 return (sysctl_rdint(oldp, oldlenp, newp, 0)); 534 #endif 535 #if NPCKBC > 0 && NUKBD > 0 536 case CPU_FORCEUKBD: 537 { 538 int error; 539 540 if (forceukbd) 541 return (sysctl_rdint(oldp, oldlenp, newp, forceukbd)); 542 543 error = sysctl_int(oldp, oldlenp, newp, newlen, &forceukbd); 544 if (forceukbd) 545 pckbc_release_console(); 546 return (error); 547 } 548 #endif 549 case CPU_TSCFREQ: 550 return (sysctl_rdquad(oldp, oldlenp, newp, tsc_frequency)); 551 default: 552 return (sysctl_bounded_arr(cpuctl_vars, nitems(cpuctl_vars), 553 name, namelen, oldp, oldlenp, newp, newlen)); 554 } 555 /* NOTREACHED */ 556 } 557 558 /* 559 * Send an interrupt to process. 560 * 561 * Stack is set up to allow sigcode to call routine, followed by 562 * syscall to sigreturn routine below. After sigreturn resets the 563 * signal mask, the stack, and the frame pointer, it returns to the 564 * user specified pc. 565 */ 566 int 567 sendsig(sig_t catcher, int sig, sigset_t mask, const siginfo_t *ksip) 568 { 569 struct proc *p = curproc; 570 struct trapframe *tf = p->p_md.md_regs; 571 struct sigacts *psp = p->p_p->ps_sigacts; 572 struct sigcontext ksc; 573 struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu; 574 register_t sp, scp, sip; 575 u_long sss; 576 577 memset(&ksc, 0, sizeof ksc); 578 ksc.sc_rdi = tf->tf_rdi; 579 ksc.sc_rsi = tf->tf_rsi; 580 ksc.sc_rdx = tf->tf_rdx; 581 ksc.sc_rcx = tf->tf_rcx; 582 ksc.sc_r8 = tf->tf_r8; 583 ksc.sc_r9 = tf->tf_r9; 584 ksc.sc_r10 = tf->tf_r10; 585 ksc.sc_r11 = tf->tf_r11; 586 ksc.sc_r12 = tf->tf_r12; 587 ksc.sc_r13 = tf->tf_r13; 588 ksc.sc_r14 = tf->tf_r14; 589 ksc.sc_r15 = tf->tf_r15; 590 ksc.sc_rbx = tf->tf_rbx; 591 ksc.sc_rax = tf->tf_rax; 592 ksc.sc_rbp = tf->tf_rbp; 593 ksc.sc_rip = tf->tf_rip; 594 ksc.sc_cs = tf->tf_cs; 595 ksc.sc_rflags = tf->tf_rflags; 596 ksc.sc_rsp = tf->tf_rsp; 597 ksc.sc_ss = tf->tf_ss; 598 ksc.sc_mask = mask; 599 600 /* Allocate space for the signal handler context. */ 601 if ((p->p_sigstk.ss_flags & SS_DISABLE) == 0 && 602 !sigonstack(tf->tf_rsp) && (psp->ps_sigonstack & sigmask(sig))) 603 sp = trunc_page((vaddr_t)p->p_sigstk.ss_sp + p->p_sigstk.ss_size); 604 else 605 sp = tf->tf_rsp - 128; 606 607 sp &= ~15ULL; /* just in case */ 608 sss = (sizeof(ksc) + 15) & ~15; 609 610 /* Save FPU state to PCB if necessary, then copy it out */ 611 if (curcpu()->ci_flags & CPUF_USERXSTATE) { 612 curcpu()->ci_flags &= ~CPUF_USERXSTATE; 613 fpusavereset(&p->p_addr->u_pcb.pcb_savefpu); 614 } 615 sp -= fpu_save_len; 616 ksc.sc_fpstate = (struct fxsave64 *)sp; 617 if (copyout(sfp, (void *)sp, fpu_save_len)) 618 return 1; 619 620 /* Now reset the FPU state in PCB */ 621 memcpy(&p->p_addr->u_pcb.pcb_savefpu, 622 &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len); 623 624 sip = 0; 625 if (psp->ps_siginfo & sigmask(sig)) { 626 sip = sp - ((sizeof(*ksip) + 15) & ~15); 627 sss += (sizeof(*ksip) + 15) & ~15; 628 629 if (copyout(ksip, (void *)sip, sizeof(*ksip))) 630 return 1; 631 } 632 scp = sp - sss; 633 634 ksc.sc_cookie = (long)scp ^ p->p_p->ps_sigcookie; 635 if (copyout(&ksc, (void *)scp, sizeof(ksc))) 636 return 1; 637 638 /* 639 * Build context to run handler in. 640 */ 641 tf->tf_rax = (u_int64_t)catcher; 642 tf->tf_rdi = sig; 643 tf->tf_rsi = sip; 644 tf->tf_rdx = scp; 645 646 tf->tf_rip = (u_int64_t)p->p_p->ps_sigcode; 647 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 648 tf->tf_rflags &= ~(PSL_T|PSL_D|PSL_VM|PSL_AC); 649 tf->tf_rsp = scp; 650 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 651 652 /* The reset state _is_ the userspace state for this thread now */ 653 curcpu()->ci_flags |= CPUF_USERXSTATE; 654 655 return 0; 656 } 657 658 /* 659 * System call to cleanup state after a signal 660 * has been taken. Reset signal mask and 661 * stack state from context left by sendsig (above). 662 * Return to previous pc and psl as specified by 663 * context left by sendsig. Check carefully to 664 * make sure that the user has not modified the 665 * psl to gain improper privileges or to cause 666 * a machine fault. 667 */ 668 int 669 sys_sigreturn(struct proc *p, void *v, register_t *retval) 670 { 671 struct sys_sigreturn_args /* { 672 syscallarg(struct sigcontext *) sigcntxp; 673 } */ *uap = v; 674 struct sigcontext ksc, *scp = SCARG(uap, sigcntxp); 675 struct trapframe *tf = p->p_md.md_regs; 676 int error; 677 678 if (PROC_PC(p) != p->p_p->ps_sigcoderet) { 679 sigexit(p, SIGILL); 680 return (EPERM); 681 } 682 683 if ((error = copyin((caddr_t)scp, &ksc, sizeof ksc))) 684 return (error); 685 686 if (ksc.sc_cookie != ((long)scp ^ p->p_p->ps_sigcookie)) { 687 sigexit(p, SIGILL); 688 return (EFAULT); 689 } 690 691 /* Prevent reuse of the sigcontext cookie */ 692 ksc.sc_cookie = 0; 693 (void)copyout(&ksc.sc_cookie, (caddr_t)scp + 694 offsetof(struct sigcontext, sc_cookie), sizeof (ksc.sc_cookie)); 695 696 if (((ksc.sc_rflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0 || 697 !USERMODE(ksc.sc_cs, ksc.sc_eflags)) 698 return (EINVAL); 699 700 /* Current state is obsolete; toss it and force a reload */ 701 if (curcpu()->ci_flags & CPUF_USERXSTATE) { 702 curcpu()->ci_flags &= ~CPUF_USERXSTATE; 703 fpureset(); 704 } 705 706 /* Copy in the FPU state to restore */ 707 if (__predict_true(ksc.sc_fpstate != NULL)) { 708 struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave; 709 710 if ((error = copyin(ksc.sc_fpstate, fx, fpu_save_len))) 711 return (error); 712 fx->fx_mxcsr &= fpu_mxcsr_mask; 713 } else { 714 /* shouldn't happen, but handle it */ 715 memcpy(&p->p_addr->u_pcb.pcb_savefpu, 716 &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len); 717 } 718 719 tf->tf_rdi = ksc.sc_rdi; 720 tf->tf_rsi = ksc.sc_rsi; 721 tf->tf_rdx = ksc.sc_rdx; 722 tf->tf_rcx = ksc.sc_rcx; 723 tf->tf_r8 = ksc.sc_r8; 724 tf->tf_r9 = ksc.sc_r9; 725 tf->tf_r10 = ksc.sc_r10; 726 tf->tf_r11 = ksc.sc_r11; 727 tf->tf_r12 = ksc.sc_r12; 728 tf->tf_r13 = ksc.sc_r13; 729 tf->tf_r14 = ksc.sc_r14; 730 tf->tf_r15 = ksc.sc_r15; 731 tf->tf_rbx = ksc.sc_rbx; 732 tf->tf_rax = ksc.sc_rax; 733 tf->tf_rbp = ksc.sc_rbp; 734 tf->tf_rip = ksc.sc_rip; 735 tf->tf_cs = ksc.sc_cs; 736 tf->tf_rflags = ksc.sc_rflags; 737 tf->tf_rsp = ksc.sc_rsp; 738 tf->tf_ss = ksc.sc_ss; 739 740 /* Restore signal mask. */ 741 p->p_sigmask = ksc.sc_mask & ~sigcantmask; 742 743 /* 744 * sigreturn() needs to return to userspace via the 'iretq' 745 * method, so that if the process was interrupted (by tick, 746 * an IPI, whatever) as opposed to already being in the kernel 747 * when a signal was being delivered, the process will be 748 * completely restored, including the userland %rcx and %r11 749 * registers which the 'sysretq' instruction cannot restore. 750 * Also need to make sure we can handle faulting on xrstor. 751 */ 752 p->p_md.md_flags |= MDP_IRET; 753 754 return (EJUSTRETURN); 755 } 756 757 #ifdef MULTIPROCESSOR 758 /* force a CPU into the kernel, whether or not it's idle */ 759 void 760 cpu_kick(struct cpu_info *ci) 761 { 762 /* only need to kick other CPUs */ 763 if (ci != curcpu()) { 764 if (cpu_mwait_size > 0) { 765 /* 766 * If not idling, then send an IPI, else 767 * just clear the "keep idling" bit. 768 */ 769 if ((ci->ci_mwait & MWAIT_IN_IDLE) == 0) 770 x86_send_ipi(ci, X86_IPI_NOP); 771 else 772 atomic_clearbits_int(&ci->ci_mwait, 773 MWAIT_KEEP_IDLING); 774 } else { 775 /* no mwait, so need an IPI */ 776 x86_send_ipi(ci, X86_IPI_NOP); 777 } 778 } 779 } 780 #endif 781 782 /* 783 * Notify the current process (p) that it has a signal pending, 784 * process as soon as possible. 785 */ 786 void 787 signotify(struct proc *p) 788 { 789 aston(p); 790 cpu_kick(p->p_cpu); 791 } 792 793 #ifdef MULTIPROCESSOR 794 void 795 cpu_unidle(struct cpu_info *ci) 796 { 797 if (cpu_mwait_size > 0 && (ci->ci_mwait & MWAIT_ONLY)) { 798 /* 799 * Just clear the "keep idling" bit; if it wasn't 800 * idling then we didn't need to do anything anyway. 801 */ 802 atomic_clearbits_int(&ci->ci_mwait, MWAIT_KEEP_IDLING); 803 return; 804 } 805 806 if (ci != curcpu()) 807 x86_send_ipi(ci, X86_IPI_NOP); 808 } 809 #endif 810 811 int waittime = -1; 812 struct pcb dumppcb; 813 814 __dead void 815 boot(int howto) 816 { 817 if ((howto & RB_POWERDOWN) != 0) 818 lid_action = 0; 819 820 if ((howto & RB_RESET) != 0) 821 goto doreset; 822 823 if (cold) { 824 if ((howto & RB_USERREQ) == 0) 825 howto |= RB_HALT; 826 goto haltsys; 827 } 828 829 boothowto = howto; 830 if ((howto & RB_NOSYNC) == 0 && waittime < 0) { 831 waittime = 0; 832 vfs_shutdown(curproc); 833 834 if ((howto & RB_TIMEBAD) == 0) { 835 resettodr(); 836 } else { 837 printf("WARNING: not updating battery clock\n"); 838 } 839 } 840 if_downall(); 841 842 uvm_shutdown(); 843 splhigh(); 844 cold = 1; 845 846 if ((howto & RB_DUMP) != 0) 847 dumpsys(); 848 849 haltsys: 850 config_suspend_all(DVACT_POWERDOWN); 851 852 #ifdef MULTIPROCESSOR 853 x86_broadcast_ipi(X86_IPI_HALT); 854 #endif 855 856 if ((howto & RB_HALT) != 0) { 857 #if NACPI > 0 && !defined(SMALL_KERNEL) 858 extern int acpi_enabled; 859 860 if (acpi_enabled) { 861 delay(500000); 862 if ((howto & RB_POWERDOWN) != 0) 863 acpi_powerdown(); 864 } 865 #endif 866 printf("\n"); 867 printf("The operating system has halted.\n"); 868 printf("Please press any key to reboot.\n\n"); 869 cnpollc(1); /* for proper keyboard command handling */ 870 cngetc(); 871 cnpollc(0); 872 } 873 874 doreset: 875 printf("rebooting...\n"); 876 if (cpureset_delay > 0) 877 delay(cpureset_delay * 1000); 878 cpu_reset(); 879 for (;;) 880 continue; 881 /* NOTREACHED */ 882 } 883 884 /* 885 * These variables are needed by /sbin/savecore 886 */ 887 u_long dumpmag = 0x8fca0101; /* magic number */ 888 int dumpsize = 0; /* pages */ 889 long dumplo = 0; /* blocks */ 890 891 /* 892 * cpu_dump: dump the machine-dependent kernel core dump headers. 893 */ 894 int 895 cpu_dump(void) 896 { 897 int (*dump)(dev_t, daddr_t, caddr_t, size_t); 898 char buf[dbtob(1)]; 899 kcore_seg_t *segp; 900 cpu_kcore_hdr_t *cpuhdrp; 901 phys_ram_seg_t *memsegp; 902 caddr_t va; 903 int i; 904 905 dump = bdevsw[major(dumpdev)].d_dump; 906 907 memset(buf, 0, sizeof buf); 908 segp = (kcore_seg_t *)buf; 909 cpuhdrp = (cpu_kcore_hdr_t *)&buf[ALIGN(sizeof(*segp))]; 910 memsegp = (phys_ram_seg_t *)&buf[ALIGN(sizeof(*segp)) + 911 ALIGN(sizeof(*cpuhdrp))]; 912 913 /* 914 * Generate a segment header. 915 */ 916 CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU); 917 segp->c_size = dbtob(1) - ALIGN(sizeof(*segp)); 918 919 /* 920 * Add the machine-dependent header info. 921 */ 922 cpuhdrp->ptdpaddr = proc0.p_addr->u_pcb.pcb_cr3; 923 cpuhdrp->nmemsegs = mem_cluster_cnt; 924 925 /* 926 * Fill in the memory segment descriptors. 927 */ 928 for (i = 0; i < mem_cluster_cnt; i++) { 929 memsegp[i].start = mem_clusters[i].start; 930 memsegp[i].size = mem_clusters[i].size & ~PAGE_MASK; 931 } 932 933 /* 934 * If we have dump memory then assume the kernel stack is in high 935 * memory and bounce 936 */ 937 if (dumpmem_vaddr != 0) { 938 memcpy((char *)dumpmem_vaddr, buf, sizeof(buf)); 939 va = (caddr_t)dumpmem_vaddr; 940 } else { 941 va = (caddr_t)buf; 942 } 943 return (dump(dumpdev, dumplo, va, dbtob(1))); 944 } 945 946 /* 947 * This is called by main to set dumplo and dumpsize. 948 * Dumps always skip the first PAGE_SIZE of disk space 949 * in case there might be a disk label stored there. 950 * If there is extra space, put dump at the end to 951 * reduce the chance that swapping trashes it. 952 */ 953 void 954 dumpconf(void) 955 { 956 int nblks, dumpblks; /* size of dump area */ 957 958 if (dumpdev == NODEV || 959 (nblks = (bdevsw[major(dumpdev)].d_psize)(dumpdev)) == 0) 960 return; 961 if (nblks <= ctod(1)) 962 return; 963 964 dumpblks = cpu_dumpsize(); 965 if (dumpblks < 0) 966 return; 967 dumpblks += ctod(cpu_dump_mempagecnt()); 968 969 /* If dump won't fit (incl. room for possible label), punt. */ 970 if (dumpblks > (nblks - ctod(1))) 971 return; 972 973 /* Put dump at end of partition */ 974 dumplo = nblks - dumpblks; 975 976 /* dumpsize is in page units, and doesn't include headers. */ 977 dumpsize = cpu_dump_mempagecnt(); 978 } 979 980 /* 981 * Doadump comes here after turning off memory management and 982 * getting on the dump stack, either when called above, or by 983 * the auto-restart code. 984 */ 985 #define BYTES_PER_DUMP MAXPHYS /* must be a multiple of pagesize */ 986 987 void 988 dumpsys(void) 989 { 990 u_long totalbytesleft, bytes, i, n, memseg; 991 u_long maddr; 992 daddr_t blkno; 993 void *va; 994 int (*dump)(dev_t, daddr_t, caddr_t, size_t); 995 int error; 996 997 /* Save registers. */ 998 savectx(&dumppcb); 999 1000 if (dumpdev == NODEV) 1001 return; 1002 1003 /* 1004 * For dumps during autoconfiguration, 1005 * if dump device has already configured... 1006 */ 1007 if (dumpsize == 0) 1008 dumpconf(); 1009 if (dumplo <= 0 || dumpsize == 0) { 1010 printf("\ndump to dev %u,%u not possible\n", major(dumpdev), 1011 minor(dumpdev)); 1012 return; 1013 } 1014 printf("\ndumping to dev %u,%u offset %ld\n", major(dumpdev), 1015 minor(dumpdev), dumplo); 1016 1017 error = (*bdevsw[major(dumpdev)].d_psize)(dumpdev); 1018 printf("dump "); 1019 if (error == -1) { 1020 printf("area unavailable\n"); 1021 return; 1022 } 1023 1024 if ((error = cpu_dump()) != 0) 1025 goto err; 1026 1027 totalbytesleft = ptoa(cpu_dump_mempagecnt()); 1028 blkno = dumplo + cpu_dumpsize(); 1029 dump = bdevsw[major(dumpdev)].d_dump; 1030 error = 0; 1031 1032 for (memseg = 0; memseg < mem_cluster_cnt; memseg++) { 1033 maddr = mem_clusters[memseg].start; 1034 bytes = mem_clusters[memseg].size; 1035 1036 for (i = 0; i < bytes; i += n, totalbytesleft -= n) { 1037 /* Print out how many MBs we have left to go. */ 1038 if ((totalbytesleft % (1024*1024)) < BYTES_PER_DUMP) 1039 printf("%ld ", totalbytesleft / (1024 * 1024)); 1040 1041 /* Limit size for next transfer. */ 1042 n = bytes - i; 1043 if (n > BYTES_PER_DUMP) 1044 n = BYTES_PER_DUMP; 1045 if (maddr > 0xffffffff) { 1046 va = (void *)dumpmem_vaddr; 1047 if (n > dumpmem_sz) 1048 n = dumpmem_sz; 1049 memcpy(va, (void *)PMAP_DIRECT_MAP(maddr), n); 1050 } else { 1051 va = (void *)PMAP_DIRECT_MAP(maddr); 1052 } 1053 1054 error = (*dump)(dumpdev, blkno, va, n); 1055 if (error) 1056 goto err; 1057 maddr += n; 1058 blkno += btodb(n); /* XXX? */ 1059 1060 #if 0 /* XXX this doesn't work. grr. */ 1061 /* operator aborting dump? */ 1062 if (sget() != NULL) { 1063 error = EINTR; 1064 break; 1065 } 1066 #endif 1067 } 1068 } 1069 1070 err: 1071 switch (error) { 1072 1073 case ENXIO: 1074 printf("device bad\n"); 1075 break; 1076 1077 case EFAULT: 1078 printf("device not ready\n"); 1079 break; 1080 1081 case EINVAL: 1082 printf("area improper\n"); 1083 break; 1084 1085 case EIO: 1086 printf("i/o error\n"); 1087 break; 1088 1089 case EINTR: 1090 printf("aborted from console\n"); 1091 break; 1092 1093 case 0: 1094 printf("succeeded\n"); 1095 break; 1096 1097 default: 1098 printf("error %d\n", error); 1099 break; 1100 } 1101 printf("\n\n"); 1102 delay(5000000); /* 5 seconds */ 1103 } 1104 1105 /* 1106 * Force the userspace FS.base to be reloaded from the PCB on return from 1107 * the kernel, and reset the segment registers (%ds, %es, %fs, and %gs) 1108 * to their expected userspace value. 1109 */ 1110 void 1111 reset_segs(void) 1112 { 1113 /* 1114 * This operates like the cpu_switchto() sequence: if we 1115 * haven't reset %[defg]s already, do so now. 1116 */ 1117 if (curcpu()->ci_flags & CPUF_USERSEGS) { 1118 curcpu()->ci_flags &= ~CPUF_USERSEGS; 1119 __asm volatile( 1120 "movw %%ax,%%ds\n\t" 1121 "movw %%ax,%%es\n\t" 1122 "movw %%ax,%%fs\n\t" 1123 "cli\n\t" /* block intr when on user GS.base */ 1124 "swapgs\n\t" /* swap from kernel to user GS.base */ 1125 "movw %%ax,%%gs\n\t"/* set %gs to UDATA and GS.base to 0 */ 1126 "swapgs\n\t" /* back to kernel GS.base */ 1127 "sti" : : "a"(GSEL(GUDATA_SEL, SEL_UPL))); 1128 } 1129 } 1130 1131 /* 1132 * Clear registers on exec 1133 */ 1134 void 1135 setregs(struct proc *p, struct exec_package *pack, u_long stack, 1136 register_t *retval) 1137 { 1138 struct trapframe *tf; 1139 1140 /* Reset FPU state in PCB */ 1141 memcpy(&p->p_addr->u_pcb.pcb_savefpu, 1142 &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len); 1143 1144 if (curcpu()->ci_flags & CPUF_USERXSTATE) { 1145 /* state in CPU is obsolete; reset it */ 1146 fpureset(); 1147 } else { 1148 /* the reset state _is_ the userspace state now */ 1149 curcpu()->ci_flags |= CPUF_USERXSTATE; 1150 } 1151 1152 /* To reset all registers we have to return via iretq */ 1153 p->p_md.md_flags |= MDP_IRET; 1154 1155 reset_segs(); 1156 p->p_addr->u_pcb.pcb_fsbase = 0; 1157 1158 tf = p->p_md.md_regs; 1159 tf->tf_rdi = 0; 1160 tf->tf_rsi = 0; 1161 tf->tf_rbp = 0; 1162 tf->tf_rbx = 0; 1163 tf->tf_rdx = 0; 1164 tf->tf_rcx = 0; 1165 tf->tf_rax = 0; 1166 tf->tf_r8 = 0; 1167 tf->tf_r9 = 0; 1168 tf->tf_r10 = 0; 1169 tf->tf_r11 = 0; 1170 tf->tf_r12 = 0; 1171 tf->tf_r13 = 0; 1172 tf->tf_r14 = 0; 1173 tf->tf_r15 = 0; 1174 tf->tf_rip = pack->ep_entry; 1175 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 1176 tf->tf_rflags = PSL_USERSET; 1177 tf->tf_rsp = stack; 1178 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 1179 1180 retval[1] = 0; 1181 } 1182 1183 /* 1184 * Initialize segments and descriptor tables 1185 */ 1186 1187 struct gate_descriptor *idt; 1188 char idt_allocmap[NIDT]; 1189 extern struct user *proc0paddr; 1190 1191 void 1192 setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl, 1193 int sel) 1194 { 1195 gd->gd_looffset = (u_int64_t)func & 0xffff; 1196 gd->gd_selector = sel; 1197 gd->gd_ist = ist; 1198 gd->gd_type = type; 1199 gd->gd_dpl = dpl; 1200 gd->gd_p = 1; 1201 gd->gd_hioffset = (u_int64_t)func >> 16; 1202 gd->gd_zero = 0; 1203 gd->gd_xx1 = 0; 1204 gd->gd_xx2 = 0; 1205 gd->gd_xx3 = 0; 1206 } 1207 1208 void 1209 unsetgate(struct gate_descriptor *gd) 1210 { 1211 memset(gd, 0, sizeof (*gd)); 1212 } 1213 1214 void 1215 setregion(struct region_descriptor *rd, void *base, u_int16_t limit) 1216 { 1217 rd->rd_limit = limit; 1218 rd->rd_base = (u_int64_t)base; 1219 } 1220 1221 /* 1222 * Note that the base and limit fields are ignored in long mode. 1223 */ 1224 void 1225 set_mem_segment(struct mem_segment_descriptor *sd, void *base, size_t limit, 1226 int type, int dpl, int gran, int def32, int is64) 1227 { 1228 sd->sd_lolimit = (unsigned)limit; 1229 sd->sd_lobase = (unsigned long)base; 1230 sd->sd_type = type; 1231 sd->sd_dpl = dpl; 1232 sd->sd_p = 1; 1233 sd->sd_hilimit = (unsigned)limit >> 16; 1234 sd->sd_avl = 0; 1235 sd->sd_long = is64; 1236 sd->sd_def32 = def32; 1237 sd->sd_gran = gran; 1238 sd->sd_hibase = (unsigned long)base >> 24; 1239 } 1240 1241 void 1242 set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit, 1243 int type, int dpl, int gran) 1244 { 1245 memset(sd, 0, sizeof *sd); 1246 sd->sd_lolimit = (unsigned)limit; 1247 sd->sd_lobase = (u_int64_t)base; 1248 sd->sd_type = type; 1249 sd->sd_dpl = dpl; 1250 sd->sd_p = 1; 1251 sd->sd_hilimit = (unsigned)limit >> 16; 1252 sd->sd_gran = gran; 1253 sd->sd_hibase = (u_int64_t)base >> 24; 1254 } 1255 1256 void cpu_init_idt(void) 1257 { 1258 struct region_descriptor region; 1259 1260 setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1); 1261 lidt(®ion); 1262 } 1263 1264 void 1265 cpu_init_extents(void) 1266 { 1267 extern struct extent *iomem_ex; 1268 static int already_done; 1269 int i; 1270 1271 /* We get called for each CPU, only first should do this */ 1272 if (already_done) 1273 return; 1274 1275 /* 1276 * Allocate the physical addresses used by RAM from the iomem 1277 * extent map. 1278 */ 1279 for (i = 0; i < mem_cluster_cnt; i++) { 1280 if (extent_alloc_region(iomem_ex, mem_clusters[i].start, 1281 mem_clusters[i].size, EX_NOWAIT)) { 1282 /* XXX What should we do? */ 1283 printf("WARNING: CAN'T ALLOCATE RAM (%llx-%llx)" 1284 " FROM IOMEM EXTENT MAP!\n", mem_clusters[i].start, 1285 mem_clusters[i].start + mem_clusters[i].size - 1); 1286 } 1287 } 1288 1289 already_done = 1; 1290 } 1291 1292 void 1293 map_tramps(void) 1294 { 1295 #if defined(MULTIPROCESSOR) || \ 1296 (NACPI > 0 && !defined(SMALL_KERNEL)) 1297 struct pmap *kmp = pmap_kernel(); 1298 extern paddr_t tramp_pdirpa; 1299 #ifdef MULTIPROCESSOR 1300 extern u_char cpu_spinup_trampoline[]; 1301 extern u_char cpu_spinup_trampoline_end[]; 1302 extern u_char mp_tramp_data_start[]; 1303 extern u_char mp_tramp_data_end[]; 1304 extern u_int32_t mp_pdirpa; 1305 #endif 1306 1307 /* 1308 * The initial PML4 pointer must be below 4G, so if the 1309 * current one isn't, use a "bounce buffer" and save it 1310 * for tramps to use. 1311 */ 1312 if (kmp->pm_pdirpa > 0xffffffff) { 1313 pmap_kenter_pa(lo32_vaddr, lo32_paddr, PROT_READ | PROT_WRITE); 1314 memcpy((void *)lo32_vaddr, kmp->pm_pdir, PAGE_SIZE); 1315 tramp_pdirpa = lo32_paddr; 1316 pmap_kremove(lo32_vaddr, PAGE_SIZE); 1317 } else 1318 tramp_pdirpa = kmp->pm_pdirpa; 1319 1320 1321 #ifdef MULTIPROCESSOR 1322 /* Map MP tramp code and data pages RW for copy */ 1323 pmap_kenter_pa(MP_TRAMPOLINE, MP_TRAMPOLINE, 1324 PROT_READ | PROT_WRITE); 1325 1326 pmap_kenter_pa(MP_TRAMP_DATA, MP_TRAMP_DATA, 1327 PROT_READ | PROT_WRITE); 1328 1329 memset((caddr_t)MP_TRAMPOLINE, 0xcc, PAGE_SIZE); 1330 memset((caddr_t)MP_TRAMP_DATA, 0xcc, PAGE_SIZE); 1331 1332 memcpy((caddr_t)MP_TRAMPOLINE, 1333 cpu_spinup_trampoline, 1334 cpu_spinup_trampoline_end-cpu_spinup_trampoline); 1335 1336 memcpy((caddr_t)MP_TRAMP_DATA, 1337 mp_tramp_data_start, 1338 mp_tramp_data_end - mp_tramp_data_start); 1339 1340 /* 1341 * We need to patch this after we copy the tramp data, 1342 * the symbol points into the copied tramp data page. 1343 */ 1344 mp_pdirpa = tramp_pdirpa; 1345 1346 /* Unmap, will be remapped in cpu_start_secondary */ 1347 pmap_kremove(MP_TRAMPOLINE, PAGE_SIZE); 1348 pmap_kremove(MP_TRAMP_DATA, PAGE_SIZE); 1349 #endif /* MULTIPROCESSOR */ 1350 #endif 1351 } 1352 1353 #define IDTVEC(name) __CONCAT(X, name) 1354 typedef void (vector)(void); 1355 extern vector *IDTVEC(exceptions)[]; 1356 1357 paddr_t early_pte_pages; 1358 1359 void 1360 init_x86_64(paddr_t first_avail) 1361 { 1362 struct region_descriptor region; 1363 bios_memmap_t *bmp; 1364 int x, ist; 1365 uint64_t max_dm_size = ((uint64_t)512 * NUM_L4_SLOT_DIRECT) << 30; 1366 1367 /* 1368 * locore0 mapped 3 pages for use before the pmap is initialized 1369 * starting at first_avail. These pages are currently used by 1370 * efifb to create early-use VAs for the framebuffer before efifb 1371 * is attached. 1372 */ 1373 early_pte_pages = first_avail; 1374 first_avail += 3 * NBPG; 1375 1376 cpu_init_msrs(&cpu_info_primary); 1377 1378 proc0.p_addr = proc0paddr; 1379 cpu_info_primary.ci_curpcb = &proc0.p_addr->u_pcb; 1380 1381 x86_bus_space_init(); 1382 1383 i8254_startclock(); 1384 1385 /* 1386 * Initialize PAGE_SIZE-dependent variables. 1387 */ 1388 uvm_setpagesize(); 1389 1390 /* 1391 * Boot arguments are in a single page specified by /boot. 1392 * 1393 * We require the "new" vector form, as well as memory ranges 1394 * to be given in bytes rather than KB. 1395 * 1396 * locore copies the data into bootinfo[] for us. 1397 */ 1398 if ((bootapiver & (BAPIV_VECTOR | BAPIV_BMEMMAP)) == 1399 (BAPIV_VECTOR | BAPIV_BMEMMAP)) { 1400 if (bootinfo_size >= sizeof(bootinfo)) 1401 panic("boot args too big"); 1402 1403 getbootinfo(bootinfo, bootinfo_size); 1404 } else 1405 panic("invalid /boot"); 1406 1407 cninit(); 1408 1409 /* 1410 * Memory on the AMD64 port is described by three different things. 1411 * 1412 * 1. biosbasemem - This is outdated, and should really only be used to 1413 * sanitize the other values. This is what we get back from the BIOS 1414 * using the legacy routines, describing memory below 640KB. 1415 * 1416 * 2. bios_memmap[] - This is the memory map as the bios has returned 1417 * it to us. It includes memory the kernel occupies, etc. 1418 * 1419 * 3. mem_cluster[] - This is the massaged free memory segments after 1420 * taking into account the contents of bios_memmap, biosbasemem, 1421 * and locore/machdep/pmap kernel allocations of physical 1422 * pages. 1423 * 1424 * The other thing is that the physical page *RANGE* is described by 1425 * three more variables: 1426 * 1427 * avail_start - This is a physical address of the start of available 1428 * pages, until IOM_BEGIN. This is basically the start 1429 * of the UVM managed range of memory, with some holes... 1430 * 1431 * avail_end - This is the end of physical pages. All physical pages 1432 * that UVM manages are between avail_start and avail_end. 1433 * There are holes... 1434 * 1435 * first_avail - This is the first available physical page after the 1436 * kernel, page tables, etc. 1437 * 1438 * We skip the first few pages for trampolines, hibernate, and to avoid 1439 * buggy SMI implementations that could corrupt the first 64KB. 1440 */ 1441 avail_start = 16*PAGE_SIZE; 1442 1443 #ifdef MULTIPROCESSOR 1444 if (avail_start < MP_TRAMPOLINE + PAGE_SIZE) 1445 avail_start = MP_TRAMPOLINE + PAGE_SIZE; 1446 if (avail_start < MP_TRAMP_DATA + PAGE_SIZE) 1447 avail_start = MP_TRAMP_DATA + PAGE_SIZE; 1448 #endif 1449 1450 #if (NACPI > 0 && !defined(SMALL_KERNEL)) 1451 if (avail_start < ACPI_TRAMPOLINE + PAGE_SIZE) 1452 avail_start = ACPI_TRAMPOLINE + PAGE_SIZE; 1453 if (avail_start < ACPI_TRAMP_DATA + PAGE_SIZE) 1454 avail_start = ACPI_TRAMP_DATA + PAGE_SIZE; 1455 #endif 1456 1457 #ifdef HIBERNATE 1458 if (avail_start < HIBERNATE_HIBALLOC_PAGE + PAGE_SIZE) 1459 avail_start = HIBERNATE_HIBALLOC_PAGE + PAGE_SIZE; 1460 #endif /* HIBERNATE */ 1461 1462 /* 1463 * We need to go through the BIOS memory map given, and 1464 * fill out mem_clusters and mem_cluster_cnt stuff, taking 1465 * into account all the points listed above. 1466 */ 1467 avail_end = mem_cluster_cnt = 0; 1468 for (bmp = bios_memmap; bmp->type != BIOS_MAP_END; bmp++) { 1469 paddr_t s1, s2, e1, e2; 1470 1471 /* Ignore non-free memory */ 1472 if (bmp->type != BIOS_MAP_FREE) 1473 continue; 1474 if (bmp->size < PAGE_SIZE) 1475 continue; 1476 1477 /* Init our segment(s), round/trunc to pages */ 1478 s1 = round_page(bmp->addr); 1479 e1 = trunc_page(bmp->addr + bmp->size); 1480 s2 = e2 = 0; 1481 1482 /* 1483 * XXX Some buggy ACPI BIOSes use memory that they 1484 * declare as free. Typically the affected memory 1485 * areas are small blocks between areas reserved for 1486 * ACPI and other BIOS goo. So skip areas smaller 1487 * than 1 MB above the 16 MB boundary (to avoid 1488 * affecting legacy stuff). 1489 */ 1490 if (s1 > 16*1024*1024 && (e1 - s1) < 1*1024*1024) 1491 continue; 1492 1493 /* Check and adjust our segment(s) */ 1494 /* Nuke low pages */ 1495 if (s1 < avail_start) { 1496 s1 = avail_start; 1497 if (s1 > e1) 1498 continue; 1499 } 1500 1501 /* 1502 * The direct map is limited to 512GB * NUM_L4_SLOT_DIRECT of 1503 * memory, so discard anything above that. 1504 */ 1505 if (e1 >= max_dm_size) { 1506 e1 = max_dm_size; 1507 if (s1 > e1) 1508 continue; 1509 } 1510 1511 /* Crop stuff into "640K hole" */ 1512 if (s1 < IOM_BEGIN && e1 > IOM_BEGIN) 1513 e1 = IOM_BEGIN; 1514 if (s1 < biosbasemem && e1 > biosbasemem) 1515 e1 = biosbasemem; 1516 1517 /* Split any segments straddling the 16MB boundary */ 1518 if (s1 < 16*1024*1024 && e1 > 16*1024*1024) { 1519 e2 = e1; 1520 s2 = e1 = 16*1024*1024; 1521 } 1522 1523 /* Store segment(s) */ 1524 if (e1 - s1 >= PAGE_SIZE) { 1525 mem_clusters[mem_cluster_cnt].start = s1; 1526 mem_clusters[mem_cluster_cnt].size = e1 - s1; 1527 mem_cluster_cnt++; 1528 } 1529 if (e2 - s2 >= PAGE_SIZE) { 1530 mem_clusters[mem_cluster_cnt].start = s2; 1531 mem_clusters[mem_cluster_cnt].size = e2 - s2; 1532 mem_cluster_cnt++; 1533 } 1534 if (avail_end < e1) avail_end = e1; 1535 if (avail_end < e2) avail_end = e2; 1536 } 1537 1538 /* 1539 * Call pmap initialization to make new kernel address space. 1540 * We must do this before loading pages into the VM system. 1541 */ 1542 first_avail = pmap_bootstrap(first_avail, trunc_page(avail_end)); 1543 1544 /* Allocate these out of the 640KB base memory */ 1545 if (avail_start != PAGE_SIZE) 1546 avail_start = pmap_prealloc_lowmem_ptps(avail_start); 1547 1548 cpu_init_extents(); 1549 1550 /* Make sure the end of the space used by the kernel is rounded. */ 1551 first_avail = round_page(first_avail); 1552 kern_end = KERNBASE + first_avail; 1553 1554 /* 1555 * Now, load the memory clusters (which have already been 1556 * flensed) into the VM system. 1557 */ 1558 for (x = 0; x < mem_cluster_cnt; x++) { 1559 paddr_t seg_start = mem_clusters[x].start; 1560 paddr_t seg_end = seg_start + mem_clusters[x].size; 1561 1562 if (seg_start < first_avail) seg_start = first_avail; 1563 if (seg_start > seg_end) continue; 1564 if (seg_end - seg_start < PAGE_SIZE) continue; 1565 1566 physmem += atop(mem_clusters[x].size); 1567 1568 #if DEBUG_MEMLOAD 1569 printf("loading 0x%lx-0x%lx (0x%lx-0x%lx)\n", 1570 seg_start, seg_end, atop(seg_start), atop(seg_end)); 1571 #endif 1572 uvm_page_physload(atop(seg_start), atop(seg_end), 1573 atop(seg_start), atop(seg_end), 0); 1574 } 1575 1576 /* 1577 * Now, load the memory between the end of I/O memory "hole" 1578 * and the kernel. 1579 */ 1580 { 1581 paddr_t seg_start = round_page(IOM_END); 1582 paddr_t seg_end = trunc_page(KERNTEXTOFF - KERNBASE); 1583 1584 if (seg_start < seg_end) { 1585 #if DEBUG_MEMLOAD 1586 printf("loading 0x%lx-0x%lx\n", seg_start, seg_end); 1587 #endif 1588 uvm_page_physload(atop(seg_start), atop(seg_end), 1589 atop(seg_start), atop(seg_end), 0); 1590 } 1591 } 1592 1593 #if DEBUG_MEMLOAD 1594 printf("avail_start = 0x%lx\n", avail_start); 1595 printf("avail_end = 0x%lx\n", avail_end); 1596 printf("first_avail = 0x%lx\n", first_avail); 1597 #endif 1598 1599 /* 1600 * Steal memory for the message buffer (at end of core). 1601 */ 1602 { 1603 struct vm_physseg *vps = NULL; 1604 psize_t sz = round_page(MSGBUFSIZE); 1605 psize_t reqsz = sz; 1606 1607 for (x = 0; x < vm_nphysseg; x++) { 1608 vps = &vm_physmem[x]; 1609 if (ptoa(vps->avail_end) == avail_end) 1610 break; 1611 } 1612 if (x == vm_nphysseg) 1613 panic("init_x86_64: can't find end of memory"); 1614 1615 /* Shrink so it'll fit in the last segment. */ 1616 if ((vps->avail_end - vps->avail_start) < atop(sz)) 1617 sz = ptoa(vps->avail_end - vps->avail_start); 1618 1619 vps->avail_end -= atop(sz); 1620 vps->end -= atop(sz); 1621 msgbuf_paddr = ptoa(vps->avail_end); 1622 1623 /* Remove the last segment if it now has no pages. */ 1624 if (vps->start == vps->end) { 1625 for (vm_nphysseg--; x < vm_nphysseg; x++) 1626 vm_physmem[x] = vm_physmem[x + 1]; 1627 } 1628 1629 /* Now find where the new avail_end is. */ 1630 for (avail_end = 0, x = 0; x < vm_nphysseg; x++) 1631 if (vm_physmem[x].avail_end > avail_end) 1632 avail_end = vm_physmem[x].avail_end; 1633 avail_end = ptoa(avail_end); 1634 1635 /* Warn if the message buffer had to be shrunk. */ 1636 if (sz != reqsz) 1637 printf("WARNING: %ld bytes not available for msgbuf " 1638 "in last cluster (%ld used)\n", reqsz, sz); 1639 } 1640 1641 /* 1642 * Steal some memory for a dump bouncebuffer if we have memory over 1643 * the 32-bit barrier. 1644 */ 1645 if (avail_end > 0xffffffff) { 1646 struct vm_physseg *vps = NULL; 1647 psize_t sz = round_page(MAX(BYTES_PER_DUMP, dbtob(1))); 1648 1649 /* XXX assumes segments are ordered */ 1650 for (x = 0; x < vm_nphysseg; x++) { 1651 vps = &vm_physmem[x]; 1652 /* Find something between 16meg and 4gig */ 1653 if (ptoa(vps->avail_end) <= 0xffffffff && 1654 ptoa(vps->avail_start) >= 0xffffff) 1655 break; 1656 } 1657 if (x == vm_nphysseg) 1658 panic("init_x86_64: no memory between " 1659 "0xffffff-0xffffffff"); 1660 1661 /* Shrink so it'll fit in the segment. */ 1662 if ((vps->avail_end - vps->avail_start) < atop(sz)) 1663 sz = ptoa(vps->avail_end - vps->avail_start); 1664 1665 vps->avail_end -= atop(sz); 1666 vps->end -= atop(sz); 1667 dumpmem_paddr = ptoa(vps->avail_end); 1668 dumpmem_vaddr = PMAP_DIRECT_MAP(dumpmem_paddr); 1669 dumpmem_sz = sz; 1670 1671 /* Remove the last segment if it now has no pages. */ 1672 if (vps->start == vps->end) { 1673 for (vm_nphysseg--; x < vm_nphysseg; x++) 1674 vm_physmem[x] = vm_physmem[x + 1]; 1675 } 1676 } 1677 1678 pmap_growkernel(VM_MIN_KERNEL_ADDRESS + 32 * 1024 * 1024); 1679 1680 pmap_kenter_pa(idt_vaddr, idt_paddr, PROT_READ | PROT_WRITE); 1681 1682 idt = (struct gate_descriptor *)idt_vaddr; 1683 cpu_info_primary.ci_tss = &cpu_info_full_primary.cif_tss; 1684 cpu_info_primary.ci_gdt = &cpu_info_full_primary.cif_gdt; 1685 1686 /* make gdt gates and memory segments */ 1687 set_mem_segment(GDT_ADDR_MEM(cpu_info_primary.ci_gdt, GCODE_SEL), 0, 1688 0xfffff, SDT_MEMERA, SEL_KPL, 1, 0, 1); 1689 1690 set_mem_segment(GDT_ADDR_MEM(cpu_info_primary.ci_gdt, GDATA_SEL), 0, 1691 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 0, 1); 1692 1693 set_mem_segment(GDT_ADDR_MEM(cpu_info_primary.ci_gdt, GUCODE32_SEL), 0, 1694 atop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0); 1695 1696 set_mem_segment(GDT_ADDR_MEM(cpu_info_primary.ci_gdt, GUDATA_SEL), 0, 1697 atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 0, 1); 1698 1699 set_mem_segment(GDT_ADDR_MEM(cpu_info_primary.ci_gdt, GUCODE_SEL), 0, 1700 atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1); 1701 1702 set_sys_segment(GDT_ADDR_SYS(cpu_info_primary.ci_gdt, GPROC0_SEL), 1703 cpu_info_primary.ci_tss, sizeof (struct x86_64_tss)-1, 1704 SDT_SYS386TSS, SEL_KPL, 0); 1705 1706 /* exceptions */ 1707 for (x = 0; x < 32; x++) { 1708 /* trap2 == NMI, trap8 == double fault */ 1709 ist = (x == 2) ? 2 : (x == 8) ? 1 : 0; 1710 setgate(&idt[x], IDTVEC(exceptions)[x], ist, SDT_SYS386IGT, 1711 (x == 3) ? SEL_UPL : SEL_KPL, 1712 GSEL(GCODE_SEL, SEL_KPL)); 1713 idt_allocmap[x] = 1; 1714 } 1715 1716 setregion(®ion, cpu_info_primary.ci_gdt, GDT_SIZE - 1); 1717 lgdt(®ion); 1718 1719 cpu_init_idt(); 1720 1721 intr_default_setup(); 1722 1723 fpuinit(&cpu_info_primary); 1724 1725 softintr_init(); 1726 splraise(IPL_IPI); 1727 intr_enable(); 1728 1729 #ifdef DDB 1730 db_machine_init(); 1731 ddb_init(); 1732 if (boothowto & RB_KDB) 1733 db_enter(); 1734 #endif 1735 } 1736 1737 void 1738 cpu_reset(void) 1739 { 1740 intr_disable(); 1741 1742 if (cpuresetfn) 1743 (*cpuresetfn)(); 1744 1745 /* 1746 * The keyboard controller has 4 random output pins, one of which is 1747 * connected to the RESET pin on the CPU in many PCs. We tell the 1748 * keyboard controller to pulse this line a couple of times. 1749 */ 1750 outb(IO_KBD + KBCMDP, KBC_PULSE0); 1751 delay(100000); 1752 outb(IO_KBD + KBCMDP, KBC_PULSE0); 1753 delay(100000); 1754 1755 /* 1756 * Try to cause a triple fault and watchdog reset by making the IDT 1757 * invalid and causing a fault. 1758 */ 1759 memset((caddr_t)idt, 0, NIDT * sizeof(idt[0])); 1760 __asm volatile("divl %0,%1" : : "q" (0), "a" (0)); 1761 1762 for (;;) 1763 continue; 1764 /* NOTREACHED */ 1765 } 1766 1767 /* 1768 * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers. 1769 */ 1770 int 1771 cpu_dumpsize(void) 1772 { 1773 int size; 1774 1775 size = ALIGN(sizeof(kcore_seg_t)) + 1776 ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t)); 1777 if (roundup(size, dbtob(1)) != dbtob(1)) 1778 return (-1); 1779 1780 return (1); 1781 } 1782 1783 /* 1784 * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped. 1785 */ 1786 u_long 1787 cpu_dump_mempagecnt(void) 1788 { 1789 u_long i, n; 1790 1791 n = 0; 1792 for (i = 0; i < mem_cluster_cnt; i++) 1793 n += atop(mem_clusters[i].size); 1794 return (n); 1795 } 1796 1797 /* 1798 * Figure out which portions of memory are used by the kernel/system. 1799 */ 1800 int 1801 amd64_pa_used(paddr_t addr) 1802 { 1803 struct vm_page *pg; 1804 1805 /* Kernel manages these */ 1806 if ((pg = PHYS_TO_VM_PAGE(addr)) && (pg->pg_flags & PG_DEV) == 0) 1807 return 1; 1808 1809 /* Kernel is loaded here */ 1810 if (addr > IOM_END && addr < (kern_end - KERNBASE)) 1811 return 1; 1812 1813 /* Low memory used for various bootstrap things */ 1814 if (addr < avail_start) 1815 return 1; 1816 1817 /* 1818 * The only regions I can think of that are left are the things 1819 * we steal away from UVM. The message buffer? 1820 * XXX - ignore these for now. 1821 */ 1822 1823 return 0; 1824 } 1825 1826 void 1827 cpu_initclocks(void) 1828 { 1829 (*initclock_func)(); 1830 } 1831 1832 void 1833 need_resched(struct cpu_info *ci) 1834 { 1835 ci->ci_want_resched = 1; 1836 1837 /* There's a risk we'll be called before the idle threads start */ 1838 if (ci->ci_curproc) { 1839 aston(ci->ci_curproc); 1840 cpu_kick(ci); 1841 } 1842 } 1843 1844 /* 1845 * Allocate an IDT vector slot within the given range. 1846 * XXX needs locking to avoid MP allocation races. 1847 */ 1848 1849 int 1850 idt_vec_alloc(int low, int high) 1851 { 1852 int vec; 1853 1854 for (vec = low; vec <= high; vec++) { 1855 if (idt_allocmap[vec] == 0) { 1856 idt_allocmap[vec] = 1; 1857 return vec; 1858 } 1859 } 1860 return 0; 1861 } 1862 1863 void 1864 idt_vec_set(int vec, void (*function)(void)) 1865 { 1866 /* 1867 * Vector should be allocated, so no locking needed. 1868 */ 1869 KASSERT(idt_allocmap[vec] == 1); 1870 setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL, 1871 GSEL(GCODE_SEL, SEL_KPL)); 1872 } 1873 1874 void 1875 idt_vec_free(int vec) 1876 { 1877 unsetgate(&idt[vec]); 1878 idt_allocmap[vec] = 0; 1879 } 1880 1881 #ifdef DIAGNOSTIC 1882 void 1883 splassert_check(int wantipl, const char *func) 1884 { 1885 int cpl = curcpu()->ci_ilevel; 1886 int floor = curcpu()->ci_handled_intr_level; 1887 1888 if (cpl < wantipl) { 1889 splassert_fail(wantipl, cpl, func); 1890 } 1891 if (floor > wantipl) { 1892 splassert_fail(wantipl, floor, func); 1893 } 1894 1895 } 1896 #endif 1897 1898 int 1899 copyin32(const uint32_t *uaddr, uint32_t *kaddr) 1900 { 1901 if ((vaddr_t)uaddr & 0x3) 1902 return EFAULT; 1903 1904 /* copyin(9) is atomic */ 1905 return copyin(uaddr, kaddr, sizeof(uint32_t)); 1906 } 1907 1908 void 1909 getbootinfo(char *bootinfo, int bootinfo_size) 1910 { 1911 bootarg32_t *q; 1912 bios_ddb_t *bios_ddb; 1913 bios_bootduid_t *bios_bootduid; 1914 bios_bootsr_t *bios_bootsr; 1915 #undef BOOTINFO_DEBUG 1916 #ifdef BOOTINFO_DEBUG 1917 printf("bootargv:"); 1918 #endif 1919 1920 for (q = (bootarg32_t *)bootinfo; 1921 (q->ba_type != BOOTARG_END) && 1922 ((((char *)q) - bootinfo) < bootinfo_size); 1923 q = (bootarg32_t *)(((char *)q) + q->ba_size)) { 1924 1925 switch (q->ba_type) { 1926 case BOOTARG_MEMMAP: 1927 bios_memmap = (bios_memmap_t *)q->ba_arg; 1928 #ifdef BOOTINFO_DEBUG 1929 printf(" memmap %p", bios_memmap); 1930 #endif 1931 break; 1932 case BOOTARG_DISKINFO: 1933 bios_diskinfo = (bios_diskinfo_t *)q->ba_arg; 1934 #ifdef BOOTINFO_DEBUG 1935 printf(" diskinfo %p", bios_diskinfo); 1936 #endif 1937 break; 1938 case BOOTARG_APMINFO: 1939 /* generated by i386 boot loader */ 1940 break; 1941 case BOOTARG_CKSUMLEN: 1942 bios_cksumlen = *(u_int32_t *)q->ba_arg; 1943 #ifdef BOOTINFO_DEBUG 1944 printf(" cksumlen %d", bios_cksumlen); 1945 #endif 1946 break; 1947 case BOOTARG_PCIINFO: 1948 /* generated by i386 boot loader */ 1949 break; 1950 case BOOTARG_CONSDEV: 1951 if (q->ba_size >= sizeof(bios_consdev_t) + 1952 offsetof(struct _boot_args32, ba_arg)) { 1953 #if NCOM > 0 1954 bios_consdev_t *cdp = 1955 (bios_consdev_t*)q->ba_arg; 1956 static const int ports[] = 1957 { 0x3f8, 0x2f8, 0x3e8, 0x2e8 }; 1958 int unit = minor(cdp->consdev); 1959 int consaddr = cdp->consaddr; 1960 if (consaddr == -1 && unit >= 0 && 1961 unit < nitems(ports)) 1962 consaddr = ports[unit]; 1963 if (major(cdp->consdev) == 8 && 1964 consaddr != -1) { 1965 comconsunit = unit; 1966 comconsaddr = consaddr; 1967 comconsrate = cdp->conspeed; 1968 comconsiot = X86_BUS_SPACE_IO; 1969 } 1970 #endif 1971 #ifdef BOOTINFO_DEBUG 1972 printf(" console 0x%x:%d", 1973 cdp->consdev, cdp->conspeed); 1974 #endif 1975 } 1976 break; 1977 case BOOTARG_BOOTMAC: 1978 bios_bootmac = (bios_bootmac_t *)q->ba_arg; 1979 break; 1980 1981 case BOOTARG_DDB: 1982 bios_ddb = (bios_ddb_t *)q->ba_arg; 1983 #ifdef DDB 1984 db_console = bios_ddb->db_console; 1985 #endif 1986 break; 1987 1988 case BOOTARG_BOOTDUID: 1989 bios_bootduid = (bios_bootduid_t *)q->ba_arg; 1990 memcpy(bootduid, bios_bootduid, sizeof(bootduid)); 1991 break; 1992 1993 case BOOTARG_BOOTSR: 1994 bios_bootsr = (bios_bootsr_t *)q->ba_arg; 1995 #if NSOFTRAID > 0 1996 memcpy(&sr_bootuuid, &bios_bootsr->uuid, 1997 sizeof(sr_bootuuid)); 1998 memcpy(&sr_bootkey, &bios_bootsr->maskkey, 1999 sizeof(sr_bootkey)); 2000 #endif 2001 explicit_bzero(bios_bootsr, sizeof(bios_bootsr_t)); 2002 break; 2003 2004 case BOOTARG_EFIINFO: 2005 bios_efiinfo = (bios_efiinfo_t *)q->ba_arg; 2006 break; 2007 2008 case BOOTARG_UCODE: 2009 bios_ucode = (bios_ucode_t *)q->ba_arg; 2010 break; 2011 2012 default: 2013 #ifdef BOOTINFO_DEBUG 2014 printf(" unsupported arg (%d) %p", q->ba_type, 2015 q->ba_arg); 2016 #endif 2017 break; 2018 } 2019 } 2020 #ifdef BOOTINFO_DEBUG 2021 printf("\n"); 2022 #endif 2023 } 2024 2025 int 2026 check_context(const struct reg *regs, struct trapframe *tf) 2027 { 2028 uint16_t sel; 2029 2030 if (((regs->r_rflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0) 2031 return EINVAL; 2032 2033 sel = regs->r_ss & 0xffff; 2034 if (!VALID_USER_DSEL(sel)) 2035 return EINVAL; 2036 2037 sel = regs->r_cs & 0xffff; 2038 if (!VALID_USER_CSEL(sel)) 2039 return EINVAL; 2040 2041 if (regs->r_rip >= VM_MAXUSER_ADDRESS) 2042 return EINVAL; 2043 2044 return 0; 2045 } 2046