1 /*- 2 * Copyright (c) 1992 Terrence R. Lambert. 3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 38 */ 39 40 #include <sys/cdefs.h> 41 __FBSDID("$FreeBSD$"); 42 43 #include "opt_apic.h" 44 #include "opt_atpic.h" 45 #include "opt_compat.h" 46 #include "opt_cpu.h" 47 #include "opt_ddb.h" 48 #include "opt_inet.h" 49 #include "opt_isa.h" 50 #include "opt_kstack_pages.h" 51 #include "opt_maxmem.h" 52 #include "opt_mp_watchdog.h" 53 #include "opt_npx.h" 54 #include "opt_perfmon.h" 55 #include "opt_platform.h" 56 #include "opt_xbox.h" 57 58 #include <sys/param.h> 59 #include <sys/proc.h> 60 #include <sys/systm.h> 61 #include <sys/bio.h> 62 #include <sys/buf.h> 63 #include <sys/bus.h> 64 #include <sys/callout.h> 65 #include <sys/cons.h> 66 #include <sys/cpu.h> 67 #include <sys/eventhandler.h> 68 #include <sys/exec.h> 69 #include <sys/imgact.h> 70 #include <sys/kdb.h> 71 #include <sys/kernel.h> 72 #include <sys/ktr.h> 73 #include <sys/linker.h> 74 #include <sys/lock.h> 75 #include <sys/malloc.h> 76 #include <sys/memrange.h> 77 #include <sys/msgbuf.h> 78 #include <sys/mutex.h> 79 #include <sys/pcpu.h> 80 #include <sys/ptrace.h> 81 #include <sys/reboot.h> 82 #include <sys/rwlock.h> 83 #include <sys/sched.h> 84 #include <sys/signalvar.h> 85 #ifdef SMP 86 #include <sys/smp.h> 87 #endif 88 #include <sys/syscallsubr.h> 89 #include <sys/sysctl.h> 90 #include <sys/sysent.h> 91 #include <sys/sysproto.h> 92 #include <sys/ucontext.h> 93 #include <sys/vmmeter.h> 94 95 #include <vm/vm.h> 96 #include <vm/vm_extern.h> 97 #include <vm/vm_kern.h> 98 #include <vm/vm_page.h> 99 #include <vm/vm_map.h> 100 #include <vm/vm_object.h> 101 #include <vm/vm_pager.h> 102 #include <vm/vm_param.h> 103 104 #ifdef DDB 105 #ifndef KDB 106 #error KDB must be enabled in order for DDB to work! 107 #endif 108 #include <ddb/ddb.h> 109 #include <ddb/db_sym.h> 110 #endif 111 112 #ifdef PC98 113 #include <pc98/pc98/pc98_machdep.h> 114 #else 115 #include <isa/rtc.h> 116 #endif 117 118 #include <net/netisr.h> 119 120 #include <machine/bootinfo.h> 121 #include <machine/clock.h> 122 #include <machine/cpu.h> 123 #include <machine/cputypes.h> 124 #include <machine/intr_machdep.h> 125 #include <x86/mca.h> 126 #include <machine/md_var.h> 127 #include <machine/metadata.h> 128 #include <machine/mp_watchdog.h> 129 #include <machine/pc/bios.h> 130 #include <machine/pcb.h> 131 #include <machine/pcb_ext.h> 132 #include <machine/proc.h> 133 #include <machine/reg.h> 134 #include <machine/sigframe.h> 135 #include <machine/specialreg.h> 136 #include <machine/vm86.h> 137 #include <x86/init.h> 138 #ifdef PERFMON 139 #include <machine/perfmon.h> 140 #endif 141 #ifdef SMP 142 #include <machine/smp.h> 143 #endif 144 #ifdef FDT 145 #include <x86/fdt.h> 146 #endif 147 148 #ifdef DEV_APIC 149 #include <x86/apicvar.h> 150 #endif 151 152 #ifdef DEV_ISA 153 #include <x86/isa/icu.h> 154 #endif 155 156 #ifdef XBOX 157 #include <machine/xbox.h> 158 159 int arch_i386_is_xbox = 0; 160 uint32_t arch_i386_xbox_memsize = 0; 161 #endif 162 163 #ifdef XEN 164 /* XEN includes */ 165 #include <xen/xen-os.h> 166 #include <xen/hypervisor.h> 167 #include <machine/xen/xenvar.h> 168 #include <machine/xen/xenfunc.h> 169 #include <xen/xen_intr.h> 170 171 void Xhypervisor_callback(void); 172 void failsafe_callback(void); 173 174 extern trap_info_t trap_table[]; 175 struct proc_ldt default_proc_ldt; 176 extern int init_first; 177 int running_xen = 1; 178 extern unsigned long physfree; 179 #endif /* XEN */ 180 181 /* Sanity check for __curthread() */ 182 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 183 184 extern register_t init386(int first); 185 extern void dblfault_handler(void); 186 187 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 188 #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 189 190 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 191 #define CPU_ENABLE_SSE 192 #endif 193 194 static void cpu_startup(void *); 195 static void fpstate_drop(struct thread *td); 196 static void get_fpcontext(struct thread *td, mcontext_t *mcp, 197 char *xfpusave, size_t xfpusave_len); 198 static int set_fpcontext(struct thread *td, mcontext_t *mcp, 199 char *xfpustate, size_t xfpustate_len); 200 #ifdef CPU_ENABLE_SSE 201 static void set_fpregs_xmm(struct save87 *, struct savexmm *); 202 static void fill_fpregs_xmm(struct savexmm *, struct save87 *); 203 #endif /* CPU_ENABLE_SSE */ 204 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 205 206 /* Intel ICH registers */ 207 #define ICH_PMBASE 0x400 208 #define ICH_SMI_EN ICH_PMBASE + 0x30 209 210 int _udatasel, _ucodesel; 211 u_int basemem; 212 213 #ifdef PC98 214 int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ 215 int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ 216 217 static int ispc98 = 1; 218 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, ""); 219 #endif 220 221 int cold = 1; 222 223 #ifdef COMPAT_43 224 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); 225 #endif 226 #ifdef COMPAT_FREEBSD4 227 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); 228 #endif 229 230 long Maxmem = 0; 231 long realmem = 0; 232 233 #ifdef PAE 234 FEATURE(pae, "Physical Address Extensions"); 235 #endif 236 237 /* 238 * The number of PHYSMAP entries must be one less than the number of 239 * PHYSSEG entries because the PHYSMAP entry that spans the largest 240 * physical address that is accessible by ISA DMA is split into two 241 * PHYSSEG entries. 242 */ 243 #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) 244 245 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; 246 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; 247 248 /* must be 2 less so 0 0 can signal end of chunks */ 249 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) 250 #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) 251 252 struct kva_md_info kmi; 253 254 static struct trapframe proc0_tf; 255 struct pcpu __pcpu[MAXCPU]; 256 257 struct mtx icu_lock; 258 259 struct mem_range_softc mem_range_softc; 260 261 /* Default init_ops implementation. */ 262 struct init_ops init_ops = { 263 .early_clock_source_init = i8254_init, 264 .early_delay = i8254_delay, 265 #ifdef DEV_APIC 266 .msi_init = msi_init, 267 #endif 268 }; 269 270 static void 271 cpu_startup(dummy) 272 void *dummy; 273 { 274 uintmax_t memsize; 275 char *sysenv; 276 277 #ifndef PC98 278 /* 279 * On MacBooks, we need to disallow the legacy USB circuit to 280 * generate an SMI# because this can cause several problems, 281 * namely: incorrect CPU frequency detection and failure to 282 * start the APs. 283 * We do this by disabling a bit in the SMI_EN (SMI Control and 284 * Enable register) of the Intel ICH LPC Interface Bridge. 285 */ 286 sysenv = kern_getenv("smbios.system.product"); 287 if (sysenv != NULL) { 288 if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 289 strncmp(sysenv, "MacBook3,1", 10) == 0 || 290 strncmp(sysenv, "MacBook4,1", 10) == 0 || 291 strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 292 strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 293 strncmp(sysenv, "MacBookPro3,1", 13) == 0 || 294 strncmp(sysenv, "MacBookPro4,1", 13) == 0 || 295 strncmp(sysenv, "Macmini1,1", 10) == 0) { 296 if (bootverbose) 297 printf("Disabling LEGACY_USB_EN bit on " 298 "Intel ICH.\n"); 299 outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 300 } 301 freeenv(sysenv); 302 } 303 #endif /* !PC98 */ 304 305 /* 306 * Good {morning,afternoon,evening,night}. 307 */ 308 startrtclock(); 309 printcpuinfo(); 310 panicifcpuunsupported(); 311 #ifdef PERFMON 312 perfmon_init(); 313 #endif 314 315 /* 316 * Display physical memory if SMBIOS reports reasonable amount. 317 */ 318 memsize = 0; 319 sysenv = kern_getenv("smbios.memory.enabled"); 320 if (sysenv != NULL) { 321 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; 322 freeenv(sysenv); 323 } 324 if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) 325 memsize = ptoa((uintmax_t)Maxmem); 326 printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); 327 realmem = atop(memsize); 328 329 /* 330 * Display any holes after the first chunk of extended memory. 331 */ 332 if (bootverbose) { 333 int indx; 334 335 printf("Physical memory chunk(s):\n"); 336 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 337 vm_paddr_t size; 338 339 size = phys_avail[indx + 1] - phys_avail[indx]; 340 printf( 341 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 342 (uintmax_t)phys_avail[indx], 343 (uintmax_t)phys_avail[indx + 1] - 1, 344 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 345 } 346 } 347 348 vm_ksubmap_init(&kmi); 349 350 printf("avail memory = %ju (%ju MB)\n", 351 ptoa((uintmax_t)vm_cnt.v_free_count), 352 ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); 353 354 /* 355 * Set up buffers, so they can be used to read disk labels. 356 */ 357 bufinit(); 358 vm_pager_bufferinit(); 359 #ifndef XEN 360 cpu_setregs(); 361 #endif 362 } 363 364 /* 365 * Send an interrupt to process. 366 * 367 * Stack is set up to allow sigcode stored 368 * at top to call routine, followed by call 369 * to sigreturn routine below. After sigreturn 370 * resets the signal mask, the stack, and the 371 * frame pointer, it returns to the user 372 * specified pc, psl. 373 */ 374 #ifdef COMPAT_43 375 static void 376 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 377 { 378 struct osigframe sf, *fp; 379 struct proc *p; 380 struct thread *td; 381 struct sigacts *psp; 382 struct trapframe *regs; 383 int sig; 384 int oonstack; 385 386 td = curthread; 387 p = td->td_proc; 388 PROC_LOCK_ASSERT(p, MA_OWNED); 389 sig = ksi->ksi_signo; 390 psp = p->p_sigacts; 391 mtx_assert(&psp->ps_mtx, MA_OWNED); 392 regs = td->td_frame; 393 oonstack = sigonstack(regs->tf_esp); 394 395 /* Allocate space for the signal handler context. */ 396 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 397 SIGISMEMBER(psp->ps_sigonstack, sig)) { 398 fp = (struct osigframe *)(td->td_sigstk.ss_sp + 399 td->td_sigstk.ss_size - sizeof(struct osigframe)); 400 #if defined(COMPAT_43) 401 td->td_sigstk.ss_flags |= SS_ONSTACK; 402 #endif 403 } else 404 fp = (struct osigframe *)regs->tf_esp - 1; 405 406 /* Translate the signal if appropriate. */ 407 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) 408 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 409 410 /* Build the argument list for the signal handler. */ 411 sf.sf_signum = sig; 412 sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; 413 bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo)); 414 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 415 /* Signal handler installed with SA_SIGINFO. */ 416 sf.sf_arg2 = (register_t)&fp->sf_siginfo; 417 sf.sf_siginfo.si_signo = sig; 418 sf.sf_siginfo.si_code = ksi->ksi_code; 419 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; 420 sf.sf_addr = 0; 421 } else { 422 /* Old FreeBSD-style arguments. */ 423 sf.sf_arg2 = ksi->ksi_code; 424 sf.sf_addr = (register_t)ksi->ksi_addr; 425 sf.sf_ahu.sf_handler = catcher; 426 } 427 mtx_unlock(&psp->ps_mtx); 428 PROC_UNLOCK(p); 429 430 /* Save most if not all of trap frame. */ 431 sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; 432 sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; 433 sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; 434 sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; 435 sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; 436 sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; 437 sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; 438 sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; 439 sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; 440 sf.sf_siginfo.si_sc.sc_es = regs->tf_es; 441 sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; 442 sf.sf_siginfo.si_sc.sc_gs = rgs(); 443 sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; 444 445 /* Build the signal context to be used by osigreturn(). */ 446 sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; 447 SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); 448 sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; 449 sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; 450 sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; 451 sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; 452 sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; 453 sf.sf_siginfo.si_sc.sc_err = regs->tf_err; 454 455 /* 456 * If we're a vm86 process, we want to save the segment registers. 457 * We also change eflags to be our emulated eflags, not the actual 458 * eflags. 459 */ 460 if (regs->tf_eflags & PSL_VM) { 461 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ 462 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 463 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; 464 465 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; 466 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; 467 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; 468 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; 469 470 if (vm86->vm86_has_vme == 0) 471 sf.sf_siginfo.si_sc.sc_ps = 472 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | 473 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); 474 475 /* See sendsig() for comments. */ 476 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); 477 } 478 479 /* 480 * Copy the sigframe out to the user's stack. 481 */ 482 if (copyout(&sf, fp, sizeof(*fp)) != 0) { 483 #ifdef DEBUG 484 printf("process %ld has trashed its stack\n", (long)p->p_pid); 485 #endif 486 PROC_LOCK(p); 487 sigexit(td, SIGILL); 488 } 489 490 regs->tf_esp = (int)fp; 491 if (p->p_sysent->sv_sigcode_base != 0) { 492 regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - 493 szosigcode; 494 } else { 495 /* a.out sysentvec does not use shared page */ 496 regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode; 497 } 498 regs->tf_eflags &= ~(PSL_T | PSL_D); 499 regs->tf_cs = _ucodesel; 500 regs->tf_ds = _udatasel; 501 regs->tf_es = _udatasel; 502 regs->tf_fs = _udatasel; 503 load_gs(_udatasel); 504 regs->tf_ss = _udatasel; 505 PROC_LOCK(p); 506 mtx_lock(&psp->ps_mtx); 507 } 508 #endif /* COMPAT_43 */ 509 510 #ifdef COMPAT_FREEBSD4 511 static void 512 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 513 { 514 struct sigframe4 sf, *sfp; 515 struct proc *p; 516 struct thread *td; 517 struct sigacts *psp; 518 struct trapframe *regs; 519 int sig; 520 int oonstack; 521 522 td = curthread; 523 p = td->td_proc; 524 PROC_LOCK_ASSERT(p, MA_OWNED); 525 sig = ksi->ksi_signo; 526 psp = p->p_sigacts; 527 mtx_assert(&psp->ps_mtx, MA_OWNED); 528 regs = td->td_frame; 529 oonstack = sigonstack(regs->tf_esp); 530 531 /* Save user context. */ 532 bzero(&sf, sizeof(sf)); 533 sf.sf_uc.uc_sigmask = *mask; 534 sf.sf_uc.uc_stack = td->td_sigstk; 535 sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 536 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; 537 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; 538 sf.sf_uc.uc_mcontext.mc_gs = rgs(); 539 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); 540 bzero(sf.sf_uc.uc_mcontext.mc_fpregs, 541 sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); 542 bzero(sf.sf_uc.uc_mcontext.__spare__, 543 sizeof(sf.sf_uc.uc_mcontext.__spare__)); 544 bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); 545 546 /* Allocate space for the signal handler context. */ 547 if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && 548 SIGISMEMBER(psp->ps_sigonstack, sig)) { 549 sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + 550 td->td_sigstk.ss_size - sizeof(struct sigframe4)); 551 #if defined(COMPAT_43) 552 td->td_sigstk.ss_flags |= SS_ONSTACK; 553 #endif 554 } else 555 sfp = (struct sigframe4 *)regs->tf_esp - 1; 556 557 /* Translate the signal if appropriate. */ 558 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) 559 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 560 561 /* Build the argument list for the signal handler. */ 562 sf.sf_signum = sig; 563 sf.sf_ucontext = (register_t)&sfp->sf_uc; 564 bzero(&sf.sf_si, sizeof(sf.sf_si)); 565 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 566 /* Signal handler installed with SA_SIGINFO. */ 567 sf.sf_siginfo = (register_t)&sfp->sf_si; 568 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; 569 570 /* Fill in POSIX parts */ 571 sf.sf_si.si_signo = sig; 572 sf.sf_si.si_code = ksi->ksi_code; 573 sf.sf_si.si_addr = ksi->ksi_addr; 574 } else { 575 /* Old FreeBSD-style arguments. */ 576 sf.sf_siginfo = ksi->ksi_code; 577 sf.sf_addr = (register_t)ksi->ksi_addr; 578 sf.sf_ahu.sf_handler = catcher; 579 } 580 mtx_unlock(&psp->ps_mtx); 581 PROC_UNLOCK(p); 582 583 /* 584 * If we're a vm86 process, we want to save the segment registers. 585 * We also change eflags to be our emulated eflags, not the actual 586 * eflags. 587 */ 588 if (regs->tf_eflags & PSL_VM) { 589 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 590 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; 591 592 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; 593 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; 594 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; 595 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; 596 597 if (vm86->vm86_has_vme == 0) 598 sf.sf_uc.uc_mcontext.mc_eflags = 599 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | 600 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); 601 602 /* 603 * Clear PSL_NT to inhibit T_TSSFLT faults on return from 604 * syscalls made by the signal handler. This just avoids 605 * wasting time for our lazy fixup of such faults. PSL_NT 606 * does nothing in vm86 mode, but vm86 programs can set it 607 * almost legitimately in probes for old cpu types. 608 */ 609 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); 610 } 611 612 /* 613 * Copy the sigframe out to the user's stack. 614 */ 615 if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { 616 #ifdef DEBUG 617 printf("process %ld has trashed its stack\n", (long)p->p_pid); 618 #endif 619 PROC_LOCK(p); 620 sigexit(td, SIGILL); 621 } 622 623 regs->tf_esp = (int)sfp; 624 regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - 625 szfreebsd4_sigcode; 626 regs->tf_eflags &= ~(PSL_T | PSL_D); 627 regs->tf_cs = _ucodesel; 628 regs->tf_ds = _udatasel; 629 regs->tf_es = _udatasel; 630 regs->tf_fs = _udatasel; 631 regs->tf_ss = _udatasel; 632 PROC_LOCK(p); 633 mtx_lock(&psp->ps_mtx); 634 } 635 #endif /* COMPAT_FREEBSD4 */ 636 637 void 638 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 639 { 640 struct sigframe sf, *sfp; 641 struct proc *p; 642 struct thread *td; 643 struct sigacts *psp; 644 char *sp; 645 struct trapframe *regs; 646 struct segment_descriptor *sdp; 647 char *xfpusave; 648 size_t xfpusave_len; 649 int sig; 650 int oonstack; 651 652 td = curthread; 653 p = td->td_proc; 654 PROC_LOCK_ASSERT(p, MA_OWNED); 655 sig = ksi->ksi_signo; 656 psp = p->p_sigacts; 657 mtx_assert(&psp->ps_mtx, MA_OWNED); 658 #ifdef COMPAT_FREEBSD4 659 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { 660 freebsd4_sendsig(catcher, ksi, mask); 661 return; 662 } 663 #endif 664 #ifdef COMPAT_43 665 if (SIGISMEMBER(psp->ps_osigset, sig)) { 666 osendsig(catcher, ksi, mask); 667 return; 668 } 669 #endif 670 regs = td->td_frame; 671 oonstack = sigonstack(regs->tf_esp); 672 673 #ifdef CPU_ENABLE_SSE 674 if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) { 675 xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu); 676 xfpusave = __builtin_alloca(xfpusave_len); 677 } else { 678 #else 679 { 680 #endif 681 xfpusave_len = 0; 682 xfpusave = NULL; 683 } 684 685 /* Save user context. */ 686 bzero(&sf, sizeof(sf)); 687 sf.sf_uc.uc_sigmask = *mask; 688 sf.sf_uc.uc_stack = td->td_sigstk; 689 sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 690 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; 691 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; 692 sf.sf_uc.uc_mcontext.mc_gs = rgs(); 693 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); 694 sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ 695 get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); 696 fpstate_drop(td); 697 /* 698 * Unconditionally fill the fsbase and gsbase into the mcontext. 699 */ 700 sdp = &td->td_pcb->pcb_fsd; 701 sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 | 702 sdp->sd_lobase; 703 sdp = &td->td_pcb->pcb_gsd; 704 sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | 705 sdp->sd_lobase; 706 bzero(sf.sf_uc.uc_mcontext.mc_spare2, 707 sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); 708 bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); 709 710 /* Allocate space for the signal handler context. */ 711 if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && 712 SIGISMEMBER(psp->ps_sigonstack, sig)) { 713 sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; 714 #if defined(COMPAT_43) 715 td->td_sigstk.ss_flags |= SS_ONSTACK; 716 #endif 717 } else 718 sp = (char *)regs->tf_esp - 128; 719 if (xfpusave != NULL) { 720 sp -= xfpusave_len; 721 sp = (char *)((unsigned int)sp & ~0x3F); 722 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; 723 } 724 sp -= sizeof(struct sigframe); 725 726 /* Align to 16 bytes. */ 727 sfp = (struct sigframe *)((unsigned int)sp & ~0xF); 728 729 /* Translate the signal if appropriate. */ 730 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) 731 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 732 733 /* Build the argument list for the signal handler. */ 734 sf.sf_signum = sig; 735 sf.sf_ucontext = (register_t)&sfp->sf_uc; 736 bzero(&sf.sf_si, sizeof(sf.sf_si)); 737 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 738 /* Signal handler installed with SA_SIGINFO. */ 739 sf.sf_siginfo = (register_t)&sfp->sf_si; 740 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; 741 742 /* Fill in POSIX parts */ 743 sf.sf_si = ksi->ksi_info; 744 sf.sf_si.si_signo = sig; /* maybe a translated signal */ 745 } else { 746 /* Old FreeBSD-style arguments. */ 747 sf.sf_siginfo = ksi->ksi_code; 748 sf.sf_addr = (register_t)ksi->ksi_addr; 749 sf.sf_ahu.sf_handler = catcher; 750 } 751 mtx_unlock(&psp->ps_mtx); 752 PROC_UNLOCK(p); 753 754 /* 755 * If we're a vm86 process, we want to save the segment registers. 756 * We also change eflags to be our emulated eflags, not the actual 757 * eflags. 758 */ 759 if (regs->tf_eflags & PSL_VM) { 760 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 761 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; 762 763 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; 764 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; 765 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; 766 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; 767 768 if (vm86->vm86_has_vme == 0) 769 sf.sf_uc.uc_mcontext.mc_eflags = 770 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | 771 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); 772 773 /* 774 * Clear PSL_NT to inhibit T_TSSFLT faults on return from 775 * syscalls made by the signal handler. This just avoids 776 * wasting time for our lazy fixup of such faults. PSL_NT 777 * does nothing in vm86 mode, but vm86 programs can set it 778 * almost legitimately in probes for old cpu types. 779 */ 780 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); 781 } 782 783 /* 784 * Copy the sigframe out to the user's stack. 785 */ 786 if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || 787 (xfpusave != NULL && copyout(xfpusave, 788 (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) 789 != 0)) { 790 #ifdef DEBUG 791 printf("process %ld has trashed its stack\n", (long)p->p_pid); 792 #endif 793 PROC_LOCK(p); 794 sigexit(td, SIGILL); 795 } 796 797 regs->tf_esp = (int)sfp; 798 regs->tf_eip = p->p_sysent->sv_sigcode_base; 799 if (regs->tf_eip == 0) 800 regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode; 801 regs->tf_eflags &= ~(PSL_T | PSL_D); 802 regs->tf_cs = _ucodesel; 803 regs->tf_ds = _udatasel; 804 regs->tf_es = _udatasel; 805 regs->tf_fs = _udatasel; 806 regs->tf_ss = _udatasel; 807 PROC_LOCK(p); 808 mtx_lock(&psp->ps_mtx); 809 } 810 811 /* 812 * System call to cleanup state after a signal 813 * has been taken. Reset signal mask and 814 * stack state from context left by sendsig (above). 815 * Return to previous pc and psl as specified by 816 * context left by sendsig. Check carefully to 817 * make sure that the user has not modified the 818 * state to gain improper privileges. 819 * 820 * MPSAFE 821 */ 822 #ifdef COMPAT_43 823 int 824 osigreturn(td, uap) 825 struct thread *td; 826 struct osigreturn_args /* { 827 struct osigcontext *sigcntxp; 828 } */ *uap; 829 { 830 struct osigcontext sc; 831 struct trapframe *regs; 832 struct osigcontext *scp; 833 int eflags, error; 834 ksiginfo_t ksi; 835 836 regs = td->td_frame; 837 error = copyin(uap->sigcntxp, &sc, sizeof(sc)); 838 if (error != 0) 839 return (error); 840 scp = ≻ 841 eflags = scp->sc_ps; 842 if (eflags & PSL_VM) { 843 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 844 struct vm86_kernel *vm86; 845 846 /* 847 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't 848 * set up the vm86 area, and we can't enter vm86 mode. 849 */ 850 if (td->td_pcb->pcb_ext == 0) 851 return (EINVAL); 852 vm86 = &td->td_pcb->pcb_ext->ext_vm86; 853 if (vm86->vm86_inited == 0) 854 return (EINVAL); 855 856 /* Go back to user mode if both flags are set. */ 857 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { 858 ksiginfo_init_trap(&ksi); 859 ksi.ksi_signo = SIGBUS; 860 ksi.ksi_code = BUS_OBJERR; 861 ksi.ksi_addr = (void *)regs->tf_eip; 862 trapsignal(td, &ksi); 863 } 864 865 if (vm86->vm86_has_vme) { 866 eflags = (tf->tf_eflags & ~VME_USERCHANGE) | 867 (eflags & VME_USERCHANGE) | PSL_VM; 868 } else { 869 vm86->vm86_eflags = eflags; /* save VIF, VIP */ 870 eflags = (tf->tf_eflags & ~VM_USERCHANGE) | 871 (eflags & VM_USERCHANGE) | PSL_VM; 872 } 873 tf->tf_vm86_ds = scp->sc_ds; 874 tf->tf_vm86_es = scp->sc_es; 875 tf->tf_vm86_fs = scp->sc_fs; 876 tf->tf_vm86_gs = scp->sc_gs; 877 tf->tf_ds = _udatasel; 878 tf->tf_es = _udatasel; 879 tf->tf_fs = _udatasel; 880 } else { 881 /* 882 * Don't allow users to change privileged or reserved flags. 883 */ 884 if (!EFL_SECURE(eflags, regs->tf_eflags)) { 885 return (EINVAL); 886 } 887 888 /* 889 * Don't allow users to load a valid privileged %cs. Let the 890 * hardware check for invalid selectors, excess privilege in 891 * other selectors, invalid %eip's and invalid %esp's. 892 */ 893 if (!CS_SECURE(scp->sc_cs)) { 894 ksiginfo_init_trap(&ksi); 895 ksi.ksi_signo = SIGBUS; 896 ksi.ksi_code = BUS_OBJERR; 897 ksi.ksi_trapno = T_PROTFLT; 898 ksi.ksi_addr = (void *)regs->tf_eip; 899 trapsignal(td, &ksi); 900 return (EINVAL); 901 } 902 regs->tf_ds = scp->sc_ds; 903 regs->tf_es = scp->sc_es; 904 regs->tf_fs = scp->sc_fs; 905 } 906 907 /* Restore remaining registers. */ 908 regs->tf_eax = scp->sc_eax; 909 regs->tf_ebx = scp->sc_ebx; 910 regs->tf_ecx = scp->sc_ecx; 911 regs->tf_edx = scp->sc_edx; 912 regs->tf_esi = scp->sc_esi; 913 regs->tf_edi = scp->sc_edi; 914 regs->tf_cs = scp->sc_cs; 915 regs->tf_ss = scp->sc_ss; 916 regs->tf_isp = scp->sc_isp; 917 regs->tf_ebp = scp->sc_fp; 918 regs->tf_esp = scp->sc_sp; 919 regs->tf_eip = scp->sc_pc; 920 regs->tf_eflags = eflags; 921 922 #if defined(COMPAT_43) 923 if (scp->sc_onstack & 1) 924 td->td_sigstk.ss_flags |= SS_ONSTACK; 925 else 926 td->td_sigstk.ss_flags &= ~SS_ONSTACK; 927 #endif 928 kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL, 929 SIGPROCMASK_OLD); 930 return (EJUSTRETURN); 931 } 932 #endif /* COMPAT_43 */ 933 934 #ifdef COMPAT_FREEBSD4 935 /* 936 * MPSAFE 937 */ 938 int 939 freebsd4_sigreturn(td, uap) 940 struct thread *td; 941 struct freebsd4_sigreturn_args /* { 942 const ucontext4 *sigcntxp; 943 } */ *uap; 944 { 945 struct ucontext4 uc; 946 struct trapframe *regs; 947 struct ucontext4 *ucp; 948 int cs, eflags, error; 949 ksiginfo_t ksi; 950 951 error = copyin(uap->sigcntxp, &uc, sizeof(uc)); 952 if (error != 0) 953 return (error); 954 ucp = &uc; 955 regs = td->td_frame; 956 eflags = ucp->uc_mcontext.mc_eflags; 957 if (eflags & PSL_VM) { 958 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 959 struct vm86_kernel *vm86; 960 961 /* 962 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't 963 * set up the vm86 area, and we can't enter vm86 mode. 964 */ 965 if (td->td_pcb->pcb_ext == 0) 966 return (EINVAL); 967 vm86 = &td->td_pcb->pcb_ext->ext_vm86; 968 if (vm86->vm86_inited == 0) 969 return (EINVAL); 970 971 /* Go back to user mode if both flags are set. */ 972 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { 973 ksiginfo_init_trap(&ksi); 974 ksi.ksi_signo = SIGBUS; 975 ksi.ksi_code = BUS_OBJERR; 976 ksi.ksi_addr = (void *)regs->tf_eip; 977 trapsignal(td, &ksi); 978 } 979 if (vm86->vm86_has_vme) { 980 eflags = (tf->tf_eflags & ~VME_USERCHANGE) | 981 (eflags & VME_USERCHANGE) | PSL_VM; 982 } else { 983 vm86->vm86_eflags = eflags; /* save VIF, VIP */ 984 eflags = (tf->tf_eflags & ~VM_USERCHANGE) | 985 (eflags & VM_USERCHANGE) | PSL_VM; 986 } 987 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); 988 tf->tf_eflags = eflags; 989 tf->tf_vm86_ds = tf->tf_ds; 990 tf->tf_vm86_es = tf->tf_es; 991 tf->tf_vm86_fs = tf->tf_fs; 992 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; 993 tf->tf_ds = _udatasel; 994 tf->tf_es = _udatasel; 995 tf->tf_fs = _udatasel; 996 } else { 997 /* 998 * Don't allow users to change privileged or reserved flags. 999 */ 1000 if (!EFL_SECURE(eflags, regs->tf_eflags)) { 1001 uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n", 1002 td->td_proc->p_pid, td->td_name, eflags); 1003 return (EINVAL); 1004 } 1005 1006 /* 1007 * Don't allow users to load a valid privileged %cs. Let the 1008 * hardware check for invalid selectors, excess privilege in 1009 * other selectors, invalid %eip's and invalid %esp's. 1010 */ 1011 cs = ucp->uc_mcontext.mc_cs; 1012 if (!CS_SECURE(cs)) { 1013 uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n", 1014 td->td_proc->p_pid, td->td_name, cs); 1015 ksiginfo_init_trap(&ksi); 1016 ksi.ksi_signo = SIGBUS; 1017 ksi.ksi_code = BUS_OBJERR; 1018 ksi.ksi_trapno = T_PROTFLT; 1019 ksi.ksi_addr = (void *)regs->tf_eip; 1020 trapsignal(td, &ksi); 1021 return (EINVAL); 1022 } 1023 1024 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); 1025 } 1026 1027 #if defined(COMPAT_43) 1028 if (ucp->uc_mcontext.mc_onstack & 1) 1029 td->td_sigstk.ss_flags |= SS_ONSTACK; 1030 else 1031 td->td_sigstk.ss_flags &= ~SS_ONSTACK; 1032 #endif 1033 kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); 1034 return (EJUSTRETURN); 1035 } 1036 #endif /* COMPAT_FREEBSD4 */ 1037 1038 /* 1039 * MPSAFE 1040 */ 1041 int 1042 sys_sigreturn(td, uap) 1043 struct thread *td; 1044 struct sigreturn_args /* { 1045 const struct __ucontext *sigcntxp; 1046 } */ *uap; 1047 { 1048 ucontext_t uc; 1049 struct proc *p; 1050 struct trapframe *regs; 1051 ucontext_t *ucp; 1052 char *xfpustate; 1053 size_t xfpustate_len; 1054 int cs, eflags, error, ret; 1055 ksiginfo_t ksi; 1056 1057 p = td->td_proc; 1058 1059 error = copyin(uap->sigcntxp, &uc, sizeof(uc)); 1060 if (error != 0) 1061 return (error); 1062 ucp = &uc; 1063 if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { 1064 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, 1065 td->td_name, ucp->uc_mcontext.mc_flags); 1066 return (EINVAL); 1067 } 1068 regs = td->td_frame; 1069 eflags = ucp->uc_mcontext.mc_eflags; 1070 if (eflags & PSL_VM) { 1071 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 1072 struct vm86_kernel *vm86; 1073 1074 /* 1075 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't 1076 * set up the vm86 area, and we can't enter vm86 mode. 1077 */ 1078 if (td->td_pcb->pcb_ext == 0) 1079 return (EINVAL); 1080 vm86 = &td->td_pcb->pcb_ext->ext_vm86; 1081 if (vm86->vm86_inited == 0) 1082 return (EINVAL); 1083 1084 /* Go back to user mode if both flags are set. */ 1085 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { 1086 ksiginfo_init_trap(&ksi); 1087 ksi.ksi_signo = SIGBUS; 1088 ksi.ksi_code = BUS_OBJERR; 1089 ksi.ksi_addr = (void *)regs->tf_eip; 1090 trapsignal(td, &ksi); 1091 } 1092 1093 if (vm86->vm86_has_vme) { 1094 eflags = (tf->tf_eflags & ~VME_USERCHANGE) | 1095 (eflags & VME_USERCHANGE) | PSL_VM; 1096 } else { 1097 vm86->vm86_eflags = eflags; /* save VIF, VIP */ 1098 eflags = (tf->tf_eflags & ~VM_USERCHANGE) | 1099 (eflags & VM_USERCHANGE) | PSL_VM; 1100 } 1101 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); 1102 tf->tf_eflags = eflags; 1103 tf->tf_vm86_ds = tf->tf_ds; 1104 tf->tf_vm86_es = tf->tf_es; 1105 tf->tf_vm86_fs = tf->tf_fs; 1106 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; 1107 tf->tf_ds = _udatasel; 1108 tf->tf_es = _udatasel; 1109 tf->tf_fs = _udatasel; 1110 } else { 1111 /* 1112 * Don't allow users to change privileged or reserved flags. 1113 */ 1114 if (!EFL_SECURE(eflags, regs->tf_eflags)) { 1115 uprintf("pid %d (%s): sigreturn eflags = 0x%x\n", 1116 td->td_proc->p_pid, td->td_name, eflags); 1117 return (EINVAL); 1118 } 1119 1120 /* 1121 * Don't allow users to load a valid privileged %cs. Let the 1122 * hardware check for invalid selectors, excess privilege in 1123 * other selectors, invalid %eip's and invalid %esp's. 1124 */ 1125 cs = ucp->uc_mcontext.mc_cs; 1126 if (!CS_SECURE(cs)) { 1127 uprintf("pid %d (%s): sigreturn cs = 0x%x\n", 1128 td->td_proc->p_pid, td->td_name, cs); 1129 ksiginfo_init_trap(&ksi); 1130 ksi.ksi_signo = SIGBUS; 1131 ksi.ksi_code = BUS_OBJERR; 1132 ksi.ksi_trapno = T_PROTFLT; 1133 ksi.ksi_addr = (void *)regs->tf_eip; 1134 trapsignal(td, &ksi); 1135 return (EINVAL); 1136 } 1137 1138 if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { 1139 xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; 1140 if (xfpustate_len > cpu_max_ext_state_size - 1141 sizeof(union savefpu)) { 1142 uprintf( 1143 "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", 1144 p->p_pid, td->td_name, xfpustate_len); 1145 return (EINVAL); 1146 } 1147 xfpustate = __builtin_alloca(xfpustate_len); 1148 error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, 1149 xfpustate, xfpustate_len); 1150 if (error != 0) { 1151 uprintf( 1152 "pid %d (%s): sigreturn copying xfpustate failed\n", 1153 p->p_pid, td->td_name); 1154 return (error); 1155 } 1156 } else { 1157 xfpustate = NULL; 1158 xfpustate_len = 0; 1159 } 1160 ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, 1161 xfpustate_len); 1162 if (ret != 0) 1163 return (ret); 1164 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); 1165 } 1166 1167 #if defined(COMPAT_43) 1168 if (ucp->uc_mcontext.mc_onstack & 1) 1169 td->td_sigstk.ss_flags |= SS_ONSTACK; 1170 else 1171 td->td_sigstk.ss_flags &= ~SS_ONSTACK; 1172 #endif 1173 1174 kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); 1175 return (EJUSTRETURN); 1176 } 1177 1178 /* 1179 * Machine dependent boot() routine 1180 * 1181 * I haven't seen anything to put here yet 1182 * Possibly some stuff might be grafted back here from boot() 1183 */ 1184 void 1185 cpu_boot(int howto) 1186 { 1187 } 1188 1189 /* 1190 * Flush the D-cache for non-DMA I/O so that the I-cache can 1191 * be made coherent later. 1192 */ 1193 void 1194 cpu_flush_dcache(void *ptr, size_t len) 1195 { 1196 /* Not applicable */ 1197 } 1198 1199 /* Get current clock frequency for the given cpu id. */ 1200 int 1201 cpu_est_clockrate(int cpu_id, uint64_t *rate) 1202 { 1203 uint64_t tsc1, tsc2; 1204 uint64_t acnt, mcnt, perf; 1205 register_t reg; 1206 1207 if (pcpu_find(cpu_id) == NULL || rate == NULL) 1208 return (EINVAL); 1209 if ((cpu_feature & CPUID_TSC) == 0) 1210 return (EOPNOTSUPP); 1211 1212 /* 1213 * If TSC is P-state invariant and APERF/MPERF MSRs do not exist, 1214 * DELAY(9) based logic fails. 1215 */ 1216 if (tsc_is_invariant && !tsc_perf_stat) 1217 return (EOPNOTSUPP); 1218 1219 #ifdef SMP 1220 if (smp_cpus > 1) { 1221 /* Schedule ourselves on the indicated cpu. */ 1222 thread_lock(curthread); 1223 sched_bind(curthread, cpu_id); 1224 thread_unlock(curthread); 1225 } 1226 #endif 1227 1228 /* Calibrate by measuring a short delay. */ 1229 reg = intr_disable(); 1230 if (tsc_is_invariant) { 1231 wrmsr(MSR_MPERF, 0); 1232 wrmsr(MSR_APERF, 0); 1233 tsc1 = rdtsc(); 1234 DELAY(1000); 1235 mcnt = rdmsr(MSR_MPERF); 1236 acnt = rdmsr(MSR_APERF); 1237 tsc2 = rdtsc(); 1238 intr_restore(reg); 1239 perf = 1000 * acnt / mcnt; 1240 *rate = (tsc2 - tsc1) * perf; 1241 } else { 1242 tsc1 = rdtsc(); 1243 DELAY(1000); 1244 tsc2 = rdtsc(); 1245 intr_restore(reg); 1246 *rate = (tsc2 - tsc1) * 1000; 1247 } 1248 1249 #ifdef SMP 1250 if (smp_cpus > 1) { 1251 thread_lock(curthread); 1252 sched_unbind(curthread); 1253 thread_unlock(curthread); 1254 } 1255 #endif 1256 1257 return (0); 1258 } 1259 1260 #ifdef XEN 1261 1262 static void 1263 idle_block(void) 1264 { 1265 1266 HYPERVISOR_sched_op(SCHEDOP_block, 0); 1267 } 1268 1269 void 1270 cpu_halt(void) 1271 { 1272 HYPERVISOR_shutdown(SHUTDOWN_poweroff); 1273 } 1274 1275 int scheduler_running; 1276 1277 static void 1278 cpu_idle_hlt(sbintime_t sbt) 1279 { 1280 1281 scheduler_running = 1; 1282 enable_intr(); 1283 idle_block(); 1284 } 1285 1286 #else 1287 /* 1288 * Shutdown the CPU as much as possible 1289 */ 1290 void 1291 cpu_halt(void) 1292 { 1293 for (;;) 1294 halt(); 1295 } 1296 1297 #endif 1298 1299 void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ 1300 static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ 1301 static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ 1302 SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait, 1303 0, "Use MONITOR/MWAIT for short idle"); 1304 1305 #define STATE_RUNNING 0x0 1306 #define STATE_MWAIT 0x1 1307 #define STATE_SLEEPING 0x2 1308 1309 #ifndef PC98 1310 static void 1311 cpu_idle_acpi(sbintime_t sbt) 1312 { 1313 int *state; 1314 1315 state = (int *)PCPU_PTR(monitorbuf); 1316 *state = STATE_SLEEPING; 1317 1318 /* See comments in cpu_idle_hlt(). */ 1319 disable_intr(); 1320 if (sched_runnable()) 1321 enable_intr(); 1322 else if (cpu_idle_hook) 1323 cpu_idle_hook(sbt); 1324 else 1325 __asm __volatile("sti; hlt"); 1326 *state = STATE_RUNNING; 1327 } 1328 #endif /* !PC98 */ 1329 1330 #ifndef XEN 1331 static void 1332 cpu_idle_hlt(sbintime_t sbt) 1333 { 1334 int *state; 1335 1336 state = (int *)PCPU_PTR(monitorbuf); 1337 *state = STATE_SLEEPING; 1338 1339 /* 1340 * Since we may be in a critical section from cpu_idle(), if 1341 * an interrupt fires during that critical section we may have 1342 * a pending preemption. If the CPU halts, then that thread 1343 * may not execute until a later interrupt awakens the CPU. 1344 * To handle this race, check for a runnable thread after 1345 * disabling interrupts and immediately return if one is 1346 * found. Also, we must absolutely guarentee that hlt is 1347 * the next instruction after sti. This ensures that any 1348 * interrupt that fires after the call to disable_intr() will 1349 * immediately awaken the CPU from hlt. Finally, please note 1350 * that on x86 this works fine because of interrupts enabled only 1351 * after the instruction following sti takes place, while IF is set 1352 * to 1 immediately, allowing hlt instruction to acknowledge the 1353 * interrupt. 1354 */ 1355 disable_intr(); 1356 if (sched_runnable()) 1357 enable_intr(); 1358 else 1359 __asm __volatile("sti; hlt"); 1360 *state = STATE_RUNNING; 1361 } 1362 #endif 1363 1364 static void 1365 cpu_idle_mwait(sbintime_t sbt) 1366 { 1367 int *state; 1368 1369 state = (int *)PCPU_PTR(monitorbuf); 1370 *state = STATE_MWAIT; 1371 1372 /* See comments in cpu_idle_hlt(). */ 1373 disable_intr(); 1374 if (sched_runnable()) { 1375 enable_intr(); 1376 *state = STATE_RUNNING; 1377 return; 1378 } 1379 cpu_monitor(state, 0, 0); 1380 if (*state == STATE_MWAIT) 1381 __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); 1382 else 1383 enable_intr(); 1384 *state = STATE_RUNNING; 1385 } 1386 1387 static void 1388 cpu_idle_spin(sbintime_t sbt) 1389 { 1390 int *state; 1391 int i; 1392 1393 state = (int *)PCPU_PTR(monitorbuf); 1394 *state = STATE_RUNNING; 1395 1396 /* 1397 * The sched_runnable() call is racy but as long as there is 1398 * a loop missing it one time will have just a little impact if any 1399 * (and it is much better than missing the check at all). 1400 */ 1401 for (i = 0; i < 1000; i++) { 1402 if (sched_runnable()) 1403 return; 1404 cpu_spinwait(); 1405 } 1406 } 1407 1408 /* 1409 * C1E renders the local APIC timer dead, so we disable it by 1410 * reading the Interrupt Pending Message register and clearing 1411 * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 1412 * 1413 * Reference: 1414 * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" 1415 * #32559 revision 3.00+ 1416 */ 1417 #define MSR_AMDK8_IPM 0xc0010055 1418 #define AMDK8_SMIONCMPHALT (1ULL << 27) 1419 #define AMDK8_C1EONCMPHALT (1ULL << 28) 1420 #define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) 1421 1422 static void 1423 cpu_probe_amdc1e(void) 1424 { 1425 1426 /* 1427 * Detect the presence of C1E capability mostly on latest 1428 * dual-cores (or future) k8 family. 1429 */ 1430 if (cpu_vendor_id == CPU_VENDOR_AMD && 1431 (cpu_id & 0x00000f00) == 0x00000f00 && 1432 (cpu_id & 0x0fff0000) >= 0x00040000) { 1433 cpu_ident_amdc1e = 1; 1434 } 1435 } 1436 1437 #if defined(PC98) || defined(XEN) 1438 void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt; 1439 #else 1440 void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi; 1441 #endif 1442 1443 void 1444 cpu_idle(int busy) 1445 { 1446 #ifndef XEN 1447 uint64_t msr; 1448 #endif 1449 sbintime_t sbt = -1; 1450 1451 CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", 1452 busy, curcpu); 1453 #if defined(MP_WATCHDOG) && !defined(XEN) 1454 ap_watchdog(PCPU_GET(cpuid)); 1455 #endif 1456 #ifndef XEN 1457 /* If we are busy - try to use fast methods. */ 1458 if (busy) { 1459 if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { 1460 cpu_idle_mwait(busy); 1461 goto out; 1462 } 1463 } 1464 #endif 1465 1466 /* If we have time - switch timers into idle mode. */ 1467 if (!busy) { 1468 critical_enter(); 1469 sbt = cpu_idleclock(); 1470 } 1471 1472 #ifndef XEN 1473 /* Apply AMD APIC timer C1E workaround. */ 1474 if (cpu_ident_amdc1e && cpu_disable_c3_sleep) { 1475 msr = rdmsr(MSR_AMDK8_IPM); 1476 if (msr & AMDK8_CMPHALT) 1477 wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); 1478 } 1479 #endif 1480 1481 /* Call main idle method. */ 1482 cpu_idle_fn(sbt); 1483 1484 /* Switch timers back into active mode. */ 1485 if (!busy) { 1486 cpu_activeclock(); 1487 critical_exit(); 1488 } 1489 #ifndef XEN 1490 out: 1491 #endif 1492 CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", 1493 busy, curcpu); 1494 } 1495 1496 int 1497 cpu_idle_wakeup(int cpu) 1498 { 1499 struct pcpu *pcpu; 1500 int *state; 1501 1502 pcpu = pcpu_find(cpu); 1503 state = (int *)pcpu->pc_monitorbuf; 1504 /* 1505 * This doesn't need to be atomic since missing the race will 1506 * simply result in unnecessary IPIs. 1507 */ 1508 if (*state == STATE_SLEEPING) 1509 return (0); 1510 if (*state == STATE_MWAIT) 1511 *state = STATE_RUNNING; 1512 return (1); 1513 } 1514 1515 /* 1516 * Ordered by speed/power consumption. 1517 */ 1518 struct { 1519 void *id_fn; 1520 char *id_name; 1521 } idle_tbl[] = { 1522 { cpu_idle_spin, "spin" }, 1523 { cpu_idle_mwait, "mwait" }, 1524 { cpu_idle_hlt, "hlt" }, 1525 #ifndef PC98 1526 { cpu_idle_acpi, "acpi" }, 1527 #endif 1528 { NULL, NULL } 1529 }; 1530 1531 static int 1532 idle_sysctl_available(SYSCTL_HANDLER_ARGS) 1533 { 1534 char *avail, *p; 1535 int error; 1536 int i; 1537 1538 avail = malloc(256, M_TEMP, M_WAITOK); 1539 p = avail; 1540 for (i = 0; idle_tbl[i].id_name != NULL; i++) { 1541 if (strstr(idle_tbl[i].id_name, "mwait") && 1542 (cpu_feature2 & CPUID2_MON) == 0) 1543 continue; 1544 #ifndef PC98 1545 if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && 1546 cpu_idle_hook == NULL) 1547 continue; 1548 #endif 1549 p += sprintf(p, "%s%s", p != avail ? ", " : "", 1550 idle_tbl[i].id_name); 1551 } 1552 error = sysctl_handle_string(oidp, avail, 0, req); 1553 free(avail, M_TEMP); 1554 return (error); 1555 } 1556 1557 SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, 1558 0, 0, idle_sysctl_available, "A", "list of available idle functions"); 1559 1560 static int 1561 idle_sysctl(SYSCTL_HANDLER_ARGS) 1562 { 1563 char buf[16]; 1564 int error; 1565 char *p; 1566 int i; 1567 1568 p = "unknown"; 1569 for (i = 0; idle_tbl[i].id_name != NULL; i++) { 1570 if (idle_tbl[i].id_fn == cpu_idle_fn) { 1571 p = idle_tbl[i].id_name; 1572 break; 1573 } 1574 } 1575 strncpy(buf, p, sizeof(buf)); 1576 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 1577 if (error != 0 || req->newptr == NULL) 1578 return (error); 1579 for (i = 0; idle_tbl[i].id_name != NULL; i++) { 1580 if (strstr(idle_tbl[i].id_name, "mwait") && 1581 (cpu_feature2 & CPUID2_MON) == 0) 1582 continue; 1583 #ifndef PC98 1584 if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && 1585 cpu_idle_hook == NULL) 1586 continue; 1587 #endif 1588 if (strcmp(idle_tbl[i].id_name, buf)) 1589 continue; 1590 cpu_idle_fn = idle_tbl[i].id_fn; 1591 return (0); 1592 } 1593 return (EINVAL); 1594 } 1595 1596 SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, 1597 idle_sysctl, "A", "currently selected idle function"); 1598 1599 /* 1600 * Reset registers to default values on exec. 1601 */ 1602 void 1603 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) 1604 { 1605 struct trapframe *regs = td->td_frame; 1606 struct pcb *pcb = td->td_pcb; 1607 1608 /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ 1609 pcb->pcb_gs = _udatasel; 1610 load_gs(_udatasel); 1611 1612 mtx_lock_spin(&dt_lock); 1613 if (td->td_proc->p_md.md_ldt) 1614 user_ldt_free(td); 1615 else 1616 mtx_unlock_spin(&dt_lock); 1617 1618 bzero((char *)regs, sizeof(struct trapframe)); 1619 regs->tf_eip = imgp->entry_addr; 1620 regs->tf_esp = stack; 1621 regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); 1622 regs->tf_ss = _udatasel; 1623 regs->tf_ds = _udatasel; 1624 regs->tf_es = _udatasel; 1625 regs->tf_fs = _udatasel; 1626 regs->tf_cs = _ucodesel; 1627 1628 /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ 1629 regs->tf_ebx = imgp->ps_strings; 1630 1631 /* 1632 * Reset the hardware debug registers if they were in use. 1633 * They won't have any meaning for the newly exec'd process. 1634 */ 1635 if (pcb->pcb_flags & PCB_DBREGS) { 1636 pcb->pcb_dr0 = 0; 1637 pcb->pcb_dr1 = 0; 1638 pcb->pcb_dr2 = 0; 1639 pcb->pcb_dr3 = 0; 1640 pcb->pcb_dr6 = 0; 1641 pcb->pcb_dr7 = 0; 1642 if (pcb == curpcb) { 1643 /* 1644 * Clear the debug registers on the running 1645 * CPU, otherwise they will end up affecting 1646 * the next process we switch to. 1647 */ 1648 reset_dbregs(); 1649 } 1650 pcb->pcb_flags &= ~PCB_DBREGS; 1651 } 1652 1653 pcb->pcb_initial_npxcw = __INITIAL_NPXCW__; 1654 1655 /* 1656 * Drop the FP state if we hold it, so that the process gets a 1657 * clean FP state if it uses the FPU again. 1658 */ 1659 fpstate_drop(td); 1660 1661 /* 1662 * XXX - Linux emulator 1663 * Make sure sure edx is 0x0 on entry. Linux binaries depend 1664 * on it. 1665 */ 1666 td->td_retval[1] = 0; 1667 } 1668 1669 void 1670 cpu_setregs(void) 1671 { 1672 unsigned int cr0; 1673 1674 cr0 = rcr0(); 1675 1676 /* 1677 * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: 1678 * 1679 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT 1680 * instructions. We must set the CR0_MP bit and use the CR0_TS 1681 * bit to control the trap, because setting the CR0_EM bit does 1682 * not cause WAIT instructions to trap. It's important to trap 1683 * WAIT instructions - otherwise the "wait" variants of no-wait 1684 * control instructions would degenerate to the "no-wait" variants 1685 * after FP context switches but work correctly otherwise. It's 1686 * particularly important to trap WAITs when there is no NPX - 1687 * otherwise the "wait" variants would always degenerate. 1688 * 1689 * Try setting CR0_NE to get correct error reporting on 486DX's. 1690 * Setting it should fail or do nothing on lesser processors. 1691 */ 1692 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 1693 load_cr0(cr0); 1694 load_gs(_udatasel); 1695 } 1696 1697 u_long bootdev; /* not a struct cdev *- encoding is different */ 1698 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, 1699 CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); 1700 1701 static char bootmethod[16] = "BIOS"; 1702 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0, 1703 "System firmware boot method"); 1704 1705 /* 1706 * Initialize 386 and configure to run kernel 1707 */ 1708 1709 /* 1710 * Initialize segments & interrupt table 1711 */ 1712 1713 int _default_ldt; 1714 1715 #ifdef XEN 1716 union descriptor *gdt; 1717 union descriptor *ldt; 1718 #else 1719 union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ 1720 union descriptor ldt[NLDT]; /* local descriptor table */ 1721 #endif 1722 static struct gate_descriptor idt0[NIDT]; 1723 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 1724 struct region_descriptor r_gdt, r_idt; /* table descriptors */ 1725 struct mtx dt_lock; /* lock for GDT and LDT */ 1726 1727 static struct i386tss dblfault_tss; 1728 static char dblfault_stack[PAGE_SIZE]; 1729 1730 extern vm_offset_t proc0kstack; 1731 1732 1733 /* 1734 * software prototypes -- in more palatable form. 1735 * 1736 * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret 1737 * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) 1738 */ 1739 struct soft_segment_descriptor gdt_segs[] = { 1740 /* GNULL_SEL 0 Null Descriptor */ 1741 { .ssd_base = 0x0, 1742 .ssd_limit = 0x0, 1743 .ssd_type = 0, 1744 .ssd_dpl = SEL_KPL, 1745 .ssd_p = 0, 1746 .ssd_xx = 0, .ssd_xx1 = 0, 1747 .ssd_def32 = 0, 1748 .ssd_gran = 0 }, 1749 /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ 1750 { .ssd_base = 0x0, 1751 .ssd_limit = 0xfffff, 1752 .ssd_type = SDT_MEMRWA, 1753 .ssd_dpl = SEL_KPL, 1754 .ssd_p = 1, 1755 .ssd_xx = 0, .ssd_xx1 = 0, 1756 .ssd_def32 = 1, 1757 .ssd_gran = 1 }, 1758 /* GUFS_SEL 2 %fs Descriptor for user */ 1759 { .ssd_base = 0x0, 1760 .ssd_limit = 0xfffff, 1761 .ssd_type = SDT_MEMRWA, 1762 .ssd_dpl = SEL_UPL, 1763 .ssd_p = 1, 1764 .ssd_xx = 0, .ssd_xx1 = 0, 1765 .ssd_def32 = 1, 1766 .ssd_gran = 1 }, 1767 /* GUGS_SEL 3 %gs Descriptor for user */ 1768 { .ssd_base = 0x0, 1769 .ssd_limit = 0xfffff, 1770 .ssd_type = SDT_MEMRWA, 1771 .ssd_dpl = SEL_UPL, 1772 .ssd_p = 1, 1773 .ssd_xx = 0, .ssd_xx1 = 0, 1774 .ssd_def32 = 1, 1775 .ssd_gran = 1 }, 1776 /* GCODE_SEL 4 Code Descriptor for kernel */ 1777 { .ssd_base = 0x0, 1778 .ssd_limit = 0xfffff, 1779 .ssd_type = SDT_MEMERA, 1780 .ssd_dpl = SEL_KPL, 1781 .ssd_p = 1, 1782 .ssd_xx = 0, .ssd_xx1 = 0, 1783 .ssd_def32 = 1, 1784 .ssd_gran = 1 }, 1785 /* GDATA_SEL 5 Data Descriptor for kernel */ 1786 { .ssd_base = 0x0, 1787 .ssd_limit = 0xfffff, 1788 .ssd_type = SDT_MEMRWA, 1789 .ssd_dpl = SEL_KPL, 1790 .ssd_p = 1, 1791 .ssd_xx = 0, .ssd_xx1 = 0, 1792 .ssd_def32 = 1, 1793 .ssd_gran = 1 }, 1794 /* GUCODE_SEL 6 Code Descriptor for user */ 1795 { .ssd_base = 0x0, 1796 .ssd_limit = 0xfffff, 1797 .ssd_type = SDT_MEMERA, 1798 .ssd_dpl = SEL_UPL, 1799 .ssd_p = 1, 1800 .ssd_xx = 0, .ssd_xx1 = 0, 1801 .ssd_def32 = 1, 1802 .ssd_gran = 1 }, 1803 /* GUDATA_SEL 7 Data Descriptor for user */ 1804 { .ssd_base = 0x0, 1805 .ssd_limit = 0xfffff, 1806 .ssd_type = SDT_MEMRWA, 1807 .ssd_dpl = SEL_UPL, 1808 .ssd_p = 1, 1809 .ssd_xx = 0, .ssd_xx1 = 0, 1810 .ssd_def32 = 1, 1811 .ssd_gran = 1 }, 1812 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ 1813 { .ssd_base = 0x400, 1814 .ssd_limit = 0xfffff, 1815 .ssd_type = SDT_MEMRWA, 1816 .ssd_dpl = SEL_KPL, 1817 .ssd_p = 1, 1818 .ssd_xx = 0, .ssd_xx1 = 0, 1819 .ssd_def32 = 1, 1820 .ssd_gran = 1 }, 1821 #ifndef XEN 1822 /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ 1823 { 1824 .ssd_base = 0x0, 1825 .ssd_limit = sizeof(struct i386tss)-1, 1826 .ssd_type = SDT_SYS386TSS, 1827 .ssd_dpl = 0, 1828 .ssd_p = 1, 1829 .ssd_xx = 0, .ssd_xx1 = 0, 1830 .ssd_def32 = 0, 1831 .ssd_gran = 0 }, 1832 /* GLDT_SEL 10 LDT Descriptor */ 1833 { .ssd_base = (int) ldt, 1834 .ssd_limit = sizeof(ldt)-1, 1835 .ssd_type = SDT_SYSLDT, 1836 .ssd_dpl = SEL_UPL, 1837 .ssd_p = 1, 1838 .ssd_xx = 0, .ssd_xx1 = 0, 1839 .ssd_def32 = 0, 1840 .ssd_gran = 0 }, 1841 /* GUSERLDT_SEL 11 User LDT Descriptor per process */ 1842 { .ssd_base = (int) ldt, 1843 .ssd_limit = (512 * sizeof(union descriptor)-1), 1844 .ssd_type = SDT_SYSLDT, 1845 .ssd_dpl = 0, 1846 .ssd_p = 1, 1847 .ssd_xx = 0, .ssd_xx1 = 0, 1848 .ssd_def32 = 0, 1849 .ssd_gran = 0 }, 1850 /* GPANIC_SEL 12 Panic Tss Descriptor */ 1851 { .ssd_base = (int) &dblfault_tss, 1852 .ssd_limit = sizeof(struct i386tss)-1, 1853 .ssd_type = SDT_SYS386TSS, 1854 .ssd_dpl = 0, 1855 .ssd_p = 1, 1856 .ssd_xx = 0, .ssd_xx1 = 0, 1857 .ssd_def32 = 0, 1858 .ssd_gran = 0 }, 1859 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ 1860 { .ssd_base = 0, 1861 .ssd_limit = 0xfffff, 1862 .ssd_type = SDT_MEMERA, 1863 .ssd_dpl = 0, 1864 .ssd_p = 1, 1865 .ssd_xx = 0, .ssd_xx1 = 0, 1866 .ssd_def32 = 0, 1867 .ssd_gran = 1 }, 1868 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ 1869 { .ssd_base = 0, 1870 .ssd_limit = 0xfffff, 1871 .ssd_type = SDT_MEMERA, 1872 .ssd_dpl = 0, 1873 .ssd_p = 1, 1874 .ssd_xx = 0, .ssd_xx1 = 0, 1875 .ssd_def32 = 0, 1876 .ssd_gran = 1 }, 1877 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ 1878 { .ssd_base = 0, 1879 .ssd_limit = 0xfffff, 1880 .ssd_type = SDT_MEMRWA, 1881 .ssd_dpl = 0, 1882 .ssd_p = 1, 1883 .ssd_xx = 0, .ssd_xx1 = 0, 1884 .ssd_def32 = 1, 1885 .ssd_gran = 1 }, 1886 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ 1887 { .ssd_base = 0, 1888 .ssd_limit = 0xfffff, 1889 .ssd_type = SDT_MEMRWA, 1890 .ssd_dpl = 0, 1891 .ssd_p = 1, 1892 .ssd_xx = 0, .ssd_xx1 = 0, 1893 .ssd_def32 = 0, 1894 .ssd_gran = 1 }, 1895 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ 1896 { .ssd_base = 0, 1897 .ssd_limit = 0xfffff, 1898 .ssd_type = SDT_MEMRWA, 1899 .ssd_dpl = 0, 1900 .ssd_p = 1, 1901 .ssd_xx = 0, .ssd_xx1 = 0, 1902 .ssd_def32 = 0, 1903 .ssd_gran = 1 }, 1904 /* GNDIS_SEL 18 NDIS Descriptor */ 1905 { .ssd_base = 0x0, 1906 .ssd_limit = 0x0, 1907 .ssd_type = 0, 1908 .ssd_dpl = 0, 1909 .ssd_p = 0, 1910 .ssd_xx = 0, .ssd_xx1 = 0, 1911 .ssd_def32 = 0, 1912 .ssd_gran = 0 }, 1913 #endif /* !XEN */ 1914 }; 1915 1916 static struct soft_segment_descriptor ldt_segs[] = { 1917 /* Null Descriptor - overwritten by call gate */ 1918 { .ssd_base = 0x0, 1919 .ssd_limit = 0x0, 1920 .ssd_type = 0, 1921 .ssd_dpl = 0, 1922 .ssd_p = 0, 1923 .ssd_xx = 0, .ssd_xx1 = 0, 1924 .ssd_def32 = 0, 1925 .ssd_gran = 0 }, 1926 /* Null Descriptor - overwritten by call gate */ 1927 { .ssd_base = 0x0, 1928 .ssd_limit = 0x0, 1929 .ssd_type = 0, 1930 .ssd_dpl = 0, 1931 .ssd_p = 0, 1932 .ssd_xx = 0, .ssd_xx1 = 0, 1933 .ssd_def32 = 0, 1934 .ssd_gran = 0 }, 1935 /* Null Descriptor - overwritten by call gate */ 1936 { .ssd_base = 0x0, 1937 .ssd_limit = 0x0, 1938 .ssd_type = 0, 1939 .ssd_dpl = 0, 1940 .ssd_p = 0, 1941 .ssd_xx = 0, .ssd_xx1 = 0, 1942 .ssd_def32 = 0, 1943 .ssd_gran = 0 }, 1944 /* Code Descriptor for user */ 1945 { .ssd_base = 0x0, 1946 .ssd_limit = 0xfffff, 1947 .ssd_type = SDT_MEMERA, 1948 .ssd_dpl = SEL_UPL, 1949 .ssd_p = 1, 1950 .ssd_xx = 0, .ssd_xx1 = 0, 1951 .ssd_def32 = 1, 1952 .ssd_gran = 1 }, 1953 /* Null Descriptor - overwritten by call gate */ 1954 { .ssd_base = 0x0, 1955 .ssd_limit = 0x0, 1956 .ssd_type = 0, 1957 .ssd_dpl = 0, 1958 .ssd_p = 0, 1959 .ssd_xx = 0, .ssd_xx1 = 0, 1960 .ssd_def32 = 0, 1961 .ssd_gran = 0 }, 1962 /* Data Descriptor for user */ 1963 { .ssd_base = 0x0, 1964 .ssd_limit = 0xfffff, 1965 .ssd_type = SDT_MEMRWA, 1966 .ssd_dpl = SEL_UPL, 1967 .ssd_p = 1, 1968 .ssd_xx = 0, .ssd_xx1 = 0, 1969 .ssd_def32 = 1, 1970 .ssd_gran = 1 }, 1971 }; 1972 1973 void 1974 setidt(idx, func, typ, dpl, selec) 1975 int idx; 1976 inthand_t *func; 1977 int typ; 1978 int dpl; 1979 int selec; 1980 { 1981 struct gate_descriptor *ip; 1982 1983 ip = idt + idx; 1984 ip->gd_looffset = (int)func; 1985 ip->gd_selector = selec; 1986 ip->gd_stkcpy = 0; 1987 ip->gd_xx = 0; 1988 ip->gd_type = typ; 1989 ip->gd_dpl = dpl; 1990 ip->gd_p = 1; 1991 ip->gd_hioffset = ((int)func)>>16 ; 1992 } 1993 1994 extern inthand_t 1995 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 1996 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 1997 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 1998 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 1999 IDTVEC(xmm), 2000 #ifdef KDTRACE_HOOKS 2001 IDTVEC(dtrace_ret), 2002 #endif 2003 #ifdef XENHVM 2004 IDTVEC(xen_intr_upcall), 2005 #endif 2006 IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); 2007 2008 #ifdef DDB 2009 /* 2010 * Display the index and function name of any IDT entries that don't use 2011 * the default 'rsvd' entry point. 2012 */ 2013 DB_SHOW_COMMAND(idt, db_show_idt) 2014 { 2015 struct gate_descriptor *ip; 2016 int idx; 2017 uintptr_t func; 2018 2019 ip = idt; 2020 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 2021 func = (ip->gd_hioffset << 16 | ip->gd_looffset); 2022 if (func != (uintptr_t)&IDTVEC(rsvd)) { 2023 db_printf("%3d\t", idx); 2024 db_printsym(func, DB_STGY_PROC); 2025 db_printf("\n"); 2026 } 2027 ip++; 2028 } 2029 } 2030 2031 /* Show privileged registers. */ 2032 DB_SHOW_COMMAND(sysregs, db_show_sysregs) 2033 { 2034 uint64_t idtr, gdtr; 2035 2036 idtr = ridt(); 2037 db_printf("idtr\t0x%08x/%04x\n", 2038 (u_int)(idtr >> 16), (u_int)idtr & 0xffff); 2039 gdtr = rgdt(); 2040 db_printf("gdtr\t0x%08x/%04x\n", 2041 (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); 2042 db_printf("ldtr\t0x%04x\n", rldt()); 2043 db_printf("tr\t0x%04x\n", rtr()); 2044 db_printf("cr0\t0x%08x\n", rcr0()); 2045 db_printf("cr2\t0x%08x\n", rcr2()); 2046 db_printf("cr3\t0x%08x\n", rcr3()); 2047 db_printf("cr4\t0x%08x\n", rcr4()); 2048 } 2049 #endif 2050 2051 void 2052 sdtossd(sd, ssd) 2053 struct segment_descriptor *sd; 2054 struct soft_segment_descriptor *ssd; 2055 { 2056 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 2057 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 2058 ssd->ssd_type = sd->sd_type; 2059 ssd->ssd_dpl = sd->sd_dpl; 2060 ssd->ssd_p = sd->sd_p; 2061 ssd->ssd_def32 = sd->sd_def32; 2062 ssd->ssd_gran = sd->sd_gran; 2063 } 2064 2065 #if !defined(PC98) && !defined(XEN) 2066 static int 2067 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, 2068 int *physmap_idxp) 2069 { 2070 int i, insert_idx, physmap_idx; 2071 2072 physmap_idx = *physmap_idxp; 2073 2074 if (length == 0) 2075 return (1); 2076 2077 #ifndef PAE 2078 if (base > 0xffffffff) { 2079 printf("%uK of memory above 4GB ignored\n", 2080 (u_int)(length / 1024)); 2081 return (1); 2082 } 2083 #endif 2084 2085 /* 2086 * Find insertion point while checking for overlap. Start off by 2087 * assuming the new entry will be added to the end. 2088 */ 2089 insert_idx = physmap_idx + 2; 2090 for (i = 0; i <= physmap_idx; i += 2) { 2091 if (base < physmap[i + 1]) { 2092 if (base + length <= physmap[i]) { 2093 insert_idx = i; 2094 break; 2095 } 2096 if (boothowto & RB_VERBOSE) 2097 printf( 2098 "Overlapping memory regions, ignoring second region\n"); 2099 return (1); 2100 } 2101 } 2102 2103 /* See if we can prepend to the next entry. */ 2104 if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { 2105 physmap[insert_idx] = base; 2106 return (1); 2107 } 2108 2109 /* See if we can append to the previous entry. */ 2110 if (insert_idx > 0 && base == physmap[insert_idx - 1]) { 2111 physmap[insert_idx - 1] += length; 2112 return (1); 2113 } 2114 2115 physmap_idx += 2; 2116 *physmap_idxp = physmap_idx; 2117 if (physmap_idx == PHYSMAP_SIZE) { 2118 printf( 2119 "Too many segments in the physical address map, giving up\n"); 2120 return (0); 2121 } 2122 2123 /* 2124 * Move the last 'N' entries down to make room for the new 2125 * entry if needed. 2126 */ 2127 for (i = physmap_idx; i > insert_idx; i -= 2) { 2128 physmap[i] = physmap[i - 2]; 2129 physmap[i + 1] = physmap[i - 1]; 2130 } 2131 2132 /* Insert the new entry. */ 2133 physmap[insert_idx] = base; 2134 physmap[insert_idx + 1] = base + length; 2135 return (1); 2136 } 2137 2138 static int 2139 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) 2140 { 2141 if (boothowto & RB_VERBOSE) 2142 printf("SMAP type=%02x base=%016llx len=%016llx\n", 2143 smap->type, smap->base, smap->length); 2144 2145 if (smap->type != SMAP_TYPE_MEMORY) 2146 return (1); 2147 2148 return (add_physmap_entry(smap->base, smap->length, physmap, 2149 physmap_idxp)); 2150 } 2151 2152 static void 2153 add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap, 2154 int *physmap_idxp) 2155 { 2156 struct bios_smap *smap, *smapend; 2157 u_int32_t smapsize; 2158 /* 2159 * Memory map from INT 15:E820. 2160 * 2161 * subr_module.c says: 2162 * "Consumer may safely assume that size value precedes data." 2163 * ie: an int32_t immediately precedes SMAP. 2164 */ 2165 smapsize = *((u_int32_t *)smapbase - 1); 2166 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 2167 2168 for (smap = smapbase; smap < smapend; smap++) 2169 if (!add_smap_entry(smap, physmap, physmap_idxp)) 2170 break; 2171 } 2172 #endif /* !PC98 && !XEN */ 2173 2174 #ifndef XEN 2175 static void 2176 basemem_setup(void) 2177 { 2178 vm_paddr_t pa; 2179 pt_entry_t *pte; 2180 int i; 2181 2182 if (basemem > 640) { 2183 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", 2184 basemem); 2185 basemem = 640; 2186 } 2187 2188 /* 2189 * XXX if biosbasemem is now < 640, there is a `hole' 2190 * between the end of base memory and the start of 2191 * ISA memory. The hole may be empty or it may 2192 * contain BIOS code or data. Map it read/write so 2193 * that the BIOS can write to it. (Memory from 0 to 2194 * the physical end of the kernel is mapped read-only 2195 * to begin with and then parts of it are remapped. 2196 * The parts that aren't remapped form holes that 2197 * remain read-only and are unused by the kernel. 2198 * The base memory area is below the physical end of 2199 * the kernel and right now forms a read-only hole. 2200 * The part of it from PAGE_SIZE to 2201 * (trunc_page(biosbasemem * 1024) - 1) will be 2202 * remapped and used by the kernel later.) 2203 * 2204 * This code is similar to the code used in 2205 * pmap_mapdev, but since no memory needs to be 2206 * allocated we simply change the mapping. 2207 */ 2208 for (pa = trunc_page(basemem * 1024); 2209 pa < ISA_HOLE_START; pa += PAGE_SIZE) 2210 pmap_kenter(KERNBASE + pa, pa); 2211 2212 /* 2213 * Map pages between basemem and ISA_HOLE_START, if any, r/w into 2214 * the vm86 page table so that vm86 can scribble on them using 2215 * the vm86 map too. XXX: why 2 ways for this and only 1 way for 2216 * page 0, at least as initialized here? 2217 */ 2218 pte = (pt_entry_t *)vm86paddr; 2219 for (i = basemem / 4; i < 160; i++) 2220 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; 2221 } 2222 #endif /* !XEN */ 2223 2224 /* 2225 * Populate the (physmap) array with base/bound pairs describing the 2226 * available physical memory in the system, then test this memory and 2227 * build the phys_avail array describing the actually-available memory. 2228 * 2229 * If we cannot accurately determine the physical memory map, then use 2230 * value from the 0xE801 call, and failing that, the RTC. 2231 * 2232 * Total memory size may be set by the kernel environment variable 2233 * hw.physmem or the compile-time define MAXMEM. 2234 * 2235 * XXX first should be vm_paddr_t. 2236 */ 2237 #ifdef PC98 2238 static void 2239 getmemsize(int first) 2240 { 2241 int off, physmap_idx, pa_indx, da_indx; 2242 u_long physmem_tunable, memtest; 2243 vm_paddr_t physmap[PHYSMAP_SIZE]; 2244 pt_entry_t *pte; 2245 quad_t dcons_addr, dcons_size; 2246 int i; 2247 int pg_n; 2248 u_int extmem; 2249 u_int under16; 2250 vm_paddr_t pa; 2251 2252 bzero(physmap, sizeof(physmap)); 2253 2254 /* XXX - some of EPSON machines can't use PG_N */ 2255 pg_n = PG_N; 2256 if (pc98_machine_type & M_EPSON_PC98) { 2257 switch (epson_machine_id) { 2258 #ifdef WB_CACHE 2259 default: 2260 #endif 2261 case EPSON_PC486_HX: 2262 case EPSON_PC486_HG: 2263 case EPSON_PC486_HA: 2264 pg_n = 0; 2265 break; 2266 } 2267 } 2268 2269 under16 = pc98_getmemsize(&basemem, &extmem); 2270 basemem_setup(); 2271 2272 physmap[0] = 0; 2273 physmap[1] = basemem * 1024; 2274 physmap_idx = 2; 2275 physmap[physmap_idx] = 0x100000; 2276 physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; 2277 2278 /* 2279 * Now, physmap contains a map of physical memory. 2280 */ 2281 2282 #ifdef SMP 2283 /* make hole for AP bootstrap code */ 2284 physmap[1] = mp_bootaddress(physmap[1]); 2285 #endif 2286 2287 /* 2288 * Maxmem isn't the "maximum memory", it's one larger than the 2289 * highest page of the physical address space. It should be 2290 * called something like "Maxphyspage". We may adjust this 2291 * based on ``hw.physmem'' and the results of the memory test. 2292 */ 2293 Maxmem = atop(physmap[physmap_idx + 1]); 2294 2295 #ifdef MAXMEM 2296 Maxmem = MAXMEM / 4; 2297 #endif 2298 2299 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 2300 Maxmem = atop(physmem_tunable); 2301 2302 /* 2303 * By default keep the memtest enabled. Use a general name so that 2304 * one could eventually do more with the code than just disable it. 2305 */ 2306 memtest = 1; 2307 TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); 2308 2309 if (atop(physmap[physmap_idx + 1]) != Maxmem && 2310 (boothowto & RB_VERBOSE)) 2311 printf("Physical memory use set to %ldK\n", Maxmem * 4); 2312 2313 /* 2314 * If Maxmem has been increased beyond what the system has detected, 2315 * extend the last memory segment to the new limit. 2316 */ 2317 if (atop(physmap[physmap_idx + 1]) < Maxmem) 2318 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); 2319 2320 /* 2321 * We need to divide chunk if Maxmem is larger than 16MB and 2322 * under 16MB area is not full of memory. 2323 * (1) system area (15-16MB region) is cut off 2324 * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY") 2325 */ 2326 if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) { 2327 /* 15M - 16M region is cut off, so need to divide chunk */ 2328 physmap[physmap_idx + 1] = under16 * 1024; 2329 physmap_idx += 2; 2330 physmap[physmap_idx] = 0x1000000; 2331 physmap[physmap_idx + 1] = physmap[2] + extmem * 1024; 2332 } 2333 2334 /* call pmap initialization to make new kernel address space */ 2335 pmap_bootstrap(first); 2336 2337 /* 2338 * Size up each available chunk of physical memory. 2339 */ 2340 physmap[0] = PAGE_SIZE; /* mask off page 0 */ 2341 pa_indx = 0; 2342 da_indx = 1; 2343 phys_avail[pa_indx++] = physmap[0]; 2344 phys_avail[pa_indx] = physmap[0]; 2345 dump_avail[da_indx] = physmap[0]; 2346 pte = CMAP3; 2347 2348 /* 2349 * Get dcons buffer address 2350 */ 2351 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 2352 getenv_quad("dcons.size", &dcons_size) == 0) 2353 dcons_addr = 0; 2354 2355 /* 2356 * physmap is in bytes, so when converting to page boundaries, 2357 * round up the start address and round down the end address. 2358 */ 2359 for (i = 0; i <= physmap_idx; i += 2) { 2360 vm_paddr_t end; 2361 2362 end = ptoa((vm_paddr_t)Maxmem); 2363 if (physmap[i + 1] < end) 2364 end = trunc_page(physmap[i + 1]); 2365 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 2366 int tmp, page_bad, full; 2367 int *ptr = (int *)CADDR3; 2368 2369 full = FALSE; 2370 /* 2371 * block out kernel memory as not available. 2372 */ 2373 if (pa >= KERNLOAD && pa < first) 2374 goto do_dump_avail; 2375 2376 /* 2377 * block out dcons buffer 2378 */ 2379 if (dcons_addr > 0 2380 && pa >= trunc_page(dcons_addr) 2381 && pa < dcons_addr + dcons_size) 2382 goto do_dump_avail; 2383 2384 page_bad = FALSE; 2385 if (memtest == 0) 2386 goto skip_memtest; 2387 2388 /* 2389 * map page into kernel: valid, read/write,non-cacheable 2390 */ 2391 *pte = pa | PG_V | PG_RW | pg_n; 2392 invltlb(); 2393 2394 tmp = *(int *)ptr; 2395 /* 2396 * Test for alternating 1's and 0's 2397 */ 2398 *(volatile int *)ptr = 0xaaaaaaaa; 2399 if (*(volatile int *)ptr != 0xaaaaaaaa) 2400 page_bad = TRUE; 2401 /* 2402 * Test for alternating 0's and 1's 2403 */ 2404 *(volatile int *)ptr = 0x55555555; 2405 if (*(volatile int *)ptr != 0x55555555) 2406 page_bad = TRUE; 2407 /* 2408 * Test for all 1's 2409 */ 2410 *(volatile int *)ptr = 0xffffffff; 2411 if (*(volatile int *)ptr != 0xffffffff) 2412 page_bad = TRUE; 2413 /* 2414 * Test for all 0's 2415 */ 2416 *(volatile int *)ptr = 0x0; 2417 if (*(volatile int *)ptr != 0x0) 2418 page_bad = TRUE; 2419 /* 2420 * Restore original value. 2421 */ 2422 *(int *)ptr = tmp; 2423 2424 skip_memtest: 2425 /* 2426 * Adjust array of valid/good pages. 2427 */ 2428 if (page_bad == TRUE) 2429 continue; 2430 /* 2431 * If this good page is a continuation of the 2432 * previous set of good pages, then just increase 2433 * the end pointer. Otherwise start a new chunk. 2434 * Note that "end" points one higher than end, 2435 * making the range >= start and < end. 2436 * If we're also doing a speculative memory 2437 * test and we at or past the end, bump up Maxmem 2438 * so that we keep going. The first bad page 2439 * will terminate the loop. 2440 */ 2441 if (phys_avail[pa_indx] == pa) { 2442 phys_avail[pa_indx] += PAGE_SIZE; 2443 } else { 2444 pa_indx++; 2445 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 2446 printf( 2447 "Too many holes in the physical address space, giving up\n"); 2448 pa_indx--; 2449 full = TRUE; 2450 goto do_dump_avail; 2451 } 2452 phys_avail[pa_indx++] = pa; /* start */ 2453 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 2454 } 2455 physmem++; 2456 do_dump_avail: 2457 if (dump_avail[da_indx] == pa) { 2458 dump_avail[da_indx] += PAGE_SIZE; 2459 } else { 2460 da_indx++; 2461 if (da_indx == DUMP_AVAIL_ARRAY_END) { 2462 da_indx--; 2463 goto do_next; 2464 } 2465 dump_avail[da_indx++] = pa; /* start */ 2466 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 2467 } 2468 do_next: 2469 if (full) 2470 break; 2471 } 2472 } 2473 *pte = 0; 2474 invltlb(); 2475 2476 /* 2477 * XXX 2478 * The last chunk must contain at least one page plus the message 2479 * buffer to avoid complicating other code (message buffer address 2480 * calculation, etc.). 2481 */ 2482 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 2483 round_page(msgbufsize) >= phys_avail[pa_indx]) { 2484 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 2485 phys_avail[pa_indx--] = 0; 2486 phys_avail[pa_indx--] = 0; 2487 } 2488 2489 Maxmem = atop(phys_avail[pa_indx]); 2490 2491 /* Trim off space for the message buffer. */ 2492 phys_avail[pa_indx] -= round_page(msgbufsize); 2493 2494 /* Map the message buffer. */ 2495 for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE) 2496 pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + 2497 off); 2498 2499 PT_UPDATES_FLUSH(); 2500 } 2501 #else /* PC98 */ 2502 static void 2503 getmemsize(int first) 2504 { 2505 int has_smap, off, physmap_idx, pa_indx, da_indx; 2506 u_long memtest; 2507 vm_paddr_t physmap[PHYSMAP_SIZE]; 2508 pt_entry_t *pte; 2509 quad_t dcons_addr, dcons_size, physmem_tunable; 2510 #ifndef XEN 2511 int hasbrokenint12, i, res; 2512 u_int extmem; 2513 struct vm86frame vmf; 2514 struct vm86context vmc; 2515 vm_paddr_t pa; 2516 struct bios_smap *smap, *smapbase; 2517 caddr_t kmdp; 2518 #endif 2519 2520 has_smap = 0; 2521 #if defined(XEN) 2522 Maxmem = xen_start_info->nr_pages - init_first; 2523 physmem = Maxmem; 2524 basemem = 0; 2525 physmap[0] = init_first << PAGE_SHIFT; 2526 physmap[1] = ptoa(Maxmem) - round_page(msgbufsize); 2527 physmap_idx = 0; 2528 #else 2529 #ifdef XBOX 2530 if (arch_i386_is_xbox) { 2531 /* 2532 * We queried the memory size before, so chop off 4MB for 2533 * the framebuffer and inform the OS of this. 2534 */ 2535 physmap[0] = 0; 2536 physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE; 2537 physmap_idx = 0; 2538 goto physmap_done; 2539 } 2540 #endif 2541 bzero(&vmf, sizeof(vmf)); 2542 bzero(physmap, sizeof(physmap)); 2543 basemem = 0; 2544 2545 /* 2546 * Check if the loader supplied an SMAP memory map. If so, 2547 * use that and do not make any VM86 calls. 2548 */ 2549 physmap_idx = 0; 2550 smapbase = NULL; 2551 kmdp = preload_search_by_type("elf kernel"); 2552 if (kmdp == NULL) 2553 kmdp = preload_search_by_type("elf32 kernel"); 2554 if (kmdp != NULL) 2555 smapbase = (struct bios_smap *)preload_search_info(kmdp, 2556 MODINFO_METADATA | MODINFOMD_SMAP); 2557 if (smapbase != NULL) { 2558 add_smap_entries(smapbase, physmap, &physmap_idx); 2559 has_smap = 1; 2560 goto have_smap; 2561 } 2562 2563 /* 2564 * Some newer BIOSes have a broken INT 12H implementation 2565 * which causes a kernel panic immediately. In this case, we 2566 * need use the SMAP to determine the base memory size. 2567 */ 2568 hasbrokenint12 = 0; 2569 TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); 2570 if (hasbrokenint12 == 0) { 2571 /* Use INT12 to determine base memory size. */ 2572 vm86_intcall(0x12, &vmf); 2573 basemem = vmf.vmf_ax; 2574 basemem_setup(); 2575 } 2576 2577 /* 2578 * Fetch the memory map with INT 15:E820. Map page 1 R/W into 2579 * the kernel page table so we can use it as a buffer. The 2580 * kernel will unmap this page later. 2581 */ 2582 pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); 2583 vmc.npages = 0; 2584 smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); 2585 res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); 2586 KASSERT(res != 0, ("vm86_getptr() failed: address not found")); 2587 2588 vmf.vmf_ebx = 0; 2589 do { 2590 vmf.vmf_eax = 0xE820; 2591 vmf.vmf_edx = SMAP_SIG; 2592 vmf.vmf_ecx = sizeof(struct bios_smap); 2593 i = vm86_datacall(0x15, &vmf, &vmc); 2594 if (i || vmf.vmf_eax != SMAP_SIG) 2595 break; 2596 has_smap = 1; 2597 if (!add_smap_entry(smap, physmap, &physmap_idx)) 2598 break; 2599 } while (vmf.vmf_ebx != 0); 2600 2601 have_smap: 2602 /* 2603 * If we didn't fetch the "base memory" size from INT12, 2604 * figure it out from the SMAP (or just guess). 2605 */ 2606 if (basemem == 0) { 2607 for (i = 0; i <= physmap_idx; i += 2) { 2608 if (physmap[i] == 0x00000000) { 2609 basemem = physmap[i + 1] / 1024; 2610 break; 2611 } 2612 } 2613 2614 /* XXX: If we couldn't find basemem from SMAP, just guess. */ 2615 if (basemem == 0) 2616 basemem = 640; 2617 basemem_setup(); 2618 } 2619 2620 if (physmap[1] != 0) 2621 goto physmap_done; 2622 2623 /* 2624 * If we failed to find an SMAP, figure out the extended 2625 * memory size. We will then build a simple memory map with 2626 * two segments, one for "base memory" and the second for 2627 * "extended memory". Note that "extended memory" starts at a 2628 * physical address of 1MB and that both basemem and extmem 2629 * are in units of 1KB. 2630 * 2631 * First, try to fetch the extended memory size via INT 15:E801. 2632 */ 2633 vmf.vmf_ax = 0xE801; 2634 if (vm86_intcall(0x15, &vmf) == 0) { 2635 extmem = vmf.vmf_cx + vmf.vmf_dx * 64; 2636 } else { 2637 /* 2638 * If INT15:E801 fails, this is our last ditch effort 2639 * to determine the extended memory size. Currently 2640 * we prefer the RTC value over INT15:88. 2641 */ 2642 #if 0 2643 vmf.vmf_ah = 0x88; 2644 vm86_intcall(0x15, &vmf); 2645 extmem = vmf.vmf_ax; 2646 #else 2647 extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); 2648 #endif 2649 } 2650 2651 /* 2652 * Special hack for chipsets that still remap the 384k hole when 2653 * there's 16MB of memory - this really confuses people that 2654 * are trying to use bus mastering ISA controllers with the 2655 * "16MB limit"; they only have 16MB, but the remapping puts 2656 * them beyond the limit. 2657 * 2658 * If extended memory is between 15-16MB (16-17MB phys address range), 2659 * chop it to 15MB. 2660 */ 2661 if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) 2662 extmem = 15 * 1024; 2663 2664 physmap[0] = 0; 2665 physmap[1] = basemem * 1024; 2666 physmap_idx = 2; 2667 physmap[physmap_idx] = 0x100000; 2668 physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; 2669 2670 physmap_done: 2671 #endif 2672 /* 2673 * Now, physmap contains a map of physical memory. 2674 */ 2675 2676 #ifdef SMP 2677 /* make hole for AP bootstrap code */ 2678 physmap[1] = mp_bootaddress(physmap[1]); 2679 #endif 2680 2681 /* 2682 * Maxmem isn't the "maximum memory", it's one larger than the 2683 * highest page of the physical address space. It should be 2684 * called something like "Maxphyspage". We may adjust this 2685 * based on ``hw.physmem'' and the results of the memory test. 2686 */ 2687 Maxmem = atop(physmap[physmap_idx + 1]); 2688 2689 #ifdef MAXMEM 2690 Maxmem = MAXMEM / 4; 2691 #endif 2692 2693 if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable)) 2694 Maxmem = atop(physmem_tunable); 2695 2696 /* 2697 * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend 2698 * the amount of memory in the system. 2699 */ 2700 if (has_smap && Maxmem > atop(physmap[physmap_idx + 1])) 2701 Maxmem = atop(physmap[physmap_idx + 1]); 2702 2703 /* 2704 * By default enable the memory test on real hardware, and disable 2705 * it if we appear to be running in a VM. This avoids touching all 2706 * pages unnecessarily, which doesn't matter on real hardware but is 2707 * bad for shared VM hosts. Use a general name so that 2708 * one could eventually do more with the code than just disable it. 2709 */ 2710 memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1; 2711 TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); 2712 2713 if (atop(physmap[physmap_idx + 1]) != Maxmem && 2714 (boothowto & RB_VERBOSE)) 2715 printf("Physical memory use set to %ldK\n", Maxmem * 4); 2716 2717 /* 2718 * If Maxmem has been increased beyond what the system has detected, 2719 * extend the last memory segment to the new limit. 2720 */ 2721 if (atop(physmap[physmap_idx + 1]) < Maxmem) 2722 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); 2723 2724 /* call pmap initialization to make new kernel address space */ 2725 pmap_bootstrap(first); 2726 2727 /* 2728 * Size up each available chunk of physical memory. 2729 */ 2730 physmap[0] = PAGE_SIZE; /* mask off page 0 */ 2731 pa_indx = 0; 2732 da_indx = 1; 2733 phys_avail[pa_indx++] = physmap[0]; 2734 phys_avail[pa_indx] = physmap[0]; 2735 dump_avail[da_indx] = physmap[0]; 2736 pte = CMAP3; 2737 2738 /* 2739 * Get dcons buffer address 2740 */ 2741 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 2742 getenv_quad("dcons.size", &dcons_size) == 0) 2743 dcons_addr = 0; 2744 2745 #ifndef XEN 2746 /* 2747 * physmap is in bytes, so when converting to page boundaries, 2748 * round up the start address and round down the end address. 2749 */ 2750 for (i = 0; i <= physmap_idx; i += 2) { 2751 vm_paddr_t end; 2752 2753 end = ptoa((vm_paddr_t)Maxmem); 2754 if (physmap[i + 1] < end) 2755 end = trunc_page(physmap[i + 1]); 2756 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 2757 int tmp, page_bad, full; 2758 int *ptr = (int *)CADDR3; 2759 2760 full = FALSE; 2761 /* 2762 * block out kernel memory as not available. 2763 */ 2764 if (pa >= KERNLOAD && pa < first) 2765 goto do_dump_avail; 2766 2767 /* 2768 * block out dcons buffer 2769 */ 2770 if (dcons_addr > 0 2771 && pa >= trunc_page(dcons_addr) 2772 && pa < dcons_addr + dcons_size) 2773 goto do_dump_avail; 2774 2775 page_bad = FALSE; 2776 if (memtest == 0) 2777 goto skip_memtest; 2778 2779 /* 2780 * map page into kernel: valid, read/write,non-cacheable 2781 */ 2782 *pte = pa | PG_V | PG_RW | PG_N; 2783 invltlb(); 2784 2785 tmp = *(int *)ptr; 2786 /* 2787 * Test for alternating 1's and 0's 2788 */ 2789 *(volatile int *)ptr = 0xaaaaaaaa; 2790 if (*(volatile int *)ptr != 0xaaaaaaaa) 2791 page_bad = TRUE; 2792 /* 2793 * Test for alternating 0's and 1's 2794 */ 2795 *(volatile int *)ptr = 0x55555555; 2796 if (*(volatile int *)ptr != 0x55555555) 2797 page_bad = TRUE; 2798 /* 2799 * Test for all 1's 2800 */ 2801 *(volatile int *)ptr = 0xffffffff; 2802 if (*(volatile int *)ptr != 0xffffffff) 2803 page_bad = TRUE; 2804 /* 2805 * Test for all 0's 2806 */ 2807 *(volatile int *)ptr = 0x0; 2808 if (*(volatile int *)ptr != 0x0) 2809 page_bad = TRUE; 2810 /* 2811 * Restore original value. 2812 */ 2813 *(int *)ptr = tmp; 2814 2815 skip_memtest: 2816 /* 2817 * Adjust array of valid/good pages. 2818 */ 2819 if (page_bad == TRUE) 2820 continue; 2821 /* 2822 * If this good page is a continuation of the 2823 * previous set of good pages, then just increase 2824 * the end pointer. Otherwise start a new chunk. 2825 * Note that "end" points one higher than end, 2826 * making the range >= start and < end. 2827 * If we're also doing a speculative memory 2828 * test and we at or past the end, bump up Maxmem 2829 * so that we keep going. The first bad page 2830 * will terminate the loop. 2831 */ 2832 if (phys_avail[pa_indx] == pa) { 2833 phys_avail[pa_indx] += PAGE_SIZE; 2834 } else { 2835 pa_indx++; 2836 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 2837 printf( 2838 "Too many holes in the physical address space, giving up\n"); 2839 pa_indx--; 2840 full = TRUE; 2841 goto do_dump_avail; 2842 } 2843 phys_avail[pa_indx++] = pa; /* start */ 2844 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 2845 } 2846 physmem++; 2847 do_dump_avail: 2848 if (dump_avail[da_indx] == pa) { 2849 dump_avail[da_indx] += PAGE_SIZE; 2850 } else { 2851 da_indx++; 2852 if (da_indx == DUMP_AVAIL_ARRAY_END) { 2853 da_indx--; 2854 goto do_next; 2855 } 2856 dump_avail[da_indx++] = pa; /* start */ 2857 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 2858 } 2859 do_next: 2860 if (full) 2861 break; 2862 } 2863 } 2864 *pte = 0; 2865 invltlb(); 2866 #else 2867 phys_avail[0] = physfree; 2868 phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE; 2869 dump_avail[0] = 0; 2870 dump_avail[1] = xen_start_info->nr_pages*PAGE_SIZE; 2871 2872 #endif 2873 2874 /* 2875 * XXX 2876 * The last chunk must contain at least one page plus the message 2877 * buffer to avoid complicating other code (message buffer address 2878 * calculation, etc.). 2879 */ 2880 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 2881 round_page(msgbufsize) >= phys_avail[pa_indx]) { 2882 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 2883 phys_avail[pa_indx--] = 0; 2884 phys_avail[pa_indx--] = 0; 2885 } 2886 2887 Maxmem = atop(phys_avail[pa_indx]); 2888 2889 /* Trim off space for the message buffer. */ 2890 phys_avail[pa_indx] -= round_page(msgbufsize); 2891 2892 /* Map the message buffer. */ 2893 for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE) 2894 pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + 2895 off); 2896 2897 PT_UPDATES_FLUSH(); 2898 } 2899 #endif /* PC98 */ 2900 2901 #ifdef XEN 2902 #define MTOPSIZE (1<<(14 + PAGE_SHIFT)) 2903 2904 register_t 2905 init386(first) 2906 int first; 2907 { 2908 unsigned long gdtmachpfn; 2909 int error, gsel_tss, metadata_missing, x, pa; 2910 struct pcpu *pc; 2911 #ifdef CPU_ENABLE_SSE 2912 struct xstate_hdr *xhdr; 2913 #endif 2914 struct callback_register event = { 2915 .type = CALLBACKTYPE_event, 2916 .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback }, 2917 }; 2918 struct callback_register failsafe = { 2919 .type = CALLBACKTYPE_failsafe, 2920 .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback }, 2921 }; 2922 2923 thread0.td_kstack = proc0kstack; 2924 thread0.td_kstack_pages = KSTACK_PAGES; 2925 2926 /* 2927 * This may be done better later if it gets more high level 2928 * components in it. If so just link td->td_proc here. 2929 */ 2930 proc_linkup0(&proc0, &thread0); 2931 2932 metadata_missing = 0; 2933 if (xen_start_info->mod_start) { 2934 preload_metadata = (caddr_t)xen_start_info->mod_start; 2935 preload_bootstrap_relocate(KERNBASE); 2936 } else { 2937 metadata_missing = 1; 2938 } 2939 if (envmode == 1) 2940 kern_envp = static_env; 2941 else if ((caddr_t)xen_start_info->cmd_line) 2942 kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line); 2943 2944 boothowto |= xen_boothowto(kern_envp); 2945 2946 /* Init basic tunables, hz etc */ 2947 init_param1(); 2948 2949 /* 2950 * XEN occupies a portion of the upper virtual address space 2951 * At its base it manages an array mapping machine page frames 2952 * to physical page frames - hence we need to be able to 2953 * access 4GB - (64MB - 4MB + 64k) 2954 */ 2955 gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2956 gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2957 gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2958 gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2959 gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2960 gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2961 gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2962 gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2963 2964 pc = &__pcpu[0]; 2965 gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 2966 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 2967 2968 PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW); 2969 bzero(gdt, PAGE_SIZE); 2970 for (x = 0; x < NGDT; x++) 2971 ssdtosd(&gdt_segs[x], &gdt[x].sd); 2972 2973 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); 2974 2975 gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; 2976 PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V); 2977 PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0); 2978 lgdt(&r_gdt); 2979 gdtset = 1; 2980 2981 if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) { 2982 panic("set_trap_table failed - error %d\n", error); 2983 } 2984 2985 error = HYPERVISOR_callback_op(CALLBACKOP_register, &event); 2986 if (error == 0) 2987 error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); 2988 #if CONFIG_XEN_COMPAT <= 0x030002 2989 if (error == -ENOXENSYS) 2990 HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), 2991 (unsigned long)Xhypervisor_callback, 2992 GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); 2993 #endif 2994 pcpu_init(pc, 0, sizeof(struct pcpu)); 2995 for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) 2996 pmap_kenter(pa + KERNBASE, pa); 2997 dpcpu_init((void *)(first + KERNBASE), 0); 2998 first += DPCPU_SIZE; 2999 physfree += DPCPU_SIZE; 3000 init_first += DPCPU_SIZE / PAGE_SIZE; 3001 3002 PCPU_SET(prvspace, pc); 3003 PCPU_SET(curthread, &thread0); 3004 3005 /* 3006 * Initialize mutexes. 3007 * 3008 * icu_lock: in order to allow an interrupt to occur in a critical 3009 * section, to set pcpu->ipending (etc...) properly, we 3010 * must be able to get the icu lock, so it can't be 3011 * under witness. 3012 */ 3013 mutex_init(); 3014 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); 3015 3016 /* make ldt memory segments */ 3017 PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW); 3018 bzero(ldt, PAGE_SIZE); 3019 ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); 3020 ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); 3021 for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) 3022 ssdtosd(&ldt_segs[x], &ldt[x].sd); 3023 3024 default_proc_ldt.ldt_base = (caddr_t)ldt; 3025 default_proc_ldt.ldt_len = 6; 3026 _default_ldt = (int)&default_proc_ldt; 3027 PCPU_SET(currentldt, _default_ldt); 3028 PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW); 3029 xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0])); 3030 3031 #if defined(XEN_PRIVILEGED) 3032 /* 3033 * Initialize the i8254 before the console so that console 3034 * initialization can use DELAY(). 3035 */ 3036 i8254_init(); 3037 #endif 3038 3039 /* 3040 * Initialize the console before we print anything out. 3041 */ 3042 cninit(); 3043 3044 if (metadata_missing) 3045 printf("WARNING: loader(8) metadata is missing!\n"); 3046 3047 #ifdef DEV_ISA 3048 #ifdef DEV_ATPIC 3049 elcr_probe(); 3050 atpic_startup(); 3051 #else 3052 /* Reset and mask the atpics and leave them shut down. */ 3053 atpic_reset(); 3054 3055 /* 3056 * Point the ICU spurious interrupt vectors at the APIC spurious 3057 * interrupt handler. 3058 */ 3059 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, 3060 GSEL(GCODE_SEL, SEL_KPL)); 3061 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, 3062 GSEL(GCODE_SEL, SEL_KPL)); 3063 #endif 3064 #endif 3065 3066 #ifdef DDB 3067 db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab); 3068 #endif 3069 3070 kdb_init(); 3071 3072 #ifdef KDB 3073 if (boothowto & RB_KDB) 3074 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 3075 #endif 3076 3077 finishidentcpu(); /* Final stage of CPU initialization */ 3078 setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, 3079 GSEL(GCODE_SEL, SEL_KPL)); 3080 setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, 3081 GSEL(GCODE_SEL, SEL_KPL)); 3082 initializecpu(); /* Initialize CPU registers */ 3083 initializecpucache(); 3084 3085 /* pointer to selector slot for %fs/%gs */ 3086 PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 3087 3088 dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = 3089 dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; 3090 dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = 3091 dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); 3092 #if defined(PAE) || defined(PAE_TABLES) 3093 dblfault_tss.tss_cr3 = (int)IdlePDPT; 3094 #else 3095 dblfault_tss.tss_cr3 = (int)IdlePTD; 3096 #endif 3097 dblfault_tss.tss_eip = (int)dblfault_handler; 3098 dblfault_tss.tss_eflags = PSL_KERNEL; 3099 dblfault_tss.tss_ds = dblfault_tss.tss_es = 3100 dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); 3101 dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); 3102 dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); 3103 dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 3104 3105 vm86_initialize(); 3106 getmemsize(first); 3107 init_param2(physmem); 3108 3109 /* now running on new page tables, configured,and u/iom is accessible */ 3110 3111 msgbufinit(msgbufp, msgbufsize); 3112 #ifdef DEV_NPX 3113 npxinit(true); 3114 #endif 3115 /* 3116 * Set up thread0 pcb after npxinit calculated pcb + fpu save 3117 * area size. Zero out the extended state header in fpu save 3118 * area. 3119 */ 3120 thread0.td_pcb = get_pcb_td(&thread0); 3121 bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); 3122 #ifdef CPU_ENABLE_SSE 3123 if (use_xsave) { 3124 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 3125 1); 3126 xhdr->xstate_bv = xsave_mask; 3127 } 3128 #endif 3129 PCPU_SET(curpcb, thread0.td_pcb); 3130 /* make an initial tss so cpu can get interrupt stack on syscall! */ 3131 /* Note: -16 is so we can grow the trapframe if we came from vm86 */ 3132 PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16); 3133 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 3134 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 3135 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), 3136 PCPU_GET(common_tss.tss_esp0)); 3137 3138 /* transfer to user mode */ 3139 3140 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 3141 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 3142 3143 /* setup proc 0's pcb */ 3144 thread0.td_pcb->pcb_flags = 0; 3145 #if defined(PAE) || defined(PAE_TABLES) 3146 thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; 3147 #else 3148 thread0.td_pcb->pcb_cr3 = (int)IdlePTD; 3149 #endif 3150 thread0.td_pcb->pcb_ext = 0; 3151 thread0.td_frame = &proc0_tf; 3152 thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0]; 3153 thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1]; 3154 3155 cpu_probe_amdc1e(); 3156 3157 /* Location of kernel stack for locore */ 3158 return ((register_t)thread0.td_pcb); 3159 } 3160 3161 #else 3162 register_t 3163 init386(first) 3164 int first; 3165 { 3166 struct gate_descriptor *gdp; 3167 int gsel_tss, metadata_missing, x, pa; 3168 struct pcpu *pc; 3169 #ifdef CPU_ENABLE_SSE 3170 struct xstate_hdr *xhdr; 3171 #endif 3172 3173 thread0.td_kstack = proc0kstack; 3174 thread0.td_kstack_pages = KSTACK_PAGES; 3175 3176 /* 3177 * This may be done better later if it gets more high level 3178 * components in it. If so just link td->td_proc here. 3179 */ 3180 proc_linkup0(&proc0, &thread0); 3181 3182 #ifdef PC98 3183 /* 3184 * Initialize DMAC 3185 */ 3186 pc98_init_dmac(); 3187 #endif 3188 3189 metadata_missing = 0; 3190 if (bootinfo.bi_modulep) { 3191 preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; 3192 preload_bootstrap_relocate(KERNBASE); 3193 } else { 3194 metadata_missing = 1; 3195 } 3196 if (envmode == 1) 3197 kern_envp = static_env; 3198 else if (bootinfo.bi_envp) 3199 kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; 3200 3201 /* Init basic tunables, hz etc */ 3202 init_param1(); 3203 3204 /* 3205 * Make gdt memory segments. All segments cover the full 4GB 3206 * of address space and permissions are enforced at page level. 3207 */ 3208 gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); 3209 gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); 3210 gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); 3211 gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); 3212 gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); 3213 gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); 3214 3215 pc = &__pcpu[0]; 3216 gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); 3217 gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 3218 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 3219 3220 for (x = 0; x < NGDT; x++) 3221 ssdtosd(&gdt_segs[x], &gdt[x].sd); 3222 3223 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 3224 r_gdt.rd_base = (int) gdt; 3225 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); 3226 lgdt(&r_gdt); 3227 3228 pcpu_init(pc, 0, sizeof(struct pcpu)); 3229 for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) 3230 pmap_kenter(pa + KERNBASE, pa); 3231 dpcpu_init((void *)(first + KERNBASE), 0); 3232 first += DPCPU_SIZE; 3233 PCPU_SET(prvspace, pc); 3234 PCPU_SET(curthread, &thread0); 3235 3236 /* 3237 * Initialize mutexes. 3238 * 3239 * icu_lock: in order to allow an interrupt to occur in a critical 3240 * section, to set pcpu->ipending (etc...) properly, we 3241 * must be able to get the icu lock, so it can't be 3242 * under witness. 3243 */ 3244 mutex_init(); 3245 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); 3246 3247 /* make ldt memory segments */ 3248 ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); 3249 ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); 3250 for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) 3251 ssdtosd(&ldt_segs[x], &ldt[x].sd); 3252 3253 _default_ldt = GSEL(GLDT_SEL, SEL_KPL); 3254 lldt(_default_ldt); 3255 PCPU_SET(currentldt, _default_ldt); 3256 3257 /* exceptions */ 3258 for (x = 0; x < NIDT; x++) 3259 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, 3260 GSEL(GCODE_SEL, SEL_KPL)); 3261 setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, 3262 GSEL(GCODE_SEL, SEL_KPL)); 3263 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, 3264 GSEL(GCODE_SEL, SEL_KPL)); 3265 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, 3266 GSEL(GCODE_SEL, SEL_KPL)); 3267 setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, 3268 GSEL(GCODE_SEL, SEL_KPL)); 3269 setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, 3270 GSEL(GCODE_SEL, SEL_KPL)); 3271 setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, 3272 GSEL(GCODE_SEL, SEL_KPL)); 3273 setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, 3274 GSEL(GCODE_SEL, SEL_KPL)); 3275 setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL 3276 , GSEL(GCODE_SEL, SEL_KPL)); 3277 setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); 3278 setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, 3279 GSEL(GCODE_SEL, SEL_KPL)); 3280 setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, 3281 GSEL(GCODE_SEL, SEL_KPL)); 3282 setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, 3283 GSEL(GCODE_SEL, SEL_KPL)); 3284 setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, 3285 GSEL(GCODE_SEL, SEL_KPL)); 3286 setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, 3287 GSEL(GCODE_SEL, SEL_KPL)); 3288 setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, 3289 GSEL(GCODE_SEL, SEL_KPL)); 3290 setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, 3291 GSEL(GCODE_SEL, SEL_KPL)); 3292 setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, 3293 GSEL(GCODE_SEL, SEL_KPL)); 3294 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, 3295 GSEL(GCODE_SEL, SEL_KPL)); 3296 setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, 3297 GSEL(GCODE_SEL, SEL_KPL)); 3298 setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, 3299 GSEL(GCODE_SEL, SEL_KPL)); 3300 #ifdef KDTRACE_HOOKS 3301 setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL, 3302 GSEL(GCODE_SEL, SEL_KPL)); 3303 #endif 3304 #ifdef XENHVM 3305 setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL, 3306 GSEL(GCODE_SEL, SEL_KPL)); 3307 #endif 3308 3309 r_idt.rd_limit = sizeof(idt0) - 1; 3310 r_idt.rd_base = (int) idt; 3311 lidt(&r_idt); 3312 3313 #ifdef XBOX 3314 /* 3315 * The following code queries the PCI ID of 0:0:0. For the XBOX, 3316 * This should be 0x10de / 0x02a5. 3317 * 3318 * This is exactly what Linux does. 3319 */ 3320 outl(0xcf8, 0x80000000); 3321 if (inl(0xcfc) == 0x02a510de) { 3322 arch_i386_is_xbox = 1; 3323 pic16l_setled(XBOX_LED_GREEN); 3324 3325 /* 3326 * We are an XBOX, but we may have either 64MB or 128MB of 3327 * memory. The PCI host bridge should be programmed for this, 3328 * so we just query it. 3329 */ 3330 outl(0xcf8, 0x80000084); 3331 arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64; 3332 } 3333 #endif /* XBOX */ 3334 3335 /* 3336 * Initialize the clock before the console so that console 3337 * initialization can use DELAY(). 3338 */ 3339 clock_init(); 3340 3341 /* 3342 * Initialize the console before we print anything out. 3343 */ 3344 cninit(); 3345 3346 if (metadata_missing) 3347 printf("WARNING: loader(8) metadata is missing!\n"); 3348 3349 #ifdef DEV_ISA 3350 #ifdef DEV_ATPIC 3351 #ifndef PC98 3352 elcr_probe(); 3353 #endif 3354 atpic_startup(); 3355 #else 3356 /* Reset and mask the atpics and leave them shut down. */ 3357 atpic_reset(); 3358 3359 /* 3360 * Point the ICU spurious interrupt vectors at the APIC spurious 3361 * interrupt handler. 3362 */ 3363 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, 3364 GSEL(GCODE_SEL, SEL_KPL)); 3365 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, 3366 GSEL(GCODE_SEL, SEL_KPL)); 3367 #endif 3368 #endif 3369 3370 #ifdef DDB 3371 db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab); 3372 #endif 3373 3374 kdb_init(); 3375 3376 #ifdef KDB 3377 if (boothowto & RB_KDB) 3378 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 3379 #endif 3380 3381 finishidentcpu(); /* Final stage of CPU initialization */ 3382 setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, 3383 GSEL(GCODE_SEL, SEL_KPL)); 3384 setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, 3385 GSEL(GCODE_SEL, SEL_KPL)); 3386 initializecpu(); /* Initialize CPU registers */ 3387 initializecpucache(); 3388 3389 /* pointer to selector slot for %fs/%gs */ 3390 PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 3391 3392 dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = 3393 dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; 3394 dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = 3395 dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); 3396 #if defined(PAE) || defined(PAE_TABLES) 3397 dblfault_tss.tss_cr3 = (int)IdlePDPT; 3398 #else 3399 dblfault_tss.tss_cr3 = (int)IdlePTD; 3400 #endif 3401 dblfault_tss.tss_eip = (int)dblfault_handler; 3402 dblfault_tss.tss_eflags = PSL_KERNEL; 3403 dblfault_tss.tss_ds = dblfault_tss.tss_es = 3404 dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); 3405 dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); 3406 dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); 3407 dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 3408 3409 vm86_initialize(); 3410 getmemsize(first); 3411 init_param2(physmem); 3412 3413 /* now running on new page tables, configured,and u/iom is accessible */ 3414 3415 msgbufinit(msgbufp, msgbufsize); 3416 #ifdef DEV_NPX 3417 npxinit(true); 3418 #endif 3419 /* 3420 * Set up thread0 pcb after npxinit calculated pcb + fpu save 3421 * area size. Zero out the extended state header in fpu save 3422 * area. 3423 */ 3424 thread0.td_pcb = get_pcb_td(&thread0); 3425 bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); 3426 #ifdef CPU_ENABLE_SSE 3427 if (use_xsave) { 3428 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 3429 1); 3430 xhdr->xstate_bv = xsave_mask; 3431 } 3432 #endif 3433 PCPU_SET(curpcb, thread0.td_pcb); 3434 /* make an initial tss so cpu can get interrupt stack on syscall! */ 3435 /* Note: -16 is so we can grow the trapframe if we came from vm86 */ 3436 PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16); 3437 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 3438 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 3439 PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); 3440 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 3441 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 3442 ltr(gsel_tss); 3443 3444 /* make a call gate to reenter kernel with */ 3445 gdp = &ldt[LSYS5CALLS_SEL].gd; 3446 3447 x = (int) &IDTVEC(lcall_syscall); 3448 gdp->gd_looffset = x; 3449 gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); 3450 gdp->gd_stkcpy = 1; 3451 gdp->gd_type = SDT_SYS386CGT; 3452 gdp->gd_dpl = SEL_UPL; 3453 gdp->gd_p = 1; 3454 gdp->gd_hioffset = x >> 16; 3455 3456 /* XXX does this work? */ 3457 /* XXX yes! */ 3458 ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; 3459 ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; 3460 3461 /* transfer to user mode */ 3462 3463 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 3464 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 3465 3466 /* setup proc 0's pcb */ 3467 thread0.td_pcb->pcb_flags = 0; 3468 #if defined(PAE) || defined(PAE_TABLES) 3469 thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; 3470 #else 3471 thread0.td_pcb->pcb_cr3 = (int)IdlePTD; 3472 #endif 3473 thread0.td_pcb->pcb_ext = 0; 3474 thread0.td_frame = &proc0_tf; 3475 3476 cpu_probe_amdc1e(); 3477 3478 #ifdef FDT 3479 x86_init_fdt(); 3480 #endif 3481 3482 /* Location of kernel stack for locore */ 3483 return ((register_t)thread0.td_pcb); 3484 } 3485 #endif 3486 3487 void 3488 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 3489 { 3490 3491 pcpu->pc_acpi_id = 0xffffffff; 3492 } 3493 3494 #ifndef PC98 3495 static int 3496 smap_sysctl_handler(SYSCTL_HANDLER_ARGS) 3497 { 3498 struct bios_smap *smapbase; 3499 struct bios_smap_xattr smap; 3500 caddr_t kmdp; 3501 uint32_t *smapattr; 3502 int count, error, i; 3503 3504 /* Retrieve the system memory map from the loader. */ 3505 kmdp = preload_search_by_type("elf kernel"); 3506 if (kmdp == NULL) 3507 kmdp = preload_search_by_type("elf32 kernel"); 3508 if (kmdp == NULL) 3509 return (0); 3510 smapbase = (struct bios_smap *)preload_search_info(kmdp, 3511 MODINFO_METADATA | MODINFOMD_SMAP); 3512 if (smapbase == NULL) 3513 return (0); 3514 smapattr = (uint32_t *)preload_search_info(kmdp, 3515 MODINFO_METADATA | MODINFOMD_SMAP_XATTR); 3516 count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase); 3517 error = 0; 3518 for (i = 0; i < count; i++) { 3519 smap.base = smapbase[i].base; 3520 smap.length = smapbase[i].length; 3521 smap.type = smapbase[i].type; 3522 if (smapattr != NULL) 3523 smap.xattr = smapattr[i]; 3524 else 3525 smap.xattr = 0; 3526 error = SYSCTL_OUT(req, &smap, sizeof(smap)); 3527 } 3528 return (error); 3529 } 3530 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, 3531 smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data"); 3532 #endif /* !PC98 */ 3533 3534 void 3535 spinlock_enter(void) 3536 { 3537 struct thread *td; 3538 register_t flags; 3539 3540 td = curthread; 3541 if (td->td_md.md_spinlock_count == 0) { 3542 flags = intr_disable(); 3543 td->td_md.md_spinlock_count = 1; 3544 td->td_md.md_saved_flags = flags; 3545 } else 3546 td->td_md.md_spinlock_count++; 3547 critical_enter(); 3548 } 3549 3550 void 3551 spinlock_exit(void) 3552 { 3553 struct thread *td; 3554 register_t flags; 3555 3556 td = curthread; 3557 critical_exit(); 3558 flags = td->td_md.md_saved_flags; 3559 td->td_md.md_spinlock_count--; 3560 if (td->td_md.md_spinlock_count == 0) 3561 intr_restore(flags); 3562 } 3563 3564 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 3565 static void f00f_hack(void *unused); 3566 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); 3567 3568 static void 3569 f00f_hack(void *unused) 3570 { 3571 struct gate_descriptor *new_idt; 3572 vm_offset_t tmp; 3573 3574 if (!has_f00f_bug) 3575 return; 3576 3577 GIANT_REQUIRED; 3578 3579 printf("Intel Pentium detected, installing workaround for F00F bug\n"); 3580 3581 tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO); 3582 if (tmp == 0) 3583 panic("kmem_malloc returned 0"); 3584 3585 /* Put the problematic entry (#6) at the end of the lower page. */ 3586 new_idt = (struct gate_descriptor*) 3587 (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); 3588 bcopy(idt, new_idt, sizeof(idt0)); 3589 r_idt.rd_base = (u_int)new_idt; 3590 lidt(&r_idt); 3591 idt = new_idt; 3592 pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ); 3593 } 3594 #endif /* defined(I586_CPU) && !NO_F00F_HACK */ 3595 3596 /* 3597 * Construct a PCB from a trapframe. This is called from kdb_trap() where 3598 * we want to start a backtrace from the function that caused us to enter 3599 * the debugger. We have the context in the trapframe, but base the trace 3600 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 3601 * enough for a backtrace. 3602 */ 3603 void 3604 makectx(struct trapframe *tf, struct pcb *pcb) 3605 { 3606 3607 pcb->pcb_edi = tf->tf_edi; 3608 pcb->pcb_esi = tf->tf_esi; 3609 pcb->pcb_ebp = tf->tf_ebp; 3610 pcb->pcb_ebx = tf->tf_ebx; 3611 pcb->pcb_eip = tf->tf_eip; 3612 pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; 3613 } 3614 3615 int 3616 ptrace_set_pc(struct thread *td, u_long addr) 3617 { 3618 3619 td->td_frame->tf_eip = addr; 3620 return (0); 3621 } 3622 3623 int 3624 ptrace_single_step(struct thread *td) 3625 { 3626 td->td_frame->tf_eflags |= PSL_T; 3627 return (0); 3628 } 3629 3630 int 3631 ptrace_clear_single_step(struct thread *td) 3632 { 3633 td->td_frame->tf_eflags &= ~PSL_T; 3634 return (0); 3635 } 3636 3637 int 3638 fill_regs(struct thread *td, struct reg *regs) 3639 { 3640 struct pcb *pcb; 3641 struct trapframe *tp; 3642 3643 tp = td->td_frame; 3644 pcb = td->td_pcb; 3645 regs->r_gs = pcb->pcb_gs; 3646 return (fill_frame_regs(tp, regs)); 3647 } 3648 3649 int 3650 fill_frame_regs(struct trapframe *tp, struct reg *regs) 3651 { 3652 regs->r_fs = tp->tf_fs; 3653 regs->r_es = tp->tf_es; 3654 regs->r_ds = tp->tf_ds; 3655 regs->r_edi = tp->tf_edi; 3656 regs->r_esi = tp->tf_esi; 3657 regs->r_ebp = tp->tf_ebp; 3658 regs->r_ebx = tp->tf_ebx; 3659 regs->r_edx = tp->tf_edx; 3660 regs->r_ecx = tp->tf_ecx; 3661 regs->r_eax = tp->tf_eax; 3662 regs->r_eip = tp->tf_eip; 3663 regs->r_cs = tp->tf_cs; 3664 regs->r_eflags = tp->tf_eflags; 3665 regs->r_esp = tp->tf_esp; 3666 regs->r_ss = tp->tf_ss; 3667 return (0); 3668 } 3669 3670 int 3671 set_regs(struct thread *td, struct reg *regs) 3672 { 3673 struct pcb *pcb; 3674 struct trapframe *tp; 3675 3676 tp = td->td_frame; 3677 if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || 3678 !CS_SECURE(regs->r_cs)) 3679 return (EINVAL); 3680 pcb = td->td_pcb; 3681 tp->tf_fs = regs->r_fs; 3682 tp->tf_es = regs->r_es; 3683 tp->tf_ds = regs->r_ds; 3684 tp->tf_edi = regs->r_edi; 3685 tp->tf_esi = regs->r_esi; 3686 tp->tf_ebp = regs->r_ebp; 3687 tp->tf_ebx = regs->r_ebx; 3688 tp->tf_edx = regs->r_edx; 3689 tp->tf_ecx = regs->r_ecx; 3690 tp->tf_eax = regs->r_eax; 3691 tp->tf_eip = regs->r_eip; 3692 tp->tf_cs = regs->r_cs; 3693 tp->tf_eflags = regs->r_eflags; 3694 tp->tf_esp = regs->r_esp; 3695 tp->tf_ss = regs->r_ss; 3696 pcb->pcb_gs = regs->r_gs; 3697 return (0); 3698 } 3699 3700 #ifdef CPU_ENABLE_SSE 3701 static void 3702 fill_fpregs_xmm(sv_xmm, sv_87) 3703 struct savexmm *sv_xmm; 3704 struct save87 *sv_87; 3705 { 3706 register struct env87 *penv_87 = &sv_87->sv_env; 3707 register struct envxmm *penv_xmm = &sv_xmm->sv_env; 3708 int i; 3709 3710 bzero(sv_87, sizeof(*sv_87)); 3711 3712 /* FPU control/status */ 3713 penv_87->en_cw = penv_xmm->en_cw; 3714 penv_87->en_sw = penv_xmm->en_sw; 3715 penv_87->en_tw = penv_xmm->en_tw; 3716 penv_87->en_fip = penv_xmm->en_fip; 3717 penv_87->en_fcs = penv_xmm->en_fcs; 3718 penv_87->en_opcode = penv_xmm->en_opcode; 3719 penv_87->en_foo = penv_xmm->en_foo; 3720 penv_87->en_fos = penv_xmm->en_fos; 3721 3722 /* FPU registers */ 3723 for (i = 0; i < 8; ++i) 3724 sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; 3725 } 3726 3727 static void 3728 set_fpregs_xmm(sv_87, sv_xmm) 3729 struct save87 *sv_87; 3730 struct savexmm *sv_xmm; 3731 { 3732 register struct env87 *penv_87 = &sv_87->sv_env; 3733 register struct envxmm *penv_xmm = &sv_xmm->sv_env; 3734 int i; 3735 3736 /* FPU control/status */ 3737 penv_xmm->en_cw = penv_87->en_cw; 3738 penv_xmm->en_sw = penv_87->en_sw; 3739 penv_xmm->en_tw = penv_87->en_tw; 3740 penv_xmm->en_fip = penv_87->en_fip; 3741 penv_xmm->en_fcs = penv_87->en_fcs; 3742 penv_xmm->en_opcode = penv_87->en_opcode; 3743 penv_xmm->en_foo = penv_87->en_foo; 3744 penv_xmm->en_fos = penv_87->en_fos; 3745 3746 /* FPU registers */ 3747 for (i = 0; i < 8; ++i) 3748 sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; 3749 } 3750 #endif /* CPU_ENABLE_SSE */ 3751 3752 int 3753 fill_fpregs(struct thread *td, struct fpreg *fpregs) 3754 { 3755 3756 KASSERT(td == curthread || TD_IS_SUSPENDED(td) || 3757 P_SHOULDSTOP(td->td_proc), 3758 ("not suspended thread %p", td)); 3759 #ifdef DEV_NPX 3760 npxgetregs(td); 3761 #else 3762 bzero(fpregs, sizeof(*fpregs)); 3763 #endif 3764 #ifdef CPU_ENABLE_SSE 3765 if (cpu_fxsr) 3766 fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm, 3767 (struct save87 *)fpregs); 3768 else 3769 #endif /* CPU_ENABLE_SSE */ 3770 bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs, 3771 sizeof(*fpregs)); 3772 return (0); 3773 } 3774 3775 int 3776 set_fpregs(struct thread *td, struct fpreg *fpregs) 3777 { 3778 3779 #ifdef CPU_ENABLE_SSE 3780 if (cpu_fxsr) 3781 set_fpregs_xmm((struct save87 *)fpregs, 3782 &get_pcb_user_save_td(td)->sv_xmm); 3783 else 3784 #endif /* CPU_ENABLE_SSE */ 3785 bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87, 3786 sizeof(*fpregs)); 3787 #ifdef DEV_NPX 3788 npxuserinited(td); 3789 #endif 3790 return (0); 3791 } 3792 3793 /* 3794 * Get machine context. 3795 */ 3796 int 3797 get_mcontext(struct thread *td, mcontext_t *mcp, int flags) 3798 { 3799 struct trapframe *tp; 3800 struct segment_descriptor *sdp; 3801 3802 tp = td->td_frame; 3803 3804 PROC_LOCK(curthread->td_proc); 3805 mcp->mc_onstack = sigonstack(tp->tf_esp); 3806 PROC_UNLOCK(curthread->td_proc); 3807 mcp->mc_gs = td->td_pcb->pcb_gs; 3808 mcp->mc_fs = tp->tf_fs; 3809 mcp->mc_es = tp->tf_es; 3810 mcp->mc_ds = tp->tf_ds; 3811 mcp->mc_edi = tp->tf_edi; 3812 mcp->mc_esi = tp->tf_esi; 3813 mcp->mc_ebp = tp->tf_ebp; 3814 mcp->mc_isp = tp->tf_isp; 3815 mcp->mc_eflags = tp->tf_eflags; 3816 if (flags & GET_MC_CLEAR_RET) { 3817 mcp->mc_eax = 0; 3818 mcp->mc_edx = 0; 3819 mcp->mc_eflags &= ~PSL_C; 3820 } else { 3821 mcp->mc_eax = tp->tf_eax; 3822 mcp->mc_edx = tp->tf_edx; 3823 } 3824 mcp->mc_ebx = tp->tf_ebx; 3825 mcp->mc_ecx = tp->tf_ecx; 3826 mcp->mc_eip = tp->tf_eip; 3827 mcp->mc_cs = tp->tf_cs; 3828 mcp->mc_esp = tp->tf_esp; 3829 mcp->mc_ss = tp->tf_ss; 3830 mcp->mc_len = sizeof(*mcp); 3831 get_fpcontext(td, mcp, NULL, 0); 3832 sdp = &td->td_pcb->pcb_fsd; 3833 mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; 3834 sdp = &td->td_pcb->pcb_gsd; 3835 mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; 3836 mcp->mc_flags = 0; 3837 mcp->mc_xfpustate = 0; 3838 mcp->mc_xfpustate_len = 0; 3839 bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); 3840 return (0); 3841 } 3842 3843 /* 3844 * Set machine context. 3845 * 3846 * However, we don't set any but the user modifiable flags, and we won't 3847 * touch the cs selector. 3848 */ 3849 int 3850 set_mcontext(struct thread *td, mcontext_t *mcp) 3851 { 3852 struct trapframe *tp; 3853 char *xfpustate; 3854 int eflags, ret; 3855 3856 tp = td->td_frame; 3857 if (mcp->mc_len != sizeof(*mcp) || 3858 (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) 3859 return (EINVAL); 3860 eflags = (mcp->mc_eflags & PSL_USERCHANGE) | 3861 (tp->tf_eflags & ~PSL_USERCHANGE); 3862 if (mcp->mc_flags & _MC_HASFPXSTATE) { 3863 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - 3864 sizeof(union savefpu)) 3865 return (EINVAL); 3866 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); 3867 ret = copyin((void *)mcp->mc_xfpustate, xfpustate, 3868 mcp->mc_xfpustate_len); 3869 if (ret != 0) 3870 return (ret); 3871 } else 3872 xfpustate = NULL; 3873 ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); 3874 if (ret != 0) 3875 return (ret); 3876 tp->tf_fs = mcp->mc_fs; 3877 tp->tf_es = mcp->mc_es; 3878 tp->tf_ds = mcp->mc_ds; 3879 tp->tf_edi = mcp->mc_edi; 3880 tp->tf_esi = mcp->mc_esi; 3881 tp->tf_ebp = mcp->mc_ebp; 3882 tp->tf_ebx = mcp->mc_ebx; 3883 tp->tf_edx = mcp->mc_edx; 3884 tp->tf_ecx = mcp->mc_ecx; 3885 tp->tf_eax = mcp->mc_eax; 3886 tp->tf_eip = mcp->mc_eip; 3887 tp->tf_eflags = eflags; 3888 tp->tf_esp = mcp->mc_esp; 3889 tp->tf_ss = mcp->mc_ss; 3890 td->td_pcb->pcb_gs = mcp->mc_gs; 3891 return (0); 3892 } 3893 3894 static void 3895 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, 3896 size_t xfpusave_len) 3897 { 3898 #ifdef CPU_ENABLE_SSE 3899 size_t max_len, len; 3900 #endif 3901 3902 #ifndef DEV_NPX 3903 mcp->mc_fpformat = _MC_FPFMT_NODEV; 3904 mcp->mc_ownedfp = _MC_FPOWNED_NONE; 3905 bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); 3906 #else 3907 mcp->mc_ownedfp = npxgetregs(td); 3908 bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], 3909 sizeof(mcp->mc_fpstate)); 3910 mcp->mc_fpformat = npxformat(); 3911 #ifdef CPU_ENABLE_SSE 3912 if (!use_xsave || xfpusave_len == 0) 3913 return; 3914 max_len = cpu_max_ext_state_size - sizeof(union savefpu); 3915 len = xfpusave_len; 3916 if (len > max_len) { 3917 len = max_len; 3918 bzero(xfpusave + max_len, len - max_len); 3919 } 3920 mcp->mc_flags |= _MC_HASFPXSTATE; 3921 mcp->mc_xfpustate_len = len; 3922 bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); 3923 #endif 3924 #endif 3925 } 3926 3927 static int 3928 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, 3929 size_t xfpustate_len) 3930 { 3931 union savefpu *fpstate; 3932 int error; 3933 3934 if (mcp->mc_fpformat == _MC_FPFMT_NODEV) 3935 return (0); 3936 else if (mcp->mc_fpformat != _MC_FPFMT_387 && 3937 mcp->mc_fpformat != _MC_FPFMT_XMM) 3938 return (EINVAL); 3939 else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { 3940 /* We don't care what state is left in the FPU or PCB. */ 3941 fpstate_drop(td); 3942 error = 0; 3943 } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || 3944 mcp->mc_ownedfp == _MC_FPOWNED_PCB) { 3945 #ifdef DEV_NPX 3946 fpstate = (union savefpu *)&mcp->mc_fpstate; 3947 #ifdef CPU_ENABLE_SSE 3948 if (cpu_fxsr) 3949 fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; 3950 #endif 3951 error = npxsetregs(td, fpstate, xfpustate, xfpustate_len); 3952 #else 3953 error = EINVAL; 3954 #endif 3955 } else 3956 return (EINVAL); 3957 return (error); 3958 } 3959 3960 static void 3961 fpstate_drop(struct thread *td) 3962 { 3963 3964 KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); 3965 critical_enter(); 3966 #ifdef DEV_NPX 3967 if (PCPU_GET(fpcurthread) == td) 3968 npxdrop(); 3969 #endif 3970 /* 3971 * XXX force a full drop of the npx. The above only drops it if we 3972 * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. 3973 * 3974 * XXX I don't much like npxgetregs()'s semantics of doing a full 3975 * drop. Dropping only to the pcb matches fnsave's behaviour. 3976 * We only need to drop to !PCB_INITDONE in sendsig(). But 3977 * sendsig() is the only caller of npxgetregs()... perhaps we just 3978 * have too many layers. 3979 */ 3980 curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | 3981 PCB_NPXUSERINITDONE); 3982 critical_exit(); 3983 } 3984 3985 int 3986 fill_dbregs(struct thread *td, struct dbreg *dbregs) 3987 { 3988 struct pcb *pcb; 3989 3990 if (td == NULL) { 3991 dbregs->dr[0] = rdr0(); 3992 dbregs->dr[1] = rdr1(); 3993 dbregs->dr[2] = rdr2(); 3994 dbregs->dr[3] = rdr3(); 3995 dbregs->dr[4] = rdr4(); 3996 dbregs->dr[5] = rdr5(); 3997 dbregs->dr[6] = rdr6(); 3998 dbregs->dr[7] = rdr7(); 3999 } else { 4000 pcb = td->td_pcb; 4001 dbregs->dr[0] = pcb->pcb_dr0; 4002 dbregs->dr[1] = pcb->pcb_dr1; 4003 dbregs->dr[2] = pcb->pcb_dr2; 4004 dbregs->dr[3] = pcb->pcb_dr3; 4005 dbregs->dr[4] = 0; 4006 dbregs->dr[5] = 0; 4007 dbregs->dr[6] = pcb->pcb_dr6; 4008 dbregs->dr[7] = pcb->pcb_dr7; 4009 } 4010 return (0); 4011 } 4012 4013 int 4014 set_dbregs(struct thread *td, struct dbreg *dbregs) 4015 { 4016 struct pcb *pcb; 4017 int i; 4018 4019 if (td == NULL) { 4020 load_dr0(dbregs->dr[0]); 4021 load_dr1(dbregs->dr[1]); 4022 load_dr2(dbregs->dr[2]); 4023 load_dr3(dbregs->dr[3]); 4024 load_dr4(dbregs->dr[4]); 4025 load_dr5(dbregs->dr[5]); 4026 load_dr6(dbregs->dr[6]); 4027 load_dr7(dbregs->dr[7]); 4028 } else { 4029 /* 4030 * Don't let an illegal value for dr7 get set. Specifically, 4031 * check for undefined settings. Setting these bit patterns 4032 * result in undefined behaviour and can lead to an unexpected 4033 * TRCTRAP. 4034 */ 4035 for (i = 0; i < 4; i++) { 4036 if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) 4037 return (EINVAL); 4038 if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) 4039 return (EINVAL); 4040 } 4041 4042 pcb = td->td_pcb; 4043 4044 /* 4045 * Don't let a process set a breakpoint that is not within the 4046 * process's address space. If a process could do this, it 4047 * could halt the system by setting a breakpoint in the kernel 4048 * (if ddb was enabled). Thus, we need to check to make sure 4049 * that no breakpoints are being enabled for addresses outside 4050 * process's address space. 4051 * 4052 * XXX - what about when the watched area of the user's 4053 * address space is written into from within the kernel 4054 * ... wouldn't that still cause a breakpoint to be generated 4055 * from within kernel mode? 4056 */ 4057 4058 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { 4059 /* dr0 is enabled */ 4060 if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) 4061 return (EINVAL); 4062 } 4063 4064 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { 4065 /* dr1 is enabled */ 4066 if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) 4067 return (EINVAL); 4068 } 4069 4070 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { 4071 /* dr2 is enabled */ 4072 if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) 4073 return (EINVAL); 4074 } 4075 4076 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { 4077 /* dr3 is enabled */ 4078 if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) 4079 return (EINVAL); 4080 } 4081 4082 pcb->pcb_dr0 = dbregs->dr[0]; 4083 pcb->pcb_dr1 = dbregs->dr[1]; 4084 pcb->pcb_dr2 = dbregs->dr[2]; 4085 pcb->pcb_dr3 = dbregs->dr[3]; 4086 pcb->pcb_dr6 = dbregs->dr[6]; 4087 pcb->pcb_dr7 = dbregs->dr[7]; 4088 4089 pcb->pcb_flags |= PCB_DBREGS; 4090 } 4091 4092 return (0); 4093 } 4094 4095 /* 4096 * Return > 0 if a hardware breakpoint has been hit, and the 4097 * breakpoint was in user space. Return 0, otherwise. 4098 */ 4099 int 4100 user_dbreg_trap(void) 4101 { 4102 u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ 4103 u_int32_t bp; /* breakpoint bits extracted from dr6 */ 4104 int nbp; /* number of breakpoints that triggered */ 4105 caddr_t addr[4]; /* breakpoint addresses */ 4106 int i; 4107 4108 dr7 = rdr7(); 4109 if ((dr7 & 0x000000ff) == 0) { 4110 /* 4111 * all GE and LE bits in the dr7 register are zero, 4112 * thus the trap couldn't have been caused by the 4113 * hardware debug registers 4114 */ 4115 return 0; 4116 } 4117 4118 nbp = 0; 4119 dr6 = rdr6(); 4120 bp = dr6 & 0x0000000f; 4121 4122 if (!bp) { 4123 /* 4124 * None of the breakpoint bits are set meaning this 4125 * trap was not caused by any of the debug registers 4126 */ 4127 return 0; 4128 } 4129 4130 /* 4131 * at least one of the breakpoints were hit, check to see 4132 * which ones and if any of them are user space addresses 4133 */ 4134 4135 if (bp & 0x01) { 4136 addr[nbp++] = (caddr_t)rdr0(); 4137 } 4138 if (bp & 0x02) { 4139 addr[nbp++] = (caddr_t)rdr1(); 4140 } 4141 if (bp & 0x04) { 4142 addr[nbp++] = (caddr_t)rdr2(); 4143 } 4144 if (bp & 0x08) { 4145 addr[nbp++] = (caddr_t)rdr3(); 4146 } 4147 4148 for (i = 0; i < nbp; i++) { 4149 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { 4150 /* 4151 * addr[i] is in user space 4152 */ 4153 return nbp; 4154 } 4155 } 4156 4157 /* 4158 * None of the breakpoints are in user space. 4159 */ 4160 return 0; 4161 } 4162 4163 #ifdef KDB 4164 4165 /* 4166 * Provide inb() and outb() as functions. They are normally only available as 4167 * inline functions, thus cannot be called from the debugger. 4168 */ 4169 4170 /* silence compiler warnings */ 4171 u_char inb_(u_short); 4172 void outb_(u_short, u_char); 4173 4174 u_char 4175 inb_(u_short port) 4176 { 4177 return inb(port); 4178 } 4179 4180 void 4181 outb_(u_short port, u_char data) 4182 { 4183 outb(port, data); 4184 } 4185 4186 #endif /* KDB */ 4187