1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 2003 Peter Wemm. 5 * Copyright (c) 1992 Terrence R. Lambert. 6 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * William Jolitz. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 */ 40 41 #include <sys/cdefs.h> 42 #include "opt_atpic.h" 43 #include "opt_cpu.h" 44 #include "opt_ddb.h" 45 #include "opt_inet.h" 46 #include "opt_isa.h" 47 #include "opt_kstack_pages.h" 48 #include "opt_maxmem.h" 49 #include "opt_pci.h" 50 #include "opt_platform.h" 51 #include "opt_sched.h" 52 53 #include <sys/param.h> 54 #include <sys/proc.h> 55 #include <sys/systm.h> 56 #include <sys/asan.h> 57 #include <sys/bio.h> 58 #include <sys/buf.h> 59 #include <sys/bus.h> 60 #include <sys/callout.h> 61 #include <sys/cons.h> 62 #include <sys/cpu.h> 63 #include <sys/csan.h> 64 #include <sys/efi.h> 65 #include <sys/eventhandler.h> 66 #include <sys/exec.h> 67 #include <sys/imgact.h> 68 #include <sys/kdb.h> 69 #include <sys/kernel.h> 70 #include <sys/ktr.h> 71 #include <sys/linker.h> 72 #include <sys/lock.h> 73 #include <sys/malloc.h> 74 #include <sys/memrange.h> 75 #include <sys/msan.h> 76 #include <sys/msgbuf.h> 77 #include <sys/mutex.h> 78 #include <sys/pcpu.h> 79 #include <sys/ptrace.h> 80 #include <sys/reboot.h> 81 #include <sys/reg.h> 82 #include <sys/rwlock.h> 83 #include <sys/sched.h> 84 #include <sys/signalvar.h> 85 #ifdef SMP 86 #include <sys/smp.h> 87 #endif 88 #include <sys/syscallsubr.h> 89 #include <sys/sysctl.h> 90 #include <sys/sysent.h> 91 #include <sys/sysproto.h> 92 #include <sys/ucontext.h> 93 #include <sys/vmmeter.h> 94 95 #include <vm/vm.h> 96 #include <vm/vm_param.h> 97 #include <vm/vm_extern.h> 98 #include <vm/vm_kern.h> 99 #include <vm/vm_page.h> 100 #include <vm/vm_map.h> 101 #include <vm/vm_object.h> 102 #include <vm/vm_pager.h> 103 #include <vm/vm_phys.h> 104 #include <vm/vm_dumpset.h> 105 106 #ifdef DDB 107 #ifndef KDB 108 #error KDB must be enabled in order for DDB to work! 109 #endif 110 #include <ddb/ddb.h> 111 #include <ddb/db_sym.h> 112 #endif 113 114 #include <net/netisr.h> 115 116 #include <dev/smbios/smbios.h> 117 118 #include <machine/clock.h> 119 #include <machine/cpu.h> 120 #include <machine/cputypes.h> 121 #include <machine/frame.h> 122 #include <machine/intr_machdep.h> 123 #include <x86/mca.h> 124 #include <machine/md_var.h> 125 #include <machine/metadata.h> 126 #include <machine/pc/bios.h> 127 #include <machine/pcb.h> 128 #include <machine/proc.h> 129 #include <machine/sigframe.h> 130 #include <machine/specialreg.h> 131 #include <machine/trap.h> 132 #include <machine/tss.h> 133 #include <x86/ucode.h> 134 #include <x86/ifunc.h> 135 #ifdef SMP 136 #include <machine/smp.h> 137 #endif 138 #ifdef FDT 139 #include <x86/fdt.h> 140 #endif 141 142 #ifdef DEV_ATPIC 143 #include <x86/isa/icu.h> 144 #else 145 #include <x86/apicvar.h> 146 #endif 147 148 #include <isa/isareg.h> 149 #include <isa/rtc.h> 150 #include <x86/init.h> 151 152 /* Sanity check for __curthread() */ 153 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 154 155 /* 156 * The PTI trampoline stack needs enough space for a hardware trapframe and a 157 * couple of scratch registers, as well as the trapframe left behind after an 158 * iret fault. 159 */ 160 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) - 161 offsetof(struct pti_frame, pti_rip)); 162 163 extern u_int64_t hammer_time(u_int64_t, u_int64_t); 164 165 static void cpu_startup(void *); 166 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 167 168 /* Probe 8254 PIT and TSC. */ 169 static void native_clock_source_init(void); 170 171 /* Preload data parse function */ 172 static caddr_t native_parse_preload_data(u_int64_t); 173 174 /* Native function to fetch and parse the e820 map */ 175 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); 176 177 /* Default init_ops implementation. */ 178 struct init_ops init_ops = { 179 .parse_preload_data = native_parse_preload_data, 180 .early_clock_source_init = native_clock_source_init, 181 .early_delay = i8254_delay, 182 .parse_memmap = native_parse_memmap, 183 }; 184 185 /* 186 * Physical address of the EFI System Table. Stashed from the metadata hints 187 * passed into the kernel and used by the EFI code to call runtime services. 188 */ 189 vm_paddr_t efi_systbl_phys; 190 191 /* Intel ICH registers */ 192 #define ICH_PMBASE 0x400 193 #define ICH_SMI_EN ICH_PMBASE + 0x30 194 195 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; 196 197 int cold = 1; 198 199 long Maxmem = 0; 200 long realmem = 0; 201 int late_console = 1; 202 203 struct kva_md_info kmi; 204 205 struct region_descriptor r_idt; 206 207 struct pcpu *__pcpu; 208 struct pcpu temp_bsp_pcpu; 209 210 struct mtx icu_lock; 211 212 struct mem_range_softc mem_range_softc; 213 214 struct mtx dt_lock; /* lock for GDT and LDT */ 215 216 void (*vmm_resume_p)(void); 217 218 bool efi_boot; 219 220 static void 221 cpu_startup(void *dummy) 222 { 223 uintmax_t memsize; 224 char *sysenv; 225 226 /* 227 * On MacBooks, we need to disallow the legacy USB circuit to 228 * generate an SMI# because this can cause several problems, 229 * namely: incorrect CPU frequency detection and failure to 230 * start the APs. 231 * We do this by disabling a bit in the SMI_EN (SMI Control and 232 * Enable register) of the Intel ICH LPC Interface Bridge. 233 */ 234 sysenv = kern_getenv("smbios.system.product"); 235 if (sysenv != NULL) { 236 if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 237 strncmp(sysenv, "MacBook3,1", 10) == 0 || 238 strncmp(sysenv, "MacBook4,1", 10) == 0 || 239 strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 240 strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 241 strncmp(sysenv, "MacBookPro3,1", 13) == 0 || 242 strncmp(sysenv, "MacBookPro4,1", 13) == 0 || 243 strncmp(sysenv, "Macmini1,1", 10) == 0) { 244 if (bootverbose) 245 printf("Disabling LEGACY_USB_EN bit on " 246 "Intel ICH.\n"); 247 outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 248 } 249 freeenv(sysenv); 250 } 251 252 /* 253 * Good {morning,afternoon,evening,night}. 254 */ 255 startrtclock(); 256 printcpuinfo(); 257 258 /* 259 * Display physical memory if SMBIOS reports reasonable amount. 260 */ 261 memsize = 0; 262 sysenv = kern_getenv("smbios.memory.enabled"); 263 if (sysenv != NULL) { 264 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; 265 freeenv(sysenv); 266 } 267 if (memsize < ptoa((uintmax_t)vm_free_count())) 268 memsize = ptoa((uintmax_t)Maxmem); 269 printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); 270 realmem = atop(memsize); 271 272 /* 273 * Display any holes after the first chunk of extended memory. 274 */ 275 if (bootverbose) { 276 int indx; 277 278 printf("Physical memory chunk(s):\n"); 279 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 280 vm_paddr_t size; 281 282 size = phys_avail[indx + 1] - phys_avail[indx]; 283 printf( 284 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 285 (uintmax_t)phys_avail[indx], 286 (uintmax_t)phys_avail[indx + 1] - 1, 287 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 288 } 289 } 290 291 vm_ksubmap_init(&kmi); 292 293 printf("avail memory = %ju (%ju MB)\n", 294 ptoa((uintmax_t)vm_free_count()), 295 ptoa((uintmax_t)vm_free_count()) / 1048576); 296 #ifdef DEV_PCI 297 if (bootverbose && intel_graphics_stolen_base != 0) 298 printf("intel stolen mem: base %#jx size %ju MB\n", 299 (uintmax_t)intel_graphics_stolen_base, 300 (uintmax_t)intel_graphics_stolen_size / 1024 / 1024); 301 #endif 302 303 /* 304 * Set up buffers, so they can be used to read disk labels. 305 */ 306 bufinit(); 307 vm_pager_bufferinit(); 308 309 cpu_setregs(); 310 } 311 312 static void 313 late_ifunc_resolve(void *dummy __unused) 314 { 315 link_elf_late_ireloc(); 316 } 317 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); 318 319 320 void 321 cpu_setregs(void) 322 { 323 register_t cr0; 324 325 TSENTER(); 326 cr0 = rcr0(); 327 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 328 TSENTER2("load_cr0"); 329 load_cr0(cr0); 330 TSEXIT2("load_cr0"); 331 TSEXIT(); 332 } 333 334 /* 335 * Initialize amd64 and configure to run kernel 336 */ 337 338 /* 339 * Initialize segments & interrupt table 340 */ 341 static struct gate_descriptor idt0[NIDT]; 342 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 343 344 static char dblfault_stack[DBLFAULT_STACK_SIZE] __aligned(16); 345 static char mce0_stack[MCE_STACK_SIZE] __aligned(16); 346 static char nmi0_stack[NMI_STACK_SIZE] __aligned(16); 347 static char dbg0_stack[DBG_STACK_SIZE] __aligned(16); 348 CTASSERT(sizeof(struct nmi_pcpu) == 16); 349 350 /* 351 * Software prototypes -- in more palatable form. 352 * 353 * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same 354 * slots as corresponding segments for i386 kernel. 355 */ 356 struct soft_segment_descriptor gdt_segs[] = { 357 [GNULL_SEL] = { /* 0 Null Descriptor */ 358 .ssd_base = 0x0, 359 .ssd_limit = 0x0, 360 .ssd_type = 0, 361 .ssd_dpl = 0, 362 .ssd_p = 0, 363 .ssd_long = 0, 364 .ssd_def32 = 0, 365 .ssd_gran = 0 }, 366 [GNULL2_SEL] = { /* 1 Null Descriptor */ 367 .ssd_base = 0x0, 368 .ssd_limit = 0x0, 369 .ssd_type = 0, 370 .ssd_dpl = 0, 371 .ssd_p = 0, 372 .ssd_long = 0, 373 .ssd_def32 = 0, 374 .ssd_gran = 0 }, 375 [GUFS32_SEL] = { /* 2 32 bit %gs Descriptor for user */ 376 .ssd_base = 0x0, 377 .ssd_limit = 0xfffff, 378 .ssd_type = SDT_MEMRWA, 379 .ssd_dpl = SEL_UPL, 380 .ssd_p = 1, 381 .ssd_long = 0, 382 .ssd_def32 = 1, 383 .ssd_gran = 1 }, 384 [GUGS32_SEL] = { /* 3 32 bit %fs Descriptor for user */ 385 .ssd_base = 0x0, 386 .ssd_limit = 0xfffff, 387 .ssd_type = SDT_MEMRWA, 388 .ssd_dpl = SEL_UPL, 389 .ssd_p = 1, 390 .ssd_long = 0, 391 .ssd_def32 = 1, 392 .ssd_gran = 1 }, 393 [GCODE_SEL] = { /* 4 Code Descriptor for kernel */ 394 .ssd_base = 0x0, 395 .ssd_limit = 0xfffff, 396 .ssd_type = SDT_MEMERA, 397 .ssd_dpl = SEL_KPL, 398 .ssd_p = 1, 399 .ssd_long = 1, 400 .ssd_def32 = 0, 401 .ssd_gran = 1 }, 402 [GDATA_SEL] = { /* 5 Data Descriptor for kernel */ 403 .ssd_base = 0x0, 404 .ssd_limit = 0xfffff, 405 .ssd_type = SDT_MEMRWA, 406 .ssd_dpl = SEL_KPL, 407 .ssd_p = 1, 408 .ssd_long = 1, 409 .ssd_def32 = 0, 410 .ssd_gran = 1 }, 411 [GUCODE32_SEL] = { /* 6 32 bit Code Descriptor for user */ 412 .ssd_base = 0x0, 413 .ssd_limit = 0xfffff, 414 .ssd_type = SDT_MEMERA, 415 .ssd_dpl = SEL_UPL, 416 .ssd_p = 1, 417 .ssd_long = 0, 418 .ssd_def32 = 1, 419 .ssd_gran = 1 }, 420 [GUDATA_SEL] = { /* 7 32/64 bit Data Descriptor for user */ 421 .ssd_base = 0x0, 422 .ssd_limit = 0xfffff, 423 .ssd_type = SDT_MEMRWA, 424 .ssd_dpl = SEL_UPL, 425 .ssd_p = 1, 426 .ssd_long = 0, 427 .ssd_def32 = 1, 428 .ssd_gran = 1 }, 429 [GUCODE_SEL] = { /* 8 64 bit Code Descriptor for user */ 430 .ssd_base = 0x0, 431 .ssd_limit = 0xfffff, 432 .ssd_type = SDT_MEMERA, 433 .ssd_dpl = SEL_UPL, 434 .ssd_p = 1, 435 .ssd_long = 1, 436 .ssd_def32 = 0, 437 .ssd_gran = 1 }, 438 [GPROC0_SEL] = { /* 9 Proc 0 TSS Descriptor */ 439 .ssd_base = 0x0, 440 .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, 441 .ssd_type = SDT_SYSTSS, 442 .ssd_dpl = SEL_KPL, 443 .ssd_p = 1, 444 .ssd_long = 0, 445 .ssd_def32 = 0, 446 .ssd_gran = 0 }, 447 [GPROC0_SEL + 1] = { /* 10 Proc 0 TSS descriptor, double size */ 448 .ssd_base = 0x0, 449 .ssd_limit = 0x0, 450 .ssd_type = 0, 451 .ssd_dpl = 0, 452 .ssd_p = 0, 453 .ssd_long = 0, 454 .ssd_def32 = 0, 455 .ssd_gran = 0 }, 456 [GUSERLDT_SEL] = { /* 11 LDT Descriptor */ 457 .ssd_base = 0x0, 458 .ssd_limit = 0x0, 459 .ssd_type = 0, 460 .ssd_dpl = 0, 461 .ssd_p = 0, 462 .ssd_long = 0, 463 .ssd_def32 = 0, 464 .ssd_gran = 0 }, 465 [GUSERLDT_SEL + 1] = { /* 12 LDT Descriptor, double size */ 466 .ssd_base = 0x0, 467 .ssd_limit = 0x0, 468 .ssd_type = 0, 469 .ssd_dpl = 0, 470 .ssd_p = 0, 471 .ssd_long = 0, 472 .ssd_def32 = 0, 473 .ssd_gran = 0 }, 474 }; 475 _Static_assert(nitems(gdt_segs) == NGDT, "Stale NGDT"); 476 477 void 478 setidt(int idx, inthand_t *func, int typ, int dpl, int ist) 479 { 480 struct gate_descriptor *ip; 481 482 ip = idt + idx; 483 ip->gd_looffset = (uintptr_t)func; 484 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); 485 ip->gd_ist = ist; 486 ip->gd_xx = 0; 487 ip->gd_type = typ; 488 ip->gd_dpl = dpl; 489 ip->gd_p = 1; 490 ip->gd_hioffset = ((uintptr_t)func)>>16 ; 491 } 492 493 extern inthand_t 494 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 495 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 496 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 497 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 498 IDTVEC(xmm), IDTVEC(dblfault), 499 IDTVEC(div_pti), IDTVEC(bpt_pti), 500 IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti), 501 IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti), 502 IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), 503 IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti), 504 IDTVEC(xmm_pti), 505 #ifdef KDTRACE_HOOKS 506 IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti), 507 #endif 508 #ifdef XENHVM 509 IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti), 510 #endif 511 IDTVEC(fast_syscall), IDTVEC(fast_syscall32), 512 IDTVEC(fast_syscall_pti); 513 514 #ifdef DDB 515 /* 516 * Display the index and function name of any IDT entries that don't use 517 * the default 'rsvd' entry point. 518 */ 519 DB_SHOW_COMMAND_FLAGS(idt, db_show_idt, DB_CMD_MEMSAFE) 520 { 521 struct gate_descriptor *ip; 522 int idx; 523 uintptr_t func; 524 525 ip = idt; 526 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 527 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 528 if (func != (uintptr_t)&IDTVEC(rsvd)) { 529 db_printf("%3d\t", idx); 530 db_printsym(func, DB_STGY_PROC); 531 db_printf("\n"); 532 } 533 ip++; 534 } 535 } 536 537 /* Show privileged registers. */ 538 DB_SHOW_COMMAND_FLAGS(sysregs, db_show_sysregs, DB_CMD_MEMSAFE) 539 { 540 struct { 541 uint16_t limit; 542 uint64_t base; 543 } __packed idtr, gdtr; 544 uint16_t ldt, tr; 545 546 __asm __volatile("sidt %0" : "=m" (idtr)); 547 db_printf("idtr\t0x%016lx/%04x\n", 548 (u_long)idtr.base, (u_int)idtr.limit); 549 __asm __volatile("sgdt %0" : "=m" (gdtr)); 550 db_printf("gdtr\t0x%016lx/%04x\n", 551 (u_long)gdtr.base, (u_int)gdtr.limit); 552 __asm __volatile("sldt %0" : "=r" (ldt)); 553 db_printf("ldtr\t0x%04x\n", ldt); 554 __asm __volatile("str %0" : "=r" (tr)); 555 db_printf("tr\t0x%04x\n", tr); 556 db_printf("cr0\t0x%016lx\n", rcr0()); 557 db_printf("cr2\t0x%016lx\n", rcr2()); 558 db_printf("cr3\t0x%016lx\n", rcr3()); 559 db_printf("cr4\t0x%016lx\n", rcr4()); 560 if (rcr4() & CR4_XSAVE) 561 db_printf("xcr0\t0x%016lx\n", rxcr(0)); 562 db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); 563 if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) 564 db_printf("FEATURES_CTL\t%016lx\n", 565 rdmsr(MSR_IA32_FEATURE_CONTROL)); 566 db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); 567 db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); 568 db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); 569 } 570 571 DB_SHOW_COMMAND_FLAGS(dbregs, db_show_dbregs, DB_CMD_MEMSAFE) 572 { 573 574 db_printf("dr0\t0x%016lx\n", rdr0()); 575 db_printf("dr1\t0x%016lx\n", rdr1()); 576 db_printf("dr2\t0x%016lx\n", rdr2()); 577 db_printf("dr3\t0x%016lx\n", rdr3()); 578 db_printf("dr6\t0x%016lx\n", rdr6()); 579 db_printf("dr7\t0x%016lx\n", rdr7()); 580 } 581 #endif 582 583 void 584 sdtossd(struct user_segment_descriptor *sd, struct soft_segment_descriptor *ssd) 585 { 586 587 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 588 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 589 ssd->ssd_type = sd->sd_type; 590 ssd->ssd_dpl = sd->sd_dpl; 591 ssd->ssd_p = sd->sd_p; 592 ssd->ssd_long = sd->sd_long; 593 ssd->ssd_def32 = sd->sd_def32; 594 ssd->ssd_gran = sd->sd_gran; 595 } 596 597 void 598 ssdtosd(struct soft_segment_descriptor *ssd, struct user_segment_descriptor *sd) 599 { 600 601 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 602 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; 603 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 604 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 605 sd->sd_type = ssd->ssd_type; 606 sd->sd_dpl = ssd->ssd_dpl; 607 sd->sd_p = ssd->ssd_p; 608 sd->sd_long = ssd->ssd_long; 609 sd->sd_def32 = ssd->ssd_def32; 610 sd->sd_gran = ssd->ssd_gran; 611 } 612 613 void 614 ssdtosyssd(struct soft_segment_descriptor *ssd, struct system_segment_descriptor *sd) 615 { 616 617 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 618 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; 619 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 620 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 621 sd->sd_type = ssd->ssd_type; 622 sd->sd_dpl = ssd->ssd_dpl; 623 sd->sd_p = ssd->ssd_p; 624 sd->sd_gran = ssd->ssd_gran; 625 } 626 627 u_int basemem; 628 629 static int 630 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, 631 int *physmap_idxp) 632 { 633 int i, insert_idx, physmap_idx; 634 635 physmap_idx = *physmap_idxp; 636 637 if (length == 0) 638 return (1); 639 640 /* 641 * Find insertion point while checking for overlap. Start off by 642 * assuming the new entry will be added to the end. 643 * 644 * NB: physmap_idx points to the next free slot. 645 */ 646 insert_idx = physmap_idx; 647 for (i = 0; i <= physmap_idx; i += 2) { 648 if (base < physmap[i + 1]) { 649 if (base + length <= physmap[i]) { 650 insert_idx = i; 651 break; 652 } 653 if (boothowto & RB_VERBOSE) 654 printf( 655 "Overlapping memory regions, ignoring second region\n"); 656 return (1); 657 } 658 } 659 660 /* See if we can prepend to the next entry. */ 661 if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { 662 physmap[insert_idx] = base; 663 return (1); 664 } 665 666 /* See if we can append to the previous entry. */ 667 if (insert_idx > 0 && base == physmap[insert_idx - 1]) { 668 physmap[insert_idx - 1] += length; 669 return (1); 670 } 671 672 physmap_idx += 2; 673 *physmap_idxp = physmap_idx; 674 if (physmap_idx == PHYS_AVAIL_ENTRIES) { 675 printf( 676 "Too many segments in the physical address map, giving up\n"); 677 return (0); 678 } 679 680 /* 681 * Move the last 'N' entries down to make room for the new 682 * entry if needed. 683 */ 684 for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { 685 physmap[i] = physmap[i - 2]; 686 physmap[i + 1] = physmap[i - 1]; 687 } 688 689 /* Insert the new entry. */ 690 physmap[insert_idx] = base; 691 physmap[insert_idx + 1] = base + length; 692 return (1); 693 } 694 695 void 696 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, 697 vm_paddr_t *physmap, int *physmap_idx) 698 { 699 struct bios_smap *smap, *smapend; 700 701 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 702 703 for (smap = smapbase; smap < smapend; smap++) { 704 if (boothowto & RB_VERBOSE) 705 printf("SMAP type=%02x base=%016lx len=%016lx\n", 706 smap->type, smap->base, smap->length); 707 708 if (smap->type != SMAP_TYPE_MEMORY) 709 continue; 710 711 if (!add_physmap_entry(smap->base, smap->length, physmap, 712 physmap_idx)) 713 break; 714 } 715 } 716 717 static void 718 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, 719 int *physmap_idx) 720 { 721 struct efi_md *map, *p; 722 const char *type; 723 size_t efisz; 724 int ndesc, i; 725 726 static const char *types[] = { 727 "Reserved", 728 "LoaderCode", 729 "LoaderData", 730 "BootServicesCode", 731 "BootServicesData", 732 "RuntimeServicesCode", 733 "RuntimeServicesData", 734 "ConventionalMemory", 735 "UnusableMemory", 736 "ACPIReclaimMemory", 737 "ACPIMemoryNVS", 738 "MemoryMappedIO", 739 "MemoryMappedIOPortSpace", 740 "PalCode", 741 "PersistentMemory" 742 }; 743 744 /* 745 * Memory map data provided by UEFI via the GetMemoryMap 746 * Boot Services API. 747 */ 748 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 749 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 750 751 if (efihdr->descriptor_size == 0) 752 return; 753 ndesc = efihdr->memory_size / efihdr->descriptor_size; 754 755 if (boothowto & RB_VERBOSE) 756 printf("%23s %12s %12s %8s %4s\n", 757 "Type", "Physical", "Virtual", "#Pages", "Attr"); 758 759 for (i = 0, p = map; i < ndesc; i++, 760 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 761 if (boothowto & RB_VERBOSE) { 762 if (p->md_type < nitems(types)) 763 type = types[p->md_type]; 764 else 765 type = "<INVALID>"; 766 printf("%23s %012lx %012lx %08lx ", type, p->md_phys, 767 p->md_virt, p->md_pages); 768 if (p->md_attr & EFI_MD_ATTR_UC) 769 printf("UC "); 770 if (p->md_attr & EFI_MD_ATTR_WC) 771 printf("WC "); 772 if (p->md_attr & EFI_MD_ATTR_WT) 773 printf("WT "); 774 if (p->md_attr & EFI_MD_ATTR_WB) 775 printf("WB "); 776 if (p->md_attr & EFI_MD_ATTR_UCE) 777 printf("UCE "); 778 if (p->md_attr & EFI_MD_ATTR_WP) 779 printf("WP "); 780 if (p->md_attr & EFI_MD_ATTR_RP) 781 printf("RP "); 782 if (p->md_attr & EFI_MD_ATTR_XP) 783 printf("XP "); 784 if (p->md_attr & EFI_MD_ATTR_NV) 785 printf("NV "); 786 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) 787 printf("MORE_RELIABLE "); 788 if (p->md_attr & EFI_MD_ATTR_RO) 789 printf("RO "); 790 if (p->md_attr & EFI_MD_ATTR_RT) 791 printf("RUNTIME"); 792 printf("\n"); 793 } 794 795 switch (p->md_type) { 796 case EFI_MD_TYPE_CODE: 797 case EFI_MD_TYPE_DATA: 798 case EFI_MD_TYPE_BS_CODE: 799 case EFI_MD_TYPE_BS_DATA: 800 case EFI_MD_TYPE_FREE: 801 /* 802 * We're allowed to use any entry with these types. 803 */ 804 break; 805 default: 806 continue; 807 } 808 809 if (!add_physmap_entry(p->md_phys, p->md_pages * EFI_PAGE_SIZE, 810 physmap, physmap_idx)) 811 break; 812 } 813 } 814 815 static void 816 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) 817 { 818 struct bios_smap *smap; 819 struct efi_map_header *efihdr; 820 u_int32_t size; 821 822 /* 823 * Memory map from INT 15:E820. 824 * 825 * subr_module.c says: 826 * "Consumer may safely assume that size value precedes data." 827 * ie: an int32_t immediately precedes smap. 828 */ 829 830 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 831 MODINFO_METADATA | MODINFOMD_EFI_MAP); 832 smap = (struct bios_smap *)preload_search_info(kmdp, 833 MODINFO_METADATA | MODINFOMD_SMAP); 834 if (efihdr == NULL && smap == NULL) 835 panic("No BIOS smap or EFI map info from loader!"); 836 837 if (efihdr != NULL) { 838 add_efi_map_entries(efihdr, physmap, physmap_idx); 839 strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); 840 } else { 841 size = *((u_int32_t *)smap - 1); 842 bios_add_smap_entries(smap, size, physmap, physmap_idx); 843 strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); 844 } 845 } 846 847 #define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) 848 849 /* 850 * Populate the (physmap) array with base/bound pairs describing the 851 * available physical memory in the system, then test this memory and 852 * build the phys_avail array describing the actually-available memory. 853 * 854 * Total memory size may be set by the kernel environment variable 855 * hw.physmem or the compile-time define MAXMEM. 856 * 857 * XXX first should be vm_paddr_t. 858 */ 859 static void 860 getmemsize(caddr_t kmdp, u_int64_t first) 861 { 862 int i, physmap_idx, pa_indx, da_indx; 863 vm_paddr_t pa, physmap[PHYS_AVAIL_ENTRIES]; 864 u_long physmem_start, physmem_tunable, memtest; 865 pt_entry_t *pte; 866 quad_t dcons_addr, dcons_size; 867 int page_counter; 868 869 TSENTER(); 870 /* 871 * Tell the physical memory allocator about pages used to store 872 * the kernel and preloaded data. See kmem_bootstrap_free(). 873 */ 874 vm_phys_early_add_seg((vm_paddr_t)kernphys, trunc_page(first)); 875 876 bzero(physmap, sizeof(physmap)); 877 physmap_idx = 0; 878 879 init_ops.parse_memmap(kmdp, physmap, &physmap_idx); 880 physmap_idx -= 2; 881 882 /* 883 * Find the 'base memory' segment for SMP 884 */ 885 basemem = 0; 886 for (i = 0; i <= physmap_idx; i += 2) { 887 if (physmap[i] <= 0xA0000) { 888 basemem = physmap[i + 1] / 1024; 889 break; 890 } 891 } 892 if (basemem == 0 || basemem > 640) { 893 if (bootverbose) 894 printf( 895 "Memory map doesn't contain a basemem segment, faking it"); 896 basemem = 640; 897 } 898 899 /* 900 * Maxmem isn't the "maximum memory", it's one larger than the 901 * highest page of the physical address space. It should be 902 * called something like "Maxphyspage". We may adjust this 903 * based on ``hw.physmem'' and the results of the memory test. 904 */ 905 Maxmem = atop(physmap[physmap_idx + 1]); 906 907 #ifdef MAXMEM 908 Maxmem = MAXMEM / 4; 909 #endif 910 911 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 912 Maxmem = atop(physmem_tunable); 913 914 /* 915 * The boot memory test is disabled by default, as it takes a 916 * significant amount of time on large-memory systems, and is 917 * unfriendly to virtual machines as it unnecessarily touches all 918 * pages. 919 * 920 * A general name is used as the code may be extended to support 921 * additional tests beyond the current "page present" test. 922 */ 923 memtest = 0; 924 TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); 925 926 /* 927 * Don't allow MAXMEM or hw.physmem to extend the amount of memory 928 * in the system. 929 */ 930 if (Maxmem > atop(physmap[physmap_idx + 1])) 931 Maxmem = atop(physmap[physmap_idx + 1]); 932 933 if (atop(physmap[physmap_idx + 1]) != Maxmem && 934 (boothowto & RB_VERBOSE)) 935 printf("Physical memory use set to %ldK\n", Maxmem * 4); 936 937 /* call pmap initialization to make new kernel address space */ 938 pmap_bootstrap(&first); 939 940 /* 941 * Size up each available chunk of physical memory. 942 * 943 * XXX Some BIOSes corrupt low 64KB between suspend and resume. 944 * By default, mask off the first 16 pages unless we appear to be 945 * running in a VM. 946 */ 947 physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; 948 TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); 949 if (physmap[0] < physmem_start) { 950 if (physmem_start < PAGE_SIZE) 951 physmap[0] = PAGE_SIZE; 952 else if (physmem_start >= physmap[1]) 953 physmap[0] = round_page(physmap[1] - PAGE_SIZE); 954 else 955 physmap[0] = round_page(physmem_start); 956 } 957 pa_indx = 0; 958 da_indx = 1; 959 phys_avail[pa_indx++] = physmap[0]; 960 phys_avail[pa_indx] = physmap[0]; 961 dump_avail[da_indx] = physmap[0]; 962 pte = CMAP1; 963 964 /* 965 * Get dcons buffer address 966 */ 967 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 968 getenv_quad("dcons.size", &dcons_size) == 0) 969 dcons_addr = 0; 970 971 /* 972 * physmap is in bytes, so when converting to page boundaries, 973 * round up the start address and round down the end address. 974 */ 975 page_counter = 0; 976 if (memtest != 0) 977 printf("Testing system memory"); 978 for (i = 0; i <= physmap_idx; i += 2) { 979 vm_paddr_t end; 980 981 end = ptoa((vm_paddr_t)Maxmem); 982 if (physmap[i + 1] < end) 983 end = trunc_page(physmap[i + 1]); 984 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 985 int *ptr = (int *)CADDR1; 986 int tmp; 987 bool full, page_bad; 988 989 full = false; 990 /* 991 * block out kernel memory as not available. 992 */ 993 if (pa >= (vm_paddr_t)kernphys && pa < first) 994 goto do_dump_avail; 995 996 /* 997 * block out dcons buffer 998 */ 999 if (dcons_addr > 0 1000 && pa >= trunc_page(dcons_addr) 1001 && pa < dcons_addr + dcons_size) 1002 goto do_dump_avail; 1003 1004 page_bad = false; 1005 if (memtest == 0) 1006 goto skip_memtest; 1007 1008 /* 1009 * Print a "." every GB to show we're making 1010 * progress. 1011 */ 1012 page_counter++; 1013 if ((page_counter % PAGES_PER_GB) == 0) 1014 printf("."); 1015 1016 /* 1017 * map page into kernel: valid, read/write,non-cacheable 1018 */ 1019 *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; 1020 invltlb(); 1021 1022 tmp = *(int *)ptr; 1023 /* 1024 * Test for alternating 1's and 0's 1025 */ 1026 *(volatile int *)ptr = 0xaaaaaaaa; 1027 if (*(volatile int *)ptr != 0xaaaaaaaa) 1028 page_bad = true; 1029 /* 1030 * Test for alternating 0's and 1's 1031 */ 1032 *(volatile int *)ptr = 0x55555555; 1033 if (*(volatile int *)ptr != 0x55555555) 1034 page_bad = true; 1035 /* 1036 * Test for all 1's 1037 */ 1038 *(volatile int *)ptr = 0xffffffff; 1039 if (*(volatile int *)ptr != 0xffffffff) 1040 page_bad = true; 1041 /* 1042 * Test for all 0's 1043 */ 1044 *(volatile int *)ptr = 0x0; 1045 if (*(volatile int *)ptr != 0x0) 1046 page_bad = true; 1047 /* 1048 * Restore original value. 1049 */ 1050 *(int *)ptr = tmp; 1051 1052 skip_memtest: 1053 /* 1054 * Adjust array of valid/good pages. 1055 */ 1056 if (page_bad == true) 1057 continue; 1058 /* 1059 * If this good page is a continuation of the 1060 * previous set of good pages, then just increase 1061 * the end pointer. Otherwise start a new chunk. 1062 * Note that "end" points one higher than end, 1063 * making the range >= start and < end. 1064 * If we're also doing a speculative memory 1065 * test and we at or past the end, bump up Maxmem 1066 * so that we keep going. The first bad page 1067 * will terminate the loop. 1068 */ 1069 if (phys_avail[pa_indx] == pa) { 1070 phys_avail[pa_indx] += PAGE_SIZE; 1071 } else { 1072 pa_indx++; 1073 if (pa_indx == PHYS_AVAIL_ENTRIES) { 1074 printf( 1075 "Too many holes in the physical address space, giving up\n"); 1076 pa_indx--; 1077 full = true; 1078 goto do_dump_avail; 1079 } 1080 phys_avail[pa_indx++] = pa; /* start */ 1081 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 1082 } 1083 physmem++; 1084 do_dump_avail: 1085 if (dump_avail[da_indx] == pa) { 1086 dump_avail[da_indx] += PAGE_SIZE; 1087 } else { 1088 da_indx++; 1089 if (da_indx == PHYS_AVAIL_ENTRIES) { 1090 da_indx--; 1091 goto do_next; 1092 } 1093 dump_avail[da_indx++] = pa; /* start */ 1094 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 1095 } 1096 do_next: 1097 if (full) 1098 break; 1099 } 1100 } 1101 *pte = 0; 1102 invltlb(); 1103 if (memtest != 0) 1104 printf("\n"); 1105 1106 /* 1107 * XXX 1108 * The last chunk must contain at least one page plus the message 1109 * buffer to avoid complicating other code (message buffer address 1110 * calculation, etc.). 1111 */ 1112 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1113 round_page(msgbufsize) >= phys_avail[pa_indx]) { 1114 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1115 phys_avail[pa_indx--] = 0; 1116 phys_avail[pa_indx--] = 0; 1117 } 1118 1119 Maxmem = atop(phys_avail[pa_indx]); 1120 1121 /* Trim off space for the message buffer. */ 1122 phys_avail[pa_indx] -= round_page(msgbufsize); 1123 1124 /* Map the message buffer. */ 1125 msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); 1126 TSEXIT(); 1127 } 1128 1129 static caddr_t 1130 native_parse_preload_data(u_int64_t modulep) 1131 { 1132 caddr_t kmdp; 1133 char *envp; 1134 #ifdef DDB 1135 vm_offset_t ksym_start; 1136 vm_offset_t ksym_end; 1137 #endif 1138 1139 preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); 1140 preload_bootstrap_relocate(KERNBASE); 1141 kmdp = preload_search_by_type("elf kernel"); 1142 if (kmdp == NULL) 1143 kmdp = preload_search_by_type("elf64 kernel"); 1144 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); 1145 envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); 1146 if (envp != NULL) 1147 envp += KERNBASE; 1148 init_static_kenv(envp, 0); 1149 #ifdef DDB 1150 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); 1151 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); 1152 db_fetch_ksymtab(ksym_start, ksym_end, 0); 1153 #endif 1154 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); 1155 1156 return (kmdp); 1157 } 1158 1159 static void 1160 native_clock_source_init(void) 1161 { 1162 i8254_init(); 1163 } 1164 1165 static void 1166 amd64_kdb_init(void) 1167 { 1168 kdb_init(); 1169 #ifdef KDB 1170 if (boothowto & RB_KDB) 1171 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 1172 #endif 1173 } 1174 1175 /* Set up the fast syscall stuff */ 1176 void 1177 amd64_conf_fast_syscall(void) 1178 { 1179 uint64_t msr; 1180 1181 msr = rdmsr(MSR_EFER) | EFER_SCE; 1182 wrmsr(MSR_EFER, msr); 1183 wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) : 1184 (u_int64_t)IDTVEC(fast_syscall)); 1185 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 1186 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1187 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 1188 wrmsr(MSR_STAR, msr); 1189 wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC); 1190 } 1191 1192 void 1193 amd64_bsp_pcpu_init1(struct pcpu *pc) 1194 { 1195 struct user_segment_descriptor *gdt; 1196 1197 PCPU_SET(prvspace, pc); 1198 gdt = *PCPU_PTR(gdt); 1199 PCPU_SET(curthread, &thread0); 1200 PCPU_SET(tssp, PCPU_PTR(common_tss)); 1201 PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1202 PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); 1203 PCPU_SET(fs32p, &gdt[GUFS32_SEL]); 1204 PCPU_SET(gs32p, &gdt[GUGS32_SEL]); 1205 PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK); 1206 PCPU_SET(smp_tlb_gen, 1); 1207 } 1208 1209 void 1210 amd64_bsp_pcpu_init2(uint64_t rsp0) 1211 { 1212 1213 PCPU_SET(rsp0, rsp0); 1214 PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) + 1215 PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); 1216 PCPU_SET(curpcb, thread0.td_pcb); 1217 } 1218 1219 void 1220 amd64_bsp_ist_init(struct pcpu *pc) 1221 { 1222 struct nmi_pcpu *np; 1223 struct amd64tss *tssp; 1224 1225 tssp = &pc->pc_common_tss; 1226 1227 /* doublefault stack space, runs on ist1 */ 1228 np = ((struct nmi_pcpu *)&dblfault_stack[sizeof(dblfault_stack)]) - 1; 1229 np->np_pcpu = (register_t)pc; 1230 tssp->tss_ist1 = (long)np; 1231 1232 /* 1233 * NMI stack, runs on ist2. The pcpu pointer is stored just 1234 * above the start of the ist2 stack. 1235 */ 1236 np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1; 1237 np->np_pcpu = (register_t)pc; 1238 tssp->tss_ist2 = (long)np; 1239 1240 /* 1241 * MC# stack, runs on ist3. The pcpu pointer is stored just 1242 * above the start of the ist3 stack. 1243 */ 1244 np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1; 1245 np->np_pcpu = (register_t)pc; 1246 tssp->tss_ist3 = (long)np; 1247 1248 /* 1249 * DB# stack, runs on ist4. 1250 */ 1251 np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1; 1252 np->np_pcpu = (register_t)pc; 1253 tssp->tss_ist4 = (long)np; 1254 } 1255 1256 /* 1257 * Calculate the kernel load address by inspecting page table created by loader. 1258 * The assumptions: 1259 * - kernel is mapped at KERNBASE, backed by contiguous phys memory 1260 * aligned at 2M, below 4G (the latter is important for AP startup) 1261 * - there is a 2M hole at KERNBASE (KERNSTART = KERNBASE + 2M) 1262 * - kernel is mapped with 2M superpages 1263 * - all participating memory, i.e. kernel, modules, metadata, 1264 * page table is accessible by pre-created 1:1 mapping 1265 * (right now loader creates 1:1 mapping for lower 4G, and all 1266 * memory is from there) 1267 * - there is a usable memory block right after the end of the 1268 * mapped kernel and all modules/metadata, pointed to by 1269 * physfree, for early allocations 1270 */ 1271 vm_paddr_t __nosanitizeaddress __nosanitizememory 1272 amd64_loadaddr(void) 1273 { 1274 pml4_entry_t *pml4e; 1275 pdp_entry_t *pdpe; 1276 pd_entry_t *pde; 1277 uint64_t cr3; 1278 1279 cr3 = rcr3(); 1280 pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(KERNSTART); 1281 pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(KERNSTART); 1282 pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(KERNSTART); 1283 return (*pde & PG_FRAME); 1284 } 1285 1286 u_int64_t 1287 hammer_time(u_int64_t modulep, u_int64_t physfree) 1288 { 1289 caddr_t kmdp; 1290 int gsel_tss, x; 1291 struct pcpu *pc; 1292 uint64_t rsp0; 1293 char *env; 1294 struct user_segment_descriptor *gdt; 1295 struct region_descriptor r_gdt; 1296 size_t kstack0_sz; 1297 1298 TSRAW(&thread0, TS_ENTER, __func__, NULL); 1299 1300 kernphys = amd64_loadaddr(); 1301 1302 physfree += kernphys; 1303 1304 kmdp = init_ops.parse_preload_data(modulep); 1305 1306 efi_boot = preload_search_info(kmdp, MODINFO_METADATA | 1307 MODINFOMD_EFI_MAP) != NULL; 1308 1309 if (!efi_boot) { 1310 /* Tell the bios to warmboot next time */ 1311 atomic_store_short((u_short *)0x472, 0x1234); 1312 } 1313 1314 physfree += ucode_load_bsp(physfree - kernphys + KERNSTART); 1315 physfree = roundup2(physfree, PAGE_SIZE); 1316 1317 identify_cpu1(); 1318 identify_hypervisor(); 1319 identify_hypervisor_smbios(); 1320 identify_cpu_fixup_bsp(); 1321 identify_cpu2(); 1322 initializecpucache(); 1323 1324 /* 1325 * Check for pti, pcid, and invpcid before ifuncs are 1326 * resolved, to correctly select the implementation for 1327 * pmap_activate_sw_mode(). 1328 */ 1329 pti = pti_get_default(); 1330 TUNABLE_INT_FETCH("vm.pmap.pti", &pti); 1331 TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); 1332 if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { 1333 invpcid_works = (cpu_stdext_feature & 1334 CPUID_STDEXT_INVPCID) != 0; 1335 } else { 1336 pmap_pcid_enabled = 0; 1337 } 1338 1339 /* 1340 * Now we can do small core initialization, after the PCID 1341 * CPU features and user knobs are evaluated. 1342 */ 1343 TUNABLE_INT_FETCH("vm.pmap.pcid_invlpg_workaround", 1344 &pmap_pcid_invlpg_workaround_uena); 1345 cpu_init_small_core(); 1346 1347 if ((cpu_feature2 & CPUID2_XSAVE) != 0) { 1348 use_xsave = 1; 1349 TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); 1350 } 1351 1352 link_elf_ireloc(kmdp); 1353 1354 /* 1355 * This may be done better later if it gets more high level 1356 * components in it. If so just link td->td_proc here. 1357 */ 1358 proc_linkup0(&proc0, &thread0); 1359 1360 /* Init basic tunables, hz etc */ 1361 init_param1(); 1362 1363 thread0.td_kstack = physfree - kernphys + KERNSTART; 1364 thread0.td_kstack_pages = kstack_pages; 1365 kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; 1366 bzero((void *)thread0.td_kstack, kstack0_sz); 1367 physfree += kstack0_sz; 1368 1369 /* 1370 * Initialize enough of thread0 for delayed invalidation to 1371 * work very early. Rely on thread0.td_base_pri 1372 * zero-initialization, it is reset to PVM at proc0_init(). 1373 */ 1374 pmap_thread_init_invl_gen(&thread0); 1375 1376 pc = &temp_bsp_pcpu; 1377 pcpu_init(pc, 0, sizeof(struct pcpu)); 1378 gdt = &temp_bsp_pcpu.pc_gdt[0]; 1379 1380 /* 1381 * make gdt memory segments 1382 */ 1383 for (x = 0; x < NGDT; x++) { 1384 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 1385 x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) 1386 ssdtosd(&gdt_segs[x], &gdt[x]); 1387 } 1388 gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&pc->pc_common_tss; 1389 ssdtosyssd(&gdt_segs[GPROC0_SEL], 1390 (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1391 1392 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 1393 r_gdt.rd_base = (long)gdt; 1394 lgdt(&r_gdt); 1395 1396 wrmsr(MSR_FSBASE, 0); /* User value */ 1397 wrmsr(MSR_GSBASE, (u_int64_t)pc); 1398 wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ 1399 1400 dpcpu_init((void *)(physfree - kernphys + KERNSTART), 0); 1401 physfree += DPCPU_SIZE; 1402 amd64_bsp_pcpu_init1(pc); 1403 /* Non-late cninit() and printf() can be moved up to here. */ 1404 1405 /* 1406 * Initialize mutexes. 1407 * 1408 * icu_lock: in order to allow an interrupt to occur in a critical 1409 * section, to set pcpu->ipending (etc...) properly, we 1410 * must be able to get the icu lock, so it can't be 1411 * under witness. 1412 */ 1413 mutex_init(); 1414 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); 1415 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); 1416 1417 /* exceptions */ 1418 for (x = 0; x < NIDT; x++) 1419 setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT, 1420 SEL_KPL, 0); 1421 setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT, 1422 SEL_KPL, 0); 1423 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4); 1424 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); 1425 setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT, 1426 SEL_UPL, 0); 1427 setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT, 1428 SEL_UPL, 0); 1429 setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT, 1430 SEL_KPL, 0); 1431 setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT, 1432 SEL_KPL, 0); 1433 setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT, 1434 SEL_KPL, 0); 1435 setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); 1436 setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm), 1437 SDT_SYSIGT, SEL_KPL, 0); 1438 setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT, 1439 SEL_KPL, 0); 1440 setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing), 1441 SDT_SYSIGT, SEL_KPL, 0); 1442 setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT, 1443 SEL_KPL, 0); 1444 setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT, 1445 SEL_KPL, 0); 1446 setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT, 1447 SEL_KPL, 0); 1448 setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT, 1449 SEL_KPL, 0); 1450 setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT, 1451 SEL_KPL, 0); 1452 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3); 1453 setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT, 1454 SEL_KPL, 0); 1455 #ifdef KDTRACE_HOOKS 1456 setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) : 1457 &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); 1458 #endif 1459 #ifdef XENHVM 1460 setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) : 1461 &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0); 1462 #endif 1463 r_idt.rd_limit = sizeof(idt0) - 1; 1464 r_idt.rd_base = (long) idt; 1465 lidt(&r_idt); 1466 1467 /* 1468 * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) 1469 * transition). 1470 * Once bootblocks have updated, we can test directly for 1471 * efi_systbl != NULL here... 1472 */ 1473 if (efi_boot) 1474 vty_set_preferred(VTY_VT); 1475 1476 TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); 1477 TUNABLE_INT_FETCH("machdep.mitigations.ibrs.disable", &hw_ibrs_disable); 1478 1479 TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable); 1480 TUNABLE_INT_FETCH("machdep.mitigations.ssb.disable", &hw_ssb_disable); 1481 1482 TUNABLE_INT_FETCH("machdep.syscall_ret_flush_l1d", 1483 &syscall_ret_l1d_flush_mode); 1484 1485 TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable); 1486 TUNABLE_INT_FETCH("machdep.mitigations.mds.disable", &hw_mds_disable); 1487 1488 TUNABLE_INT_FETCH("machdep.mitigations.taa.enable", &x86_taa_enable); 1489 1490 TUNABLE_INT_FETCH("machdep.mitigations.rngds.enable", 1491 &x86_rngds_mitg_enable); 1492 1493 TUNABLE_INT_FETCH("machdep.mitigations.zenbleed.enable", 1494 &zenbleed_enable); 1495 zenbleed_sanitize_enable(); 1496 1497 finishidentcpu(); /* Final stage of CPU initialization */ 1498 1499 /* 1500 * Initialize the clock before the console so that console 1501 * initialization can use DELAY(). 1502 */ 1503 clock_init(); 1504 1505 initializecpu(); /* Initialize CPU registers */ 1506 1507 amd64_bsp_ist_init(pc); 1508 1509 /* Set the IO permission bitmap (empty due to tss seg limit) */ 1510 pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + 1511 IOPERM_BITMAP_SIZE; 1512 1513 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1514 ltr(gsel_tss); 1515 1516 amd64_conf_fast_syscall(); 1517 1518 /* 1519 * We initialize the PCB pointer early so that exception 1520 * handlers will work. Also set up td_critnest to short-cut 1521 * the page fault handler. 1522 */ 1523 cpu_max_ext_state_size = sizeof(struct savefpu); 1524 set_top_of_stack_td(&thread0); 1525 thread0.td_pcb = get_pcb_td(&thread0); 1526 thread0.td_critnest = 1; 1527 1528 /* 1529 * The console and kdb should be initialized even earlier than here, 1530 * but some console drivers don't work until after getmemsize(). 1531 * Default to late console initialization to support these drivers. 1532 * This loses mainly printf()s in getmemsize() and early debugging. 1533 */ 1534 TUNABLE_INT_FETCH("debug.late_console", &late_console); 1535 if (!late_console) { 1536 cninit(); 1537 amd64_kdb_init(); 1538 } 1539 1540 getmemsize(kmdp, physfree); 1541 init_param2(physmem); 1542 1543 /* now running on new page tables, configured,and u/iom is accessible */ 1544 1545 #ifdef DEV_PCI 1546 /* This call might adjust phys_avail[]. */ 1547 pci_early_quirks(); 1548 #endif 1549 1550 if (late_console) 1551 cninit(); 1552 1553 /* 1554 * Dump the boot metadata. We have to wait for cninit() since console 1555 * output is required. If it's grossly incorrect the kernel will never 1556 * make it this far. 1557 */ 1558 if (getenv_is_true("debug.dump_modinfo_at_boot")) 1559 preload_dump(); 1560 1561 #ifdef DEV_ISA 1562 #ifdef DEV_ATPIC 1563 elcr_probe(); 1564 atpic_startup(); 1565 #else 1566 /* Reset and mask the atpics and leave them shut down. */ 1567 atpic_reset(); 1568 1569 /* 1570 * Point the ICU spurious interrupt vectors at the APIC spurious 1571 * interrupt handler. 1572 */ 1573 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1574 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1575 #endif 1576 #else 1577 #error "have you forgotten the isa device?" 1578 #endif 1579 1580 if (late_console) 1581 amd64_kdb_init(); 1582 1583 msgbufinit(msgbufp, msgbufsize); 1584 fpuinit(); 1585 1586 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1587 rsp0 = thread0.td_md.md_stack_base; 1588 /* Ensure the stack is aligned to 16 bytes */ 1589 rsp0 &= ~0xFul; 1590 PCPU_PTR(common_tss)->tss_rsp0 = rsp0; 1591 amd64_bsp_pcpu_init2(rsp0); 1592 1593 /* transfer to user mode */ 1594 1595 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 1596 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 1597 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); 1598 _ufssel = GSEL(GUFS32_SEL, SEL_UPL); 1599 _ugssel = GSEL(GUGS32_SEL, SEL_UPL); 1600 1601 load_ds(_udatasel); 1602 load_es(_udatasel); 1603 load_fs(_ufssel); 1604 1605 /* setup proc 0's pcb */ 1606 thread0.td_pcb->pcb_flags = 0; 1607 1608 env = kern_getenv("kernelname"); 1609 if (env != NULL) 1610 strlcpy(kernelname, env, sizeof(kernelname)); 1611 1612 kcsan_cpu_init(0); 1613 1614 #ifdef FDT 1615 x86_init_fdt(); 1616 #endif 1617 thread0.td_critnest = 0; 1618 1619 kasan_init(); 1620 kmsan_init(); 1621 1622 TSEXIT(); 1623 1624 /* Location of kernel stack for locore */ 1625 return (thread0.td_md.md_stack_base); 1626 } 1627 1628 void 1629 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 1630 { 1631 1632 pcpu->pc_acpi_id = 0xffffffff; 1633 } 1634 1635 static int 1636 smap_sysctl_handler(SYSCTL_HANDLER_ARGS) 1637 { 1638 struct bios_smap *smapbase; 1639 struct bios_smap_xattr smap; 1640 caddr_t kmdp; 1641 uint32_t *smapattr; 1642 int count, error, i; 1643 1644 /* Retrieve the system memory map from the loader. */ 1645 kmdp = preload_search_by_type("elf kernel"); 1646 if (kmdp == NULL) 1647 kmdp = preload_search_by_type("elf64 kernel"); 1648 smapbase = (struct bios_smap *)preload_search_info(kmdp, 1649 MODINFO_METADATA | MODINFOMD_SMAP); 1650 if (smapbase == NULL) 1651 return (0); 1652 smapattr = (uint32_t *)preload_search_info(kmdp, 1653 MODINFO_METADATA | MODINFOMD_SMAP_XATTR); 1654 count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); 1655 error = 0; 1656 for (i = 0; i < count; i++) { 1657 smap.base = smapbase[i].base; 1658 smap.length = smapbase[i].length; 1659 smap.type = smapbase[i].type; 1660 if (smapattr != NULL) 1661 smap.xattr = smapattr[i]; 1662 else 1663 smap.xattr = 0; 1664 error = SYSCTL_OUT(req, &smap, sizeof(smap)); 1665 } 1666 return (error); 1667 } 1668 SYSCTL_PROC(_machdep, OID_AUTO, smap, 1669 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1670 smap_sysctl_handler, "S,bios_smap_xattr", 1671 "Raw BIOS SMAP data"); 1672 1673 static int 1674 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) 1675 { 1676 struct efi_map_header *efihdr; 1677 caddr_t kmdp; 1678 uint32_t efisize; 1679 1680 kmdp = preload_search_by_type("elf kernel"); 1681 if (kmdp == NULL) 1682 kmdp = preload_search_by_type("elf64 kernel"); 1683 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 1684 MODINFO_METADATA | MODINFOMD_EFI_MAP); 1685 if (efihdr == NULL) 1686 return (0); 1687 efisize = *((uint32_t *)efihdr - 1); 1688 return (SYSCTL_OUT(req, efihdr, efisize)); 1689 } 1690 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, 1691 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1692 efi_map_sysctl_handler, "S,efi_map_header", 1693 "Raw EFI Memory Map"); 1694 1695 void 1696 spinlock_enter(void) 1697 { 1698 struct thread *td; 1699 register_t flags; 1700 1701 td = curthread; 1702 if (td->td_md.md_spinlock_count == 0) { 1703 flags = intr_disable(); 1704 td->td_md.md_spinlock_count = 1; 1705 td->td_md.md_saved_flags = flags; 1706 critical_enter(); 1707 } else 1708 td->td_md.md_spinlock_count++; 1709 } 1710 1711 void 1712 spinlock_exit(void) 1713 { 1714 struct thread *td; 1715 register_t flags; 1716 1717 td = curthread; 1718 flags = td->td_md.md_saved_flags; 1719 td->td_md.md_spinlock_count--; 1720 if (td->td_md.md_spinlock_count == 0) { 1721 critical_exit(); 1722 intr_restore(flags); 1723 } 1724 } 1725 1726 /* 1727 * Construct a PCB from a trapframe. This is called from kdb_trap() where 1728 * we want to start a backtrace from the function that caused us to enter 1729 * the debugger. We have the context in the trapframe, but base the trace 1730 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 1731 * enough for a backtrace. 1732 */ 1733 void 1734 makectx(struct trapframe *tf, struct pcb *pcb) 1735 { 1736 1737 pcb->pcb_r12 = tf->tf_r12; 1738 pcb->pcb_r13 = tf->tf_r13; 1739 pcb->pcb_r14 = tf->tf_r14; 1740 pcb->pcb_r15 = tf->tf_r15; 1741 pcb->pcb_rbp = tf->tf_rbp; 1742 pcb->pcb_rbx = tf->tf_rbx; 1743 pcb->pcb_rip = tf->tf_rip; 1744 pcb->pcb_rsp = tf->tf_rsp; 1745 } 1746 1747 /* 1748 * The pcb_flags is only modified by current thread, or by other threads 1749 * when current thread is stopped. However, current thread may change it 1750 * from the interrupt context in cpu_switch(), or in the trap handler. 1751 * When we read-modify-write pcb_flags from C sources, compiler may generate 1752 * code that is not atomic regarding the interrupt handler. If a trap or 1753 * interrupt happens and any flag is modified from the handler, it can be 1754 * clobbered with the cached value later. Therefore, we implement setting 1755 * and clearing flags with single-instruction functions, which do not race 1756 * with possible modification of the flags from the trap or interrupt context, 1757 * because traps and interrupts are executed only on instruction boundary. 1758 */ 1759 void 1760 set_pcb_flags_raw(struct pcb *pcb, const u_int flags) 1761 { 1762 1763 __asm __volatile("orl %1,%0" 1764 : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) 1765 : "cc", "memory"); 1766 1767 } 1768 1769 /* 1770 * The support for RDFSBASE, WRFSBASE and similar instructions for %gs 1771 * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into 1772 * pcb if user space modified the bases. We must save on the context 1773 * switch or if the return to usermode happens through the doreti. 1774 * 1775 * Tracking of both events is performed by the pcb flag PCB_FULL_IRET, 1776 * which have a consequence that the base MSRs must be saved each time 1777 * the PCB_FULL_IRET flag is set. We disable interrupts to sync with 1778 * context switches. 1779 */ 1780 static void 1781 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags) 1782 { 1783 register_t r; 1784 1785 if (curpcb == pcb && 1786 (flags & PCB_FULL_IRET) != 0 && 1787 (pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1788 r = intr_disable(); 1789 if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1790 if (rfs() == _ufssel) 1791 pcb->pcb_fsbase = rdfsbase(); 1792 if (rgs() == _ugssel) 1793 pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); 1794 } 1795 set_pcb_flags_raw(pcb, flags); 1796 intr_restore(r); 1797 } else { 1798 set_pcb_flags_raw(pcb, flags); 1799 } 1800 } 1801 1802 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int)) 1803 { 1804 1805 return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ? 1806 set_pcb_flags_fsgsbase : set_pcb_flags_raw); 1807 } 1808 1809 void 1810 clear_pcb_flags(struct pcb *pcb, const u_int flags) 1811 { 1812 1813 __asm __volatile("andl %1,%0" 1814 : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) 1815 : "cc", "memory"); 1816 } 1817 1818 #ifdef KDB 1819 1820 /* 1821 * Provide inb() and outb() as functions. They are normally only available as 1822 * inline functions, thus cannot be called from the debugger. 1823 */ 1824 1825 /* silence compiler warnings */ 1826 u_char inb_(u_short); 1827 void outb_(u_short, u_char); 1828 1829 u_char 1830 inb_(u_short port) 1831 { 1832 return inb(port); 1833 } 1834 1835 void 1836 outb_(u_short port, u_char data) 1837 { 1838 outb(port, data); 1839 } 1840 1841 #endif /* KDB */ 1842 1843 #undef memset 1844 #undef memmove 1845 #undef memcpy 1846 1847 void *memset_std(void *buf, int c, size_t len); 1848 void *memset_erms(void *buf, int c, size_t len); 1849 void *memmove_std(void * _Nonnull dst, const void * _Nonnull src, 1850 size_t len); 1851 void *memmove_erms(void * _Nonnull dst, const void * _Nonnull src, 1852 size_t len); 1853 void *memcpy_std(void * _Nonnull dst, const void * _Nonnull src, 1854 size_t len); 1855 void *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src, 1856 size_t len); 1857 1858 #ifdef KCSAN 1859 /* 1860 * These fail to build as ifuncs when used with KCSAN. 1861 */ 1862 void * 1863 memset(void *buf, int c, size_t len) 1864 { 1865 1866 return (memset_std(buf, c, len)); 1867 } 1868 1869 void * 1870 memmove(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1871 { 1872 1873 return (memmove_std(dst, src, len)); 1874 } 1875 1876 void * 1877 memcpy(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1878 { 1879 1880 return (memcpy_std(dst, src, len)); 1881 } 1882 #else 1883 DEFINE_IFUNC(, void *, memset, (void *, int, size_t)) 1884 { 1885 1886 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1887 memset_erms : memset_std); 1888 } 1889 1890 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull, 1891 size_t)) 1892 { 1893 1894 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1895 memmove_erms : memmove_std); 1896 } 1897 1898 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t)) 1899 { 1900 1901 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1902 memcpy_erms : memcpy_std); 1903 } 1904 #endif 1905 1906 void pagezero_std(void *addr); 1907 void pagezero_erms(void *addr); 1908 DEFINE_IFUNC(, void , pagezero, (void *)) 1909 { 1910 1911 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1912 pagezero_erms : pagezero_std); 1913 } 1914