1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 2003 Peter Wemm. 5 * Copyright (c) 1992 Terrence R. Lambert. 6 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * William Jolitz. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_atpic.h" 47 #include "opt_cpu.h" 48 #include "opt_ddb.h" 49 #include "opt_inet.h" 50 #include "opt_isa.h" 51 #include "opt_kstack_pages.h" 52 #include "opt_maxmem.h" 53 #include "opt_pci.h" 54 #include "opt_platform.h" 55 #include "opt_sched.h" 56 57 #include <sys/param.h> 58 #include <sys/proc.h> 59 #include <sys/systm.h> 60 #include <sys/asan.h> 61 #include <sys/bio.h> 62 #include <sys/buf.h> 63 #include <sys/bus.h> 64 #include <sys/callout.h> 65 #include <sys/cons.h> 66 #include <sys/cpu.h> 67 #include <sys/csan.h> 68 #include <sys/efi.h> 69 #include <sys/eventhandler.h> 70 #include <sys/exec.h> 71 #include <sys/imgact.h> 72 #include <sys/kdb.h> 73 #include <sys/kernel.h> 74 #include <sys/ktr.h> 75 #include <sys/linker.h> 76 #include <sys/lock.h> 77 #include <sys/malloc.h> 78 #include <sys/memrange.h> 79 #include <sys/msan.h> 80 #include <sys/msgbuf.h> 81 #include <sys/mutex.h> 82 #include <sys/pcpu.h> 83 #include <sys/ptrace.h> 84 #include <sys/reboot.h> 85 #include <sys/reg.h> 86 #include <sys/rwlock.h> 87 #include <sys/sched.h> 88 #include <sys/signalvar.h> 89 #ifdef SMP 90 #include <sys/smp.h> 91 #endif 92 #include <sys/syscallsubr.h> 93 #include <sys/sysctl.h> 94 #include <sys/sysent.h> 95 #include <sys/sysproto.h> 96 #include <sys/ucontext.h> 97 #include <sys/vmmeter.h> 98 99 #include <vm/vm.h> 100 #include <vm/vm_param.h> 101 #include <vm/vm_extern.h> 102 #include <vm/vm_kern.h> 103 #include <vm/vm_page.h> 104 #include <vm/vm_map.h> 105 #include <vm/vm_object.h> 106 #include <vm/vm_pager.h> 107 #include <vm/vm_phys.h> 108 #include <vm/vm_dumpset.h> 109 110 #ifdef DDB 111 #ifndef KDB 112 #error KDB must be enabled in order for DDB to work! 113 #endif 114 #include <ddb/ddb.h> 115 #include <ddb/db_sym.h> 116 #endif 117 118 #include <net/netisr.h> 119 120 #include <machine/clock.h> 121 #include <machine/cpu.h> 122 #include <machine/cputypes.h> 123 #include <machine/frame.h> 124 #include <machine/intr_machdep.h> 125 #include <x86/mca.h> 126 #include <machine/md_var.h> 127 #include <machine/metadata.h> 128 #include <machine/pc/bios.h> 129 #include <machine/pcb.h> 130 #include <machine/proc.h> 131 #include <machine/sigframe.h> 132 #include <machine/specialreg.h> 133 #include <machine/trap.h> 134 #include <machine/tss.h> 135 #include <x86/ucode.h> 136 #include <x86/ifunc.h> 137 #ifdef SMP 138 #include <machine/smp.h> 139 #endif 140 #ifdef FDT 141 #include <x86/fdt.h> 142 #endif 143 144 #ifdef DEV_ATPIC 145 #include <x86/isa/icu.h> 146 #else 147 #include <x86/apicvar.h> 148 #endif 149 150 #include <isa/isareg.h> 151 #include <isa/rtc.h> 152 #include <x86/init.h> 153 154 /* Sanity check for __curthread() */ 155 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 156 157 /* 158 * The PTI trampoline stack needs enough space for a hardware trapframe and a 159 * couple of scratch registers, as well as the trapframe left behind after an 160 * iret fault. 161 */ 162 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) - 163 offsetof(struct pti_frame, pti_rip)); 164 165 extern u_int64_t hammer_time(u_int64_t, u_int64_t); 166 167 static void cpu_startup(void *); 168 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 169 170 /* Probe 8254 PIT and TSC. */ 171 static void native_clock_source_init(void); 172 173 /* Preload data parse function */ 174 static caddr_t native_parse_preload_data(u_int64_t); 175 176 /* Native function to fetch and parse the e820 map */ 177 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); 178 179 /* Default init_ops implementation. */ 180 struct init_ops init_ops = { 181 .parse_preload_data = native_parse_preload_data, 182 .early_clock_source_init = native_clock_source_init, 183 .early_delay = i8254_delay, 184 .parse_memmap = native_parse_memmap, 185 }; 186 187 /* 188 * Physical address of the EFI System Table. Stashed from the metadata hints 189 * passed into the kernel and used by the EFI code to call runtime services. 190 */ 191 vm_paddr_t efi_systbl_phys; 192 193 /* Intel ICH registers */ 194 #define ICH_PMBASE 0x400 195 #define ICH_SMI_EN ICH_PMBASE + 0x30 196 197 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; 198 199 int cold = 1; 200 201 long Maxmem = 0; 202 long realmem = 0; 203 int late_console = 1; 204 205 struct kva_md_info kmi; 206 207 struct region_descriptor r_idt; 208 209 struct pcpu *__pcpu; 210 struct pcpu temp_bsp_pcpu; 211 212 struct mtx icu_lock; 213 214 struct mem_range_softc mem_range_softc; 215 216 struct mtx dt_lock; /* lock for GDT and LDT */ 217 218 void (*vmm_resume_p)(void); 219 220 bool efi_boot; 221 222 static void 223 cpu_startup(dummy) 224 void *dummy; 225 { 226 uintmax_t memsize; 227 char *sysenv; 228 229 /* 230 * On MacBooks, we need to disallow the legacy USB circuit to 231 * generate an SMI# because this can cause several problems, 232 * namely: incorrect CPU frequency detection and failure to 233 * start the APs. 234 * We do this by disabling a bit in the SMI_EN (SMI Control and 235 * Enable register) of the Intel ICH LPC Interface Bridge. 236 */ 237 sysenv = kern_getenv("smbios.system.product"); 238 if (sysenv != NULL) { 239 if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 240 strncmp(sysenv, "MacBook3,1", 10) == 0 || 241 strncmp(sysenv, "MacBook4,1", 10) == 0 || 242 strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 243 strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 244 strncmp(sysenv, "MacBookPro3,1", 13) == 0 || 245 strncmp(sysenv, "MacBookPro4,1", 13) == 0 || 246 strncmp(sysenv, "Macmini1,1", 10) == 0) { 247 if (bootverbose) 248 printf("Disabling LEGACY_USB_EN bit on " 249 "Intel ICH.\n"); 250 outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 251 } 252 freeenv(sysenv); 253 } 254 255 /* 256 * Good {morning,afternoon,evening,night}. 257 */ 258 startrtclock(); 259 printcpuinfo(); 260 261 /* 262 * Display physical memory if SMBIOS reports reasonable amount. 263 */ 264 memsize = 0; 265 sysenv = kern_getenv("smbios.memory.enabled"); 266 if (sysenv != NULL) { 267 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; 268 freeenv(sysenv); 269 } 270 if (memsize < ptoa((uintmax_t)vm_free_count())) 271 memsize = ptoa((uintmax_t)Maxmem); 272 printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); 273 realmem = atop(memsize); 274 275 /* 276 * Display any holes after the first chunk of extended memory. 277 */ 278 if (bootverbose) { 279 int indx; 280 281 printf("Physical memory chunk(s):\n"); 282 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 283 vm_paddr_t size; 284 285 size = phys_avail[indx + 1] - phys_avail[indx]; 286 printf( 287 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 288 (uintmax_t)phys_avail[indx], 289 (uintmax_t)phys_avail[indx + 1] - 1, 290 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 291 } 292 } 293 294 vm_ksubmap_init(&kmi); 295 296 printf("avail memory = %ju (%ju MB)\n", 297 ptoa((uintmax_t)vm_free_count()), 298 ptoa((uintmax_t)vm_free_count()) / 1048576); 299 #ifdef DEV_PCI 300 if (bootverbose && intel_graphics_stolen_base != 0) 301 printf("intel stolen mem: base %#jx size %ju MB\n", 302 (uintmax_t)intel_graphics_stolen_base, 303 (uintmax_t)intel_graphics_stolen_size / 1024 / 1024); 304 #endif 305 306 /* 307 * Set up buffers, so they can be used to read disk labels. 308 */ 309 bufinit(); 310 vm_pager_bufferinit(); 311 312 cpu_setregs(); 313 } 314 315 static void 316 late_ifunc_resolve(void *dummy __unused) 317 { 318 link_elf_late_ireloc(); 319 } 320 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); 321 322 323 void 324 cpu_setregs(void) 325 { 326 register_t cr0; 327 328 cr0 = rcr0(); 329 /* 330 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the 331 * BSP. See the comments there about why we set them. 332 */ 333 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 334 load_cr0(cr0); 335 } 336 337 /* 338 * Initialize amd64 and configure to run kernel 339 */ 340 341 /* 342 * Initialize segments & interrupt table 343 */ 344 static struct gate_descriptor idt0[NIDT]; 345 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 346 347 static char dblfault_stack[DBLFAULT_STACK_SIZE] __aligned(16); 348 static char mce0_stack[MCE_STACK_SIZE] __aligned(16); 349 static char nmi0_stack[NMI_STACK_SIZE] __aligned(16); 350 static char dbg0_stack[DBG_STACK_SIZE] __aligned(16); 351 CTASSERT(sizeof(struct nmi_pcpu) == 16); 352 353 /* 354 * Software prototypes -- in more palatable form. 355 * 356 * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same 357 * slots as corresponding segments for i386 kernel. 358 */ 359 struct soft_segment_descriptor gdt_segs[] = { 360 /* GNULL_SEL 0 Null Descriptor */ 361 { .ssd_base = 0x0, 362 .ssd_limit = 0x0, 363 .ssd_type = 0, 364 .ssd_dpl = 0, 365 .ssd_p = 0, 366 .ssd_long = 0, 367 .ssd_def32 = 0, 368 .ssd_gran = 0 }, 369 /* GNULL2_SEL 1 Null Descriptor */ 370 { .ssd_base = 0x0, 371 .ssd_limit = 0x0, 372 .ssd_type = 0, 373 .ssd_dpl = 0, 374 .ssd_p = 0, 375 .ssd_long = 0, 376 .ssd_def32 = 0, 377 .ssd_gran = 0 }, 378 /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ 379 { .ssd_base = 0x0, 380 .ssd_limit = 0xfffff, 381 .ssd_type = SDT_MEMRWA, 382 .ssd_dpl = SEL_UPL, 383 .ssd_p = 1, 384 .ssd_long = 0, 385 .ssd_def32 = 1, 386 .ssd_gran = 1 }, 387 /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ 388 { .ssd_base = 0x0, 389 .ssd_limit = 0xfffff, 390 .ssd_type = SDT_MEMRWA, 391 .ssd_dpl = SEL_UPL, 392 .ssd_p = 1, 393 .ssd_long = 0, 394 .ssd_def32 = 1, 395 .ssd_gran = 1 }, 396 /* GCODE_SEL 4 Code Descriptor for kernel */ 397 { .ssd_base = 0x0, 398 .ssd_limit = 0xfffff, 399 .ssd_type = SDT_MEMERA, 400 .ssd_dpl = SEL_KPL, 401 .ssd_p = 1, 402 .ssd_long = 1, 403 .ssd_def32 = 0, 404 .ssd_gran = 1 }, 405 /* GDATA_SEL 5 Data Descriptor for kernel */ 406 { .ssd_base = 0x0, 407 .ssd_limit = 0xfffff, 408 .ssd_type = SDT_MEMRWA, 409 .ssd_dpl = SEL_KPL, 410 .ssd_p = 1, 411 .ssd_long = 1, 412 .ssd_def32 = 0, 413 .ssd_gran = 1 }, 414 /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ 415 { .ssd_base = 0x0, 416 .ssd_limit = 0xfffff, 417 .ssd_type = SDT_MEMERA, 418 .ssd_dpl = SEL_UPL, 419 .ssd_p = 1, 420 .ssd_long = 0, 421 .ssd_def32 = 1, 422 .ssd_gran = 1 }, 423 /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ 424 { .ssd_base = 0x0, 425 .ssd_limit = 0xfffff, 426 .ssd_type = SDT_MEMRWA, 427 .ssd_dpl = SEL_UPL, 428 .ssd_p = 1, 429 .ssd_long = 0, 430 .ssd_def32 = 1, 431 .ssd_gran = 1 }, 432 /* GUCODE_SEL 8 64 bit Code Descriptor for user */ 433 { .ssd_base = 0x0, 434 .ssd_limit = 0xfffff, 435 .ssd_type = SDT_MEMERA, 436 .ssd_dpl = SEL_UPL, 437 .ssd_p = 1, 438 .ssd_long = 1, 439 .ssd_def32 = 0, 440 .ssd_gran = 1 }, 441 /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ 442 { .ssd_base = 0x0, 443 .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, 444 .ssd_type = SDT_SYSTSS, 445 .ssd_dpl = SEL_KPL, 446 .ssd_p = 1, 447 .ssd_long = 0, 448 .ssd_def32 = 0, 449 .ssd_gran = 0 }, 450 /* Actually, the TSS is a system descriptor which is double size */ 451 { .ssd_base = 0x0, 452 .ssd_limit = 0x0, 453 .ssd_type = 0, 454 .ssd_dpl = 0, 455 .ssd_p = 0, 456 .ssd_long = 0, 457 .ssd_def32 = 0, 458 .ssd_gran = 0 }, 459 /* GUSERLDT_SEL 11 LDT Descriptor */ 460 { .ssd_base = 0x0, 461 .ssd_limit = 0x0, 462 .ssd_type = 0, 463 .ssd_dpl = 0, 464 .ssd_p = 0, 465 .ssd_long = 0, 466 .ssd_def32 = 0, 467 .ssd_gran = 0 }, 468 /* GUSERLDT_SEL 12 LDT Descriptor, double size */ 469 { .ssd_base = 0x0, 470 .ssd_limit = 0x0, 471 .ssd_type = 0, 472 .ssd_dpl = 0, 473 .ssd_p = 0, 474 .ssd_long = 0, 475 .ssd_def32 = 0, 476 .ssd_gran = 0 }, 477 }; 478 _Static_assert(nitems(gdt_segs) == NGDT, "Stale NGDT"); 479 480 void 481 setidt(int idx, inthand_t *func, int typ, int dpl, int ist) 482 { 483 struct gate_descriptor *ip; 484 485 ip = idt + idx; 486 ip->gd_looffset = (uintptr_t)func; 487 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); 488 ip->gd_ist = ist; 489 ip->gd_xx = 0; 490 ip->gd_type = typ; 491 ip->gd_dpl = dpl; 492 ip->gd_p = 1; 493 ip->gd_hioffset = ((uintptr_t)func)>>16 ; 494 } 495 496 extern inthand_t 497 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 498 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 499 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 500 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 501 IDTVEC(xmm), IDTVEC(dblfault), 502 IDTVEC(div_pti), IDTVEC(bpt_pti), 503 IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti), 504 IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti), 505 IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), 506 IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti), 507 IDTVEC(xmm_pti), 508 #ifdef KDTRACE_HOOKS 509 IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti), 510 #endif 511 #ifdef XENHVM 512 IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti), 513 #endif 514 IDTVEC(fast_syscall), IDTVEC(fast_syscall32), 515 IDTVEC(fast_syscall_pti); 516 517 #ifdef DDB 518 /* 519 * Display the index and function name of any IDT entries that don't use 520 * the default 'rsvd' entry point. 521 */ 522 DB_SHOW_COMMAND_FLAGS(idt, db_show_idt, DB_CMD_MEMSAFE) 523 { 524 struct gate_descriptor *ip; 525 int idx; 526 uintptr_t func; 527 528 ip = idt; 529 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 530 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 531 if (func != (uintptr_t)&IDTVEC(rsvd)) { 532 db_printf("%3d\t", idx); 533 db_printsym(func, DB_STGY_PROC); 534 db_printf("\n"); 535 } 536 ip++; 537 } 538 } 539 540 /* Show privileged registers. */ 541 DB_SHOW_COMMAND_FLAGS(sysregs, db_show_sysregs, DB_CMD_MEMSAFE) 542 { 543 struct { 544 uint16_t limit; 545 uint64_t base; 546 } __packed idtr, gdtr; 547 uint16_t ldt, tr; 548 549 __asm __volatile("sidt %0" : "=m" (idtr)); 550 db_printf("idtr\t0x%016lx/%04x\n", 551 (u_long)idtr.base, (u_int)idtr.limit); 552 __asm __volatile("sgdt %0" : "=m" (gdtr)); 553 db_printf("gdtr\t0x%016lx/%04x\n", 554 (u_long)gdtr.base, (u_int)gdtr.limit); 555 __asm __volatile("sldt %0" : "=r" (ldt)); 556 db_printf("ldtr\t0x%04x\n", ldt); 557 __asm __volatile("str %0" : "=r" (tr)); 558 db_printf("tr\t0x%04x\n", tr); 559 db_printf("cr0\t0x%016lx\n", rcr0()); 560 db_printf("cr2\t0x%016lx\n", rcr2()); 561 db_printf("cr3\t0x%016lx\n", rcr3()); 562 db_printf("cr4\t0x%016lx\n", rcr4()); 563 if (rcr4() & CR4_XSAVE) 564 db_printf("xcr0\t0x%016lx\n", rxcr(0)); 565 db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); 566 if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) 567 db_printf("FEATURES_CTL\t%016lx\n", 568 rdmsr(MSR_IA32_FEATURE_CONTROL)); 569 db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); 570 db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); 571 db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); 572 } 573 574 DB_SHOW_COMMAND_FLAGS(dbregs, db_show_dbregs, DB_CMD_MEMSAFE) 575 { 576 577 db_printf("dr0\t0x%016lx\n", rdr0()); 578 db_printf("dr1\t0x%016lx\n", rdr1()); 579 db_printf("dr2\t0x%016lx\n", rdr2()); 580 db_printf("dr3\t0x%016lx\n", rdr3()); 581 db_printf("dr6\t0x%016lx\n", rdr6()); 582 db_printf("dr7\t0x%016lx\n", rdr7()); 583 } 584 #endif 585 586 void 587 sdtossd(sd, ssd) 588 struct user_segment_descriptor *sd; 589 struct soft_segment_descriptor *ssd; 590 { 591 592 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 593 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 594 ssd->ssd_type = sd->sd_type; 595 ssd->ssd_dpl = sd->sd_dpl; 596 ssd->ssd_p = sd->sd_p; 597 ssd->ssd_long = sd->sd_long; 598 ssd->ssd_def32 = sd->sd_def32; 599 ssd->ssd_gran = sd->sd_gran; 600 } 601 602 void 603 ssdtosd(ssd, sd) 604 struct soft_segment_descriptor *ssd; 605 struct user_segment_descriptor *sd; 606 { 607 608 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 609 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; 610 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 611 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 612 sd->sd_type = ssd->ssd_type; 613 sd->sd_dpl = ssd->ssd_dpl; 614 sd->sd_p = ssd->ssd_p; 615 sd->sd_long = ssd->ssd_long; 616 sd->sd_def32 = ssd->ssd_def32; 617 sd->sd_gran = ssd->ssd_gran; 618 } 619 620 void 621 ssdtosyssd(ssd, sd) 622 struct soft_segment_descriptor *ssd; 623 struct system_segment_descriptor *sd; 624 { 625 626 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 627 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; 628 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 629 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 630 sd->sd_type = ssd->ssd_type; 631 sd->sd_dpl = ssd->ssd_dpl; 632 sd->sd_p = ssd->ssd_p; 633 sd->sd_gran = ssd->ssd_gran; 634 } 635 636 u_int basemem; 637 638 static int 639 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, 640 int *physmap_idxp) 641 { 642 int i, insert_idx, physmap_idx; 643 644 physmap_idx = *physmap_idxp; 645 646 if (length == 0) 647 return (1); 648 649 /* 650 * Find insertion point while checking for overlap. Start off by 651 * assuming the new entry will be added to the end. 652 * 653 * NB: physmap_idx points to the next free slot. 654 */ 655 insert_idx = physmap_idx; 656 for (i = 0; i <= physmap_idx; i += 2) { 657 if (base < physmap[i + 1]) { 658 if (base + length <= physmap[i]) { 659 insert_idx = i; 660 break; 661 } 662 if (boothowto & RB_VERBOSE) 663 printf( 664 "Overlapping memory regions, ignoring second region\n"); 665 return (1); 666 } 667 } 668 669 /* See if we can prepend to the next entry. */ 670 if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { 671 physmap[insert_idx] = base; 672 return (1); 673 } 674 675 /* See if we can append to the previous entry. */ 676 if (insert_idx > 0 && base == physmap[insert_idx - 1]) { 677 physmap[insert_idx - 1] += length; 678 return (1); 679 } 680 681 physmap_idx += 2; 682 *physmap_idxp = physmap_idx; 683 if (physmap_idx == PHYS_AVAIL_ENTRIES) { 684 printf( 685 "Too many segments in the physical address map, giving up\n"); 686 return (0); 687 } 688 689 /* 690 * Move the last 'N' entries down to make room for the new 691 * entry if needed. 692 */ 693 for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { 694 physmap[i] = physmap[i - 2]; 695 physmap[i + 1] = physmap[i - 1]; 696 } 697 698 /* Insert the new entry. */ 699 physmap[insert_idx] = base; 700 physmap[insert_idx + 1] = base + length; 701 return (1); 702 } 703 704 void 705 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, 706 vm_paddr_t *physmap, int *physmap_idx) 707 { 708 struct bios_smap *smap, *smapend; 709 710 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 711 712 for (smap = smapbase; smap < smapend; smap++) { 713 if (boothowto & RB_VERBOSE) 714 printf("SMAP type=%02x base=%016lx len=%016lx\n", 715 smap->type, smap->base, smap->length); 716 717 if (smap->type != SMAP_TYPE_MEMORY) 718 continue; 719 720 if (!add_physmap_entry(smap->base, smap->length, physmap, 721 physmap_idx)) 722 break; 723 } 724 } 725 726 static void 727 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, 728 int *physmap_idx) 729 { 730 struct efi_md *map, *p; 731 const char *type; 732 size_t efisz; 733 int ndesc, i; 734 735 static const char *types[] = { 736 "Reserved", 737 "LoaderCode", 738 "LoaderData", 739 "BootServicesCode", 740 "BootServicesData", 741 "RuntimeServicesCode", 742 "RuntimeServicesData", 743 "ConventionalMemory", 744 "UnusableMemory", 745 "ACPIReclaimMemory", 746 "ACPIMemoryNVS", 747 "MemoryMappedIO", 748 "MemoryMappedIOPortSpace", 749 "PalCode", 750 "PersistentMemory" 751 }; 752 753 /* 754 * Memory map data provided by UEFI via the GetMemoryMap 755 * Boot Services API. 756 */ 757 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 758 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 759 760 if (efihdr->descriptor_size == 0) 761 return; 762 ndesc = efihdr->memory_size / efihdr->descriptor_size; 763 764 if (boothowto & RB_VERBOSE) 765 printf("%23s %12s %12s %8s %4s\n", 766 "Type", "Physical", "Virtual", "#Pages", "Attr"); 767 768 for (i = 0, p = map; i < ndesc; i++, 769 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 770 if (boothowto & RB_VERBOSE) { 771 if (p->md_type < nitems(types)) 772 type = types[p->md_type]; 773 else 774 type = "<INVALID>"; 775 printf("%23s %012lx %012lx %08lx ", type, p->md_phys, 776 p->md_virt, p->md_pages); 777 if (p->md_attr & EFI_MD_ATTR_UC) 778 printf("UC "); 779 if (p->md_attr & EFI_MD_ATTR_WC) 780 printf("WC "); 781 if (p->md_attr & EFI_MD_ATTR_WT) 782 printf("WT "); 783 if (p->md_attr & EFI_MD_ATTR_WB) 784 printf("WB "); 785 if (p->md_attr & EFI_MD_ATTR_UCE) 786 printf("UCE "); 787 if (p->md_attr & EFI_MD_ATTR_WP) 788 printf("WP "); 789 if (p->md_attr & EFI_MD_ATTR_RP) 790 printf("RP "); 791 if (p->md_attr & EFI_MD_ATTR_XP) 792 printf("XP "); 793 if (p->md_attr & EFI_MD_ATTR_NV) 794 printf("NV "); 795 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) 796 printf("MORE_RELIABLE "); 797 if (p->md_attr & EFI_MD_ATTR_RO) 798 printf("RO "); 799 if (p->md_attr & EFI_MD_ATTR_RT) 800 printf("RUNTIME"); 801 printf("\n"); 802 } 803 804 switch (p->md_type) { 805 case EFI_MD_TYPE_CODE: 806 case EFI_MD_TYPE_DATA: 807 case EFI_MD_TYPE_BS_CODE: 808 case EFI_MD_TYPE_BS_DATA: 809 case EFI_MD_TYPE_FREE: 810 /* 811 * We're allowed to use any entry with these types. 812 */ 813 break; 814 default: 815 continue; 816 } 817 818 if (!add_physmap_entry(p->md_phys, p->md_pages * EFI_PAGE_SIZE, 819 physmap, physmap_idx)) 820 break; 821 } 822 } 823 824 static void 825 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) 826 { 827 struct bios_smap *smap; 828 struct efi_map_header *efihdr; 829 u_int32_t size; 830 831 /* 832 * Memory map from INT 15:E820. 833 * 834 * subr_module.c says: 835 * "Consumer may safely assume that size value precedes data." 836 * ie: an int32_t immediately precedes smap. 837 */ 838 839 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 840 MODINFO_METADATA | MODINFOMD_EFI_MAP); 841 smap = (struct bios_smap *)preload_search_info(kmdp, 842 MODINFO_METADATA | MODINFOMD_SMAP); 843 if (efihdr == NULL && smap == NULL) 844 panic("No BIOS smap or EFI map info from loader!"); 845 846 if (efihdr != NULL) { 847 add_efi_map_entries(efihdr, physmap, physmap_idx); 848 strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); 849 } else { 850 size = *((u_int32_t *)smap - 1); 851 bios_add_smap_entries(smap, size, physmap, physmap_idx); 852 strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); 853 } 854 } 855 856 #define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) 857 858 /* 859 * Populate the (physmap) array with base/bound pairs describing the 860 * available physical memory in the system, then test this memory and 861 * build the phys_avail array describing the actually-available memory. 862 * 863 * Total memory size may be set by the kernel environment variable 864 * hw.physmem or the compile-time define MAXMEM. 865 * 866 * XXX first should be vm_paddr_t. 867 */ 868 static void 869 getmemsize(caddr_t kmdp, u_int64_t first) 870 { 871 int i, physmap_idx, pa_indx, da_indx; 872 vm_paddr_t pa, physmap[PHYS_AVAIL_ENTRIES]; 873 u_long physmem_start, physmem_tunable, memtest; 874 pt_entry_t *pte; 875 quad_t dcons_addr, dcons_size; 876 int page_counter; 877 878 /* 879 * Tell the physical memory allocator about pages used to store 880 * the kernel and preloaded data. See kmem_bootstrap_free(). 881 */ 882 vm_phys_early_add_seg((vm_paddr_t)kernphys, trunc_page(first)); 883 884 bzero(physmap, sizeof(physmap)); 885 physmap_idx = 0; 886 887 init_ops.parse_memmap(kmdp, physmap, &physmap_idx); 888 physmap_idx -= 2; 889 890 /* 891 * Find the 'base memory' segment for SMP 892 */ 893 basemem = 0; 894 for (i = 0; i <= physmap_idx; i += 2) { 895 if (physmap[i] <= 0xA0000) { 896 basemem = physmap[i + 1] / 1024; 897 break; 898 } 899 } 900 if (basemem == 0 || basemem > 640) { 901 if (bootverbose) 902 printf( 903 "Memory map doesn't contain a basemem segment, faking it"); 904 basemem = 640; 905 } 906 907 /* 908 * Maxmem isn't the "maximum memory", it's one larger than the 909 * highest page of the physical address space. It should be 910 * called something like "Maxphyspage". We may adjust this 911 * based on ``hw.physmem'' and the results of the memory test. 912 */ 913 Maxmem = atop(physmap[physmap_idx + 1]); 914 915 #ifdef MAXMEM 916 Maxmem = MAXMEM / 4; 917 #endif 918 919 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 920 Maxmem = atop(physmem_tunable); 921 922 /* 923 * The boot memory test is disabled by default, as it takes a 924 * significant amount of time on large-memory systems, and is 925 * unfriendly to virtual machines as it unnecessarily touches all 926 * pages. 927 * 928 * A general name is used as the code may be extended to support 929 * additional tests beyond the current "page present" test. 930 */ 931 memtest = 0; 932 TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); 933 934 /* 935 * Don't allow MAXMEM or hw.physmem to extend the amount of memory 936 * in the system. 937 */ 938 if (Maxmem > atop(physmap[physmap_idx + 1])) 939 Maxmem = atop(physmap[physmap_idx + 1]); 940 941 if (atop(physmap[physmap_idx + 1]) != Maxmem && 942 (boothowto & RB_VERBOSE)) 943 printf("Physical memory use set to %ldK\n", Maxmem * 4); 944 945 /* call pmap initialization to make new kernel address space */ 946 pmap_bootstrap(&first); 947 948 /* 949 * Size up each available chunk of physical memory. 950 * 951 * XXX Some BIOSes corrupt low 64KB between suspend and resume. 952 * By default, mask off the first 16 pages unless we appear to be 953 * running in a VM. 954 */ 955 physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; 956 TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); 957 if (physmap[0] < physmem_start) { 958 if (physmem_start < PAGE_SIZE) 959 physmap[0] = PAGE_SIZE; 960 else if (physmem_start >= physmap[1]) 961 physmap[0] = round_page(physmap[1] - PAGE_SIZE); 962 else 963 physmap[0] = round_page(physmem_start); 964 } 965 pa_indx = 0; 966 da_indx = 1; 967 phys_avail[pa_indx++] = physmap[0]; 968 phys_avail[pa_indx] = physmap[0]; 969 dump_avail[da_indx] = physmap[0]; 970 pte = CMAP1; 971 972 /* 973 * Get dcons buffer address 974 */ 975 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 976 getenv_quad("dcons.size", &dcons_size) == 0) 977 dcons_addr = 0; 978 979 /* 980 * physmap is in bytes, so when converting to page boundaries, 981 * round up the start address and round down the end address. 982 */ 983 page_counter = 0; 984 if (memtest != 0) 985 printf("Testing system memory"); 986 for (i = 0; i <= physmap_idx; i += 2) { 987 vm_paddr_t end; 988 989 end = ptoa((vm_paddr_t)Maxmem); 990 if (physmap[i + 1] < end) 991 end = trunc_page(physmap[i + 1]); 992 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 993 int tmp, page_bad, full; 994 int *ptr = (int *)CADDR1; 995 996 full = FALSE; 997 /* 998 * block out kernel memory as not available. 999 */ 1000 if (pa >= (vm_paddr_t)kernphys && pa < first) 1001 goto do_dump_avail; 1002 1003 /* 1004 * block out dcons buffer 1005 */ 1006 if (dcons_addr > 0 1007 && pa >= trunc_page(dcons_addr) 1008 && pa < dcons_addr + dcons_size) 1009 goto do_dump_avail; 1010 1011 page_bad = FALSE; 1012 if (memtest == 0) 1013 goto skip_memtest; 1014 1015 /* 1016 * Print a "." every GB to show we're making 1017 * progress. 1018 */ 1019 page_counter++; 1020 if ((page_counter % PAGES_PER_GB) == 0) 1021 printf("."); 1022 1023 /* 1024 * map page into kernel: valid, read/write,non-cacheable 1025 */ 1026 *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; 1027 invltlb(); 1028 1029 tmp = *(int *)ptr; 1030 /* 1031 * Test for alternating 1's and 0's 1032 */ 1033 *(volatile int *)ptr = 0xaaaaaaaa; 1034 if (*(volatile int *)ptr != 0xaaaaaaaa) 1035 page_bad = TRUE; 1036 /* 1037 * Test for alternating 0's and 1's 1038 */ 1039 *(volatile int *)ptr = 0x55555555; 1040 if (*(volatile int *)ptr != 0x55555555) 1041 page_bad = TRUE; 1042 /* 1043 * Test for all 1's 1044 */ 1045 *(volatile int *)ptr = 0xffffffff; 1046 if (*(volatile int *)ptr != 0xffffffff) 1047 page_bad = TRUE; 1048 /* 1049 * Test for all 0's 1050 */ 1051 *(volatile int *)ptr = 0x0; 1052 if (*(volatile int *)ptr != 0x0) 1053 page_bad = TRUE; 1054 /* 1055 * Restore original value. 1056 */ 1057 *(int *)ptr = tmp; 1058 1059 skip_memtest: 1060 /* 1061 * Adjust array of valid/good pages. 1062 */ 1063 if (page_bad == TRUE) 1064 continue; 1065 /* 1066 * If this good page is a continuation of the 1067 * previous set of good pages, then just increase 1068 * the end pointer. Otherwise start a new chunk. 1069 * Note that "end" points one higher than end, 1070 * making the range >= start and < end. 1071 * If we're also doing a speculative memory 1072 * test and we at or past the end, bump up Maxmem 1073 * so that we keep going. The first bad page 1074 * will terminate the loop. 1075 */ 1076 if (phys_avail[pa_indx] == pa) { 1077 phys_avail[pa_indx] += PAGE_SIZE; 1078 } else { 1079 pa_indx++; 1080 if (pa_indx == PHYS_AVAIL_ENTRIES) { 1081 printf( 1082 "Too many holes in the physical address space, giving up\n"); 1083 pa_indx--; 1084 full = TRUE; 1085 goto do_dump_avail; 1086 } 1087 phys_avail[pa_indx++] = pa; /* start */ 1088 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 1089 } 1090 physmem++; 1091 do_dump_avail: 1092 if (dump_avail[da_indx] == pa) { 1093 dump_avail[da_indx] += PAGE_SIZE; 1094 } else { 1095 da_indx++; 1096 if (da_indx == PHYS_AVAIL_ENTRIES) { 1097 da_indx--; 1098 goto do_next; 1099 } 1100 dump_avail[da_indx++] = pa; /* start */ 1101 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 1102 } 1103 do_next: 1104 if (full) 1105 break; 1106 } 1107 } 1108 *pte = 0; 1109 invltlb(); 1110 if (memtest != 0) 1111 printf("\n"); 1112 1113 /* 1114 * XXX 1115 * The last chunk must contain at least one page plus the message 1116 * buffer to avoid complicating other code (message buffer address 1117 * calculation, etc.). 1118 */ 1119 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1120 round_page(msgbufsize) >= phys_avail[pa_indx]) { 1121 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1122 phys_avail[pa_indx--] = 0; 1123 phys_avail[pa_indx--] = 0; 1124 } 1125 1126 Maxmem = atop(phys_avail[pa_indx]); 1127 1128 /* Trim off space for the message buffer. */ 1129 phys_avail[pa_indx] -= round_page(msgbufsize); 1130 1131 /* Map the message buffer. */ 1132 msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); 1133 } 1134 1135 static caddr_t 1136 native_parse_preload_data(u_int64_t modulep) 1137 { 1138 caddr_t kmdp; 1139 char *envp; 1140 #ifdef DDB 1141 vm_offset_t ksym_start; 1142 vm_offset_t ksym_end; 1143 #endif 1144 1145 preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); 1146 preload_bootstrap_relocate(KERNBASE); 1147 kmdp = preload_search_by_type("elf kernel"); 1148 if (kmdp == NULL) 1149 kmdp = preload_search_by_type("elf64 kernel"); 1150 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); 1151 envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); 1152 if (envp != NULL) 1153 envp += KERNBASE; 1154 init_static_kenv(envp, 0); 1155 #ifdef DDB 1156 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); 1157 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); 1158 db_fetch_ksymtab(ksym_start, ksym_end, 0); 1159 #endif 1160 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); 1161 1162 return (kmdp); 1163 } 1164 1165 static void 1166 native_clock_source_init(void) 1167 { 1168 i8254_init(); 1169 } 1170 1171 static void 1172 amd64_kdb_init(void) 1173 { 1174 kdb_init(); 1175 #ifdef KDB 1176 if (boothowto & RB_KDB) 1177 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 1178 #endif 1179 } 1180 1181 /* Set up the fast syscall stuff */ 1182 void 1183 amd64_conf_fast_syscall(void) 1184 { 1185 uint64_t msr; 1186 1187 msr = rdmsr(MSR_EFER) | EFER_SCE; 1188 wrmsr(MSR_EFER, msr); 1189 wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) : 1190 (u_int64_t)IDTVEC(fast_syscall)); 1191 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 1192 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1193 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 1194 wrmsr(MSR_STAR, msr); 1195 wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC); 1196 } 1197 1198 void 1199 amd64_bsp_pcpu_init1(struct pcpu *pc) 1200 { 1201 struct user_segment_descriptor *gdt; 1202 1203 PCPU_SET(prvspace, pc); 1204 gdt = *PCPU_PTR(gdt); 1205 PCPU_SET(curthread, &thread0); 1206 PCPU_SET(tssp, PCPU_PTR(common_tss)); 1207 PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1208 PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); 1209 PCPU_SET(fs32p, &gdt[GUFS32_SEL]); 1210 PCPU_SET(gs32p, &gdt[GUGS32_SEL]); 1211 PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK); 1212 PCPU_SET(smp_tlb_gen, 1); 1213 } 1214 1215 void 1216 amd64_bsp_pcpu_init2(uint64_t rsp0) 1217 { 1218 1219 PCPU_SET(rsp0, rsp0); 1220 PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) + 1221 PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); 1222 PCPU_SET(curpcb, thread0.td_pcb); 1223 } 1224 1225 void 1226 amd64_bsp_ist_init(struct pcpu *pc) 1227 { 1228 struct nmi_pcpu *np; 1229 struct amd64tss *tssp; 1230 1231 tssp = &pc->pc_common_tss; 1232 1233 /* doublefault stack space, runs on ist1 */ 1234 np = ((struct nmi_pcpu *)&dblfault_stack[sizeof(dblfault_stack)]) - 1; 1235 np->np_pcpu = (register_t)pc; 1236 tssp->tss_ist1 = (long)np; 1237 1238 /* 1239 * NMI stack, runs on ist2. The pcpu pointer is stored just 1240 * above the start of the ist2 stack. 1241 */ 1242 np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1; 1243 np->np_pcpu = (register_t)pc; 1244 tssp->tss_ist2 = (long)np; 1245 1246 /* 1247 * MC# stack, runs on ist3. The pcpu pointer is stored just 1248 * above the start of the ist3 stack. 1249 */ 1250 np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1; 1251 np->np_pcpu = (register_t)pc; 1252 tssp->tss_ist3 = (long)np; 1253 1254 /* 1255 * DB# stack, runs on ist4. 1256 */ 1257 np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1; 1258 np->np_pcpu = (register_t)pc; 1259 tssp->tss_ist4 = (long)np; 1260 } 1261 1262 /* 1263 * Calculate the kernel load address by inspecting page table created by loader. 1264 * The assumptions: 1265 * - kernel is mapped at KERNBASE, backed by contiguous phys memory 1266 * aligned at 2M, below 4G (the latter is important for AP startup) 1267 * - there is a 2M hole at KERNBASE (KERNSTART = KERNBASE + 2M) 1268 * - kernel is mapped with 2M superpages 1269 * - all participating memory, i.e. kernel, modules, metadata, 1270 * page table is accessible by pre-created 1:1 mapping 1271 * (right now loader creates 1:1 mapping for lower 4G, and all 1272 * memory is from there) 1273 * - there is a usable memory block right after the end of the 1274 * mapped kernel and all modules/metadata, pointed to by 1275 * physfree, for early allocations 1276 */ 1277 vm_paddr_t __nosanitizeaddress __nosanitizememory 1278 amd64_loadaddr(void) 1279 { 1280 pml4_entry_t *pml4e; 1281 pdp_entry_t *pdpe; 1282 pd_entry_t *pde; 1283 uint64_t cr3; 1284 1285 cr3 = rcr3(); 1286 pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(KERNSTART); 1287 pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(KERNSTART); 1288 pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(KERNSTART); 1289 return (*pde & PG_FRAME); 1290 } 1291 1292 u_int64_t 1293 hammer_time(u_int64_t modulep, u_int64_t physfree) 1294 { 1295 caddr_t kmdp; 1296 int gsel_tss, x; 1297 struct pcpu *pc; 1298 uint64_t rsp0; 1299 char *env; 1300 struct user_segment_descriptor *gdt; 1301 struct region_descriptor r_gdt; 1302 size_t kstack0_sz; 1303 1304 TSRAW(&thread0, TS_ENTER, __func__, NULL); 1305 1306 kernphys = amd64_loadaddr(); 1307 1308 physfree += kernphys; 1309 1310 kmdp = init_ops.parse_preload_data(modulep); 1311 1312 efi_boot = preload_search_info(kmdp, MODINFO_METADATA | 1313 MODINFOMD_EFI_MAP) != NULL; 1314 1315 if (!efi_boot) { 1316 /* Tell the bios to warmboot next time */ 1317 atomic_store_short((u_short *)0x472, 0x1234); 1318 } 1319 1320 physfree += ucode_load_bsp(physfree - kernphys + KERNSTART); 1321 physfree = roundup2(physfree, PAGE_SIZE); 1322 1323 identify_cpu1(); 1324 identify_hypervisor(); 1325 identify_cpu_fixup_bsp(); 1326 identify_cpu2(); 1327 initializecpucache(); 1328 1329 /* 1330 * Check for pti, pcid, and invpcid before ifuncs are 1331 * resolved, to correctly select the implementation for 1332 * pmap_activate_sw_mode(). 1333 */ 1334 pti = pti_get_default(); 1335 TUNABLE_INT_FETCH("vm.pmap.pti", &pti); 1336 TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); 1337 if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { 1338 invpcid_works = (cpu_stdext_feature & 1339 CPUID_STDEXT_INVPCID) != 0; 1340 } else { 1341 pmap_pcid_enabled = 0; 1342 } 1343 1344 /* 1345 * Now we can do small core initialization, after the PCID 1346 * CPU features and user knobs are evaluated. 1347 */ 1348 TUNABLE_INT_FETCH("vm.pmap.pcid_invlpg_workaround", 1349 &pmap_pcid_invlpg_workaround_uena); 1350 cpu_init_small_core(); 1351 1352 link_elf_ireloc(kmdp); 1353 1354 /* 1355 * This may be done better later if it gets more high level 1356 * components in it. If so just link td->td_proc here. 1357 */ 1358 proc_linkup0(&proc0, &thread0); 1359 1360 /* Init basic tunables, hz etc */ 1361 init_param1(); 1362 1363 thread0.td_kstack = physfree - kernphys + KERNSTART; 1364 thread0.td_kstack_pages = kstack_pages; 1365 kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; 1366 bzero((void *)thread0.td_kstack, kstack0_sz); 1367 physfree += kstack0_sz; 1368 1369 /* 1370 * Initialize enough of thread0 for delayed invalidation to 1371 * work very early. Rely on thread0.td_base_pri 1372 * zero-initialization, it is reset to PVM at proc0_init(). 1373 */ 1374 pmap_thread_init_invl_gen(&thread0); 1375 1376 pc = &temp_bsp_pcpu; 1377 pcpu_init(pc, 0, sizeof(struct pcpu)); 1378 gdt = &temp_bsp_pcpu.pc_gdt[0]; 1379 1380 /* 1381 * make gdt memory segments 1382 */ 1383 for (x = 0; x < NGDT; x++) { 1384 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 1385 x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) 1386 ssdtosd(&gdt_segs[x], &gdt[x]); 1387 } 1388 gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&pc->pc_common_tss; 1389 ssdtosyssd(&gdt_segs[GPROC0_SEL], 1390 (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1391 1392 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 1393 r_gdt.rd_base = (long)gdt; 1394 lgdt(&r_gdt); 1395 1396 wrmsr(MSR_FSBASE, 0); /* User value */ 1397 wrmsr(MSR_GSBASE, (u_int64_t)pc); 1398 wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ 1399 1400 dpcpu_init((void *)(physfree - kernphys + KERNSTART), 0); 1401 physfree += DPCPU_SIZE; 1402 amd64_bsp_pcpu_init1(pc); 1403 /* Non-late cninit() and printf() can be moved up to here. */ 1404 1405 /* 1406 * Initialize mutexes. 1407 * 1408 * icu_lock: in order to allow an interrupt to occur in a critical 1409 * section, to set pcpu->ipending (etc...) properly, we 1410 * must be able to get the icu lock, so it can't be 1411 * under witness. 1412 */ 1413 mutex_init(); 1414 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); 1415 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); 1416 1417 /* exceptions */ 1418 for (x = 0; x < NIDT; x++) 1419 setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT, 1420 SEL_KPL, 0); 1421 setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT, 1422 SEL_KPL, 0); 1423 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4); 1424 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); 1425 setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT, 1426 SEL_UPL, 0); 1427 setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT, 1428 SEL_UPL, 0); 1429 setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT, 1430 SEL_KPL, 0); 1431 setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT, 1432 SEL_KPL, 0); 1433 setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT, 1434 SEL_KPL, 0); 1435 setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); 1436 setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm), 1437 SDT_SYSIGT, SEL_KPL, 0); 1438 setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT, 1439 SEL_KPL, 0); 1440 setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing), 1441 SDT_SYSIGT, SEL_KPL, 0); 1442 setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT, 1443 SEL_KPL, 0); 1444 setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT, 1445 SEL_KPL, 0); 1446 setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT, 1447 SEL_KPL, 0); 1448 setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT, 1449 SEL_KPL, 0); 1450 setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT, 1451 SEL_KPL, 0); 1452 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3); 1453 setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT, 1454 SEL_KPL, 0); 1455 #ifdef KDTRACE_HOOKS 1456 setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) : 1457 &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); 1458 #endif 1459 #ifdef XENHVM 1460 setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) : 1461 &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0); 1462 #endif 1463 r_idt.rd_limit = sizeof(idt0) - 1; 1464 r_idt.rd_base = (long) idt; 1465 lidt(&r_idt); 1466 1467 /* 1468 * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) 1469 * transition). 1470 * Once bootblocks have updated, we can test directly for 1471 * efi_systbl != NULL here... 1472 */ 1473 if (efi_boot) 1474 vty_set_preferred(VTY_VT); 1475 1476 TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); 1477 TUNABLE_INT_FETCH("machdep.mitigations.ibrs.disable", &hw_ibrs_disable); 1478 1479 TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable); 1480 TUNABLE_INT_FETCH("machdep.mitigations.ssb.disable", &hw_ssb_disable); 1481 1482 TUNABLE_INT_FETCH("machdep.syscall_ret_l1d_flush", 1483 &syscall_ret_l1d_flush_mode); 1484 1485 TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable); 1486 TUNABLE_INT_FETCH("machdep.mitigations.mds.disable", &hw_mds_disable); 1487 1488 TUNABLE_INT_FETCH("machdep.mitigations.taa.enable", &x86_taa_enable); 1489 1490 TUNABLE_INT_FETCH("machdep.mitigations.rndgs.enable", 1491 &x86_rngds_mitg_enable); 1492 1493 finishidentcpu(); /* Final stage of CPU initialization */ 1494 1495 /* 1496 * Initialize the clock before the console so that console 1497 * initialization can use DELAY(). 1498 */ 1499 clock_init(); 1500 1501 initializecpu(); /* Initialize CPU registers */ 1502 1503 amd64_bsp_ist_init(pc); 1504 1505 /* Set the IO permission bitmap (empty due to tss seg limit) */ 1506 pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + 1507 IOPERM_BITMAP_SIZE; 1508 1509 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1510 ltr(gsel_tss); 1511 1512 amd64_conf_fast_syscall(); 1513 1514 /* 1515 * We initialize the PCB pointer early so that exception 1516 * handlers will work. Also set up td_critnest to short-cut 1517 * the page fault handler. 1518 */ 1519 cpu_max_ext_state_size = sizeof(struct savefpu); 1520 set_top_of_stack_td(&thread0); 1521 thread0.td_pcb = get_pcb_td(&thread0); 1522 thread0.td_critnest = 1; 1523 1524 /* 1525 * The console and kdb should be initialized even earlier than here, 1526 * but some console drivers don't work until after getmemsize(). 1527 * Default to late console initialization to support these drivers. 1528 * This loses mainly printf()s in getmemsize() and early debugging. 1529 */ 1530 TUNABLE_INT_FETCH("debug.late_console", &late_console); 1531 if (!late_console) { 1532 cninit(); 1533 amd64_kdb_init(); 1534 } 1535 1536 getmemsize(kmdp, physfree); 1537 init_param2(physmem); 1538 1539 /* now running on new page tables, configured,and u/iom is accessible */ 1540 1541 #ifdef DEV_PCI 1542 /* This call might adjust phys_avail[]. */ 1543 pci_early_quirks(); 1544 #endif 1545 1546 if (late_console) 1547 cninit(); 1548 1549 /* 1550 * Dump the boot metadata. We have to wait for cninit() since console 1551 * output is required. If it's grossly incorrect the kernel will never 1552 * make it this far. 1553 */ 1554 if (getenv_is_true("debug.dump_modinfo_at_boot")) 1555 preload_dump(); 1556 1557 #ifdef DEV_ISA 1558 #ifdef DEV_ATPIC 1559 elcr_probe(); 1560 atpic_startup(); 1561 #else 1562 /* Reset and mask the atpics and leave them shut down. */ 1563 atpic_reset(); 1564 1565 /* 1566 * Point the ICU spurious interrupt vectors at the APIC spurious 1567 * interrupt handler. 1568 */ 1569 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1570 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1571 #endif 1572 #else 1573 #error "have you forgotten the isa device?" 1574 #endif 1575 1576 if (late_console) 1577 amd64_kdb_init(); 1578 1579 msgbufinit(msgbufp, msgbufsize); 1580 fpuinit(); 1581 1582 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1583 rsp0 = thread0.td_md.md_stack_base; 1584 /* Ensure the stack is aligned to 16 bytes */ 1585 rsp0 &= ~0xFul; 1586 PCPU_PTR(common_tss)->tss_rsp0 = rsp0; 1587 amd64_bsp_pcpu_init2(rsp0); 1588 1589 /* transfer to user mode */ 1590 1591 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 1592 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 1593 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); 1594 _ufssel = GSEL(GUFS32_SEL, SEL_UPL); 1595 _ugssel = GSEL(GUGS32_SEL, SEL_UPL); 1596 1597 load_ds(_udatasel); 1598 load_es(_udatasel); 1599 load_fs(_ufssel); 1600 1601 /* setup proc 0's pcb */ 1602 thread0.td_pcb->pcb_flags = 0; 1603 1604 env = kern_getenv("kernelname"); 1605 if (env != NULL) 1606 strlcpy(kernelname, env, sizeof(kernelname)); 1607 1608 kcsan_cpu_init(0); 1609 1610 #ifdef FDT 1611 x86_init_fdt(); 1612 #endif 1613 thread0.td_critnest = 0; 1614 1615 kasan_init(); 1616 kmsan_init(); 1617 1618 TSEXIT(); 1619 1620 /* Location of kernel stack for locore */ 1621 return (thread0.td_md.md_stack_base); 1622 } 1623 1624 void 1625 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 1626 { 1627 1628 pcpu->pc_acpi_id = 0xffffffff; 1629 } 1630 1631 static int 1632 smap_sysctl_handler(SYSCTL_HANDLER_ARGS) 1633 { 1634 struct bios_smap *smapbase; 1635 struct bios_smap_xattr smap; 1636 caddr_t kmdp; 1637 uint32_t *smapattr; 1638 int count, error, i; 1639 1640 /* Retrieve the system memory map from the loader. */ 1641 kmdp = preload_search_by_type("elf kernel"); 1642 if (kmdp == NULL) 1643 kmdp = preload_search_by_type("elf64 kernel"); 1644 smapbase = (struct bios_smap *)preload_search_info(kmdp, 1645 MODINFO_METADATA | MODINFOMD_SMAP); 1646 if (smapbase == NULL) 1647 return (0); 1648 smapattr = (uint32_t *)preload_search_info(kmdp, 1649 MODINFO_METADATA | MODINFOMD_SMAP_XATTR); 1650 count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); 1651 error = 0; 1652 for (i = 0; i < count; i++) { 1653 smap.base = smapbase[i].base; 1654 smap.length = smapbase[i].length; 1655 smap.type = smapbase[i].type; 1656 if (smapattr != NULL) 1657 smap.xattr = smapattr[i]; 1658 else 1659 smap.xattr = 0; 1660 error = SYSCTL_OUT(req, &smap, sizeof(smap)); 1661 } 1662 return (error); 1663 } 1664 SYSCTL_PROC(_machdep, OID_AUTO, smap, 1665 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1666 smap_sysctl_handler, "S,bios_smap_xattr", 1667 "Raw BIOS SMAP data"); 1668 1669 static int 1670 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) 1671 { 1672 struct efi_map_header *efihdr; 1673 caddr_t kmdp; 1674 uint32_t efisize; 1675 1676 kmdp = preload_search_by_type("elf kernel"); 1677 if (kmdp == NULL) 1678 kmdp = preload_search_by_type("elf64 kernel"); 1679 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 1680 MODINFO_METADATA | MODINFOMD_EFI_MAP); 1681 if (efihdr == NULL) 1682 return (0); 1683 efisize = *((uint32_t *)efihdr - 1); 1684 return (SYSCTL_OUT(req, efihdr, efisize)); 1685 } 1686 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, 1687 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1688 efi_map_sysctl_handler, "S,efi_map_header", 1689 "Raw EFI Memory Map"); 1690 1691 void 1692 spinlock_enter(void) 1693 { 1694 struct thread *td; 1695 register_t flags; 1696 1697 td = curthread; 1698 if (td->td_md.md_spinlock_count == 0) { 1699 flags = intr_disable(); 1700 td->td_md.md_spinlock_count = 1; 1701 td->td_md.md_saved_flags = flags; 1702 critical_enter(); 1703 } else 1704 td->td_md.md_spinlock_count++; 1705 } 1706 1707 void 1708 spinlock_exit(void) 1709 { 1710 struct thread *td; 1711 register_t flags; 1712 1713 td = curthread; 1714 flags = td->td_md.md_saved_flags; 1715 td->td_md.md_spinlock_count--; 1716 if (td->td_md.md_spinlock_count == 0) { 1717 critical_exit(); 1718 intr_restore(flags); 1719 } 1720 } 1721 1722 /* 1723 * Construct a PCB from a trapframe. This is called from kdb_trap() where 1724 * we want to start a backtrace from the function that caused us to enter 1725 * the debugger. We have the context in the trapframe, but base the trace 1726 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 1727 * enough for a backtrace. 1728 */ 1729 void 1730 makectx(struct trapframe *tf, struct pcb *pcb) 1731 { 1732 1733 pcb->pcb_r12 = tf->tf_r12; 1734 pcb->pcb_r13 = tf->tf_r13; 1735 pcb->pcb_r14 = tf->tf_r14; 1736 pcb->pcb_r15 = tf->tf_r15; 1737 pcb->pcb_rbp = tf->tf_rbp; 1738 pcb->pcb_rbx = tf->tf_rbx; 1739 pcb->pcb_rip = tf->tf_rip; 1740 pcb->pcb_rsp = tf->tf_rsp; 1741 } 1742 1743 /* 1744 * The pcb_flags is only modified by current thread, or by other threads 1745 * when current thread is stopped. However, current thread may change it 1746 * from the interrupt context in cpu_switch(), or in the trap handler. 1747 * When we read-modify-write pcb_flags from C sources, compiler may generate 1748 * code that is not atomic regarding the interrupt handler. If a trap or 1749 * interrupt happens and any flag is modified from the handler, it can be 1750 * clobbered with the cached value later. Therefore, we implement setting 1751 * and clearing flags with single-instruction functions, which do not race 1752 * with possible modification of the flags from the trap or interrupt context, 1753 * because traps and interrupts are executed only on instruction boundary. 1754 */ 1755 void 1756 set_pcb_flags_raw(struct pcb *pcb, const u_int flags) 1757 { 1758 1759 __asm __volatile("orl %1,%0" 1760 : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) 1761 : "cc", "memory"); 1762 1763 } 1764 1765 /* 1766 * The support for RDFSBASE, WRFSBASE and similar instructions for %gs 1767 * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into 1768 * pcb if user space modified the bases. We must save on the context 1769 * switch or if the return to usermode happens through the doreti. 1770 * 1771 * Tracking of both events is performed by the pcb flag PCB_FULL_IRET, 1772 * which have a consequence that the base MSRs must be saved each time 1773 * the PCB_FULL_IRET flag is set. We disable interrupts to sync with 1774 * context switches. 1775 */ 1776 static void 1777 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags) 1778 { 1779 register_t r; 1780 1781 if (curpcb == pcb && 1782 (flags & PCB_FULL_IRET) != 0 && 1783 (pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1784 r = intr_disable(); 1785 if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1786 if (rfs() == _ufssel) 1787 pcb->pcb_fsbase = rdfsbase(); 1788 if (rgs() == _ugssel) 1789 pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); 1790 } 1791 set_pcb_flags_raw(pcb, flags); 1792 intr_restore(r); 1793 } else { 1794 set_pcb_flags_raw(pcb, flags); 1795 } 1796 } 1797 1798 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int)) 1799 { 1800 1801 return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ? 1802 set_pcb_flags_fsgsbase : set_pcb_flags_raw); 1803 } 1804 1805 void 1806 clear_pcb_flags(struct pcb *pcb, const u_int flags) 1807 { 1808 1809 __asm __volatile("andl %1,%0" 1810 : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) 1811 : "cc", "memory"); 1812 } 1813 1814 #ifdef KDB 1815 1816 /* 1817 * Provide inb() and outb() as functions. They are normally only available as 1818 * inline functions, thus cannot be called from the debugger. 1819 */ 1820 1821 /* silence compiler warnings */ 1822 u_char inb_(u_short); 1823 void outb_(u_short, u_char); 1824 1825 u_char 1826 inb_(u_short port) 1827 { 1828 return inb(port); 1829 } 1830 1831 void 1832 outb_(u_short port, u_char data) 1833 { 1834 outb(port, data); 1835 } 1836 1837 #endif /* KDB */ 1838 1839 #undef memset 1840 #undef memmove 1841 #undef memcpy 1842 1843 void *memset_std(void *buf, int c, size_t len); 1844 void *memset_erms(void *buf, int c, size_t len); 1845 void *memmove_std(void * _Nonnull dst, const void * _Nonnull src, 1846 size_t len); 1847 void *memmove_erms(void * _Nonnull dst, const void * _Nonnull src, 1848 size_t len); 1849 void *memcpy_std(void * _Nonnull dst, const void * _Nonnull src, 1850 size_t len); 1851 void *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src, 1852 size_t len); 1853 1854 #ifdef KCSAN 1855 /* 1856 * These fail to build as ifuncs when used with KCSAN. 1857 */ 1858 void * 1859 memset(void *buf, int c, size_t len) 1860 { 1861 1862 return (memset_std(buf, c, len)); 1863 } 1864 1865 void * 1866 memmove(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1867 { 1868 1869 return (memmove_std(dst, src, len)); 1870 } 1871 1872 void * 1873 memcpy(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1874 { 1875 1876 return (memcpy_std(dst, src, len)); 1877 } 1878 #else 1879 DEFINE_IFUNC(, void *, memset, (void *, int, size_t)) 1880 { 1881 1882 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1883 memset_erms : memset_std); 1884 } 1885 1886 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull, 1887 size_t)) 1888 { 1889 1890 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1891 memmove_erms : memmove_std); 1892 } 1893 1894 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t)) 1895 { 1896 1897 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1898 memcpy_erms : memcpy_std); 1899 } 1900 #endif 1901 1902 void pagezero_std(void *addr); 1903 void pagezero_erms(void *addr); 1904 DEFINE_IFUNC(, void , pagezero, (void *)) 1905 { 1906 1907 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1908 pagezero_erms : pagezero_std); 1909 } 1910