1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 2003 Peter Wemm. 5 * Copyright (c) 1992 Terrence R. Lambert. 6 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * William Jolitz. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_atpic.h" 47 #include "opt_cpu.h" 48 #include "opt_ddb.h" 49 #include "opt_inet.h" 50 #include "opt_isa.h" 51 #include "opt_kstack_pages.h" 52 #include "opt_maxmem.h" 53 #include "opt_pci.h" 54 #include "opt_platform.h" 55 #include "opt_sched.h" 56 57 #include <sys/param.h> 58 #include <sys/proc.h> 59 #include <sys/systm.h> 60 #include <sys/asan.h> 61 #include <sys/bio.h> 62 #include <sys/buf.h> 63 #include <sys/bus.h> 64 #include <sys/callout.h> 65 #include <sys/cons.h> 66 #include <sys/cpu.h> 67 #include <sys/csan.h> 68 #include <sys/efi.h> 69 #include <sys/eventhandler.h> 70 #include <sys/exec.h> 71 #include <sys/imgact.h> 72 #include <sys/kdb.h> 73 #include <sys/kernel.h> 74 #include <sys/ktr.h> 75 #include <sys/linker.h> 76 #include <sys/lock.h> 77 #include <sys/malloc.h> 78 #include <sys/memrange.h> 79 #include <sys/msan.h> 80 #include <sys/msgbuf.h> 81 #include <sys/mutex.h> 82 #include <sys/pcpu.h> 83 #include <sys/ptrace.h> 84 #include <sys/reboot.h> 85 #include <sys/reg.h> 86 #include <sys/rwlock.h> 87 #include <sys/sched.h> 88 #include <sys/signalvar.h> 89 #ifdef SMP 90 #include <sys/smp.h> 91 #endif 92 #include <sys/syscallsubr.h> 93 #include <sys/sysctl.h> 94 #include <sys/sysent.h> 95 #include <sys/sysproto.h> 96 #include <sys/ucontext.h> 97 #include <sys/vmmeter.h> 98 99 #include <vm/vm.h> 100 #include <vm/vm_param.h> 101 #include <vm/vm_extern.h> 102 #include <vm/vm_kern.h> 103 #include <vm/vm_page.h> 104 #include <vm/vm_map.h> 105 #include <vm/vm_object.h> 106 #include <vm/vm_pager.h> 107 #include <vm/vm_phys.h> 108 #include <vm/vm_dumpset.h> 109 110 #ifdef DDB 111 #ifndef KDB 112 #error KDB must be enabled in order for DDB to work! 113 #endif 114 #include <ddb/ddb.h> 115 #include <ddb/db_sym.h> 116 #endif 117 118 #include <net/netisr.h> 119 120 #include <dev/smbios/smbios.h> 121 122 #include <machine/clock.h> 123 #include <machine/cpu.h> 124 #include <machine/cputypes.h> 125 #include <machine/frame.h> 126 #include <machine/intr_machdep.h> 127 #include <x86/mca.h> 128 #include <machine/md_var.h> 129 #include <machine/metadata.h> 130 #include <machine/pc/bios.h> 131 #include <machine/pcb.h> 132 #include <machine/proc.h> 133 #include <machine/sigframe.h> 134 #include <machine/specialreg.h> 135 #include <machine/trap.h> 136 #include <machine/tss.h> 137 #include <x86/ucode.h> 138 #include <x86/ifunc.h> 139 #ifdef SMP 140 #include <machine/smp.h> 141 #endif 142 #ifdef FDT 143 #include <x86/fdt.h> 144 #endif 145 146 #ifdef DEV_ATPIC 147 #include <x86/isa/icu.h> 148 #else 149 #include <x86/apicvar.h> 150 #endif 151 152 #include <isa/isareg.h> 153 #include <isa/rtc.h> 154 #include <x86/init.h> 155 156 /* Sanity check for __curthread() */ 157 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 158 159 /* 160 * The PTI trampoline stack needs enough space for a hardware trapframe and a 161 * couple of scratch registers, as well as the trapframe left behind after an 162 * iret fault. 163 */ 164 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) - 165 offsetof(struct pti_frame, pti_rip)); 166 167 extern u_int64_t hammer_time(u_int64_t, u_int64_t); 168 169 static void cpu_startup(void *); 170 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 171 172 /* Probe 8254 PIT and TSC. */ 173 static void native_clock_source_init(void); 174 175 /* Preload data parse function */ 176 static caddr_t native_parse_preload_data(u_int64_t); 177 178 /* Native function to fetch and parse the e820 map */ 179 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); 180 181 /* Default init_ops implementation. */ 182 struct init_ops init_ops = { 183 .parse_preload_data = native_parse_preload_data, 184 .early_clock_source_init = native_clock_source_init, 185 .early_delay = i8254_delay, 186 .parse_memmap = native_parse_memmap, 187 }; 188 189 /* 190 * Physical address of the EFI System Table. Stashed from the metadata hints 191 * passed into the kernel and used by the EFI code to call runtime services. 192 */ 193 vm_paddr_t efi_systbl_phys; 194 195 /* Intel ICH registers */ 196 #define ICH_PMBASE 0x400 197 #define ICH_SMI_EN ICH_PMBASE + 0x30 198 199 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; 200 201 int cold = 1; 202 203 long Maxmem = 0; 204 long realmem = 0; 205 int late_console = 1; 206 207 struct kva_md_info kmi; 208 209 struct region_descriptor r_idt; 210 211 struct pcpu *__pcpu; 212 struct pcpu temp_bsp_pcpu; 213 214 struct mtx icu_lock; 215 216 struct mem_range_softc mem_range_softc; 217 218 struct mtx dt_lock; /* lock for GDT and LDT */ 219 220 void (*vmm_resume_p)(void); 221 222 bool efi_boot; 223 224 static void 225 cpu_startup(void *dummy) 226 { 227 uintmax_t memsize; 228 char *sysenv; 229 230 /* 231 * On MacBooks, we need to disallow the legacy USB circuit to 232 * generate an SMI# because this can cause several problems, 233 * namely: incorrect CPU frequency detection and failure to 234 * start the APs. 235 * We do this by disabling a bit in the SMI_EN (SMI Control and 236 * Enable register) of the Intel ICH LPC Interface Bridge. 237 */ 238 sysenv = kern_getenv("smbios.system.product"); 239 if (sysenv != NULL) { 240 if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 241 strncmp(sysenv, "MacBook3,1", 10) == 0 || 242 strncmp(sysenv, "MacBook4,1", 10) == 0 || 243 strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 244 strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 245 strncmp(sysenv, "MacBookPro3,1", 13) == 0 || 246 strncmp(sysenv, "MacBookPro4,1", 13) == 0 || 247 strncmp(sysenv, "Macmini1,1", 10) == 0) { 248 if (bootverbose) 249 printf("Disabling LEGACY_USB_EN bit on " 250 "Intel ICH.\n"); 251 outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 252 } 253 freeenv(sysenv); 254 } 255 256 /* 257 * Good {morning,afternoon,evening,night}. 258 */ 259 startrtclock(); 260 printcpuinfo(); 261 262 /* 263 * Display physical memory if SMBIOS reports reasonable amount. 264 */ 265 memsize = 0; 266 sysenv = kern_getenv("smbios.memory.enabled"); 267 if (sysenv != NULL) { 268 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; 269 freeenv(sysenv); 270 } 271 if (memsize < ptoa((uintmax_t)vm_free_count())) 272 memsize = ptoa((uintmax_t)Maxmem); 273 printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); 274 realmem = atop(memsize); 275 276 /* 277 * Display any holes after the first chunk of extended memory. 278 */ 279 if (bootverbose) { 280 int indx; 281 282 printf("Physical memory chunk(s):\n"); 283 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 284 vm_paddr_t size; 285 286 size = phys_avail[indx + 1] - phys_avail[indx]; 287 printf( 288 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 289 (uintmax_t)phys_avail[indx], 290 (uintmax_t)phys_avail[indx + 1] - 1, 291 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 292 } 293 } 294 295 vm_ksubmap_init(&kmi); 296 297 printf("avail memory = %ju (%ju MB)\n", 298 ptoa((uintmax_t)vm_free_count()), 299 ptoa((uintmax_t)vm_free_count()) / 1048576); 300 #ifdef DEV_PCI 301 if (bootverbose && intel_graphics_stolen_base != 0) 302 printf("intel stolen mem: base %#jx size %ju MB\n", 303 (uintmax_t)intel_graphics_stolen_base, 304 (uintmax_t)intel_graphics_stolen_size / 1024 / 1024); 305 #endif 306 307 /* 308 * Set up buffers, so they can be used to read disk labels. 309 */ 310 bufinit(); 311 vm_pager_bufferinit(); 312 313 cpu_setregs(); 314 } 315 316 static void 317 late_ifunc_resolve(void *dummy __unused) 318 { 319 link_elf_late_ireloc(); 320 } 321 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); 322 323 324 void 325 cpu_setregs(void) 326 { 327 register_t cr0; 328 329 cr0 = rcr0(); 330 /* 331 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the 332 * BSP. See the comments there about why we set them. 333 */ 334 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 335 load_cr0(cr0); 336 } 337 338 /* 339 * Initialize amd64 and configure to run kernel 340 */ 341 342 /* 343 * Initialize segments & interrupt table 344 */ 345 static struct gate_descriptor idt0[NIDT]; 346 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 347 348 static char dblfault_stack[DBLFAULT_STACK_SIZE] __aligned(16); 349 static char mce0_stack[MCE_STACK_SIZE] __aligned(16); 350 static char nmi0_stack[NMI_STACK_SIZE] __aligned(16); 351 static char dbg0_stack[DBG_STACK_SIZE] __aligned(16); 352 CTASSERT(sizeof(struct nmi_pcpu) == 16); 353 354 /* 355 * Software prototypes -- in more palatable form. 356 * 357 * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same 358 * slots as corresponding segments for i386 kernel. 359 */ 360 struct soft_segment_descriptor gdt_segs[] = { 361 /* GNULL_SEL 0 Null Descriptor */ 362 { .ssd_base = 0x0, 363 .ssd_limit = 0x0, 364 .ssd_type = 0, 365 .ssd_dpl = 0, 366 .ssd_p = 0, 367 .ssd_long = 0, 368 .ssd_def32 = 0, 369 .ssd_gran = 0 }, 370 /* GNULL2_SEL 1 Null Descriptor */ 371 { .ssd_base = 0x0, 372 .ssd_limit = 0x0, 373 .ssd_type = 0, 374 .ssd_dpl = 0, 375 .ssd_p = 0, 376 .ssd_long = 0, 377 .ssd_def32 = 0, 378 .ssd_gran = 0 }, 379 /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ 380 { .ssd_base = 0x0, 381 .ssd_limit = 0xfffff, 382 .ssd_type = SDT_MEMRWA, 383 .ssd_dpl = SEL_UPL, 384 .ssd_p = 1, 385 .ssd_long = 0, 386 .ssd_def32 = 1, 387 .ssd_gran = 1 }, 388 /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ 389 { .ssd_base = 0x0, 390 .ssd_limit = 0xfffff, 391 .ssd_type = SDT_MEMRWA, 392 .ssd_dpl = SEL_UPL, 393 .ssd_p = 1, 394 .ssd_long = 0, 395 .ssd_def32 = 1, 396 .ssd_gran = 1 }, 397 /* GCODE_SEL 4 Code Descriptor for kernel */ 398 { .ssd_base = 0x0, 399 .ssd_limit = 0xfffff, 400 .ssd_type = SDT_MEMERA, 401 .ssd_dpl = SEL_KPL, 402 .ssd_p = 1, 403 .ssd_long = 1, 404 .ssd_def32 = 0, 405 .ssd_gran = 1 }, 406 /* GDATA_SEL 5 Data Descriptor for kernel */ 407 { .ssd_base = 0x0, 408 .ssd_limit = 0xfffff, 409 .ssd_type = SDT_MEMRWA, 410 .ssd_dpl = SEL_KPL, 411 .ssd_p = 1, 412 .ssd_long = 1, 413 .ssd_def32 = 0, 414 .ssd_gran = 1 }, 415 /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ 416 { .ssd_base = 0x0, 417 .ssd_limit = 0xfffff, 418 .ssd_type = SDT_MEMERA, 419 .ssd_dpl = SEL_UPL, 420 .ssd_p = 1, 421 .ssd_long = 0, 422 .ssd_def32 = 1, 423 .ssd_gran = 1 }, 424 /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ 425 { .ssd_base = 0x0, 426 .ssd_limit = 0xfffff, 427 .ssd_type = SDT_MEMRWA, 428 .ssd_dpl = SEL_UPL, 429 .ssd_p = 1, 430 .ssd_long = 0, 431 .ssd_def32 = 1, 432 .ssd_gran = 1 }, 433 /* GUCODE_SEL 8 64 bit Code Descriptor for user */ 434 { .ssd_base = 0x0, 435 .ssd_limit = 0xfffff, 436 .ssd_type = SDT_MEMERA, 437 .ssd_dpl = SEL_UPL, 438 .ssd_p = 1, 439 .ssd_long = 1, 440 .ssd_def32 = 0, 441 .ssd_gran = 1 }, 442 /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ 443 { .ssd_base = 0x0, 444 .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, 445 .ssd_type = SDT_SYSTSS, 446 .ssd_dpl = SEL_KPL, 447 .ssd_p = 1, 448 .ssd_long = 0, 449 .ssd_def32 = 0, 450 .ssd_gran = 0 }, 451 /* Actually, the TSS is a system descriptor which is double size */ 452 { .ssd_base = 0x0, 453 .ssd_limit = 0x0, 454 .ssd_type = 0, 455 .ssd_dpl = 0, 456 .ssd_p = 0, 457 .ssd_long = 0, 458 .ssd_def32 = 0, 459 .ssd_gran = 0 }, 460 /* GUSERLDT_SEL 11 LDT Descriptor */ 461 { .ssd_base = 0x0, 462 .ssd_limit = 0x0, 463 .ssd_type = 0, 464 .ssd_dpl = 0, 465 .ssd_p = 0, 466 .ssd_long = 0, 467 .ssd_def32 = 0, 468 .ssd_gran = 0 }, 469 /* GUSERLDT_SEL 12 LDT Descriptor, double size */ 470 { .ssd_base = 0x0, 471 .ssd_limit = 0x0, 472 .ssd_type = 0, 473 .ssd_dpl = 0, 474 .ssd_p = 0, 475 .ssd_long = 0, 476 .ssd_def32 = 0, 477 .ssd_gran = 0 }, 478 }; 479 _Static_assert(nitems(gdt_segs) == NGDT, "Stale NGDT"); 480 481 void 482 setidt(int idx, inthand_t *func, int typ, int dpl, int ist) 483 { 484 struct gate_descriptor *ip; 485 486 ip = idt + idx; 487 ip->gd_looffset = (uintptr_t)func; 488 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); 489 ip->gd_ist = ist; 490 ip->gd_xx = 0; 491 ip->gd_type = typ; 492 ip->gd_dpl = dpl; 493 ip->gd_p = 1; 494 ip->gd_hioffset = ((uintptr_t)func)>>16 ; 495 } 496 497 extern inthand_t 498 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 499 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 500 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 501 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 502 IDTVEC(xmm), IDTVEC(dblfault), 503 IDTVEC(div_pti), IDTVEC(bpt_pti), 504 IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti), 505 IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti), 506 IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), 507 IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti), 508 IDTVEC(xmm_pti), 509 #ifdef KDTRACE_HOOKS 510 IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti), 511 #endif 512 #ifdef XENHVM 513 IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti), 514 #endif 515 IDTVEC(fast_syscall), IDTVEC(fast_syscall32), 516 IDTVEC(fast_syscall_pti); 517 518 #ifdef DDB 519 /* 520 * Display the index and function name of any IDT entries that don't use 521 * the default 'rsvd' entry point. 522 */ 523 DB_SHOW_COMMAND_FLAGS(idt, db_show_idt, DB_CMD_MEMSAFE) 524 { 525 struct gate_descriptor *ip; 526 int idx; 527 uintptr_t func; 528 529 ip = idt; 530 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 531 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 532 if (func != (uintptr_t)&IDTVEC(rsvd)) { 533 db_printf("%3d\t", idx); 534 db_printsym(func, DB_STGY_PROC); 535 db_printf("\n"); 536 } 537 ip++; 538 } 539 } 540 541 /* Show privileged registers. */ 542 DB_SHOW_COMMAND_FLAGS(sysregs, db_show_sysregs, DB_CMD_MEMSAFE) 543 { 544 struct { 545 uint16_t limit; 546 uint64_t base; 547 } __packed idtr, gdtr; 548 uint16_t ldt, tr; 549 550 __asm __volatile("sidt %0" : "=m" (idtr)); 551 db_printf("idtr\t0x%016lx/%04x\n", 552 (u_long)idtr.base, (u_int)idtr.limit); 553 __asm __volatile("sgdt %0" : "=m" (gdtr)); 554 db_printf("gdtr\t0x%016lx/%04x\n", 555 (u_long)gdtr.base, (u_int)gdtr.limit); 556 __asm __volatile("sldt %0" : "=r" (ldt)); 557 db_printf("ldtr\t0x%04x\n", ldt); 558 __asm __volatile("str %0" : "=r" (tr)); 559 db_printf("tr\t0x%04x\n", tr); 560 db_printf("cr0\t0x%016lx\n", rcr0()); 561 db_printf("cr2\t0x%016lx\n", rcr2()); 562 db_printf("cr3\t0x%016lx\n", rcr3()); 563 db_printf("cr4\t0x%016lx\n", rcr4()); 564 if (rcr4() & CR4_XSAVE) 565 db_printf("xcr0\t0x%016lx\n", rxcr(0)); 566 db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); 567 if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) 568 db_printf("FEATURES_CTL\t%016lx\n", 569 rdmsr(MSR_IA32_FEATURE_CONTROL)); 570 db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); 571 db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); 572 db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); 573 } 574 575 DB_SHOW_COMMAND_FLAGS(dbregs, db_show_dbregs, DB_CMD_MEMSAFE) 576 { 577 578 db_printf("dr0\t0x%016lx\n", rdr0()); 579 db_printf("dr1\t0x%016lx\n", rdr1()); 580 db_printf("dr2\t0x%016lx\n", rdr2()); 581 db_printf("dr3\t0x%016lx\n", rdr3()); 582 db_printf("dr6\t0x%016lx\n", rdr6()); 583 db_printf("dr7\t0x%016lx\n", rdr7()); 584 } 585 #endif 586 587 void 588 sdtossd(struct user_segment_descriptor *sd, struct soft_segment_descriptor *ssd) 589 { 590 591 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 592 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 593 ssd->ssd_type = sd->sd_type; 594 ssd->ssd_dpl = sd->sd_dpl; 595 ssd->ssd_p = sd->sd_p; 596 ssd->ssd_long = sd->sd_long; 597 ssd->ssd_def32 = sd->sd_def32; 598 ssd->ssd_gran = sd->sd_gran; 599 } 600 601 void 602 ssdtosd(struct soft_segment_descriptor *ssd, struct user_segment_descriptor *sd) 603 { 604 605 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 606 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; 607 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 608 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 609 sd->sd_type = ssd->ssd_type; 610 sd->sd_dpl = ssd->ssd_dpl; 611 sd->sd_p = ssd->ssd_p; 612 sd->sd_long = ssd->ssd_long; 613 sd->sd_def32 = ssd->ssd_def32; 614 sd->sd_gran = ssd->ssd_gran; 615 } 616 617 void 618 ssdtosyssd(struct soft_segment_descriptor *ssd, struct system_segment_descriptor *sd) 619 { 620 621 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 622 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; 623 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 624 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 625 sd->sd_type = ssd->ssd_type; 626 sd->sd_dpl = ssd->ssd_dpl; 627 sd->sd_p = ssd->ssd_p; 628 sd->sd_gran = ssd->ssd_gran; 629 } 630 631 u_int basemem; 632 633 static int 634 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, 635 int *physmap_idxp) 636 { 637 int i, insert_idx, physmap_idx; 638 639 physmap_idx = *physmap_idxp; 640 641 if (length == 0) 642 return (1); 643 644 /* 645 * Find insertion point while checking for overlap. Start off by 646 * assuming the new entry will be added to the end. 647 * 648 * NB: physmap_idx points to the next free slot. 649 */ 650 insert_idx = physmap_idx; 651 for (i = 0; i <= physmap_idx; i += 2) { 652 if (base < physmap[i + 1]) { 653 if (base + length <= physmap[i]) { 654 insert_idx = i; 655 break; 656 } 657 if (boothowto & RB_VERBOSE) 658 printf( 659 "Overlapping memory regions, ignoring second region\n"); 660 return (1); 661 } 662 } 663 664 /* See if we can prepend to the next entry. */ 665 if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { 666 physmap[insert_idx] = base; 667 return (1); 668 } 669 670 /* See if we can append to the previous entry. */ 671 if (insert_idx > 0 && base == physmap[insert_idx - 1]) { 672 physmap[insert_idx - 1] += length; 673 return (1); 674 } 675 676 physmap_idx += 2; 677 *physmap_idxp = physmap_idx; 678 if (physmap_idx == PHYS_AVAIL_ENTRIES) { 679 printf( 680 "Too many segments in the physical address map, giving up\n"); 681 return (0); 682 } 683 684 /* 685 * Move the last 'N' entries down to make room for the new 686 * entry if needed. 687 */ 688 for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { 689 physmap[i] = physmap[i - 2]; 690 physmap[i + 1] = physmap[i - 1]; 691 } 692 693 /* Insert the new entry. */ 694 physmap[insert_idx] = base; 695 physmap[insert_idx + 1] = base + length; 696 return (1); 697 } 698 699 void 700 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, 701 vm_paddr_t *physmap, int *physmap_idx) 702 { 703 struct bios_smap *smap, *smapend; 704 705 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 706 707 for (smap = smapbase; smap < smapend; smap++) { 708 if (boothowto & RB_VERBOSE) 709 printf("SMAP type=%02x base=%016lx len=%016lx\n", 710 smap->type, smap->base, smap->length); 711 712 if (smap->type != SMAP_TYPE_MEMORY) 713 continue; 714 715 if (!add_physmap_entry(smap->base, smap->length, physmap, 716 physmap_idx)) 717 break; 718 } 719 } 720 721 static void 722 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, 723 int *physmap_idx) 724 { 725 struct efi_md *map, *p; 726 const char *type; 727 size_t efisz; 728 int ndesc, i; 729 730 static const char *types[] = { 731 "Reserved", 732 "LoaderCode", 733 "LoaderData", 734 "BootServicesCode", 735 "BootServicesData", 736 "RuntimeServicesCode", 737 "RuntimeServicesData", 738 "ConventionalMemory", 739 "UnusableMemory", 740 "ACPIReclaimMemory", 741 "ACPIMemoryNVS", 742 "MemoryMappedIO", 743 "MemoryMappedIOPortSpace", 744 "PalCode", 745 "PersistentMemory" 746 }; 747 748 /* 749 * Memory map data provided by UEFI via the GetMemoryMap 750 * Boot Services API. 751 */ 752 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 753 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 754 755 if (efihdr->descriptor_size == 0) 756 return; 757 ndesc = efihdr->memory_size / efihdr->descriptor_size; 758 759 if (boothowto & RB_VERBOSE) 760 printf("%23s %12s %12s %8s %4s\n", 761 "Type", "Physical", "Virtual", "#Pages", "Attr"); 762 763 for (i = 0, p = map; i < ndesc; i++, 764 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 765 if (boothowto & RB_VERBOSE) { 766 if (p->md_type < nitems(types)) 767 type = types[p->md_type]; 768 else 769 type = "<INVALID>"; 770 printf("%23s %012lx %012lx %08lx ", type, p->md_phys, 771 p->md_virt, p->md_pages); 772 if (p->md_attr & EFI_MD_ATTR_UC) 773 printf("UC "); 774 if (p->md_attr & EFI_MD_ATTR_WC) 775 printf("WC "); 776 if (p->md_attr & EFI_MD_ATTR_WT) 777 printf("WT "); 778 if (p->md_attr & EFI_MD_ATTR_WB) 779 printf("WB "); 780 if (p->md_attr & EFI_MD_ATTR_UCE) 781 printf("UCE "); 782 if (p->md_attr & EFI_MD_ATTR_WP) 783 printf("WP "); 784 if (p->md_attr & EFI_MD_ATTR_RP) 785 printf("RP "); 786 if (p->md_attr & EFI_MD_ATTR_XP) 787 printf("XP "); 788 if (p->md_attr & EFI_MD_ATTR_NV) 789 printf("NV "); 790 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) 791 printf("MORE_RELIABLE "); 792 if (p->md_attr & EFI_MD_ATTR_RO) 793 printf("RO "); 794 if (p->md_attr & EFI_MD_ATTR_RT) 795 printf("RUNTIME"); 796 printf("\n"); 797 } 798 799 switch (p->md_type) { 800 case EFI_MD_TYPE_CODE: 801 case EFI_MD_TYPE_DATA: 802 case EFI_MD_TYPE_BS_CODE: 803 case EFI_MD_TYPE_BS_DATA: 804 case EFI_MD_TYPE_FREE: 805 /* 806 * We're allowed to use any entry with these types. 807 */ 808 break; 809 default: 810 continue; 811 } 812 813 if (!add_physmap_entry(p->md_phys, p->md_pages * EFI_PAGE_SIZE, 814 physmap, physmap_idx)) 815 break; 816 } 817 } 818 819 static void 820 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) 821 { 822 struct bios_smap *smap; 823 struct efi_map_header *efihdr; 824 u_int32_t size; 825 826 /* 827 * Memory map from INT 15:E820. 828 * 829 * subr_module.c says: 830 * "Consumer may safely assume that size value precedes data." 831 * ie: an int32_t immediately precedes smap. 832 */ 833 834 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 835 MODINFO_METADATA | MODINFOMD_EFI_MAP); 836 smap = (struct bios_smap *)preload_search_info(kmdp, 837 MODINFO_METADATA | MODINFOMD_SMAP); 838 if (efihdr == NULL && smap == NULL) 839 panic("No BIOS smap or EFI map info from loader!"); 840 841 if (efihdr != NULL) { 842 add_efi_map_entries(efihdr, physmap, physmap_idx); 843 strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); 844 } else { 845 size = *((u_int32_t *)smap - 1); 846 bios_add_smap_entries(smap, size, physmap, physmap_idx); 847 strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); 848 } 849 } 850 851 #define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) 852 853 /* 854 * Populate the (physmap) array with base/bound pairs describing the 855 * available physical memory in the system, then test this memory and 856 * build the phys_avail array describing the actually-available memory. 857 * 858 * Total memory size may be set by the kernel environment variable 859 * hw.physmem or the compile-time define MAXMEM. 860 * 861 * XXX first should be vm_paddr_t. 862 */ 863 static void 864 getmemsize(caddr_t kmdp, u_int64_t first) 865 { 866 int i, physmap_idx, pa_indx, da_indx; 867 vm_paddr_t pa, physmap[PHYS_AVAIL_ENTRIES]; 868 u_long physmem_start, physmem_tunable, memtest; 869 pt_entry_t *pte; 870 quad_t dcons_addr, dcons_size; 871 int page_counter; 872 873 /* 874 * Tell the physical memory allocator about pages used to store 875 * the kernel and preloaded data. See kmem_bootstrap_free(). 876 */ 877 vm_phys_early_add_seg((vm_paddr_t)kernphys, trunc_page(first)); 878 879 bzero(physmap, sizeof(physmap)); 880 physmap_idx = 0; 881 882 init_ops.parse_memmap(kmdp, physmap, &physmap_idx); 883 physmap_idx -= 2; 884 885 /* 886 * Find the 'base memory' segment for SMP 887 */ 888 basemem = 0; 889 for (i = 0; i <= physmap_idx; i += 2) { 890 if (physmap[i] <= 0xA0000) { 891 basemem = physmap[i + 1] / 1024; 892 break; 893 } 894 } 895 if (basemem == 0 || basemem > 640) { 896 if (bootverbose) 897 printf( 898 "Memory map doesn't contain a basemem segment, faking it"); 899 basemem = 640; 900 } 901 902 /* 903 * Maxmem isn't the "maximum memory", it's one larger than the 904 * highest page of the physical address space. It should be 905 * called something like "Maxphyspage". We may adjust this 906 * based on ``hw.physmem'' and the results of the memory test. 907 */ 908 Maxmem = atop(physmap[physmap_idx + 1]); 909 910 #ifdef MAXMEM 911 Maxmem = MAXMEM / 4; 912 #endif 913 914 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 915 Maxmem = atop(physmem_tunable); 916 917 /* 918 * The boot memory test is disabled by default, as it takes a 919 * significant amount of time on large-memory systems, and is 920 * unfriendly to virtual machines as it unnecessarily touches all 921 * pages. 922 * 923 * A general name is used as the code may be extended to support 924 * additional tests beyond the current "page present" test. 925 */ 926 memtest = 0; 927 TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); 928 929 /* 930 * Don't allow MAXMEM or hw.physmem to extend the amount of memory 931 * in the system. 932 */ 933 if (Maxmem > atop(physmap[physmap_idx + 1])) 934 Maxmem = atop(physmap[physmap_idx + 1]); 935 936 if (atop(physmap[physmap_idx + 1]) != Maxmem && 937 (boothowto & RB_VERBOSE)) 938 printf("Physical memory use set to %ldK\n", Maxmem * 4); 939 940 /* call pmap initialization to make new kernel address space */ 941 pmap_bootstrap(&first); 942 943 /* 944 * Size up each available chunk of physical memory. 945 * 946 * XXX Some BIOSes corrupt low 64KB between suspend and resume. 947 * By default, mask off the first 16 pages unless we appear to be 948 * running in a VM. 949 */ 950 physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; 951 TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); 952 if (physmap[0] < physmem_start) { 953 if (physmem_start < PAGE_SIZE) 954 physmap[0] = PAGE_SIZE; 955 else if (physmem_start >= physmap[1]) 956 physmap[0] = round_page(physmap[1] - PAGE_SIZE); 957 else 958 physmap[0] = round_page(physmem_start); 959 } 960 pa_indx = 0; 961 da_indx = 1; 962 phys_avail[pa_indx++] = physmap[0]; 963 phys_avail[pa_indx] = physmap[0]; 964 dump_avail[da_indx] = physmap[0]; 965 pte = CMAP1; 966 967 /* 968 * Get dcons buffer address 969 */ 970 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 971 getenv_quad("dcons.size", &dcons_size) == 0) 972 dcons_addr = 0; 973 974 /* 975 * physmap is in bytes, so when converting to page boundaries, 976 * round up the start address and round down the end address. 977 */ 978 page_counter = 0; 979 if (memtest != 0) 980 printf("Testing system memory"); 981 for (i = 0; i <= physmap_idx; i += 2) { 982 vm_paddr_t end; 983 984 end = ptoa((vm_paddr_t)Maxmem); 985 if (physmap[i + 1] < end) 986 end = trunc_page(physmap[i + 1]); 987 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 988 int tmp, page_bad, full; 989 int *ptr = (int *)CADDR1; 990 991 full = FALSE; 992 /* 993 * block out kernel memory as not available. 994 */ 995 if (pa >= (vm_paddr_t)kernphys && pa < first) 996 goto do_dump_avail; 997 998 /* 999 * block out dcons buffer 1000 */ 1001 if (dcons_addr > 0 1002 && pa >= trunc_page(dcons_addr) 1003 && pa < dcons_addr + dcons_size) 1004 goto do_dump_avail; 1005 1006 page_bad = FALSE; 1007 if (memtest == 0) 1008 goto skip_memtest; 1009 1010 /* 1011 * Print a "." every GB to show we're making 1012 * progress. 1013 */ 1014 page_counter++; 1015 if ((page_counter % PAGES_PER_GB) == 0) 1016 printf("."); 1017 1018 /* 1019 * map page into kernel: valid, read/write,non-cacheable 1020 */ 1021 *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; 1022 invltlb(); 1023 1024 tmp = *(int *)ptr; 1025 /* 1026 * Test for alternating 1's and 0's 1027 */ 1028 *(volatile int *)ptr = 0xaaaaaaaa; 1029 if (*(volatile int *)ptr != 0xaaaaaaaa) 1030 page_bad = TRUE; 1031 /* 1032 * Test for alternating 0's and 1's 1033 */ 1034 *(volatile int *)ptr = 0x55555555; 1035 if (*(volatile int *)ptr != 0x55555555) 1036 page_bad = TRUE; 1037 /* 1038 * Test for all 1's 1039 */ 1040 *(volatile int *)ptr = 0xffffffff; 1041 if (*(volatile int *)ptr != 0xffffffff) 1042 page_bad = TRUE; 1043 /* 1044 * Test for all 0's 1045 */ 1046 *(volatile int *)ptr = 0x0; 1047 if (*(volatile int *)ptr != 0x0) 1048 page_bad = TRUE; 1049 /* 1050 * Restore original value. 1051 */ 1052 *(int *)ptr = tmp; 1053 1054 skip_memtest: 1055 /* 1056 * Adjust array of valid/good pages. 1057 */ 1058 if (page_bad == TRUE) 1059 continue; 1060 /* 1061 * If this good page is a continuation of the 1062 * previous set of good pages, then just increase 1063 * the end pointer. Otherwise start a new chunk. 1064 * Note that "end" points one higher than end, 1065 * making the range >= start and < end. 1066 * If we're also doing a speculative memory 1067 * test and we at or past the end, bump up Maxmem 1068 * so that we keep going. The first bad page 1069 * will terminate the loop. 1070 */ 1071 if (phys_avail[pa_indx] == pa) { 1072 phys_avail[pa_indx] += PAGE_SIZE; 1073 } else { 1074 pa_indx++; 1075 if (pa_indx == PHYS_AVAIL_ENTRIES) { 1076 printf( 1077 "Too many holes in the physical address space, giving up\n"); 1078 pa_indx--; 1079 full = TRUE; 1080 goto do_dump_avail; 1081 } 1082 phys_avail[pa_indx++] = pa; /* start */ 1083 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 1084 } 1085 physmem++; 1086 do_dump_avail: 1087 if (dump_avail[da_indx] == pa) { 1088 dump_avail[da_indx] += PAGE_SIZE; 1089 } else { 1090 da_indx++; 1091 if (da_indx == PHYS_AVAIL_ENTRIES) { 1092 da_indx--; 1093 goto do_next; 1094 } 1095 dump_avail[da_indx++] = pa; /* start */ 1096 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 1097 } 1098 do_next: 1099 if (full) 1100 break; 1101 } 1102 } 1103 *pte = 0; 1104 invltlb(); 1105 if (memtest != 0) 1106 printf("\n"); 1107 1108 /* 1109 * XXX 1110 * The last chunk must contain at least one page plus the message 1111 * buffer to avoid complicating other code (message buffer address 1112 * calculation, etc.). 1113 */ 1114 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1115 round_page(msgbufsize) >= phys_avail[pa_indx]) { 1116 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1117 phys_avail[pa_indx--] = 0; 1118 phys_avail[pa_indx--] = 0; 1119 } 1120 1121 Maxmem = atop(phys_avail[pa_indx]); 1122 1123 /* Trim off space for the message buffer. */ 1124 phys_avail[pa_indx] -= round_page(msgbufsize); 1125 1126 /* Map the message buffer. */ 1127 msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); 1128 } 1129 1130 static caddr_t 1131 native_parse_preload_data(u_int64_t modulep) 1132 { 1133 caddr_t kmdp; 1134 char *envp; 1135 #ifdef DDB 1136 vm_offset_t ksym_start; 1137 vm_offset_t ksym_end; 1138 #endif 1139 1140 preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); 1141 preload_bootstrap_relocate(KERNBASE); 1142 kmdp = preload_search_by_type("elf kernel"); 1143 if (kmdp == NULL) 1144 kmdp = preload_search_by_type("elf64 kernel"); 1145 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); 1146 envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); 1147 if (envp != NULL) 1148 envp += KERNBASE; 1149 init_static_kenv(envp, 0); 1150 #ifdef DDB 1151 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); 1152 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); 1153 db_fetch_ksymtab(ksym_start, ksym_end, 0); 1154 #endif 1155 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); 1156 1157 return (kmdp); 1158 } 1159 1160 static void 1161 native_clock_source_init(void) 1162 { 1163 i8254_init(); 1164 } 1165 1166 static void 1167 amd64_kdb_init(void) 1168 { 1169 kdb_init(); 1170 #ifdef KDB 1171 if (boothowto & RB_KDB) 1172 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 1173 #endif 1174 } 1175 1176 /* Set up the fast syscall stuff */ 1177 void 1178 amd64_conf_fast_syscall(void) 1179 { 1180 uint64_t msr; 1181 1182 msr = rdmsr(MSR_EFER) | EFER_SCE; 1183 wrmsr(MSR_EFER, msr); 1184 wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) : 1185 (u_int64_t)IDTVEC(fast_syscall)); 1186 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 1187 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1188 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 1189 wrmsr(MSR_STAR, msr); 1190 wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC); 1191 } 1192 1193 void 1194 amd64_bsp_pcpu_init1(struct pcpu *pc) 1195 { 1196 struct user_segment_descriptor *gdt; 1197 1198 PCPU_SET(prvspace, pc); 1199 gdt = *PCPU_PTR(gdt); 1200 PCPU_SET(curthread, &thread0); 1201 PCPU_SET(tssp, PCPU_PTR(common_tss)); 1202 PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1203 PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); 1204 PCPU_SET(fs32p, &gdt[GUFS32_SEL]); 1205 PCPU_SET(gs32p, &gdt[GUGS32_SEL]); 1206 PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK); 1207 PCPU_SET(smp_tlb_gen, 1); 1208 } 1209 1210 void 1211 amd64_bsp_pcpu_init2(uint64_t rsp0) 1212 { 1213 1214 PCPU_SET(rsp0, rsp0); 1215 PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) + 1216 PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); 1217 PCPU_SET(curpcb, thread0.td_pcb); 1218 } 1219 1220 void 1221 amd64_bsp_ist_init(struct pcpu *pc) 1222 { 1223 struct nmi_pcpu *np; 1224 struct amd64tss *tssp; 1225 1226 tssp = &pc->pc_common_tss; 1227 1228 /* doublefault stack space, runs on ist1 */ 1229 np = ((struct nmi_pcpu *)&dblfault_stack[sizeof(dblfault_stack)]) - 1; 1230 np->np_pcpu = (register_t)pc; 1231 tssp->tss_ist1 = (long)np; 1232 1233 /* 1234 * NMI stack, runs on ist2. The pcpu pointer is stored just 1235 * above the start of the ist2 stack. 1236 */ 1237 np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1; 1238 np->np_pcpu = (register_t)pc; 1239 tssp->tss_ist2 = (long)np; 1240 1241 /* 1242 * MC# stack, runs on ist3. The pcpu pointer is stored just 1243 * above the start of the ist3 stack. 1244 */ 1245 np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1; 1246 np->np_pcpu = (register_t)pc; 1247 tssp->tss_ist3 = (long)np; 1248 1249 /* 1250 * DB# stack, runs on ist4. 1251 */ 1252 np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1; 1253 np->np_pcpu = (register_t)pc; 1254 tssp->tss_ist4 = (long)np; 1255 } 1256 1257 /* 1258 * Calculate the kernel load address by inspecting page table created by loader. 1259 * The assumptions: 1260 * - kernel is mapped at KERNBASE, backed by contiguous phys memory 1261 * aligned at 2M, below 4G (the latter is important for AP startup) 1262 * - there is a 2M hole at KERNBASE (KERNSTART = KERNBASE + 2M) 1263 * - kernel is mapped with 2M superpages 1264 * - all participating memory, i.e. kernel, modules, metadata, 1265 * page table is accessible by pre-created 1:1 mapping 1266 * (right now loader creates 1:1 mapping for lower 4G, and all 1267 * memory is from there) 1268 * - there is a usable memory block right after the end of the 1269 * mapped kernel and all modules/metadata, pointed to by 1270 * physfree, for early allocations 1271 */ 1272 vm_paddr_t __nosanitizeaddress __nosanitizememory 1273 amd64_loadaddr(void) 1274 { 1275 pml4_entry_t *pml4e; 1276 pdp_entry_t *pdpe; 1277 pd_entry_t *pde; 1278 uint64_t cr3; 1279 1280 cr3 = rcr3(); 1281 pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(KERNSTART); 1282 pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(KERNSTART); 1283 pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(KERNSTART); 1284 return (*pde & PG_FRAME); 1285 } 1286 1287 u_int64_t 1288 hammer_time(u_int64_t modulep, u_int64_t physfree) 1289 { 1290 caddr_t kmdp; 1291 int gsel_tss, x; 1292 struct pcpu *pc; 1293 uint64_t rsp0; 1294 char *env; 1295 struct user_segment_descriptor *gdt; 1296 struct region_descriptor r_gdt; 1297 size_t kstack0_sz; 1298 1299 TSRAW(&thread0, TS_ENTER, __func__, NULL); 1300 1301 kernphys = amd64_loadaddr(); 1302 1303 physfree += kernphys; 1304 1305 kmdp = init_ops.parse_preload_data(modulep); 1306 1307 efi_boot = preload_search_info(kmdp, MODINFO_METADATA | 1308 MODINFOMD_EFI_MAP) != NULL; 1309 1310 if (!efi_boot) { 1311 /* Tell the bios to warmboot next time */ 1312 atomic_store_short((u_short *)0x472, 0x1234); 1313 } 1314 1315 physfree += ucode_load_bsp(physfree - kernphys + KERNSTART); 1316 physfree = roundup2(physfree, PAGE_SIZE); 1317 1318 identify_cpu1(); 1319 identify_hypervisor(); 1320 identify_hypervisor_smbios(); 1321 identify_cpu_fixup_bsp(); 1322 identify_cpu2(); 1323 initializecpucache(); 1324 1325 /* 1326 * Check for pti, pcid, and invpcid before ifuncs are 1327 * resolved, to correctly select the implementation for 1328 * pmap_activate_sw_mode(). 1329 */ 1330 pti = pti_get_default(); 1331 TUNABLE_INT_FETCH("vm.pmap.pti", &pti); 1332 TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); 1333 if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { 1334 invpcid_works = (cpu_stdext_feature & 1335 CPUID_STDEXT_INVPCID) != 0; 1336 } else { 1337 pmap_pcid_enabled = 0; 1338 } 1339 1340 /* 1341 * Now we can do small core initialization, after the PCID 1342 * CPU features and user knobs are evaluated. 1343 */ 1344 TUNABLE_INT_FETCH("vm.pmap.pcid_invlpg_workaround", 1345 &pmap_pcid_invlpg_workaround_uena); 1346 cpu_init_small_core(); 1347 1348 link_elf_ireloc(kmdp); 1349 1350 /* 1351 * This may be done better later if it gets more high level 1352 * components in it. If so just link td->td_proc here. 1353 */ 1354 proc_linkup0(&proc0, &thread0); 1355 1356 /* Init basic tunables, hz etc */ 1357 init_param1(); 1358 1359 thread0.td_kstack = physfree - kernphys + KERNSTART; 1360 thread0.td_kstack_pages = kstack_pages; 1361 kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; 1362 bzero((void *)thread0.td_kstack, kstack0_sz); 1363 physfree += kstack0_sz; 1364 1365 /* 1366 * Initialize enough of thread0 for delayed invalidation to 1367 * work very early. Rely on thread0.td_base_pri 1368 * zero-initialization, it is reset to PVM at proc0_init(). 1369 */ 1370 pmap_thread_init_invl_gen(&thread0); 1371 1372 pc = &temp_bsp_pcpu; 1373 pcpu_init(pc, 0, sizeof(struct pcpu)); 1374 gdt = &temp_bsp_pcpu.pc_gdt[0]; 1375 1376 /* 1377 * make gdt memory segments 1378 */ 1379 for (x = 0; x < NGDT; x++) { 1380 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 1381 x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) 1382 ssdtosd(&gdt_segs[x], &gdt[x]); 1383 } 1384 gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&pc->pc_common_tss; 1385 ssdtosyssd(&gdt_segs[GPROC0_SEL], 1386 (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1387 1388 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 1389 r_gdt.rd_base = (long)gdt; 1390 lgdt(&r_gdt); 1391 1392 wrmsr(MSR_FSBASE, 0); /* User value */ 1393 wrmsr(MSR_GSBASE, (u_int64_t)pc); 1394 wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ 1395 1396 dpcpu_init((void *)(physfree - kernphys + KERNSTART), 0); 1397 physfree += DPCPU_SIZE; 1398 amd64_bsp_pcpu_init1(pc); 1399 /* Non-late cninit() and printf() can be moved up to here. */ 1400 1401 /* 1402 * Initialize mutexes. 1403 * 1404 * icu_lock: in order to allow an interrupt to occur in a critical 1405 * section, to set pcpu->ipending (etc...) properly, we 1406 * must be able to get the icu lock, so it can't be 1407 * under witness. 1408 */ 1409 mutex_init(); 1410 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); 1411 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); 1412 1413 /* exceptions */ 1414 for (x = 0; x < NIDT; x++) 1415 setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT, 1416 SEL_KPL, 0); 1417 setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT, 1418 SEL_KPL, 0); 1419 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4); 1420 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); 1421 setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT, 1422 SEL_UPL, 0); 1423 setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT, 1424 SEL_UPL, 0); 1425 setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT, 1426 SEL_KPL, 0); 1427 setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT, 1428 SEL_KPL, 0); 1429 setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT, 1430 SEL_KPL, 0); 1431 setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); 1432 setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm), 1433 SDT_SYSIGT, SEL_KPL, 0); 1434 setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT, 1435 SEL_KPL, 0); 1436 setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing), 1437 SDT_SYSIGT, SEL_KPL, 0); 1438 setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT, 1439 SEL_KPL, 0); 1440 setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT, 1441 SEL_KPL, 0); 1442 setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT, 1443 SEL_KPL, 0); 1444 setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT, 1445 SEL_KPL, 0); 1446 setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT, 1447 SEL_KPL, 0); 1448 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3); 1449 setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT, 1450 SEL_KPL, 0); 1451 #ifdef KDTRACE_HOOKS 1452 setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) : 1453 &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); 1454 #endif 1455 #ifdef XENHVM 1456 setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) : 1457 &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0); 1458 #endif 1459 r_idt.rd_limit = sizeof(idt0) - 1; 1460 r_idt.rd_base = (long) idt; 1461 lidt(&r_idt); 1462 1463 /* 1464 * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) 1465 * transition). 1466 * Once bootblocks have updated, we can test directly for 1467 * efi_systbl != NULL here... 1468 */ 1469 if (efi_boot) 1470 vty_set_preferred(VTY_VT); 1471 1472 TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); 1473 TUNABLE_INT_FETCH("machdep.mitigations.ibrs.disable", &hw_ibrs_disable); 1474 1475 TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable); 1476 TUNABLE_INT_FETCH("machdep.mitigations.ssb.disable", &hw_ssb_disable); 1477 1478 TUNABLE_INT_FETCH("machdep.syscall_ret_l1d_flush", 1479 &syscall_ret_l1d_flush_mode); 1480 1481 TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable); 1482 TUNABLE_INT_FETCH("machdep.mitigations.mds.disable", &hw_mds_disable); 1483 1484 TUNABLE_INT_FETCH("machdep.mitigations.taa.enable", &x86_taa_enable); 1485 1486 TUNABLE_INT_FETCH("machdep.mitigations.rndgs.enable", 1487 &x86_rngds_mitg_enable); 1488 1489 finishidentcpu(); /* Final stage of CPU initialization */ 1490 1491 /* 1492 * Initialize the clock before the console so that console 1493 * initialization can use DELAY(). 1494 */ 1495 clock_init(); 1496 1497 initializecpu(); /* Initialize CPU registers */ 1498 1499 amd64_bsp_ist_init(pc); 1500 1501 /* Set the IO permission bitmap (empty due to tss seg limit) */ 1502 pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + 1503 IOPERM_BITMAP_SIZE; 1504 1505 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1506 ltr(gsel_tss); 1507 1508 amd64_conf_fast_syscall(); 1509 1510 /* 1511 * We initialize the PCB pointer early so that exception 1512 * handlers will work. Also set up td_critnest to short-cut 1513 * the page fault handler. 1514 */ 1515 cpu_max_ext_state_size = sizeof(struct savefpu); 1516 set_top_of_stack_td(&thread0); 1517 thread0.td_pcb = get_pcb_td(&thread0); 1518 thread0.td_critnest = 1; 1519 1520 /* 1521 * The console and kdb should be initialized even earlier than here, 1522 * but some console drivers don't work until after getmemsize(). 1523 * Default to late console initialization to support these drivers. 1524 * This loses mainly printf()s in getmemsize() and early debugging. 1525 */ 1526 TUNABLE_INT_FETCH("debug.late_console", &late_console); 1527 if (!late_console) { 1528 cninit(); 1529 amd64_kdb_init(); 1530 } 1531 1532 getmemsize(kmdp, physfree); 1533 init_param2(physmem); 1534 1535 /* now running on new page tables, configured,and u/iom is accessible */ 1536 1537 #ifdef DEV_PCI 1538 /* This call might adjust phys_avail[]. */ 1539 pci_early_quirks(); 1540 #endif 1541 1542 if (late_console) 1543 cninit(); 1544 1545 /* 1546 * Dump the boot metadata. We have to wait for cninit() since console 1547 * output is required. If it's grossly incorrect the kernel will never 1548 * make it this far. 1549 */ 1550 if (getenv_is_true("debug.dump_modinfo_at_boot")) 1551 preload_dump(); 1552 1553 #ifdef DEV_ISA 1554 #ifdef DEV_ATPIC 1555 elcr_probe(); 1556 atpic_startup(); 1557 #else 1558 /* Reset and mask the atpics and leave them shut down. */ 1559 atpic_reset(); 1560 1561 /* 1562 * Point the ICU spurious interrupt vectors at the APIC spurious 1563 * interrupt handler. 1564 */ 1565 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1566 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1567 #endif 1568 #else 1569 #error "have you forgotten the isa device?" 1570 #endif 1571 1572 if (late_console) 1573 amd64_kdb_init(); 1574 1575 msgbufinit(msgbufp, msgbufsize); 1576 fpuinit(); 1577 1578 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1579 rsp0 = thread0.td_md.md_stack_base; 1580 /* Ensure the stack is aligned to 16 bytes */ 1581 rsp0 &= ~0xFul; 1582 PCPU_PTR(common_tss)->tss_rsp0 = rsp0; 1583 amd64_bsp_pcpu_init2(rsp0); 1584 1585 /* transfer to user mode */ 1586 1587 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 1588 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 1589 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); 1590 _ufssel = GSEL(GUFS32_SEL, SEL_UPL); 1591 _ugssel = GSEL(GUGS32_SEL, SEL_UPL); 1592 1593 load_ds(_udatasel); 1594 load_es(_udatasel); 1595 load_fs(_ufssel); 1596 1597 /* setup proc 0's pcb */ 1598 thread0.td_pcb->pcb_flags = 0; 1599 1600 env = kern_getenv("kernelname"); 1601 if (env != NULL) 1602 strlcpy(kernelname, env, sizeof(kernelname)); 1603 1604 kcsan_cpu_init(0); 1605 1606 #ifdef FDT 1607 x86_init_fdt(); 1608 #endif 1609 thread0.td_critnest = 0; 1610 1611 kasan_init(); 1612 kmsan_init(); 1613 1614 TSEXIT(); 1615 1616 /* Location of kernel stack for locore */ 1617 return (thread0.td_md.md_stack_base); 1618 } 1619 1620 void 1621 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 1622 { 1623 1624 pcpu->pc_acpi_id = 0xffffffff; 1625 } 1626 1627 static int 1628 smap_sysctl_handler(SYSCTL_HANDLER_ARGS) 1629 { 1630 struct bios_smap *smapbase; 1631 struct bios_smap_xattr smap; 1632 caddr_t kmdp; 1633 uint32_t *smapattr; 1634 int count, error, i; 1635 1636 /* Retrieve the system memory map from the loader. */ 1637 kmdp = preload_search_by_type("elf kernel"); 1638 if (kmdp == NULL) 1639 kmdp = preload_search_by_type("elf64 kernel"); 1640 smapbase = (struct bios_smap *)preload_search_info(kmdp, 1641 MODINFO_METADATA | MODINFOMD_SMAP); 1642 if (smapbase == NULL) 1643 return (0); 1644 smapattr = (uint32_t *)preload_search_info(kmdp, 1645 MODINFO_METADATA | MODINFOMD_SMAP_XATTR); 1646 count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); 1647 error = 0; 1648 for (i = 0; i < count; i++) { 1649 smap.base = smapbase[i].base; 1650 smap.length = smapbase[i].length; 1651 smap.type = smapbase[i].type; 1652 if (smapattr != NULL) 1653 smap.xattr = smapattr[i]; 1654 else 1655 smap.xattr = 0; 1656 error = SYSCTL_OUT(req, &smap, sizeof(smap)); 1657 } 1658 return (error); 1659 } 1660 SYSCTL_PROC(_machdep, OID_AUTO, smap, 1661 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1662 smap_sysctl_handler, "S,bios_smap_xattr", 1663 "Raw BIOS SMAP data"); 1664 1665 static int 1666 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) 1667 { 1668 struct efi_map_header *efihdr; 1669 caddr_t kmdp; 1670 uint32_t efisize; 1671 1672 kmdp = preload_search_by_type("elf kernel"); 1673 if (kmdp == NULL) 1674 kmdp = preload_search_by_type("elf64 kernel"); 1675 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 1676 MODINFO_METADATA | MODINFOMD_EFI_MAP); 1677 if (efihdr == NULL) 1678 return (0); 1679 efisize = *((uint32_t *)efihdr - 1); 1680 return (SYSCTL_OUT(req, efihdr, efisize)); 1681 } 1682 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, 1683 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1684 efi_map_sysctl_handler, "S,efi_map_header", 1685 "Raw EFI Memory Map"); 1686 1687 void 1688 spinlock_enter(void) 1689 { 1690 struct thread *td; 1691 register_t flags; 1692 1693 td = curthread; 1694 if (td->td_md.md_spinlock_count == 0) { 1695 flags = intr_disable(); 1696 td->td_md.md_spinlock_count = 1; 1697 td->td_md.md_saved_flags = flags; 1698 critical_enter(); 1699 } else 1700 td->td_md.md_spinlock_count++; 1701 } 1702 1703 void 1704 spinlock_exit(void) 1705 { 1706 struct thread *td; 1707 register_t flags; 1708 1709 td = curthread; 1710 flags = td->td_md.md_saved_flags; 1711 td->td_md.md_spinlock_count--; 1712 if (td->td_md.md_spinlock_count == 0) { 1713 critical_exit(); 1714 intr_restore(flags); 1715 } 1716 } 1717 1718 /* 1719 * Construct a PCB from a trapframe. This is called from kdb_trap() where 1720 * we want to start a backtrace from the function that caused us to enter 1721 * the debugger. We have the context in the trapframe, but base the trace 1722 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 1723 * enough for a backtrace. 1724 */ 1725 void 1726 makectx(struct trapframe *tf, struct pcb *pcb) 1727 { 1728 1729 pcb->pcb_r12 = tf->tf_r12; 1730 pcb->pcb_r13 = tf->tf_r13; 1731 pcb->pcb_r14 = tf->tf_r14; 1732 pcb->pcb_r15 = tf->tf_r15; 1733 pcb->pcb_rbp = tf->tf_rbp; 1734 pcb->pcb_rbx = tf->tf_rbx; 1735 pcb->pcb_rip = tf->tf_rip; 1736 pcb->pcb_rsp = tf->tf_rsp; 1737 } 1738 1739 /* 1740 * The pcb_flags is only modified by current thread, or by other threads 1741 * when current thread is stopped. However, current thread may change it 1742 * from the interrupt context in cpu_switch(), or in the trap handler. 1743 * When we read-modify-write pcb_flags from C sources, compiler may generate 1744 * code that is not atomic regarding the interrupt handler. If a trap or 1745 * interrupt happens and any flag is modified from the handler, it can be 1746 * clobbered with the cached value later. Therefore, we implement setting 1747 * and clearing flags with single-instruction functions, which do not race 1748 * with possible modification of the flags from the trap or interrupt context, 1749 * because traps and interrupts are executed only on instruction boundary. 1750 */ 1751 void 1752 set_pcb_flags_raw(struct pcb *pcb, const u_int flags) 1753 { 1754 1755 __asm __volatile("orl %1,%0" 1756 : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) 1757 : "cc", "memory"); 1758 1759 } 1760 1761 /* 1762 * The support for RDFSBASE, WRFSBASE and similar instructions for %gs 1763 * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into 1764 * pcb if user space modified the bases. We must save on the context 1765 * switch or if the return to usermode happens through the doreti. 1766 * 1767 * Tracking of both events is performed by the pcb flag PCB_FULL_IRET, 1768 * which have a consequence that the base MSRs must be saved each time 1769 * the PCB_FULL_IRET flag is set. We disable interrupts to sync with 1770 * context switches. 1771 */ 1772 static void 1773 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags) 1774 { 1775 register_t r; 1776 1777 if (curpcb == pcb && 1778 (flags & PCB_FULL_IRET) != 0 && 1779 (pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1780 r = intr_disable(); 1781 if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1782 if (rfs() == _ufssel) 1783 pcb->pcb_fsbase = rdfsbase(); 1784 if (rgs() == _ugssel) 1785 pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); 1786 } 1787 set_pcb_flags_raw(pcb, flags); 1788 intr_restore(r); 1789 } else { 1790 set_pcb_flags_raw(pcb, flags); 1791 } 1792 } 1793 1794 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int)) 1795 { 1796 1797 return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ? 1798 set_pcb_flags_fsgsbase : set_pcb_flags_raw); 1799 } 1800 1801 void 1802 clear_pcb_flags(struct pcb *pcb, const u_int flags) 1803 { 1804 1805 __asm __volatile("andl %1,%0" 1806 : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) 1807 : "cc", "memory"); 1808 } 1809 1810 #ifdef KDB 1811 1812 /* 1813 * Provide inb() and outb() as functions. They are normally only available as 1814 * inline functions, thus cannot be called from the debugger. 1815 */ 1816 1817 /* silence compiler warnings */ 1818 u_char inb_(u_short); 1819 void outb_(u_short, u_char); 1820 1821 u_char 1822 inb_(u_short port) 1823 { 1824 return inb(port); 1825 } 1826 1827 void 1828 outb_(u_short port, u_char data) 1829 { 1830 outb(port, data); 1831 } 1832 1833 #endif /* KDB */ 1834 1835 #undef memset 1836 #undef memmove 1837 #undef memcpy 1838 1839 void *memset_std(void *buf, int c, size_t len); 1840 void *memset_erms(void *buf, int c, size_t len); 1841 void *memmove_std(void * _Nonnull dst, const void * _Nonnull src, 1842 size_t len); 1843 void *memmove_erms(void * _Nonnull dst, const void * _Nonnull src, 1844 size_t len); 1845 void *memcpy_std(void * _Nonnull dst, const void * _Nonnull src, 1846 size_t len); 1847 void *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src, 1848 size_t len); 1849 1850 #ifdef KCSAN 1851 /* 1852 * These fail to build as ifuncs when used with KCSAN. 1853 */ 1854 void * 1855 memset(void *buf, int c, size_t len) 1856 { 1857 1858 return (memset_std(buf, c, len)); 1859 } 1860 1861 void * 1862 memmove(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1863 { 1864 1865 return (memmove_std(dst, src, len)); 1866 } 1867 1868 void * 1869 memcpy(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1870 { 1871 1872 return (memcpy_std(dst, src, len)); 1873 } 1874 #else 1875 DEFINE_IFUNC(, void *, memset, (void *, int, size_t)) 1876 { 1877 1878 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1879 memset_erms : memset_std); 1880 } 1881 1882 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull, 1883 size_t)) 1884 { 1885 1886 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1887 memmove_erms : memmove_std); 1888 } 1889 1890 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t)) 1891 { 1892 1893 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1894 memcpy_erms : memcpy_std); 1895 } 1896 #endif 1897 1898 void pagezero_std(void *addr); 1899 void pagezero_erms(void *addr); 1900 DEFINE_IFUNC(, void , pagezero, (void *)) 1901 { 1902 1903 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1904 pagezero_erms : pagezero_std); 1905 } 1906