1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 2003 Peter Wemm. 5 * Copyright (c) 1992 Terrence R. Lambert. 6 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * William Jolitz. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 */ 40 41 #include <sys/cdefs.h> 42 #include "opt_atpic.h" 43 #include "opt_cpu.h" 44 #include "opt_ddb.h" 45 #include "opt_inet.h" 46 #include "opt_isa.h" 47 #include "opt_kstack_pages.h" 48 #include "opt_maxmem.h" 49 #include "opt_pci.h" 50 #include "opt_platform.h" 51 #include "opt_sched.h" 52 53 #include <sys/param.h> 54 #include <sys/proc.h> 55 #include <sys/systm.h> 56 #include <sys/asan.h> 57 #include <sys/bio.h> 58 #include <sys/buf.h> 59 #include <sys/bus.h> 60 #include <sys/callout.h> 61 #include <sys/cons.h> 62 #include <sys/cpu.h> 63 #include <sys/csan.h> 64 #include <sys/efi.h> 65 #include <sys/eventhandler.h> 66 #include <sys/exec.h> 67 #include <sys/imgact.h> 68 #include <sys/kdb.h> 69 #include <sys/kernel.h> 70 #include <sys/ktr.h> 71 #include <sys/linker.h> 72 #include <sys/lock.h> 73 #include <sys/malloc.h> 74 #include <sys/memrange.h> 75 #include <sys/msan.h> 76 #include <sys/msgbuf.h> 77 #include <sys/mutex.h> 78 #include <sys/pcpu.h> 79 #include <sys/ptrace.h> 80 #include <sys/reboot.h> 81 #include <sys/reg.h> 82 #include <sys/rwlock.h> 83 #include <sys/sched.h> 84 #include <sys/signalvar.h> 85 #ifdef SMP 86 #include <sys/smp.h> 87 #endif 88 #include <sys/syscallsubr.h> 89 #include <sys/sysctl.h> 90 #include <sys/sysent.h> 91 #include <sys/sysproto.h> 92 #include <sys/ucontext.h> 93 #include <sys/vmmeter.h> 94 95 #include <vm/vm.h> 96 #include <vm/vm_param.h> 97 #include <vm/vm_extern.h> 98 #include <vm/vm_kern.h> 99 #include <vm/vm_page.h> 100 #include <vm/vm_map.h> 101 #include <vm/vm_object.h> 102 #include <vm/vm_pager.h> 103 #include <vm/vm_phys.h> 104 #include <vm/vm_dumpset.h> 105 106 #ifdef DDB 107 #ifndef KDB 108 #error KDB must be enabled in order for DDB to work! 109 #endif 110 #include <ddb/ddb.h> 111 #include <ddb/db_sym.h> 112 #endif 113 114 #include <net/netisr.h> 115 116 #include <dev/smbios/smbios.h> 117 118 #include <machine/clock.h> 119 #include <machine/cpu.h> 120 #include <machine/cputypes.h> 121 #include <machine/frame.h> 122 #include <machine/intr_machdep.h> 123 #include <x86/mca.h> 124 #include <machine/md_var.h> 125 #include <machine/metadata.h> 126 #include <machine/pc/bios.h> 127 #include <machine/pcb.h> 128 #include <machine/proc.h> 129 #include <machine/sigframe.h> 130 #include <machine/specialreg.h> 131 #include <machine/trap.h> 132 #include <machine/tss.h> 133 #include <x86/ucode.h> 134 #include <x86/ifunc.h> 135 #ifdef SMP 136 #include <machine/smp.h> 137 #endif 138 #ifdef FDT 139 #include <x86/fdt.h> 140 #endif 141 142 #ifdef DEV_ATPIC 143 #include <x86/isa/icu.h> 144 #else 145 #include <x86/apicvar.h> 146 #endif 147 148 #include <isa/isareg.h> 149 #include <isa/rtc.h> 150 #include <x86/init.h> 151 152 /* Sanity check for __curthread() */ 153 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 154 155 /* 156 * The PTI trampoline stack needs enough space for a hardware trapframe and a 157 * couple of scratch registers, as well as the trapframe left behind after an 158 * iret fault. 159 */ 160 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) - 161 offsetof(struct pti_frame, pti_rip)); 162 163 extern u_int64_t hammer_time(u_int64_t, u_int64_t); 164 165 static void cpu_startup(void *); 166 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 167 168 /* Probe 8254 PIT and TSC. */ 169 static void native_clock_source_init(void); 170 171 /* Preload data parse function */ 172 static caddr_t native_parse_preload_data(u_int64_t); 173 174 /* Native function to fetch and parse the e820 map */ 175 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); 176 177 /* Default init_ops implementation. */ 178 struct init_ops init_ops = { 179 .parse_preload_data = native_parse_preload_data, 180 .early_clock_source_init = native_clock_source_init, 181 .early_delay = i8254_delay, 182 .parse_memmap = native_parse_memmap, 183 }; 184 185 /* 186 * Physical address of the EFI System Table. Stashed from the metadata hints 187 * passed into the kernel and used by the EFI code to call runtime services. 188 */ 189 vm_paddr_t efi_systbl_phys; 190 191 /* Intel ICH registers */ 192 #define ICH_PMBASE 0x400 193 #define ICH_SMI_EN ICH_PMBASE + 0x30 194 195 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; 196 197 int cold = 1; 198 199 long Maxmem = 0; 200 long realmem = 0; 201 int late_console = 1; 202 203 struct kva_md_info kmi; 204 205 struct region_descriptor r_idt; 206 207 struct pcpu *__pcpu; 208 struct pcpu temp_bsp_pcpu; 209 210 struct mtx icu_lock; 211 212 struct mem_range_softc mem_range_softc; 213 214 struct mtx dt_lock; /* lock for GDT and LDT */ 215 216 void (*vmm_resume_p)(void); 217 218 bool efi_boot; 219 220 static void 221 cpu_startup(void *dummy) 222 { 223 uintmax_t memsize; 224 char *sysenv; 225 226 /* 227 * On MacBooks, we need to disallow the legacy USB circuit to 228 * generate an SMI# because this can cause several problems, 229 * namely: incorrect CPU frequency detection and failure to 230 * start the APs. 231 * We do this by disabling a bit in the SMI_EN (SMI Control and 232 * Enable register) of the Intel ICH LPC Interface Bridge. 233 */ 234 sysenv = kern_getenv("smbios.system.product"); 235 if (sysenv != NULL) { 236 if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 237 strncmp(sysenv, "MacBook3,1", 10) == 0 || 238 strncmp(sysenv, "MacBook4,1", 10) == 0 || 239 strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 240 strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 241 strncmp(sysenv, "MacBookPro3,1", 13) == 0 || 242 strncmp(sysenv, "MacBookPro4,1", 13) == 0 || 243 strncmp(sysenv, "Macmini1,1", 10) == 0) { 244 if (bootverbose) 245 printf("Disabling LEGACY_USB_EN bit on " 246 "Intel ICH.\n"); 247 outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 248 } 249 freeenv(sysenv); 250 } 251 252 /* 253 * Good {morning,afternoon,evening,night}. 254 */ 255 startrtclock(); 256 printcpuinfo(); 257 258 /* 259 * Display physical memory if SMBIOS reports reasonable amount. 260 */ 261 memsize = 0; 262 sysenv = kern_getenv("smbios.memory.enabled"); 263 if (sysenv != NULL) { 264 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; 265 freeenv(sysenv); 266 } 267 if (memsize < ptoa((uintmax_t)vm_free_count())) 268 memsize = ptoa((uintmax_t)Maxmem); 269 printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); 270 realmem = atop(memsize); 271 272 /* 273 * Display any holes after the first chunk of extended memory. 274 */ 275 if (bootverbose) { 276 int indx; 277 278 printf("Physical memory chunk(s):\n"); 279 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 280 vm_paddr_t size; 281 282 size = phys_avail[indx + 1] - phys_avail[indx]; 283 printf( 284 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 285 (uintmax_t)phys_avail[indx], 286 (uintmax_t)phys_avail[indx + 1] - 1, 287 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 288 } 289 } 290 291 vm_ksubmap_init(&kmi); 292 293 printf("avail memory = %ju (%ju MB)\n", 294 ptoa((uintmax_t)vm_free_count()), 295 ptoa((uintmax_t)vm_free_count()) / 1048576); 296 #ifdef DEV_PCI 297 if (bootverbose && intel_graphics_stolen_base != 0) 298 printf("intel stolen mem: base %#jx size %ju MB\n", 299 (uintmax_t)intel_graphics_stolen_base, 300 (uintmax_t)intel_graphics_stolen_size / 1024 / 1024); 301 #endif 302 303 /* 304 * Set up buffers, so they can be used to read disk labels. 305 */ 306 bufinit(); 307 vm_pager_bufferinit(); 308 309 cpu_setregs(); 310 } 311 312 static void 313 late_ifunc_resolve(void *dummy __unused) 314 { 315 link_elf_late_ireloc(); 316 } 317 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); 318 319 320 void 321 cpu_setregs(void) 322 { 323 register_t cr0; 324 325 TSENTER(); 326 cr0 = rcr0(); 327 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 328 TSENTER2("load_cr0"); 329 load_cr0(cr0); 330 TSEXIT2("load_cr0"); 331 TSEXIT(); 332 } 333 334 /* 335 * Initialize amd64 and configure to run kernel 336 */ 337 338 /* 339 * Initialize segments & interrupt table 340 */ 341 static struct gate_descriptor idt0[NIDT]; 342 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 343 344 static char dblfault_stack[DBLFAULT_STACK_SIZE] __aligned(16); 345 static char mce0_stack[MCE_STACK_SIZE] __aligned(16); 346 static char nmi0_stack[NMI_STACK_SIZE] __aligned(16); 347 static char dbg0_stack[DBG_STACK_SIZE] __aligned(16); 348 CTASSERT(sizeof(struct nmi_pcpu) == 16); 349 350 /* 351 * Software prototypes -- in more palatable form. 352 * 353 * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same 354 * slots as corresponding segments for i386 kernel. 355 */ 356 struct soft_segment_descriptor gdt_segs[] = { 357 /* GNULL_SEL 0 Null Descriptor */ 358 { .ssd_base = 0x0, 359 .ssd_limit = 0x0, 360 .ssd_type = 0, 361 .ssd_dpl = 0, 362 .ssd_p = 0, 363 .ssd_long = 0, 364 .ssd_def32 = 0, 365 .ssd_gran = 0 }, 366 /* GNULL2_SEL 1 Null Descriptor */ 367 { .ssd_base = 0x0, 368 .ssd_limit = 0x0, 369 .ssd_type = 0, 370 .ssd_dpl = 0, 371 .ssd_p = 0, 372 .ssd_long = 0, 373 .ssd_def32 = 0, 374 .ssd_gran = 0 }, 375 /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ 376 { .ssd_base = 0x0, 377 .ssd_limit = 0xfffff, 378 .ssd_type = SDT_MEMRWA, 379 .ssd_dpl = SEL_UPL, 380 .ssd_p = 1, 381 .ssd_long = 0, 382 .ssd_def32 = 1, 383 .ssd_gran = 1 }, 384 /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ 385 { .ssd_base = 0x0, 386 .ssd_limit = 0xfffff, 387 .ssd_type = SDT_MEMRWA, 388 .ssd_dpl = SEL_UPL, 389 .ssd_p = 1, 390 .ssd_long = 0, 391 .ssd_def32 = 1, 392 .ssd_gran = 1 }, 393 /* GCODE_SEL 4 Code Descriptor for kernel */ 394 { .ssd_base = 0x0, 395 .ssd_limit = 0xfffff, 396 .ssd_type = SDT_MEMERA, 397 .ssd_dpl = SEL_KPL, 398 .ssd_p = 1, 399 .ssd_long = 1, 400 .ssd_def32 = 0, 401 .ssd_gran = 1 }, 402 /* GDATA_SEL 5 Data Descriptor for kernel */ 403 { .ssd_base = 0x0, 404 .ssd_limit = 0xfffff, 405 .ssd_type = SDT_MEMRWA, 406 .ssd_dpl = SEL_KPL, 407 .ssd_p = 1, 408 .ssd_long = 1, 409 .ssd_def32 = 0, 410 .ssd_gran = 1 }, 411 /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ 412 { .ssd_base = 0x0, 413 .ssd_limit = 0xfffff, 414 .ssd_type = SDT_MEMERA, 415 .ssd_dpl = SEL_UPL, 416 .ssd_p = 1, 417 .ssd_long = 0, 418 .ssd_def32 = 1, 419 .ssd_gran = 1 }, 420 /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ 421 { .ssd_base = 0x0, 422 .ssd_limit = 0xfffff, 423 .ssd_type = SDT_MEMRWA, 424 .ssd_dpl = SEL_UPL, 425 .ssd_p = 1, 426 .ssd_long = 0, 427 .ssd_def32 = 1, 428 .ssd_gran = 1 }, 429 /* GUCODE_SEL 8 64 bit Code Descriptor for user */ 430 { .ssd_base = 0x0, 431 .ssd_limit = 0xfffff, 432 .ssd_type = SDT_MEMERA, 433 .ssd_dpl = SEL_UPL, 434 .ssd_p = 1, 435 .ssd_long = 1, 436 .ssd_def32 = 0, 437 .ssd_gran = 1 }, 438 /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ 439 { .ssd_base = 0x0, 440 .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, 441 .ssd_type = SDT_SYSTSS, 442 .ssd_dpl = SEL_KPL, 443 .ssd_p = 1, 444 .ssd_long = 0, 445 .ssd_def32 = 0, 446 .ssd_gran = 0 }, 447 /* Actually, the TSS is a system descriptor which is double size */ 448 { .ssd_base = 0x0, 449 .ssd_limit = 0x0, 450 .ssd_type = 0, 451 .ssd_dpl = 0, 452 .ssd_p = 0, 453 .ssd_long = 0, 454 .ssd_def32 = 0, 455 .ssd_gran = 0 }, 456 /* GUSERLDT_SEL 11 LDT Descriptor */ 457 { .ssd_base = 0x0, 458 .ssd_limit = 0x0, 459 .ssd_type = 0, 460 .ssd_dpl = 0, 461 .ssd_p = 0, 462 .ssd_long = 0, 463 .ssd_def32 = 0, 464 .ssd_gran = 0 }, 465 /* GUSERLDT_SEL 12 LDT Descriptor, double size */ 466 { .ssd_base = 0x0, 467 .ssd_limit = 0x0, 468 .ssd_type = 0, 469 .ssd_dpl = 0, 470 .ssd_p = 0, 471 .ssd_long = 0, 472 .ssd_def32 = 0, 473 .ssd_gran = 0 }, 474 }; 475 _Static_assert(nitems(gdt_segs) == NGDT, "Stale NGDT"); 476 477 void 478 setidt(int idx, inthand_t *func, int typ, int dpl, int ist) 479 { 480 struct gate_descriptor *ip; 481 482 ip = idt + idx; 483 ip->gd_looffset = (uintptr_t)func; 484 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); 485 ip->gd_ist = ist; 486 ip->gd_xx = 0; 487 ip->gd_type = typ; 488 ip->gd_dpl = dpl; 489 ip->gd_p = 1; 490 ip->gd_hioffset = ((uintptr_t)func)>>16 ; 491 } 492 493 extern inthand_t 494 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 495 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 496 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 497 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 498 IDTVEC(xmm), IDTVEC(dblfault), 499 IDTVEC(div_pti), IDTVEC(bpt_pti), 500 IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti), 501 IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti), 502 IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), 503 IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti), 504 IDTVEC(xmm_pti), 505 #ifdef KDTRACE_HOOKS 506 IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti), 507 #endif 508 #ifdef XENHVM 509 IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti), 510 #endif 511 IDTVEC(fast_syscall), IDTVEC(fast_syscall32), 512 IDTVEC(fast_syscall_pti); 513 514 #ifdef DDB 515 /* 516 * Display the index and function name of any IDT entries that don't use 517 * the default 'rsvd' entry point. 518 */ 519 DB_SHOW_COMMAND_FLAGS(idt, db_show_idt, DB_CMD_MEMSAFE) 520 { 521 struct gate_descriptor *ip; 522 int idx; 523 uintptr_t func; 524 525 ip = idt; 526 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 527 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 528 if (func != (uintptr_t)&IDTVEC(rsvd)) { 529 db_printf("%3d\t", idx); 530 db_printsym(func, DB_STGY_PROC); 531 db_printf("\n"); 532 } 533 ip++; 534 } 535 } 536 537 /* Show privileged registers. */ 538 DB_SHOW_COMMAND_FLAGS(sysregs, db_show_sysregs, DB_CMD_MEMSAFE) 539 { 540 struct { 541 uint16_t limit; 542 uint64_t base; 543 } __packed idtr, gdtr; 544 uint16_t ldt, tr; 545 546 __asm __volatile("sidt %0" : "=m" (idtr)); 547 db_printf("idtr\t0x%016lx/%04x\n", 548 (u_long)idtr.base, (u_int)idtr.limit); 549 __asm __volatile("sgdt %0" : "=m" (gdtr)); 550 db_printf("gdtr\t0x%016lx/%04x\n", 551 (u_long)gdtr.base, (u_int)gdtr.limit); 552 __asm __volatile("sldt %0" : "=r" (ldt)); 553 db_printf("ldtr\t0x%04x\n", ldt); 554 __asm __volatile("str %0" : "=r" (tr)); 555 db_printf("tr\t0x%04x\n", tr); 556 db_printf("cr0\t0x%016lx\n", rcr0()); 557 db_printf("cr2\t0x%016lx\n", rcr2()); 558 db_printf("cr3\t0x%016lx\n", rcr3()); 559 db_printf("cr4\t0x%016lx\n", rcr4()); 560 if (rcr4() & CR4_XSAVE) 561 db_printf("xcr0\t0x%016lx\n", rxcr(0)); 562 db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); 563 if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) 564 db_printf("FEATURES_CTL\t%016lx\n", 565 rdmsr(MSR_IA32_FEATURE_CONTROL)); 566 db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); 567 db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); 568 db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); 569 } 570 571 DB_SHOW_COMMAND_FLAGS(dbregs, db_show_dbregs, DB_CMD_MEMSAFE) 572 { 573 574 db_printf("dr0\t0x%016lx\n", rdr0()); 575 db_printf("dr1\t0x%016lx\n", rdr1()); 576 db_printf("dr2\t0x%016lx\n", rdr2()); 577 db_printf("dr3\t0x%016lx\n", rdr3()); 578 db_printf("dr6\t0x%016lx\n", rdr6()); 579 db_printf("dr7\t0x%016lx\n", rdr7()); 580 } 581 #endif 582 583 void 584 sdtossd(struct user_segment_descriptor *sd, struct soft_segment_descriptor *ssd) 585 { 586 587 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 588 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 589 ssd->ssd_type = sd->sd_type; 590 ssd->ssd_dpl = sd->sd_dpl; 591 ssd->ssd_p = sd->sd_p; 592 ssd->ssd_long = sd->sd_long; 593 ssd->ssd_def32 = sd->sd_def32; 594 ssd->ssd_gran = sd->sd_gran; 595 } 596 597 void 598 ssdtosd(struct soft_segment_descriptor *ssd, struct user_segment_descriptor *sd) 599 { 600 601 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 602 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; 603 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 604 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 605 sd->sd_type = ssd->ssd_type; 606 sd->sd_dpl = ssd->ssd_dpl; 607 sd->sd_p = ssd->ssd_p; 608 sd->sd_long = ssd->ssd_long; 609 sd->sd_def32 = ssd->ssd_def32; 610 sd->sd_gran = ssd->ssd_gran; 611 } 612 613 void 614 ssdtosyssd(struct soft_segment_descriptor *ssd, struct system_segment_descriptor *sd) 615 { 616 617 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 618 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; 619 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 620 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 621 sd->sd_type = ssd->ssd_type; 622 sd->sd_dpl = ssd->ssd_dpl; 623 sd->sd_p = ssd->ssd_p; 624 sd->sd_gran = ssd->ssd_gran; 625 } 626 627 u_int basemem; 628 629 static int 630 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, 631 int *physmap_idxp) 632 { 633 int i, insert_idx, physmap_idx; 634 635 physmap_idx = *physmap_idxp; 636 637 if (length == 0) 638 return (1); 639 640 /* 641 * Find insertion point while checking for overlap. Start off by 642 * assuming the new entry will be added to the end. 643 * 644 * NB: physmap_idx points to the next free slot. 645 */ 646 insert_idx = physmap_idx; 647 for (i = 0; i <= physmap_idx; i += 2) { 648 if (base < physmap[i + 1]) { 649 if (base + length <= physmap[i]) { 650 insert_idx = i; 651 break; 652 } 653 if (boothowto & RB_VERBOSE) 654 printf( 655 "Overlapping memory regions, ignoring second region\n"); 656 return (1); 657 } 658 } 659 660 /* See if we can prepend to the next entry. */ 661 if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { 662 physmap[insert_idx] = base; 663 return (1); 664 } 665 666 /* See if we can append to the previous entry. */ 667 if (insert_idx > 0 && base == physmap[insert_idx - 1]) { 668 physmap[insert_idx - 1] += length; 669 return (1); 670 } 671 672 physmap_idx += 2; 673 *physmap_idxp = physmap_idx; 674 if (physmap_idx == PHYS_AVAIL_ENTRIES) { 675 printf( 676 "Too many segments in the physical address map, giving up\n"); 677 return (0); 678 } 679 680 /* 681 * Move the last 'N' entries down to make room for the new 682 * entry if needed. 683 */ 684 for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { 685 physmap[i] = physmap[i - 2]; 686 physmap[i + 1] = physmap[i - 1]; 687 } 688 689 /* Insert the new entry. */ 690 physmap[insert_idx] = base; 691 physmap[insert_idx + 1] = base + length; 692 return (1); 693 } 694 695 void 696 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, 697 vm_paddr_t *physmap, int *physmap_idx) 698 { 699 struct bios_smap *smap, *smapend; 700 701 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 702 703 for (smap = smapbase; smap < smapend; smap++) { 704 if (boothowto & RB_VERBOSE) 705 printf("SMAP type=%02x base=%016lx len=%016lx\n", 706 smap->type, smap->base, smap->length); 707 708 if (smap->type != SMAP_TYPE_MEMORY) 709 continue; 710 711 if (!add_physmap_entry(smap->base, smap->length, physmap, 712 physmap_idx)) 713 break; 714 } 715 } 716 717 static void 718 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, 719 int *physmap_idx) 720 { 721 struct efi_md *map, *p; 722 const char *type; 723 size_t efisz; 724 int ndesc, i; 725 726 static const char *types[] = { 727 "Reserved", 728 "LoaderCode", 729 "LoaderData", 730 "BootServicesCode", 731 "BootServicesData", 732 "RuntimeServicesCode", 733 "RuntimeServicesData", 734 "ConventionalMemory", 735 "UnusableMemory", 736 "ACPIReclaimMemory", 737 "ACPIMemoryNVS", 738 "MemoryMappedIO", 739 "MemoryMappedIOPortSpace", 740 "PalCode", 741 "PersistentMemory" 742 }; 743 744 /* 745 * Memory map data provided by UEFI via the GetMemoryMap 746 * Boot Services API. 747 */ 748 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 749 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 750 751 if (efihdr->descriptor_size == 0) 752 return; 753 ndesc = efihdr->memory_size / efihdr->descriptor_size; 754 755 if (boothowto & RB_VERBOSE) 756 printf("%23s %12s %12s %8s %4s\n", 757 "Type", "Physical", "Virtual", "#Pages", "Attr"); 758 759 for (i = 0, p = map; i < ndesc; i++, 760 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 761 if (boothowto & RB_VERBOSE) { 762 if (p->md_type < nitems(types)) 763 type = types[p->md_type]; 764 else 765 type = "<INVALID>"; 766 printf("%23s %012lx %012lx %08lx ", type, p->md_phys, 767 p->md_virt, p->md_pages); 768 if (p->md_attr & EFI_MD_ATTR_UC) 769 printf("UC "); 770 if (p->md_attr & EFI_MD_ATTR_WC) 771 printf("WC "); 772 if (p->md_attr & EFI_MD_ATTR_WT) 773 printf("WT "); 774 if (p->md_attr & EFI_MD_ATTR_WB) 775 printf("WB "); 776 if (p->md_attr & EFI_MD_ATTR_UCE) 777 printf("UCE "); 778 if (p->md_attr & EFI_MD_ATTR_WP) 779 printf("WP "); 780 if (p->md_attr & EFI_MD_ATTR_RP) 781 printf("RP "); 782 if (p->md_attr & EFI_MD_ATTR_XP) 783 printf("XP "); 784 if (p->md_attr & EFI_MD_ATTR_NV) 785 printf("NV "); 786 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) 787 printf("MORE_RELIABLE "); 788 if (p->md_attr & EFI_MD_ATTR_RO) 789 printf("RO "); 790 if (p->md_attr & EFI_MD_ATTR_RT) 791 printf("RUNTIME"); 792 printf("\n"); 793 } 794 795 switch (p->md_type) { 796 case EFI_MD_TYPE_CODE: 797 case EFI_MD_TYPE_DATA: 798 case EFI_MD_TYPE_BS_CODE: 799 case EFI_MD_TYPE_BS_DATA: 800 case EFI_MD_TYPE_FREE: 801 /* 802 * We're allowed to use any entry with these types. 803 */ 804 break; 805 default: 806 continue; 807 } 808 809 if (!add_physmap_entry(p->md_phys, p->md_pages * EFI_PAGE_SIZE, 810 physmap, physmap_idx)) 811 break; 812 } 813 } 814 815 static void 816 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) 817 { 818 struct bios_smap *smap; 819 struct efi_map_header *efihdr; 820 u_int32_t size; 821 822 /* 823 * Memory map from INT 15:E820. 824 * 825 * subr_module.c says: 826 * "Consumer may safely assume that size value precedes data." 827 * ie: an int32_t immediately precedes smap. 828 */ 829 830 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 831 MODINFO_METADATA | MODINFOMD_EFI_MAP); 832 smap = (struct bios_smap *)preload_search_info(kmdp, 833 MODINFO_METADATA | MODINFOMD_SMAP); 834 if (efihdr == NULL && smap == NULL) 835 panic("No BIOS smap or EFI map info from loader!"); 836 837 if (efihdr != NULL) { 838 add_efi_map_entries(efihdr, physmap, physmap_idx); 839 strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); 840 } else { 841 size = *((u_int32_t *)smap - 1); 842 bios_add_smap_entries(smap, size, physmap, physmap_idx); 843 strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); 844 } 845 } 846 847 #define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) 848 849 /* 850 * Populate the (physmap) array with base/bound pairs describing the 851 * available physical memory in the system, then test this memory and 852 * build the phys_avail array describing the actually-available memory. 853 * 854 * Total memory size may be set by the kernel environment variable 855 * hw.physmem or the compile-time define MAXMEM. 856 * 857 * XXX first should be vm_paddr_t. 858 */ 859 static void 860 getmemsize(caddr_t kmdp, u_int64_t first) 861 { 862 int i, physmap_idx, pa_indx, da_indx; 863 vm_paddr_t pa, physmap[PHYS_AVAIL_ENTRIES]; 864 u_long physmem_start, physmem_tunable, memtest; 865 pt_entry_t *pte; 866 quad_t dcons_addr, dcons_size; 867 int page_counter; 868 869 TSENTER(); 870 /* 871 * Tell the physical memory allocator about pages used to store 872 * the kernel and preloaded data. See kmem_bootstrap_free(). 873 */ 874 vm_phys_early_add_seg((vm_paddr_t)kernphys, trunc_page(first)); 875 876 bzero(physmap, sizeof(physmap)); 877 physmap_idx = 0; 878 879 init_ops.parse_memmap(kmdp, physmap, &physmap_idx); 880 physmap_idx -= 2; 881 882 /* 883 * Find the 'base memory' segment for SMP 884 */ 885 basemem = 0; 886 for (i = 0; i <= physmap_idx; i += 2) { 887 if (physmap[i] <= 0xA0000) { 888 basemem = physmap[i + 1] / 1024; 889 break; 890 } 891 } 892 if (basemem == 0 || basemem > 640) { 893 if (bootverbose) 894 printf( 895 "Memory map doesn't contain a basemem segment, faking it"); 896 basemem = 640; 897 } 898 899 /* 900 * Maxmem isn't the "maximum memory", it's one larger than the 901 * highest page of the physical address space. It should be 902 * called something like "Maxphyspage". We may adjust this 903 * based on ``hw.physmem'' and the results of the memory test. 904 */ 905 Maxmem = atop(physmap[physmap_idx + 1]); 906 907 #ifdef MAXMEM 908 Maxmem = MAXMEM / 4; 909 #endif 910 911 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 912 Maxmem = atop(physmem_tunable); 913 914 /* 915 * The boot memory test is disabled by default, as it takes a 916 * significant amount of time on large-memory systems, and is 917 * unfriendly to virtual machines as it unnecessarily touches all 918 * pages. 919 * 920 * A general name is used as the code may be extended to support 921 * additional tests beyond the current "page present" test. 922 */ 923 memtest = 0; 924 TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); 925 926 /* 927 * Don't allow MAXMEM or hw.physmem to extend the amount of memory 928 * in the system. 929 */ 930 if (Maxmem > atop(physmap[physmap_idx + 1])) 931 Maxmem = atop(physmap[physmap_idx + 1]); 932 933 if (atop(physmap[physmap_idx + 1]) != Maxmem && 934 (boothowto & RB_VERBOSE)) 935 printf("Physical memory use set to %ldK\n", Maxmem * 4); 936 937 /* call pmap initialization to make new kernel address space */ 938 pmap_bootstrap(&first); 939 940 /* 941 * Size up each available chunk of physical memory. 942 * 943 * XXX Some BIOSes corrupt low 64KB between suspend and resume. 944 * By default, mask off the first 16 pages unless we appear to be 945 * running in a VM. 946 */ 947 physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; 948 TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); 949 if (physmap[0] < physmem_start) { 950 if (physmem_start < PAGE_SIZE) 951 physmap[0] = PAGE_SIZE; 952 else if (physmem_start >= physmap[1]) 953 physmap[0] = round_page(physmap[1] - PAGE_SIZE); 954 else 955 physmap[0] = round_page(physmem_start); 956 } 957 pa_indx = 0; 958 da_indx = 1; 959 phys_avail[pa_indx++] = physmap[0]; 960 phys_avail[pa_indx] = physmap[0]; 961 dump_avail[da_indx] = physmap[0]; 962 pte = CMAP1; 963 964 /* 965 * Get dcons buffer address 966 */ 967 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 968 getenv_quad("dcons.size", &dcons_size) == 0) 969 dcons_addr = 0; 970 971 /* 972 * physmap is in bytes, so when converting to page boundaries, 973 * round up the start address and round down the end address. 974 */ 975 page_counter = 0; 976 if (memtest != 0) 977 printf("Testing system memory"); 978 for (i = 0; i <= physmap_idx; i += 2) { 979 vm_paddr_t end; 980 981 end = ptoa((vm_paddr_t)Maxmem); 982 if (physmap[i + 1] < end) 983 end = trunc_page(physmap[i + 1]); 984 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 985 int tmp, page_bad, full; 986 int *ptr = (int *)CADDR1; 987 988 full = FALSE; 989 /* 990 * block out kernel memory as not available. 991 */ 992 if (pa >= (vm_paddr_t)kernphys && pa < first) 993 goto do_dump_avail; 994 995 /* 996 * block out dcons buffer 997 */ 998 if (dcons_addr > 0 999 && pa >= trunc_page(dcons_addr) 1000 && pa < dcons_addr + dcons_size) 1001 goto do_dump_avail; 1002 1003 page_bad = FALSE; 1004 if (memtest == 0) 1005 goto skip_memtest; 1006 1007 /* 1008 * Print a "." every GB to show we're making 1009 * progress. 1010 */ 1011 page_counter++; 1012 if ((page_counter % PAGES_PER_GB) == 0) 1013 printf("."); 1014 1015 /* 1016 * map page into kernel: valid, read/write,non-cacheable 1017 */ 1018 *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; 1019 invltlb(); 1020 1021 tmp = *(int *)ptr; 1022 /* 1023 * Test for alternating 1's and 0's 1024 */ 1025 *(volatile int *)ptr = 0xaaaaaaaa; 1026 if (*(volatile int *)ptr != 0xaaaaaaaa) 1027 page_bad = TRUE; 1028 /* 1029 * Test for alternating 0's and 1's 1030 */ 1031 *(volatile int *)ptr = 0x55555555; 1032 if (*(volatile int *)ptr != 0x55555555) 1033 page_bad = TRUE; 1034 /* 1035 * Test for all 1's 1036 */ 1037 *(volatile int *)ptr = 0xffffffff; 1038 if (*(volatile int *)ptr != 0xffffffff) 1039 page_bad = TRUE; 1040 /* 1041 * Test for all 0's 1042 */ 1043 *(volatile int *)ptr = 0x0; 1044 if (*(volatile int *)ptr != 0x0) 1045 page_bad = TRUE; 1046 /* 1047 * Restore original value. 1048 */ 1049 *(int *)ptr = tmp; 1050 1051 skip_memtest: 1052 /* 1053 * Adjust array of valid/good pages. 1054 */ 1055 if (page_bad == TRUE) 1056 continue; 1057 /* 1058 * If this good page is a continuation of the 1059 * previous set of good pages, then just increase 1060 * the end pointer. Otherwise start a new chunk. 1061 * Note that "end" points one higher than end, 1062 * making the range >= start and < end. 1063 * If we're also doing a speculative memory 1064 * test and we at or past the end, bump up Maxmem 1065 * so that we keep going. The first bad page 1066 * will terminate the loop. 1067 */ 1068 if (phys_avail[pa_indx] == pa) { 1069 phys_avail[pa_indx] += PAGE_SIZE; 1070 } else { 1071 pa_indx++; 1072 if (pa_indx == PHYS_AVAIL_ENTRIES) { 1073 printf( 1074 "Too many holes in the physical address space, giving up\n"); 1075 pa_indx--; 1076 full = TRUE; 1077 goto do_dump_avail; 1078 } 1079 phys_avail[pa_indx++] = pa; /* start */ 1080 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 1081 } 1082 physmem++; 1083 do_dump_avail: 1084 if (dump_avail[da_indx] == pa) { 1085 dump_avail[da_indx] += PAGE_SIZE; 1086 } else { 1087 da_indx++; 1088 if (da_indx == PHYS_AVAIL_ENTRIES) { 1089 da_indx--; 1090 goto do_next; 1091 } 1092 dump_avail[da_indx++] = pa; /* start */ 1093 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 1094 } 1095 do_next: 1096 if (full) 1097 break; 1098 } 1099 } 1100 *pte = 0; 1101 invltlb(); 1102 if (memtest != 0) 1103 printf("\n"); 1104 1105 /* 1106 * XXX 1107 * The last chunk must contain at least one page plus the message 1108 * buffer to avoid complicating other code (message buffer address 1109 * calculation, etc.). 1110 */ 1111 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1112 round_page(msgbufsize) >= phys_avail[pa_indx]) { 1113 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1114 phys_avail[pa_indx--] = 0; 1115 phys_avail[pa_indx--] = 0; 1116 } 1117 1118 Maxmem = atop(phys_avail[pa_indx]); 1119 1120 /* Trim off space for the message buffer. */ 1121 phys_avail[pa_indx] -= round_page(msgbufsize); 1122 1123 /* Map the message buffer. */ 1124 msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); 1125 TSEXIT(); 1126 } 1127 1128 static caddr_t 1129 native_parse_preload_data(u_int64_t modulep) 1130 { 1131 caddr_t kmdp; 1132 char *envp; 1133 #ifdef DDB 1134 vm_offset_t ksym_start; 1135 vm_offset_t ksym_end; 1136 #endif 1137 1138 preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); 1139 preload_bootstrap_relocate(KERNBASE); 1140 kmdp = preload_search_by_type("elf kernel"); 1141 if (kmdp == NULL) 1142 kmdp = preload_search_by_type("elf64 kernel"); 1143 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); 1144 envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); 1145 if (envp != NULL) 1146 envp += KERNBASE; 1147 init_static_kenv(envp, 0); 1148 #ifdef DDB 1149 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); 1150 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); 1151 db_fetch_ksymtab(ksym_start, ksym_end, 0); 1152 #endif 1153 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); 1154 1155 return (kmdp); 1156 } 1157 1158 static void 1159 native_clock_source_init(void) 1160 { 1161 i8254_init(); 1162 } 1163 1164 static void 1165 amd64_kdb_init(void) 1166 { 1167 kdb_init(); 1168 #ifdef KDB 1169 if (boothowto & RB_KDB) 1170 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 1171 #endif 1172 } 1173 1174 /* Set up the fast syscall stuff */ 1175 void 1176 amd64_conf_fast_syscall(void) 1177 { 1178 uint64_t msr; 1179 1180 msr = rdmsr(MSR_EFER) | EFER_SCE; 1181 wrmsr(MSR_EFER, msr); 1182 wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) : 1183 (u_int64_t)IDTVEC(fast_syscall)); 1184 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 1185 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1186 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 1187 wrmsr(MSR_STAR, msr); 1188 wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC); 1189 } 1190 1191 void 1192 amd64_bsp_pcpu_init1(struct pcpu *pc) 1193 { 1194 struct user_segment_descriptor *gdt; 1195 1196 PCPU_SET(prvspace, pc); 1197 gdt = *PCPU_PTR(gdt); 1198 PCPU_SET(curthread, &thread0); 1199 PCPU_SET(tssp, PCPU_PTR(common_tss)); 1200 PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1201 PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); 1202 PCPU_SET(fs32p, &gdt[GUFS32_SEL]); 1203 PCPU_SET(gs32p, &gdt[GUGS32_SEL]); 1204 PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK); 1205 PCPU_SET(smp_tlb_gen, 1); 1206 } 1207 1208 void 1209 amd64_bsp_pcpu_init2(uint64_t rsp0) 1210 { 1211 1212 PCPU_SET(rsp0, rsp0); 1213 PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) + 1214 PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); 1215 PCPU_SET(curpcb, thread0.td_pcb); 1216 } 1217 1218 void 1219 amd64_bsp_ist_init(struct pcpu *pc) 1220 { 1221 struct nmi_pcpu *np; 1222 struct amd64tss *tssp; 1223 1224 tssp = &pc->pc_common_tss; 1225 1226 /* doublefault stack space, runs on ist1 */ 1227 np = ((struct nmi_pcpu *)&dblfault_stack[sizeof(dblfault_stack)]) - 1; 1228 np->np_pcpu = (register_t)pc; 1229 tssp->tss_ist1 = (long)np; 1230 1231 /* 1232 * NMI stack, runs on ist2. The pcpu pointer is stored just 1233 * above the start of the ist2 stack. 1234 */ 1235 np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1; 1236 np->np_pcpu = (register_t)pc; 1237 tssp->tss_ist2 = (long)np; 1238 1239 /* 1240 * MC# stack, runs on ist3. The pcpu pointer is stored just 1241 * above the start of the ist3 stack. 1242 */ 1243 np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1; 1244 np->np_pcpu = (register_t)pc; 1245 tssp->tss_ist3 = (long)np; 1246 1247 /* 1248 * DB# stack, runs on ist4. 1249 */ 1250 np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1; 1251 np->np_pcpu = (register_t)pc; 1252 tssp->tss_ist4 = (long)np; 1253 } 1254 1255 /* 1256 * Calculate the kernel load address by inspecting page table created by loader. 1257 * The assumptions: 1258 * - kernel is mapped at KERNBASE, backed by contiguous phys memory 1259 * aligned at 2M, below 4G (the latter is important for AP startup) 1260 * - there is a 2M hole at KERNBASE (KERNSTART = KERNBASE + 2M) 1261 * - kernel is mapped with 2M superpages 1262 * - all participating memory, i.e. kernel, modules, metadata, 1263 * page table is accessible by pre-created 1:1 mapping 1264 * (right now loader creates 1:1 mapping for lower 4G, and all 1265 * memory is from there) 1266 * - there is a usable memory block right after the end of the 1267 * mapped kernel and all modules/metadata, pointed to by 1268 * physfree, for early allocations 1269 */ 1270 vm_paddr_t __nosanitizeaddress __nosanitizememory 1271 amd64_loadaddr(void) 1272 { 1273 pml4_entry_t *pml4e; 1274 pdp_entry_t *pdpe; 1275 pd_entry_t *pde; 1276 uint64_t cr3; 1277 1278 cr3 = rcr3(); 1279 pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(KERNSTART); 1280 pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(KERNSTART); 1281 pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(KERNSTART); 1282 return (*pde & PG_FRAME); 1283 } 1284 1285 u_int64_t 1286 hammer_time(u_int64_t modulep, u_int64_t physfree) 1287 { 1288 caddr_t kmdp; 1289 int gsel_tss, x; 1290 struct pcpu *pc; 1291 uint64_t rsp0; 1292 char *env; 1293 struct user_segment_descriptor *gdt; 1294 struct region_descriptor r_gdt; 1295 size_t kstack0_sz; 1296 1297 TSRAW(&thread0, TS_ENTER, __func__, NULL); 1298 1299 kernphys = amd64_loadaddr(); 1300 1301 physfree += kernphys; 1302 1303 kmdp = init_ops.parse_preload_data(modulep); 1304 1305 efi_boot = preload_search_info(kmdp, MODINFO_METADATA | 1306 MODINFOMD_EFI_MAP) != NULL; 1307 1308 if (!efi_boot) { 1309 /* Tell the bios to warmboot next time */ 1310 atomic_store_short((u_short *)0x472, 0x1234); 1311 } 1312 1313 physfree += ucode_load_bsp(physfree - kernphys + KERNSTART); 1314 physfree = roundup2(physfree, PAGE_SIZE); 1315 1316 identify_cpu1(); 1317 identify_hypervisor(); 1318 identify_hypervisor_smbios(); 1319 identify_cpu_fixup_bsp(); 1320 identify_cpu2(); 1321 initializecpucache(); 1322 1323 /* 1324 * Check for pti, pcid, and invpcid before ifuncs are 1325 * resolved, to correctly select the implementation for 1326 * pmap_activate_sw_mode(). 1327 */ 1328 pti = pti_get_default(); 1329 TUNABLE_INT_FETCH("vm.pmap.pti", &pti); 1330 TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); 1331 if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { 1332 invpcid_works = (cpu_stdext_feature & 1333 CPUID_STDEXT_INVPCID) != 0; 1334 } else { 1335 pmap_pcid_enabled = 0; 1336 } 1337 1338 /* 1339 * Now we can do small core initialization, after the PCID 1340 * CPU features and user knobs are evaluated. 1341 */ 1342 TUNABLE_INT_FETCH("vm.pmap.pcid_invlpg_workaround", 1343 &pmap_pcid_invlpg_workaround_uena); 1344 cpu_init_small_core(); 1345 1346 if ((cpu_feature2 & CPUID2_XSAVE) != 0) { 1347 use_xsave = 1; 1348 TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); 1349 } 1350 1351 link_elf_ireloc(kmdp); 1352 1353 /* 1354 * This may be done better later if it gets more high level 1355 * components in it. If so just link td->td_proc here. 1356 */ 1357 proc_linkup0(&proc0, &thread0); 1358 1359 /* Init basic tunables, hz etc */ 1360 init_param1(); 1361 1362 thread0.td_kstack = physfree - kernphys + KERNSTART; 1363 thread0.td_kstack_pages = kstack_pages; 1364 kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; 1365 bzero((void *)thread0.td_kstack, kstack0_sz); 1366 physfree += kstack0_sz; 1367 1368 /* 1369 * Initialize enough of thread0 for delayed invalidation to 1370 * work very early. Rely on thread0.td_base_pri 1371 * zero-initialization, it is reset to PVM at proc0_init(). 1372 */ 1373 pmap_thread_init_invl_gen(&thread0); 1374 1375 pc = &temp_bsp_pcpu; 1376 pcpu_init(pc, 0, sizeof(struct pcpu)); 1377 gdt = &temp_bsp_pcpu.pc_gdt[0]; 1378 1379 /* 1380 * make gdt memory segments 1381 */ 1382 for (x = 0; x < NGDT; x++) { 1383 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 1384 x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) 1385 ssdtosd(&gdt_segs[x], &gdt[x]); 1386 } 1387 gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&pc->pc_common_tss; 1388 ssdtosyssd(&gdt_segs[GPROC0_SEL], 1389 (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1390 1391 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 1392 r_gdt.rd_base = (long)gdt; 1393 lgdt(&r_gdt); 1394 1395 wrmsr(MSR_FSBASE, 0); /* User value */ 1396 wrmsr(MSR_GSBASE, (u_int64_t)pc); 1397 wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ 1398 1399 dpcpu_init((void *)(physfree - kernphys + KERNSTART), 0); 1400 physfree += DPCPU_SIZE; 1401 amd64_bsp_pcpu_init1(pc); 1402 /* Non-late cninit() and printf() can be moved up to here. */ 1403 1404 /* 1405 * Initialize mutexes. 1406 * 1407 * icu_lock: in order to allow an interrupt to occur in a critical 1408 * section, to set pcpu->ipending (etc...) properly, we 1409 * must be able to get the icu lock, so it can't be 1410 * under witness. 1411 */ 1412 mutex_init(); 1413 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); 1414 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); 1415 1416 /* exceptions */ 1417 for (x = 0; x < NIDT; x++) 1418 setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT, 1419 SEL_KPL, 0); 1420 setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT, 1421 SEL_KPL, 0); 1422 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4); 1423 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); 1424 setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT, 1425 SEL_UPL, 0); 1426 setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT, 1427 SEL_UPL, 0); 1428 setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT, 1429 SEL_KPL, 0); 1430 setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT, 1431 SEL_KPL, 0); 1432 setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT, 1433 SEL_KPL, 0); 1434 setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); 1435 setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm), 1436 SDT_SYSIGT, SEL_KPL, 0); 1437 setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT, 1438 SEL_KPL, 0); 1439 setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing), 1440 SDT_SYSIGT, SEL_KPL, 0); 1441 setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT, 1442 SEL_KPL, 0); 1443 setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT, 1444 SEL_KPL, 0); 1445 setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT, 1446 SEL_KPL, 0); 1447 setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT, 1448 SEL_KPL, 0); 1449 setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT, 1450 SEL_KPL, 0); 1451 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3); 1452 setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT, 1453 SEL_KPL, 0); 1454 #ifdef KDTRACE_HOOKS 1455 setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) : 1456 &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); 1457 #endif 1458 #ifdef XENHVM 1459 setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) : 1460 &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0); 1461 #endif 1462 r_idt.rd_limit = sizeof(idt0) - 1; 1463 r_idt.rd_base = (long) idt; 1464 lidt(&r_idt); 1465 1466 /* 1467 * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) 1468 * transition). 1469 * Once bootblocks have updated, we can test directly for 1470 * efi_systbl != NULL here... 1471 */ 1472 if (efi_boot) 1473 vty_set_preferred(VTY_VT); 1474 1475 TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); 1476 TUNABLE_INT_FETCH("machdep.mitigations.ibrs.disable", &hw_ibrs_disable); 1477 1478 TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable); 1479 TUNABLE_INT_FETCH("machdep.mitigations.ssb.disable", &hw_ssb_disable); 1480 1481 TUNABLE_INT_FETCH("machdep.syscall_ret_flush_l1d", 1482 &syscall_ret_l1d_flush_mode); 1483 1484 TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable); 1485 TUNABLE_INT_FETCH("machdep.mitigations.mds.disable", &hw_mds_disable); 1486 1487 TUNABLE_INT_FETCH("machdep.mitigations.taa.enable", &x86_taa_enable); 1488 1489 TUNABLE_INT_FETCH("machdep.mitigations.rngds.enable", 1490 &x86_rngds_mitg_enable); 1491 1492 TUNABLE_INT_FETCH("machdep.mitigations.zenbleed.enable", 1493 &zenbleed_enable); 1494 zenbleed_sanitize_enable(); 1495 1496 finishidentcpu(); /* Final stage of CPU initialization */ 1497 1498 /* 1499 * Initialize the clock before the console so that console 1500 * initialization can use DELAY(). 1501 */ 1502 clock_init(); 1503 1504 initializecpu(); /* Initialize CPU registers */ 1505 1506 amd64_bsp_ist_init(pc); 1507 1508 /* Set the IO permission bitmap (empty due to tss seg limit) */ 1509 pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + 1510 IOPERM_BITMAP_SIZE; 1511 1512 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1513 ltr(gsel_tss); 1514 1515 amd64_conf_fast_syscall(); 1516 1517 /* 1518 * We initialize the PCB pointer early so that exception 1519 * handlers will work. Also set up td_critnest to short-cut 1520 * the page fault handler. 1521 */ 1522 cpu_max_ext_state_size = sizeof(struct savefpu); 1523 set_top_of_stack_td(&thread0); 1524 thread0.td_pcb = get_pcb_td(&thread0); 1525 thread0.td_critnest = 1; 1526 1527 /* 1528 * The console and kdb should be initialized even earlier than here, 1529 * but some console drivers don't work until after getmemsize(). 1530 * Default to late console initialization to support these drivers. 1531 * This loses mainly printf()s in getmemsize() and early debugging. 1532 */ 1533 TUNABLE_INT_FETCH("debug.late_console", &late_console); 1534 if (!late_console) { 1535 cninit(); 1536 amd64_kdb_init(); 1537 } 1538 1539 getmemsize(kmdp, physfree); 1540 init_param2(physmem); 1541 1542 /* now running on new page tables, configured,and u/iom is accessible */ 1543 1544 #ifdef DEV_PCI 1545 /* This call might adjust phys_avail[]. */ 1546 pci_early_quirks(); 1547 #endif 1548 1549 if (late_console) 1550 cninit(); 1551 1552 /* 1553 * Dump the boot metadata. We have to wait for cninit() since console 1554 * output is required. If it's grossly incorrect the kernel will never 1555 * make it this far. 1556 */ 1557 if (getenv_is_true("debug.dump_modinfo_at_boot")) 1558 preload_dump(); 1559 1560 #ifdef DEV_ISA 1561 #ifdef DEV_ATPIC 1562 elcr_probe(); 1563 atpic_startup(); 1564 #else 1565 /* Reset and mask the atpics and leave them shut down. */ 1566 atpic_reset(); 1567 1568 /* 1569 * Point the ICU spurious interrupt vectors at the APIC spurious 1570 * interrupt handler. 1571 */ 1572 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1573 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1574 #endif 1575 #else 1576 #error "have you forgotten the isa device?" 1577 #endif 1578 1579 if (late_console) 1580 amd64_kdb_init(); 1581 1582 msgbufinit(msgbufp, msgbufsize); 1583 fpuinit(); 1584 1585 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1586 rsp0 = thread0.td_md.md_stack_base; 1587 /* Ensure the stack is aligned to 16 bytes */ 1588 rsp0 &= ~0xFul; 1589 PCPU_PTR(common_tss)->tss_rsp0 = rsp0; 1590 amd64_bsp_pcpu_init2(rsp0); 1591 1592 /* transfer to user mode */ 1593 1594 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 1595 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 1596 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); 1597 _ufssel = GSEL(GUFS32_SEL, SEL_UPL); 1598 _ugssel = GSEL(GUGS32_SEL, SEL_UPL); 1599 1600 load_ds(_udatasel); 1601 load_es(_udatasel); 1602 load_fs(_ufssel); 1603 1604 /* setup proc 0's pcb */ 1605 thread0.td_pcb->pcb_flags = 0; 1606 1607 env = kern_getenv("kernelname"); 1608 if (env != NULL) 1609 strlcpy(kernelname, env, sizeof(kernelname)); 1610 1611 kcsan_cpu_init(0); 1612 1613 #ifdef FDT 1614 x86_init_fdt(); 1615 #endif 1616 thread0.td_critnest = 0; 1617 1618 kasan_init(); 1619 kmsan_init(); 1620 1621 TSEXIT(); 1622 1623 /* Location of kernel stack for locore */ 1624 return (thread0.td_md.md_stack_base); 1625 } 1626 1627 void 1628 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 1629 { 1630 1631 pcpu->pc_acpi_id = 0xffffffff; 1632 } 1633 1634 static int 1635 smap_sysctl_handler(SYSCTL_HANDLER_ARGS) 1636 { 1637 struct bios_smap *smapbase; 1638 struct bios_smap_xattr smap; 1639 caddr_t kmdp; 1640 uint32_t *smapattr; 1641 int count, error, i; 1642 1643 /* Retrieve the system memory map from the loader. */ 1644 kmdp = preload_search_by_type("elf kernel"); 1645 if (kmdp == NULL) 1646 kmdp = preload_search_by_type("elf64 kernel"); 1647 smapbase = (struct bios_smap *)preload_search_info(kmdp, 1648 MODINFO_METADATA | MODINFOMD_SMAP); 1649 if (smapbase == NULL) 1650 return (0); 1651 smapattr = (uint32_t *)preload_search_info(kmdp, 1652 MODINFO_METADATA | MODINFOMD_SMAP_XATTR); 1653 count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); 1654 error = 0; 1655 for (i = 0; i < count; i++) { 1656 smap.base = smapbase[i].base; 1657 smap.length = smapbase[i].length; 1658 smap.type = smapbase[i].type; 1659 if (smapattr != NULL) 1660 smap.xattr = smapattr[i]; 1661 else 1662 smap.xattr = 0; 1663 error = SYSCTL_OUT(req, &smap, sizeof(smap)); 1664 } 1665 return (error); 1666 } 1667 SYSCTL_PROC(_machdep, OID_AUTO, smap, 1668 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1669 smap_sysctl_handler, "S,bios_smap_xattr", 1670 "Raw BIOS SMAP data"); 1671 1672 static int 1673 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) 1674 { 1675 struct efi_map_header *efihdr; 1676 caddr_t kmdp; 1677 uint32_t efisize; 1678 1679 kmdp = preload_search_by_type("elf kernel"); 1680 if (kmdp == NULL) 1681 kmdp = preload_search_by_type("elf64 kernel"); 1682 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 1683 MODINFO_METADATA | MODINFOMD_EFI_MAP); 1684 if (efihdr == NULL) 1685 return (0); 1686 efisize = *((uint32_t *)efihdr - 1); 1687 return (SYSCTL_OUT(req, efihdr, efisize)); 1688 } 1689 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, 1690 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1691 efi_map_sysctl_handler, "S,efi_map_header", 1692 "Raw EFI Memory Map"); 1693 1694 void 1695 spinlock_enter(void) 1696 { 1697 struct thread *td; 1698 register_t flags; 1699 1700 td = curthread; 1701 if (td->td_md.md_spinlock_count == 0) { 1702 flags = intr_disable(); 1703 td->td_md.md_spinlock_count = 1; 1704 td->td_md.md_saved_flags = flags; 1705 critical_enter(); 1706 } else 1707 td->td_md.md_spinlock_count++; 1708 } 1709 1710 void 1711 spinlock_exit(void) 1712 { 1713 struct thread *td; 1714 register_t flags; 1715 1716 td = curthread; 1717 flags = td->td_md.md_saved_flags; 1718 td->td_md.md_spinlock_count--; 1719 if (td->td_md.md_spinlock_count == 0) { 1720 critical_exit(); 1721 intr_restore(flags); 1722 } 1723 } 1724 1725 /* 1726 * Construct a PCB from a trapframe. This is called from kdb_trap() where 1727 * we want to start a backtrace from the function that caused us to enter 1728 * the debugger. We have the context in the trapframe, but base the trace 1729 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 1730 * enough for a backtrace. 1731 */ 1732 void 1733 makectx(struct trapframe *tf, struct pcb *pcb) 1734 { 1735 1736 pcb->pcb_r12 = tf->tf_r12; 1737 pcb->pcb_r13 = tf->tf_r13; 1738 pcb->pcb_r14 = tf->tf_r14; 1739 pcb->pcb_r15 = tf->tf_r15; 1740 pcb->pcb_rbp = tf->tf_rbp; 1741 pcb->pcb_rbx = tf->tf_rbx; 1742 pcb->pcb_rip = tf->tf_rip; 1743 pcb->pcb_rsp = tf->tf_rsp; 1744 } 1745 1746 /* 1747 * The pcb_flags is only modified by current thread, or by other threads 1748 * when current thread is stopped. However, current thread may change it 1749 * from the interrupt context in cpu_switch(), or in the trap handler. 1750 * When we read-modify-write pcb_flags from C sources, compiler may generate 1751 * code that is not atomic regarding the interrupt handler. If a trap or 1752 * interrupt happens and any flag is modified from the handler, it can be 1753 * clobbered with the cached value later. Therefore, we implement setting 1754 * and clearing flags with single-instruction functions, which do not race 1755 * with possible modification of the flags from the trap or interrupt context, 1756 * because traps and interrupts are executed only on instruction boundary. 1757 */ 1758 void 1759 set_pcb_flags_raw(struct pcb *pcb, const u_int flags) 1760 { 1761 1762 __asm __volatile("orl %1,%0" 1763 : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) 1764 : "cc", "memory"); 1765 1766 } 1767 1768 /* 1769 * The support for RDFSBASE, WRFSBASE and similar instructions for %gs 1770 * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into 1771 * pcb if user space modified the bases. We must save on the context 1772 * switch or if the return to usermode happens through the doreti. 1773 * 1774 * Tracking of both events is performed by the pcb flag PCB_FULL_IRET, 1775 * which have a consequence that the base MSRs must be saved each time 1776 * the PCB_FULL_IRET flag is set. We disable interrupts to sync with 1777 * context switches. 1778 */ 1779 static void 1780 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags) 1781 { 1782 register_t r; 1783 1784 if (curpcb == pcb && 1785 (flags & PCB_FULL_IRET) != 0 && 1786 (pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1787 r = intr_disable(); 1788 if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1789 if (rfs() == _ufssel) 1790 pcb->pcb_fsbase = rdfsbase(); 1791 if (rgs() == _ugssel) 1792 pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); 1793 } 1794 set_pcb_flags_raw(pcb, flags); 1795 intr_restore(r); 1796 } else { 1797 set_pcb_flags_raw(pcb, flags); 1798 } 1799 } 1800 1801 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int)) 1802 { 1803 1804 return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ? 1805 set_pcb_flags_fsgsbase : set_pcb_flags_raw); 1806 } 1807 1808 void 1809 clear_pcb_flags(struct pcb *pcb, const u_int flags) 1810 { 1811 1812 __asm __volatile("andl %1,%0" 1813 : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) 1814 : "cc", "memory"); 1815 } 1816 1817 #ifdef KDB 1818 1819 /* 1820 * Provide inb() and outb() as functions. They are normally only available as 1821 * inline functions, thus cannot be called from the debugger. 1822 */ 1823 1824 /* silence compiler warnings */ 1825 u_char inb_(u_short); 1826 void outb_(u_short, u_char); 1827 1828 u_char 1829 inb_(u_short port) 1830 { 1831 return inb(port); 1832 } 1833 1834 void 1835 outb_(u_short port, u_char data) 1836 { 1837 outb(port, data); 1838 } 1839 1840 #endif /* KDB */ 1841 1842 #undef memset 1843 #undef memmove 1844 #undef memcpy 1845 1846 void *memset_std(void *buf, int c, size_t len); 1847 void *memset_erms(void *buf, int c, size_t len); 1848 void *memmove_std(void * _Nonnull dst, const void * _Nonnull src, 1849 size_t len); 1850 void *memmove_erms(void * _Nonnull dst, const void * _Nonnull src, 1851 size_t len); 1852 void *memcpy_std(void * _Nonnull dst, const void * _Nonnull src, 1853 size_t len); 1854 void *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src, 1855 size_t len); 1856 1857 #ifdef KCSAN 1858 /* 1859 * These fail to build as ifuncs when used with KCSAN. 1860 */ 1861 void * 1862 memset(void *buf, int c, size_t len) 1863 { 1864 1865 return (memset_std(buf, c, len)); 1866 } 1867 1868 void * 1869 memmove(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1870 { 1871 1872 return (memmove_std(dst, src, len)); 1873 } 1874 1875 void * 1876 memcpy(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1877 { 1878 1879 return (memcpy_std(dst, src, len)); 1880 } 1881 #else 1882 DEFINE_IFUNC(, void *, memset, (void *, int, size_t)) 1883 { 1884 1885 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1886 memset_erms : memset_std); 1887 } 1888 1889 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull, 1890 size_t)) 1891 { 1892 1893 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1894 memmove_erms : memmove_std); 1895 } 1896 1897 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t)) 1898 { 1899 1900 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1901 memcpy_erms : memcpy_std); 1902 } 1903 #endif 1904 1905 void pagezero_std(void *addr); 1906 void pagezero_erms(void *addr); 1907 DEFINE_IFUNC(, void , pagezero, (void *)) 1908 { 1909 1910 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1911 pagezero_erms : pagezero_std); 1912 } 1913