1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 2003 Peter Wemm. 5 * Copyright (c) 1992 Terrence R. Lambert. 6 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * William Jolitz. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_atpic.h" 47 #include "opt_cpu.h" 48 #include "opt_ddb.h" 49 #include "opt_inet.h" 50 #include "opt_isa.h" 51 #include "opt_kstack_pages.h" 52 #include "opt_maxmem.h" 53 #include "opt_mp_watchdog.h" 54 #include "opt_pci.h" 55 #include "opt_platform.h" 56 #include "opt_sched.h" 57 58 #include <sys/param.h> 59 #include <sys/proc.h> 60 #include <sys/systm.h> 61 #include <sys/asan.h> 62 #include <sys/bio.h> 63 #include <sys/buf.h> 64 #include <sys/bus.h> 65 #include <sys/callout.h> 66 #include <sys/cons.h> 67 #include <sys/cpu.h> 68 #include <sys/csan.h> 69 #include <sys/efi.h> 70 #include <sys/eventhandler.h> 71 #include <sys/exec.h> 72 #include <sys/imgact.h> 73 #include <sys/kdb.h> 74 #include <sys/kernel.h> 75 #include <sys/ktr.h> 76 #include <sys/linker.h> 77 #include <sys/lock.h> 78 #include <sys/malloc.h> 79 #include <sys/memrange.h> 80 #include <sys/msan.h> 81 #include <sys/msgbuf.h> 82 #include <sys/mutex.h> 83 #include <sys/pcpu.h> 84 #include <sys/ptrace.h> 85 #include <sys/reboot.h> 86 #include <sys/reg.h> 87 #include <sys/rwlock.h> 88 #include <sys/sched.h> 89 #include <sys/signalvar.h> 90 #ifdef SMP 91 #include <sys/smp.h> 92 #endif 93 #include <sys/syscallsubr.h> 94 #include <sys/sysctl.h> 95 #include <sys/sysent.h> 96 #include <sys/sysproto.h> 97 #include <sys/ucontext.h> 98 #include <sys/vmmeter.h> 99 100 #include <vm/vm.h> 101 #include <vm/vm_param.h> 102 #include <vm/vm_extern.h> 103 #include <vm/vm_kern.h> 104 #include <vm/vm_page.h> 105 #include <vm/vm_map.h> 106 #include <vm/vm_object.h> 107 #include <vm/vm_pager.h> 108 #include <vm/vm_phys.h> 109 #include <vm/vm_dumpset.h> 110 111 #ifdef DDB 112 #ifndef KDB 113 #error KDB must be enabled in order for DDB to work! 114 #endif 115 #include <ddb/ddb.h> 116 #include <ddb/db_sym.h> 117 #endif 118 119 #include <net/netisr.h> 120 121 #include <machine/clock.h> 122 #include <machine/cpu.h> 123 #include <machine/cputypes.h> 124 #include <machine/frame.h> 125 #include <machine/intr_machdep.h> 126 #include <x86/mca.h> 127 #include <machine/md_var.h> 128 #include <machine/metadata.h> 129 #include <machine/mp_watchdog.h> 130 #include <machine/pc/bios.h> 131 #include <machine/pcb.h> 132 #include <machine/proc.h> 133 #include <machine/sigframe.h> 134 #include <machine/specialreg.h> 135 #include <machine/trap.h> 136 #include <machine/tss.h> 137 #include <x86/ucode.h> 138 #include <x86/ifunc.h> 139 #ifdef SMP 140 #include <machine/smp.h> 141 #endif 142 #ifdef FDT 143 #include <x86/fdt.h> 144 #endif 145 146 #ifdef DEV_ATPIC 147 #include <x86/isa/icu.h> 148 #else 149 #include <x86/apicvar.h> 150 #endif 151 152 #include <isa/isareg.h> 153 #include <isa/rtc.h> 154 #include <x86/init.h> 155 156 /* Sanity check for __curthread() */ 157 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 158 159 /* 160 * The PTI trampoline stack needs enough space for a hardware trapframe and a 161 * couple of scratch registers, as well as the trapframe left behind after an 162 * iret fault. 163 */ 164 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) - 165 offsetof(struct pti_frame, pti_rip)); 166 167 extern u_int64_t hammer_time(u_int64_t, u_int64_t); 168 169 static void cpu_startup(void *); 170 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 171 172 /* Preload data parse function */ 173 static caddr_t native_parse_preload_data(u_int64_t); 174 175 /* Native function to fetch and parse the e820 map */ 176 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); 177 178 /* Default init_ops implementation. */ 179 struct init_ops init_ops = { 180 .parse_preload_data = native_parse_preload_data, 181 .early_clock_source_init = i8254_init, 182 .early_delay = i8254_delay, 183 .parse_memmap = native_parse_memmap, 184 }; 185 186 /* 187 * Physical address of the EFI System Table. Stashed from the metadata hints 188 * passed into the kernel and used by the EFI code to call runtime services. 189 */ 190 vm_paddr_t efi_systbl_phys; 191 192 /* Intel ICH registers */ 193 #define ICH_PMBASE 0x400 194 #define ICH_SMI_EN ICH_PMBASE + 0x30 195 196 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; 197 198 int cold = 1; 199 200 long Maxmem = 0; 201 long realmem = 0; 202 203 struct kva_md_info kmi; 204 205 struct region_descriptor r_idt; 206 207 struct pcpu *__pcpu; 208 struct pcpu temp_bsp_pcpu; 209 210 struct mtx icu_lock; 211 212 struct mem_range_softc mem_range_softc; 213 214 struct mtx dt_lock; /* lock for GDT and LDT */ 215 216 void (*vmm_resume_p)(void); 217 218 bool efi_boot; 219 220 static void 221 cpu_startup(dummy) 222 void *dummy; 223 { 224 uintmax_t memsize; 225 char *sysenv; 226 227 /* 228 * On MacBooks, we need to disallow the legacy USB circuit to 229 * generate an SMI# because this can cause several problems, 230 * namely: incorrect CPU frequency detection and failure to 231 * start the APs. 232 * We do this by disabling a bit in the SMI_EN (SMI Control and 233 * Enable register) of the Intel ICH LPC Interface Bridge. 234 */ 235 sysenv = kern_getenv("smbios.system.product"); 236 if (sysenv != NULL) { 237 if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 238 strncmp(sysenv, "MacBook3,1", 10) == 0 || 239 strncmp(sysenv, "MacBook4,1", 10) == 0 || 240 strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 241 strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 242 strncmp(sysenv, "MacBookPro3,1", 13) == 0 || 243 strncmp(sysenv, "MacBookPro4,1", 13) == 0 || 244 strncmp(sysenv, "Macmini1,1", 10) == 0) { 245 if (bootverbose) 246 printf("Disabling LEGACY_USB_EN bit on " 247 "Intel ICH.\n"); 248 outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 249 } 250 freeenv(sysenv); 251 } 252 253 /* 254 * Good {morning,afternoon,evening,night}. 255 */ 256 startrtclock(); 257 printcpuinfo(); 258 259 /* 260 * Display physical memory if SMBIOS reports reasonable amount. 261 */ 262 memsize = 0; 263 sysenv = kern_getenv("smbios.memory.enabled"); 264 if (sysenv != NULL) { 265 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; 266 freeenv(sysenv); 267 } 268 if (memsize < ptoa((uintmax_t)vm_free_count())) 269 memsize = ptoa((uintmax_t)Maxmem); 270 printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); 271 realmem = atop(memsize); 272 273 /* 274 * Display any holes after the first chunk of extended memory. 275 */ 276 if (bootverbose) { 277 int indx; 278 279 printf("Physical memory chunk(s):\n"); 280 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 281 vm_paddr_t size; 282 283 size = phys_avail[indx + 1] - phys_avail[indx]; 284 printf( 285 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 286 (uintmax_t)phys_avail[indx], 287 (uintmax_t)phys_avail[indx + 1] - 1, 288 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 289 } 290 } 291 292 vm_ksubmap_init(&kmi); 293 294 printf("avail memory = %ju (%ju MB)\n", 295 ptoa((uintmax_t)vm_free_count()), 296 ptoa((uintmax_t)vm_free_count()) / 1048576); 297 #ifdef DEV_PCI 298 if (bootverbose && intel_graphics_stolen_base != 0) 299 printf("intel stolen mem: base %#jx size %ju MB\n", 300 (uintmax_t)intel_graphics_stolen_base, 301 (uintmax_t)intel_graphics_stolen_size / 1024 / 1024); 302 #endif 303 304 /* 305 * Set up buffers, so they can be used to read disk labels. 306 */ 307 bufinit(); 308 vm_pager_bufferinit(); 309 310 cpu_setregs(); 311 } 312 313 static void 314 late_ifunc_resolve(void *dummy __unused) 315 { 316 link_elf_late_ireloc(); 317 } 318 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); 319 320 321 void 322 cpu_setregs(void) 323 { 324 register_t cr0; 325 326 cr0 = rcr0(); 327 /* 328 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the 329 * BSP. See the comments there about why we set them. 330 */ 331 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 332 load_cr0(cr0); 333 } 334 335 /* 336 * Initialize amd64 and configure to run kernel 337 */ 338 339 /* 340 * Initialize segments & interrupt table 341 */ 342 static struct gate_descriptor idt0[NIDT]; 343 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 344 345 static char dblfault_stack[DBLFAULT_STACK_SIZE] __aligned(16); 346 static char mce0_stack[MCE_STACK_SIZE] __aligned(16); 347 static char nmi0_stack[NMI_STACK_SIZE] __aligned(16); 348 static char dbg0_stack[DBG_STACK_SIZE] __aligned(16); 349 CTASSERT(sizeof(struct nmi_pcpu) == 16); 350 351 /* 352 * Software prototypes -- in more palatable form. 353 * 354 * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same 355 * slots as corresponding segments for i386 kernel. 356 */ 357 struct soft_segment_descriptor gdt_segs[] = { 358 /* GNULL_SEL 0 Null Descriptor */ 359 { .ssd_base = 0x0, 360 .ssd_limit = 0x0, 361 .ssd_type = 0, 362 .ssd_dpl = 0, 363 .ssd_p = 0, 364 .ssd_long = 0, 365 .ssd_def32 = 0, 366 .ssd_gran = 0 }, 367 /* GNULL2_SEL 1 Null Descriptor */ 368 { .ssd_base = 0x0, 369 .ssd_limit = 0x0, 370 .ssd_type = 0, 371 .ssd_dpl = 0, 372 .ssd_p = 0, 373 .ssd_long = 0, 374 .ssd_def32 = 0, 375 .ssd_gran = 0 }, 376 /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ 377 { .ssd_base = 0x0, 378 .ssd_limit = 0xfffff, 379 .ssd_type = SDT_MEMRWA, 380 .ssd_dpl = SEL_UPL, 381 .ssd_p = 1, 382 .ssd_long = 0, 383 .ssd_def32 = 1, 384 .ssd_gran = 1 }, 385 /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ 386 { .ssd_base = 0x0, 387 .ssd_limit = 0xfffff, 388 .ssd_type = SDT_MEMRWA, 389 .ssd_dpl = SEL_UPL, 390 .ssd_p = 1, 391 .ssd_long = 0, 392 .ssd_def32 = 1, 393 .ssd_gran = 1 }, 394 /* GCODE_SEL 4 Code Descriptor for kernel */ 395 { .ssd_base = 0x0, 396 .ssd_limit = 0xfffff, 397 .ssd_type = SDT_MEMERA, 398 .ssd_dpl = SEL_KPL, 399 .ssd_p = 1, 400 .ssd_long = 1, 401 .ssd_def32 = 0, 402 .ssd_gran = 1 }, 403 /* GDATA_SEL 5 Data Descriptor for kernel */ 404 { .ssd_base = 0x0, 405 .ssd_limit = 0xfffff, 406 .ssd_type = SDT_MEMRWA, 407 .ssd_dpl = SEL_KPL, 408 .ssd_p = 1, 409 .ssd_long = 1, 410 .ssd_def32 = 0, 411 .ssd_gran = 1 }, 412 /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ 413 { .ssd_base = 0x0, 414 .ssd_limit = 0xfffff, 415 .ssd_type = SDT_MEMERA, 416 .ssd_dpl = SEL_UPL, 417 .ssd_p = 1, 418 .ssd_long = 0, 419 .ssd_def32 = 1, 420 .ssd_gran = 1 }, 421 /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ 422 { .ssd_base = 0x0, 423 .ssd_limit = 0xfffff, 424 .ssd_type = SDT_MEMRWA, 425 .ssd_dpl = SEL_UPL, 426 .ssd_p = 1, 427 .ssd_long = 0, 428 .ssd_def32 = 1, 429 .ssd_gran = 1 }, 430 /* GUCODE_SEL 8 64 bit Code Descriptor for user */ 431 { .ssd_base = 0x0, 432 .ssd_limit = 0xfffff, 433 .ssd_type = SDT_MEMERA, 434 .ssd_dpl = SEL_UPL, 435 .ssd_p = 1, 436 .ssd_long = 1, 437 .ssd_def32 = 0, 438 .ssd_gran = 1 }, 439 /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ 440 { .ssd_base = 0x0, 441 .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, 442 .ssd_type = SDT_SYSTSS, 443 .ssd_dpl = SEL_KPL, 444 .ssd_p = 1, 445 .ssd_long = 0, 446 .ssd_def32 = 0, 447 .ssd_gran = 0 }, 448 /* Actually, the TSS is a system descriptor which is double size */ 449 { .ssd_base = 0x0, 450 .ssd_limit = 0x0, 451 .ssd_type = 0, 452 .ssd_dpl = 0, 453 .ssd_p = 0, 454 .ssd_long = 0, 455 .ssd_def32 = 0, 456 .ssd_gran = 0 }, 457 /* GUSERLDT_SEL 11 LDT Descriptor */ 458 { .ssd_base = 0x0, 459 .ssd_limit = 0x0, 460 .ssd_type = 0, 461 .ssd_dpl = 0, 462 .ssd_p = 0, 463 .ssd_long = 0, 464 .ssd_def32 = 0, 465 .ssd_gran = 0 }, 466 /* GUSERLDT_SEL 12 LDT Descriptor, double size */ 467 { .ssd_base = 0x0, 468 .ssd_limit = 0x0, 469 .ssd_type = 0, 470 .ssd_dpl = 0, 471 .ssd_p = 0, 472 .ssd_long = 0, 473 .ssd_def32 = 0, 474 .ssd_gran = 0 }, 475 }; 476 _Static_assert(nitems(gdt_segs) == NGDT, "Stale NGDT"); 477 478 void 479 setidt(int idx, inthand_t *func, int typ, int dpl, int ist) 480 { 481 struct gate_descriptor *ip; 482 483 ip = idt + idx; 484 ip->gd_looffset = (uintptr_t)func; 485 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); 486 ip->gd_ist = ist; 487 ip->gd_xx = 0; 488 ip->gd_type = typ; 489 ip->gd_dpl = dpl; 490 ip->gd_p = 1; 491 ip->gd_hioffset = ((uintptr_t)func)>>16 ; 492 } 493 494 extern inthand_t 495 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 496 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 497 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 498 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 499 IDTVEC(xmm), IDTVEC(dblfault), 500 IDTVEC(div_pti), IDTVEC(bpt_pti), 501 IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti), 502 IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti), 503 IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), 504 IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti), 505 IDTVEC(xmm_pti), 506 #ifdef KDTRACE_HOOKS 507 IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti), 508 #endif 509 #ifdef XENHVM 510 IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti), 511 #endif 512 IDTVEC(fast_syscall), IDTVEC(fast_syscall32), 513 IDTVEC(fast_syscall_pti); 514 515 #ifdef DDB 516 /* 517 * Display the index and function name of any IDT entries that don't use 518 * the default 'rsvd' entry point. 519 */ 520 DB_SHOW_COMMAND(idt, db_show_idt) 521 { 522 struct gate_descriptor *ip; 523 int idx; 524 uintptr_t func; 525 526 ip = idt; 527 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 528 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 529 if (func != (uintptr_t)&IDTVEC(rsvd)) { 530 db_printf("%3d\t", idx); 531 db_printsym(func, DB_STGY_PROC); 532 db_printf("\n"); 533 } 534 ip++; 535 } 536 } 537 538 /* Show privileged registers. */ 539 DB_SHOW_COMMAND(sysregs, db_show_sysregs) 540 { 541 struct { 542 uint16_t limit; 543 uint64_t base; 544 } __packed idtr, gdtr; 545 uint16_t ldt, tr; 546 547 __asm __volatile("sidt %0" : "=m" (idtr)); 548 db_printf("idtr\t0x%016lx/%04x\n", 549 (u_long)idtr.base, (u_int)idtr.limit); 550 __asm __volatile("sgdt %0" : "=m" (gdtr)); 551 db_printf("gdtr\t0x%016lx/%04x\n", 552 (u_long)gdtr.base, (u_int)gdtr.limit); 553 __asm __volatile("sldt %0" : "=r" (ldt)); 554 db_printf("ldtr\t0x%04x\n", ldt); 555 __asm __volatile("str %0" : "=r" (tr)); 556 db_printf("tr\t0x%04x\n", tr); 557 db_printf("cr0\t0x%016lx\n", rcr0()); 558 db_printf("cr2\t0x%016lx\n", rcr2()); 559 db_printf("cr3\t0x%016lx\n", rcr3()); 560 db_printf("cr4\t0x%016lx\n", rcr4()); 561 if (rcr4() & CR4_XSAVE) 562 db_printf("xcr0\t0x%016lx\n", rxcr(0)); 563 db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); 564 if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) 565 db_printf("FEATURES_CTL\t%016lx\n", 566 rdmsr(MSR_IA32_FEATURE_CONTROL)); 567 db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); 568 db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); 569 db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); 570 } 571 572 DB_SHOW_COMMAND(dbregs, db_show_dbregs) 573 { 574 575 db_printf("dr0\t0x%016lx\n", rdr0()); 576 db_printf("dr1\t0x%016lx\n", rdr1()); 577 db_printf("dr2\t0x%016lx\n", rdr2()); 578 db_printf("dr3\t0x%016lx\n", rdr3()); 579 db_printf("dr6\t0x%016lx\n", rdr6()); 580 db_printf("dr7\t0x%016lx\n", rdr7()); 581 } 582 #endif 583 584 void 585 sdtossd(sd, ssd) 586 struct user_segment_descriptor *sd; 587 struct soft_segment_descriptor *ssd; 588 { 589 590 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 591 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 592 ssd->ssd_type = sd->sd_type; 593 ssd->ssd_dpl = sd->sd_dpl; 594 ssd->ssd_p = sd->sd_p; 595 ssd->ssd_long = sd->sd_long; 596 ssd->ssd_def32 = sd->sd_def32; 597 ssd->ssd_gran = sd->sd_gran; 598 } 599 600 void 601 ssdtosd(ssd, sd) 602 struct soft_segment_descriptor *ssd; 603 struct user_segment_descriptor *sd; 604 { 605 606 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 607 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; 608 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 609 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 610 sd->sd_type = ssd->ssd_type; 611 sd->sd_dpl = ssd->ssd_dpl; 612 sd->sd_p = ssd->ssd_p; 613 sd->sd_long = ssd->ssd_long; 614 sd->sd_def32 = ssd->ssd_def32; 615 sd->sd_gran = ssd->ssd_gran; 616 } 617 618 void 619 ssdtosyssd(ssd, sd) 620 struct soft_segment_descriptor *ssd; 621 struct system_segment_descriptor *sd; 622 { 623 624 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 625 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; 626 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 627 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 628 sd->sd_type = ssd->ssd_type; 629 sd->sd_dpl = ssd->ssd_dpl; 630 sd->sd_p = ssd->ssd_p; 631 sd->sd_gran = ssd->ssd_gran; 632 } 633 634 u_int basemem; 635 636 static int 637 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, 638 int *physmap_idxp) 639 { 640 int i, insert_idx, physmap_idx; 641 642 physmap_idx = *physmap_idxp; 643 644 if (length == 0) 645 return (1); 646 647 /* 648 * Find insertion point while checking for overlap. Start off by 649 * assuming the new entry will be added to the end. 650 * 651 * NB: physmap_idx points to the next free slot. 652 */ 653 insert_idx = physmap_idx; 654 for (i = 0; i <= physmap_idx; i += 2) { 655 if (base < physmap[i + 1]) { 656 if (base + length <= physmap[i]) { 657 insert_idx = i; 658 break; 659 } 660 if (boothowto & RB_VERBOSE) 661 printf( 662 "Overlapping memory regions, ignoring second region\n"); 663 return (1); 664 } 665 } 666 667 /* See if we can prepend to the next entry. */ 668 if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { 669 physmap[insert_idx] = base; 670 return (1); 671 } 672 673 /* See if we can append to the previous entry. */ 674 if (insert_idx > 0 && base == physmap[insert_idx - 1]) { 675 physmap[insert_idx - 1] += length; 676 return (1); 677 } 678 679 physmap_idx += 2; 680 *physmap_idxp = physmap_idx; 681 if (physmap_idx == PHYS_AVAIL_ENTRIES) { 682 printf( 683 "Too many segments in the physical address map, giving up\n"); 684 return (0); 685 } 686 687 /* 688 * Move the last 'N' entries down to make room for the new 689 * entry if needed. 690 */ 691 for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { 692 physmap[i] = physmap[i - 2]; 693 physmap[i + 1] = physmap[i - 1]; 694 } 695 696 /* Insert the new entry. */ 697 physmap[insert_idx] = base; 698 physmap[insert_idx + 1] = base + length; 699 return (1); 700 } 701 702 void 703 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, 704 vm_paddr_t *physmap, int *physmap_idx) 705 { 706 struct bios_smap *smap, *smapend; 707 708 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 709 710 for (smap = smapbase; smap < smapend; smap++) { 711 if (boothowto & RB_VERBOSE) 712 printf("SMAP type=%02x base=%016lx len=%016lx\n", 713 smap->type, smap->base, smap->length); 714 715 if (smap->type != SMAP_TYPE_MEMORY) 716 continue; 717 718 if (!add_physmap_entry(smap->base, smap->length, physmap, 719 physmap_idx)) 720 break; 721 } 722 } 723 724 static void 725 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, 726 int *physmap_idx) 727 { 728 struct efi_md *map, *p; 729 const char *type; 730 size_t efisz; 731 int ndesc, i; 732 733 static const char *types[] = { 734 "Reserved", 735 "LoaderCode", 736 "LoaderData", 737 "BootServicesCode", 738 "BootServicesData", 739 "RuntimeServicesCode", 740 "RuntimeServicesData", 741 "ConventionalMemory", 742 "UnusableMemory", 743 "ACPIReclaimMemory", 744 "ACPIMemoryNVS", 745 "MemoryMappedIO", 746 "MemoryMappedIOPortSpace", 747 "PalCode", 748 "PersistentMemory" 749 }; 750 751 /* 752 * Memory map data provided by UEFI via the GetMemoryMap 753 * Boot Services API. 754 */ 755 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 756 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 757 758 if (efihdr->descriptor_size == 0) 759 return; 760 ndesc = efihdr->memory_size / efihdr->descriptor_size; 761 762 if (boothowto & RB_VERBOSE) 763 printf("%23s %12s %12s %8s %4s\n", 764 "Type", "Physical", "Virtual", "#Pages", "Attr"); 765 766 for (i = 0, p = map; i < ndesc; i++, 767 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 768 if (boothowto & RB_VERBOSE) { 769 if (p->md_type < nitems(types)) 770 type = types[p->md_type]; 771 else 772 type = "<INVALID>"; 773 printf("%23s %012lx %012lx %08lx ", type, p->md_phys, 774 p->md_virt, p->md_pages); 775 if (p->md_attr & EFI_MD_ATTR_UC) 776 printf("UC "); 777 if (p->md_attr & EFI_MD_ATTR_WC) 778 printf("WC "); 779 if (p->md_attr & EFI_MD_ATTR_WT) 780 printf("WT "); 781 if (p->md_attr & EFI_MD_ATTR_WB) 782 printf("WB "); 783 if (p->md_attr & EFI_MD_ATTR_UCE) 784 printf("UCE "); 785 if (p->md_attr & EFI_MD_ATTR_WP) 786 printf("WP "); 787 if (p->md_attr & EFI_MD_ATTR_RP) 788 printf("RP "); 789 if (p->md_attr & EFI_MD_ATTR_XP) 790 printf("XP "); 791 if (p->md_attr & EFI_MD_ATTR_NV) 792 printf("NV "); 793 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) 794 printf("MORE_RELIABLE "); 795 if (p->md_attr & EFI_MD_ATTR_RO) 796 printf("RO "); 797 if (p->md_attr & EFI_MD_ATTR_RT) 798 printf("RUNTIME"); 799 printf("\n"); 800 } 801 802 switch (p->md_type) { 803 case EFI_MD_TYPE_CODE: 804 case EFI_MD_TYPE_DATA: 805 case EFI_MD_TYPE_BS_CODE: 806 case EFI_MD_TYPE_BS_DATA: 807 case EFI_MD_TYPE_FREE: 808 /* 809 * We're allowed to use any entry with these types. 810 */ 811 break; 812 default: 813 continue; 814 } 815 816 if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), 817 physmap, physmap_idx)) 818 break; 819 } 820 } 821 822 static void 823 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) 824 { 825 struct bios_smap *smap; 826 struct efi_map_header *efihdr; 827 u_int32_t size; 828 829 /* 830 * Memory map from INT 15:E820. 831 * 832 * subr_module.c says: 833 * "Consumer may safely assume that size value precedes data." 834 * ie: an int32_t immediately precedes smap. 835 */ 836 837 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 838 MODINFO_METADATA | MODINFOMD_EFI_MAP); 839 smap = (struct bios_smap *)preload_search_info(kmdp, 840 MODINFO_METADATA | MODINFOMD_SMAP); 841 if (efihdr == NULL && smap == NULL) 842 panic("No BIOS smap or EFI map info from loader!"); 843 844 if (efihdr != NULL) { 845 add_efi_map_entries(efihdr, physmap, physmap_idx); 846 strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); 847 } else { 848 size = *((u_int32_t *)smap - 1); 849 bios_add_smap_entries(smap, size, physmap, physmap_idx); 850 strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); 851 } 852 } 853 854 #define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) 855 856 /* 857 * Populate the (physmap) array with base/bound pairs describing the 858 * available physical memory in the system, then test this memory and 859 * build the phys_avail array describing the actually-available memory. 860 * 861 * Total memory size may be set by the kernel environment variable 862 * hw.physmem or the compile-time define MAXMEM. 863 * 864 * XXX first should be vm_paddr_t. 865 */ 866 static void 867 getmemsize(caddr_t kmdp, u_int64_t first) 868 { 869 int i, physmap_idx, pa_indx, da_indx; 870 vm_paddr_t pa, physmap[PHYS_AVAIL_ENTRIES]; 871 u_long physmem_start, physmem_tunable, memtest; 872 pt_entry_t *pte; 873 quad_t dcons_addr, dcons_size; 874 int page_counter; 875 876 /* 877 * Tell the physical memory allocator about pages used to store 878 * the kernel and preloaded data. See kmem_bootstrap_free(). 879 */ 880 vm_phys_early_add_seg((vm_paddr_t)kernphys, trunc_page(first)); 881 882 bzero(physmap, sizeof(physmap)); 883 physmap_idx = 0; 884 885 init_ops.parse_memmap(kmdp, physmap, &physmap_idx); 886 physmap_idx -= 2; 887 888 /* 889 * Find the 'base memory' segment for SMP 890 */ 891 basemem = 0; 892 for (i = 0; i <= physmap_idx; i += 2) { 893 if (physmap[i] <= 0xA0000) { 894 basemem = physmap[i + 1] / 1024; 895 break; 896 } 897 } 898 if (basemem == 0 || basemem > 640) { 899 if (bootverbose) 900 printf( 901 "Memory map doesn't contain a basemem segment, faking it"); 902 basemem = 640; 903 } 904 905 /* 906 * Maxmem isn't the "maximum memory", it's one larger than the 907 * highest page of the physical address space. It should be 908 * called something like "Maxphyspage". We may adjust this 909 * based on ``hw.physmem'' and the results of the memory test. 910 */ 911 Maxmem = atop(physmap[physmap_idx + 1]); 912 913 #ifdef MAXMEM 914 Maxmem = MAXMEM / 4; 915 #endif 916 917 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 918 Maxmem = atop(physmem_tunable); 919 920 /* 921 * The boot memory test is disabled by default, as it takes a 922 * significant amount of time on large-memory systems, and is 923 * unfriendly to virtual machines as it unnecessarily touches all 924 * pages. 925 * 926 * A general name is used as the code may be extended to support 927 * additional tests beyond the current "page present" test. 928 */ 929 memtest = 0; 930 TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); 931 932 /* 933 * Don't allow MAXMEM or hw.physmem to extend the amount of memory 934 * in the system. 935 */ 936 if (Maxmem > atop(physmap[physmap_idx + 1])) 937 Maxmem = atop(physmap[physmap_idx + 1]); 938 939 if (atop(physmap[physmap_idx + 1]) != Maxmem && 940 (boothowto & RB_VERBOSE)) 941 printf("Physical memory use set to %ldK\n", Maxmem * 4); 942 943 /* call pmap initialization to make new kernel address space */ 944 pmap_bootstrap(&first); 945 946 /* 947 * Size up each available chunk of physical memory. 948 * 949 * XXX Some BIOSes corrupt low 64KB between suspend and resume. 950 * By default, mask off the first 16 pages unless we appear to be 951 * running in a VM. 952 */ 953 physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; 954 TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); 955 if (physmap[0] < physmem_start) { 956 if (physmem_start < PAGE_SIZE) 957 physmap[0] = PAGE_SIZE; 958 else if (physmem_start >= physmap[1]) 959 physmap[0] = round_page(physmap[1] - PAGE_SIZE); 960 else 961 physmap[0] = round_page(physmem_start); 962 } 963 pa_indx = 0; 964 da_indx = 1; 965 phys_avail[pa_indx++] = physmap[0]; 966 phys_avail[pa_indx] = physmap[0]; 967 dump_avail[da_indx] = physmap[0]; 968 pte = CMAP1; 969 970 /* 971 * Get dcons buffer address 972 */ 973 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 974 getenv_quad("dcons.size", &dcons_size) == 0) 975 dcons_addr = 0; 976 977 /* 978 * physmap is in bytes, so when converting to page boundaries, 979 * round up the start address and round down the end address. 980 */ 981 page_counter = 0; 982 if (memtest != 0) 983 printf("Testing system memory"); 984 for (i = 0; i <= physmap_idx; i += 2) { 985 vm_paddr_t end; 986 987 end = ptoa((vm_paddr_t)Maxmem); 988 if (physmap[i + 1] < end) 989 end = trunc_page(physmap[i + 1]); 990 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 991 int tmp, page_bad, full; 992 int *ptr = (int *)CADDR1; 993 994 full = FALSE; 995 /* 996 * block out kernel memory as not available. 997 */ 998 if (pa >= (vm_paddr_t)kernphys && pa < first) 999 goto do_dump_avail; 1000 1001 /* 1002 * block out dcons buffer 1003 */ 1004 if (dcons_addr > 0 1005 && pa >= trunc_page(dcons_addr) 1006 && pa < dcons_addr + dcons_size) 1007 goto do_dump_avail; 1008 1009 page_bad = FALSE; 1010 if (memtest == 0) 1011 goto skip_memtest; 1012 1013 /* 1014 * Print a "." every GB to show we're making 1015 * progress. 1016 */ 1017 page_counter++; 1018 if ((page_counter % PAGES_PER_GB) == 0) 1019 printf("."); 1020 1021 /* 1022 * map page into kernel: valid, read/write,non-cacheable 1023 */ 1024 *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; 1025 invltlb(); 1026 1027 tmp = *(int *)ptr; 1028 /* 1029 * Test for alternating 1's and 0's 1030 */ 1031 *(volatile int *)ptr = 0xaaaaaaaa; 1032 if (*(volatile int *)ptr != 0xaaaaaaaa) 1033 page_bad = TRUE; 1034 /* 1035 * Test for alternating 0's and 1's 1036 */ 1037 *(volatile int *)ptr = 0x55555555; 1038 if (*(volatile int *)ptr != 0x55555555) 1039 page_bad = TRUE; 1040 /* 1041 * Test for all 1's 1042 */ 1043 *(volatile int *)ptr = 0xffffffff; 1044 if (*(volatile int *)ptr != 0xffffffff) 1045 page_bad = TRUE; 1046 /* 1047 * Test for all 0's 1048 */ 1049 *(volatile int *)ptr = 0x0; 1050 if (*(volatile int *)ptr != 0x0) 1051 page_bad = TRUE; 1052 /* 1053 * Restore original value. 1054 */ 1055 *(int *)ptr = tmp; 1056 1057 skip_memtest: 1058 /* 1059 * Adjust array of valid/good pages. 1060 */ 1061 if (page_bad == TRUE) 1062 continue; 1063 /* 1064 * If this good page is a continuation of the 1065 * previous set of good pages, then just increase 1066 * the end pointer. Otherwise start a new chunk. 1067 * Note that "end" points one higher than end, 1068 * making the range >= start and < end. 1069 * If we're also doing a speculative memory 1070 * test and we at or past the end, bump up Maxmem 1071 * so that we keep going. The first bad page 1072 * will terminate the loop. 1073 */ 1074 if (phys_avail[pa_indx] == pa) { 1075 phys_avail[pa_indx] += PAGE_SIZE; 1076 } else { 1077 pa_indx++; 1078 if (pa_indx == PHYS_AVAIL_ENTRIES) { 1079 printf( 1080 "Too many holes in the physical address space, giving up\n"); 1081 pa_indx--; 1082 full = TRUE; 1083 goto do_dump_avail; 1084 } 1085 phys_avail[pa_indx++] = pa; /* start */ 1086 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 1087 } 1088 physmem++; 1089 do_dump_avail: 1090 if (dump_avail[da_indx] == pa) { 1091 dump_avail[da_indx] += PAGE_SIZE; 1092 } else { 1093 da_indx++; 1094 if (da_indx == PHYS_AVAIL_ENTRIES) { 1095 da_indx--; 1096 goto do_next; 1097 } 1098 dump_avail[da_indx++] = pa; /* start */ 1099 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 1100 } 1101 do_next: 1102 if (full) 1103 break; 1104 } 1105 } 1106 *pte = 0; 1107 invltlb(); 1108 if (memtest != 0) 1109 printf("\n"); 1110 1111 /* 1112 * XXX 1113 * The last chunk must contain at least one page plus the message 1114 * buffer to avoid complicating other code (message buffer address 1115 * calculation, etc.). 1116 */ 1117 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1118 round_page(msgbufsize) >= phys_avail[pa_indx]) { 1119 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1120 phys_avail[pa_indx--] = 0; 1121 phys_avail[pa_indx--] = 0; 1122 } 1123 1124 Maxmem = atop(phys_avail[pa_indx]); 1125 1126 /* Trim off space for the message buffer. */ 1127 phys_avail[pa_indx] -= round_page(msgbufsize); 1128 1129 /* Map the message buffer. */ 1130 msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); 1131 } 1132 1133 static caddr_t 1134 native_parse_preload_data(u_int64_t modulep) 1135 { 1136 caddr_t kmdp; 1137 char *envp; 1138 #ifdef DDB 1139 vm_offset_t ksym_start; 1140 vm_offset_t ksym_end; 1141 #endif 1142 1143 preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); 1144 preload_bootstrap_relocate(KERNBASE); 1145 kmdp = preload_search_by_type("elf kernel"); 1146 if (kmdp == NULL) 1147 kmdp = preload_search_by_type("elf64 kernel"); 1148 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); 1149 envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); 1150 if (envp != NULL) 1151 envp += KERNBASE; 1152 init_static_kenv(envp, 0); 1153 #ifdef DDB 1154 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); 1155 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); 1156 db_fetch_ksymtab(ksym_start, ksym_end, 0); 1157 #endif 1158 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); 1159 1160 return (kmdp); 1161 } 1162 1163 static void 1164 amd64_kdb_init(void) 1165 { 1166 kdb_init(); 1167 #ifdef KDB 1168 if (boothowto & RB_KDB) 1169 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 1170 #endif 1171 } 1172 1173 /* Set up the fast syscall stuff */ 1174 void 1175 amd64_conf_fast_syscall(void) 1176 { 1177 uint64_t msr; 1178 1179 msr = rdmsr(MSR_EFER) | EFER_SCE; 1180 wrmsr(MSR_EFER, msr); 1181 wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) : 1182 (u_int64_t)IDTVEC(fast_syscall)); 1183 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 1184 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1185 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 1186 wrmsr(MSR_STAR, msr); 1187 wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC); 1188 } 1189 1190 void 1191 amd64_bsp_pcpu_init1(struct pcpu *pc) 1192 { 1193 struct user_segment_descriptor *gdt; 1194 1195 PCPU_SET(prvspace, pc); 1196 gdt = *PCPU_PTR(gdt); 1197 PCPU_SET(curthread, &thread0); 1198 PCPU_SET(tssp, PCPU_PTR(common_tss)); 1199 PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1200 PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); 1201 PCPU_SET(fs32p, &gdt[GUFS32_SEL]); 1202 PCPU_SET(gs32p, &gdt[GUGS32_SEL]); 1203 PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK); 1204 PCPU_SET(smp_tlb_gen, 1); 1205 } 1206 1207 void 1208 amd64_bsp_pcpu_init2(uint64_t rsp0) 1209 { 1210 1211 PCPU_SET(rsp0, rsp0); 1212 PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) + 1213 PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); 1214 PCPU_SET(curpcb, thread0.td_pcb); 1215 } 1216 1217 void 1218 amd64_bsp_ist_init(struct pcpu *pc) 1219 { 1220 struct nmi_pcpu *np; 1221 struct amd64tss *tssp; 1222 1223 tssp = &pc->pc_common_tss; 1224 1225 /* doublefault stack space, runs on ist1 */ 1226 np = ((struct nmi_pcpu *)&dblfault_stack[sizeof(dblfault_stack)]) - 1; 1227 np->np_pcpu = (register_t)pc; 1228 tssp->tss_ist1 = (long)np; 1229 1230 /* 1231 * NMI stack, runs on ist2. The pcpu pointer is stored just 1232 * above the start of the ist2 stack. 1233 */ 1234 np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1; 1235 np->np_pcpu = (register_t)pc; 1236 tssp->tss_ist2 = (long)np; 1237 1238 /* 1239 * MC# stack, runs on ist3. The pcpu pointer is stored just 1240 * above the start of the ist3 stack. 1241 */ 1242 np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1; 1243 np->np_pcpu = (register_t)pc; 1244 tssp->tss_ist3 = (long)np; 1245 1246 /* 1247 * DB# stack, runs on ist4. 1248 */ 1249 np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1; 1250 np->np_pcpu = (register_t)pc; 1251 tssp->tss_ist4 = (long)np; 1252 } 1253 1254 u_int64_t 1255 hammer_time(u_int64_t modulep, u_int64_t physfree) 1256 { 1257 caddr_t kmdp; 1258 int gsel_tss, x; 1259 struct pcpu *pc; 1260 uint64_t cr3, rsp0; 1261 pml4_entry_t *pml4e; 1262 pdp_entry_t *pdpe; 1263 pd_entry_t *pde; 1264 char *env; 1265 struct user_segment_descriptor *gdt; 1266 struct region_descriptor r_gdt; 1267 size_t kstack0_sz; 1268 int late_console; 1269 1270 TSRAW(&thread0, TS_ENTER, __func__, NULL); 1271 1272 /* 1273 * Calculate kernphys by inspecting page table created by loader. 1274 * The assumptions: 1275 * - kernel is mapped at KERNBASE, backed by contiguous phys memory 1276 * aligned at 2M, below 4G (the latter is important for AP startup) 1277 * - there is a 2M hole at KERNBASE 1278 * - kernel is mapped with 2M superpages 1279 * - all participating memory, i.e. kernel, modules, metadata, 1280 * page table is accessible by pre-created 1:1 mapping 1281 * (right now loader creates 1:1 mapping for lower 4G, and all 1282 * memory is from there) 1283 * - there is a usable memory block right after the end of the 1284 * mapped kernel and all modules/metadata, pointed to by 1285 * physfree, for early allocations 1286 */ 1287 cr3 = rcr3(); 1288 pml4e = (pml4_entry_t *)(cr3 & ~PAGE_MASK) + pmap_pml4e_index( 1289 (vm_offset_t)hammer_time); 1290 pdpe = (pdp_entry_t *)(*pml4e & ~PAGE_MASK) + pmap_pdpe_index( 1291 (vm_offset_t)hammer_time); 1292 pde = (pd_entry_t *)(*pdpe & ~PAGE_MASK) + pmap_pde_index( 1293 (vm_offset_t)hammer_time); 1294 kernphys = (vm_paddr_t)(*pde & ~PDRMASK) - 1295 (vm_paddr_t)(((vm_offset_t)hammer_time - KERNBASE) & ~PDRMASK); 1296 1297 /* Fix-up for 2M hole */ 1298 physfree += kernphys; 1299 kernphys += NBPDR; 1300 1301 kmdp = init_ops.parse_preload_data(modulep); 1302 1303 efi_boot = preload_search_info(kmdp, MODINFO_METADATA | 1304 MODINFOMD_EFI_MAP) != NULL; 1305 1306 if (!efi_boot) { 1307 /* Tell the bios to warmboot next time */ 1308 atomic_store_short((u_short *)0x472, 0x1234); 1309 } 1310 1311 physfree += ucode_load_bsp(physfree - kernphys + KERNSTART); 1312 physfree = roundup2(physfree, PAGE_SIZE); 1313 1314 identify_cpu1(); 1315 identify_hypervisor(); 1316 identify_cpu_fixup_bsp(); 1317 identify_cpu2(); 1318 initializecpucache(); 1319 1320 /* 1321 * Check for pti, pcid, and invpcid before ifuncs are 1322 * resolved, to correctly select the implementation for 1323 * pmap_activate_sw_mode(). 1324 */ 1325 pti = pti_get_default(); 1326 TUNABLE_INT_FETCH("vm.pmap.pti", &pti); 1327 TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); 1328 if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { 1329 invpcid_works = (cpu_stdext_feature & 1330 CPUID_STDEXT_INVPCID) != 0; 1331 } else { 1332 pmap_pcid_enabled = 0; 1333 } 1334 1335 link_elf_ireloc(kmdp); 1336 1337 /* 1338 * This may be done better later if it gets more high level 1339 * components in it. If so just link td->td_proc here. 1340 */ 1341 proc_linkup0(&proc0, &thread0); 1342 1343 /* Init basic tunables, hz etc */ 1344 init_param1(); 1345 1346 thread0.td_kstack = physfree - kernphys + KERNSTART; 1347 thread0.td_kstack_pages = kstack_pages; 1348 kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; 1349 bzero((void *)thread0.td_kstack, kstack0_sz); 1350 physfree += kstack0_sz; 1351 1352 /* 1353 * Initialize enough of thread0 for delayed invalidation to 1354 * work very early. Rely on thread0.td_base_pri 1355 * zero-initialization, it is reset to PVM at proc0_init(). 1356 */ 1357 pmap_thread_init_invl_gen(&thread0); 1358 1359 pc = &temp_bsp_pcpu; 1360 pcpu_init(pc, 0, sizeof(struct pcpu)); 1361 gdt = &temp_bsp_pcpu.pc_gdt[0]; 1362 1363 /* 1364 * make gdt memory segments 1365 */ 1366 for (x = 0; x < NGDT; x++) { 1367 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 1368 x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) 1369 ssdtosd(&gdt_segs[x], &gdt[x]); 1370 } 1371 gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&pc->pc_common_tss; 1372 ssdtosyssd(&gdt_segs[GPROC0_SEL], 1373 (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1374 1375 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 1376 r_gdt.rd_base = (long)gdt; 1377 lgdt(&r_gdt); 1378 1379 wrmsr(MSR_FSBASE, 0); /* User value */ 1380 wrmsr(MSR_GSBASE, (u_int64_t)pc); 1381 wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ 1382 1383 dpcpu_init((void *)(physfree - kernphys + KERNSTART), 0); 1384 physfree += DPCPU_SIZE; 1385 amd64_bsp_pcpu_init1(pc); 1386 /* Non-late cninit() and printf() can be moved up to here. */ 1387 1388 /* 1389 * Initialize mutexes. 1390 * 1391 * icu_lock: in order to allow an interrupt to occur in a critical 1392 * section, to set pcpu->ipending (etc...) properly, we 1393 * must be able to get the icu lock, so it can't be 1394 * under witness. 1395 */ 1396 mutex_init(); 1397 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); 1398 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); 1399 1400 /* exceptions */ 1401 for (x = 0; x < NIDT; x++) 1402 setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT, 1403 SEL_KPL, 0); 1404 setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT, 1405 SEL_KPL, 0); 1406 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4); 1407 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); 1408 setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT, 1409 SEL_UPL, 0); 1410 setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT, 1411 SEL_UPL, 0); 1412 setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT, 1413 SEL_KPL, 0); 1414 setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT, 1415 SEL_KPL, 0); 1416 setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT, 1417 SEL_KPL, 0); 1418 setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); 1419 setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm), 1420 SDT_SYSIGT, SEL_KPL, 0); 1421 setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT, 1422 SEL_KPL, 0); 1423 setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing), 1424 SDT_SYSIGT, SEL_KPL, 0); 1425 setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT, 1426 SEL_KPL, 0); 1427 setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT, 1428 SEL_KPL, 0); 1429 setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT, 1430 SEL_KPL, 0); 1431 setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT, 1432 SEL_KPL, 0); 1433 setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT, 1434 SEL_KPL, 0); 1435 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3); 1436 setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT, 1437 SEL_KPL, 0); 1438 #ifdef KDTRACE_HOOKS 1439 setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) : 1440 &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); 1441 #endif 1442 #ifdef XENHVM 1443 setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) : 1444 &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0); 1445 #endif 1446 r_idt.rd_limit = sizeof(idt0) - 1; 1447 r_idt.rd_base = (long) idt; 1448 lidt(&r_idt); 1449 1450 /* 1451 * Initialize the clock before the console so that console 1452 * initialization can use DELAY(). 1453 */ 1454 clock_init(); 1455 1456 /* 1457 * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) 1458 * transition). 1459 * Once bootblocks have updated, we can test directly for 1460 * efi_systbl != NULL here... 1461 */ 1462 if (efi_boot) 1463 vty_set_preferred(VTY_VT); 1464 1465 TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); 1466 TUNABLE_INT_FETCH("machdep.mitigations.ibrs.disable", &hw_ibrs_disable); 1467 1468 TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable); 1469 TUNABLE_INT_FETCH("machdep.mitigations.ssb.disable", &hw_ssb_disable); 1470 1471 TUNABLE_INT_FETCH("machdep.syscall_ret_l1d_flush", 1472 &syscall_ret_l1d_flush_mode); 1473 1474 TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable); 1475 TUNABLE_INT_FETCH("machdep.mitigations.mds.disable", &hw_mds_disable); 1476 1477 TUNABLE_INT_FETCH("machdep.mitigations.taa.enable", &x86_taa_enable); 1478 1479 TUNABLE_INT_FETCH("machdep.mitigations.rndgs.enable", 1480 &x86_rngds_mitg_enable); 1481 1482 finishidentcpu(); /* Final stage of CPU initialization */ 1483 initializecpu(); /* Initialize CPU registers */ 1484 1485 amd64_bsp_ist_init(pc); 1486 1487 /* Set the IO permission bitmap (empty due to tss seg limit) */ 1488 pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + 1489 IOPERM_BITMAP_SIZE; 1490 1491 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1492 ltr(gsel_tss); 1493 1494 amd64_conf_fast_syscall(); 1495 1496 /* 1497 * We initialize the PCB pointer early so that exception 1498 * handlers will work. Also set up td_critnest to short-cut 1499 * the page fault handler. 1500 */ 1501 cpu_max_ext_state_size = sizeof(struct savefpu); 1502 set_top_of_stack_td(&thread0); 1503 thread0.td_pcb = get_pcb_td(&thread0); 1504 thread0.td_critnest = 1; 1505 1506 /* 1507 * The console and kdb should be initialized even earlier than here, 1508 * but some console drivers don't work until after getmemsize(). 1509 * Default to late console initialization to support these drivers. 1510 * This loses mainly printf()s in getmemsize() and early debugging. 1511 */ 1512 late_console = 1; 1513 TUNABLE_INT_FETCH("debug.late_console", &late_console); 1514 if (!late_console) { 1515 cninit(); 1516 amd64_kdb_init(); 1517 } 1518 1519 getmemsize(kmdp, physfree); 1520 init_param2(physmem); 1521 1522 /* now running on new page tables, configured,and u/iom is accessible */ 1523 1524 #ifdef DEV_PCI 1525 /* This call might adjust phys_avail[]. */ 1526 pci_early_quirks(); 1527 #endif 1528 1529 if (late_console) 1530 cninit(); 1531 1532 /* 1533 * Dump the boot metadata. We have to wait for cninit() since console 1534 * output is required. If it's grossly incorrect the kernel will never 1535 * make it this far. 1536 */ 1537 if (getenv_is_true("debug.dump_modinfo_at_boot")) 1538 preload_dump(); 1539 1540 #ifdef DEV_ISA 1541 #ifdef DEV_ATPIC 1542 elcr_probe(); 1543 atpic_startup(); 1544 #else 1545 /* Reset and mask the atpics and leave them shut down. */ 1546 atpic_reset(); 1547 1548 /* 1549 * Point the ICU spurious interrupt vectors at the APIC spurious 1550 * interrupt handler. 1551 */ 1552 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1553 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1554 #endif 1555 #else 1556 #error "have you forgotten the isa device?" 1557 #endif 1558 1559 if (late_console) 1560 amd64_kdb_init(); 1561 1562 msgbufinit(msgbufp, msgbufsize); 1563 fpuinit(); 1564 1565 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1566 rsp0 = thread0.td_md.md_stack_base; 1567 /* Ensure the stack is aligned to 16 bytes */ 1568 rsp0 &= ~0xFul; 1569 PCPU_PTR(common_tss)->tss_rsp0 = rsp0; 1570 amd64_bsp_pcpu_init2(rsp0); 1571 1572 /* transfer to user mode */ 1573 1574 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 1575 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 1576 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); 1577 _ufssel = GSEL(GUFS32_SEL, SEL_UPL); 1578 _ugssel = GSEL(GUGS32_SEL, SEL_UPL); 1579 1580 load_ds(_udatasel); 1581 load_es(_udatasel); 1582 load_fs(_ufssel); 1583 1584 /* setup proc 0's pcb */ 1585 thread0.td_pcb->pcb_flags = 0; 1586 1587 env = kern_getenv("kernelname"); 1588 if (env != NULL) 1589 strlcpy(kernelname, env, sizeof(kernelname)); 1590 1591 kcsan_cpu_init(0); 1592 1593 #ifdef FDT 1594 x86_init_fdt(); 1595 #endif 1596 thread0.td_critnest = 0; 1597 1598 kasan_init(); 1599 kmsan_init(); 1600 1601 TSEXIT(); 1602 1603 /* Location of kernel stack for locore */ 1604 return (thread0.td_md.md_stack_base); 1605 } 1606 1607 void 1608 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 1609 { 1610 1611 pcpu->pc_acpi_id = 0xffffffff; 1612 } 1613 1614 static int 1615 smap_sysctl_handler(SYSCTL_HANDLER_ARGS) 1616 { 1617 struct bios_smap *smapbase; 1618 struct bios_smap_xattr smap; 1619 caddr_t kmdp; 1620 uint32_t *smapattr; 1621 int count, error, i; 1622 1623 /* Retrieve the system memory map from the loader. */ 1624 kmdp = preload_search_by_type("elf kernel"); 1625 if (kmdp == NULL) 1626 kmdp = preload_search_by_type("elf64 kernel"); 1627 smapbase = (struct bios_smap *)preload_search_info(kmdp, 1628 MODINFO_METADATA | MODINFOMD_SMAP); 1629 if (smapbase == NULL) 1630 return (0); 1631 smapattr = (uint32_t *)preload_search_info(kmdp, 1632 MODINFO_METADATA | MODINFOMD_SMAP_XATTR); 1633 count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); 1634 error = 0; 1635 for (i = 0; i < count; i++) { 1636 smap.base = smapbase[i].base; 1637 smap.length = smapbase[i].length; 1638 smap.type = smapbase[i].type; 1639 if (smapattr != NULL) 1640 smap.xattr = smapattr[i]; 1641 else 1642 smap.xattr = 0; 1643 error = SYSCTL_OUT(req, &smap, sizeof(smap)); 1644 } 1645 return (error); 1646 } 1647 SYSCTL_PROC(_machdep, OID_AUTO, smap, 1648 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1649 smap_sysctl_handler, "S,bios_smap_xattr", 1650 "Raw BIOS SMAP data"); 1651 1652 static int 1653 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) 1654 { 1655 struct efi_map_header *efihdr; 1656 caddr_t kmdp; 1657 uint32_t efisize; 1658 1659 kmdp = preload_search_by_type("elf kernel"); 1660 if (kmdp == NULL) 1661 kmdp = preload_search_by_type("elf64 kernel"); 1662 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 1663 MODINFO_METADATA | MODINFOMD_EFI_MAP); 1664 if (efihdr == NULL) 1665 return (0); 1666 efisize = *((uint32_t *)efihdr - 1); 1667 return (SYSCTL_OUT(req, efihdr, efisize)); 1668 } 1669 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, 1670 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1671 efi_map_sysctl_handler, "S,efi_map_header", 1672 "Raw EFI Memory Map"); 1673 1674 void 1675 spinlock_enter(void) 1676 { 1677 struct thread *td; 1678 register_t flags; 1679 1680 td = curthread; 1681 if (td->td_md.md_spinlock_count == 0) { 1682 flags = intr_disable(); 1683 td->td_md.md_spinlock_count = 1; 1684 td->td_md.md_saved_flags = flags; 1685 critical_enter(); 1686 } else 1687 td->td_md.md_spinlock_count++; 1688 } 1689 1690 void 1691 spinlock_exit(void) 1692 { 1693 struct thread *td; 1694 register_t flags; 1695 1696 td = curthread; 1697 flags = td->td_md.md_saved_flags; 1698 td->td_md.md_spinlock_count--; 1699 if (td->td_md.md_spinlock_count == 0) { 1700 critical_exit(); 1701 intr_restore(flags); 1702 } 1703 } 1704 1705 /* 1706 * Construct a PCB from a trapframe. This is called from kdb_trap() where 1707 * we want to start a backtrace from the function that caused us to enter 1708 * the debugger. We have the context in the trapframe, but base the trace 1709 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 1710 * enough for a backtrace. 1711 */ 1712 void 1713 makectx(struct trapframe *tf, struct pcb *pcb) 1714 { 1715 1716 pcb->pcb_r12 = tf->tf_r12; 1717 pcb->pcb_r13 = tf->tf_r13; 1718 pcb->pcb_r14 = tf->tf_r14; 1719 pcb->pcb_r15 = tf->tf_r15; 1720 pcb->pcb_rbp = tf->tf_rbp; 1721 pcb->pcb_rbx = tf->tf_rbx; 1722 pcb->pcb_rip = tf->tf_rip; 1723 pcb->pcb_rsp = tf->tf_rsp; 1724 } 1725 1726 /* 1727 * The pcb_flags is only modified by current thread, or by other threads 1728 * when current thread is stopped. However, current thread may change it 1729 * from the interrupt context in cpu_switch(), or in the trap handler. 1730 * When we read-modify-write pcb_flags from C sources, compiler may generate 1731 * code that is not atomic regarding the interrupt handler. If a trap or 1732 * interrupt happens and any flag is modified from the handler, it can be 1733 * clobbered with the cached value later. Therefore, we implement setting 1734 * and clearing flags with single-instruction functions, which do not race 1735 * with possible modification of the flags from the trap or interrupt context, 1736 * because traps and interrupts are executed only on instruction boundary. 1737 */ 1738 void 1739 set_pcb_flags_raw(struct pcb *pcb, const u_int flags) 1740 { 1741 1742 __asm __volatile("orl %1,%0" 1743 : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) 1744 : "cc", "memory"); 1745 1746 } 1747 1748 /* 1749 * The support for RDFSBASE, WRFSBASE and similar instructions for %gs 1750 * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into 1751 * pcb if user space modified the bases. We must save on the context 1752 * switch or if the return to usermode happens through the doreti. 1753 * 1754 * Tracking of both events is performed by the pcb flag PCB_FULL_IRET, 1755 * which have a consequence that the base MSRs must be saved each time 1756 * the PCB_FULL_IRET flag is set. We disable interrupts to sync with 1757 * context switches. 1758 */ 1759 static void 1760 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags) 1761 { 1762 register_t r; 1763 1764 if (curpcb == pcb && 1765 (flags & PCB_FULL_IRET) != 0 && 1766 (pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1767 r = intr_disable(); 1768 if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1769 if (rfs() == _ufssel) 1770 pcb->pcb_fsbase = rdfsbase(); 1771 if (rgs() == _ugssel) 1772 pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); 1773 } 1774 set_pcb_flags_raw(pcb, flags); 1775 intr_restore(r); 1776 } else { 1777 set_pcb_flags_raw(pcb, flags); 1778 } 1779 } 1780 1781 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int)) 1782 { 1783 1784 return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ? 1785 set_pcb_flags_fsgsbase : set_pcb_flags_raw); 1786 } 1787 1788 void 1789 clear_pcb_flags(struct pcb *pcb, const u_int flags) 1790 { 1791 1792 __asm __volatile("andl %1,%0" 1793 : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) 1794 : "cc", "memory"); 1795 } 1796 1797 #ifdef KDB 1798 1799 /* 1800 * Provide inb() and outb() as functions. They are normally only available as 1801 * inline functions, thus cannot be called from the debugger. 1802 */ 1803 1804 /* silence compiler warnings */ 1805 u_char inb_(u_short); 1806 void outb_(u_short, u_char); 1807 1808 u_char 1809 inb_(u_short port) 1810 { 1811 return inb(port); 1812 } 1813 1814 void 1815 outb_(u_short port, u_char data) 1816 { 1817 outb(port, data); 1818 } 1819 1820 #endif /* KDB */ 1821 1822 #undef memset 1823 #undef memmove 1824 #undef memcpy 1825 1826 void *memset_std(void *buf, int c, size_t len); 1827 void *memset_erms(void *buf, int c, size_t len); 1828 void *memmove_std(void * _Nonnull dst, const void * _Nonnull src, 1829 size_t len); 1830 void *memmove_erms(void * _Nonnull dst, const void * _Nonnull src, 1831 size_t len); 1832 void *memcpy_std(void * _Nonnull dst, const void * _Nonnull src, 1833 size_t len); 1834 void *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src, 1835 size_t len); 1836 1837 #ifdef KCSAN 1838 /* 1839 * These fail to build as ifuncs when used with KCSAN. 1840 */ 1841 void * 1842 memset(void *buf, int c, size_t len) 1843 { 1844 1845 return (memset_std(buf, c, len)); 1846 } 1847 1848 void * 1849 memmove(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1850 { 1851 1852 return (memmove_std(dst, src, len)); 1853 } 1854 1855 void * 1856 memcpy(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1857 { 1858 1859 return (memcpy_std(dst, src, len)); 1860 } 1861 #else 1862 DEFINE_IFUNC(, void *, memset, (void *, int, size_t)) 1863 { 1864 1865 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1866 memset_erms : memset_std); 1867 } 1868 1869 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull, 1870 size_t)) 1871 { 1872 1873 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1874 memmove_erms : memmove_std); 1875 } 1876 1877 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t)) 1878 { 1879 1880 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1881 memcpy_erms : memcpy_std); 1882 } 1883 #endif 1884 1885 void pagezero_std(void *addr); 1886 void pagezero_erms(void *addr); 1887 DEFINE_IFUNC(, void , pagezero, (void *)) 1888 { 1889 1890 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1891 pagezero_erms : pagezero_std); 1892 } 1893