1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 2003 Peter Wemm. 5 * Copyright (c) 1992 Terrence R. Lambert. 6 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * William Jolitz. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_atpic.h" 47 #include "opt_cpu.h" 48 #include "opt_ddb.h" 49 #include "opt_inet.h" 50 #include "opt_isa.h" 51 #include "opt_kstack_pages.h" 52 #include "opt_maxmem.h" 53 #include "opt_pci.h" 54 #include "opt_platform.h" 55 #include "opt_sched.h" 56 57 #include <sys/param.h> 58 #include <sys/proc.h> 59 #include <sys/systm.h> 60 #include <sys/asan.h> 61 #include <sys/bio.h> 62 #include <sys/buf.h> 63 #include <sys/bus.h> 64 #include <sys/callout.h> 65 #include <sys/cons.h> 66 #include <sys/cpu.h> 67 #include <sys/csan.h> 68 #include <sys/efi.h> 69 #include <sys/eventhandler.h> 70 #include <sys/exec.h> 71 #include <sys/imgact.h> 72 #include <sys/kdb.h> 73 #include <sys/kernel.h> 74 #include <sys/ktr.h> 75 #include <sys/linker.h> 76 #include <sys/lock.h> 77 #include <sys/malloc.h> 78 #include <sys/memrange.h> 79 #include <sys/msan.h> 80 #include <sys/msgbuf.h> 81 #include <sys/mutex.h> 82 #include <sys/pcpu.h> 83 #include <sys/ptrace.h> 84 #include <sys/reboot.h> 85 #include <sys/reg.h> 86 #include <sys/rwlock.h> 87 #include <sys/sched.h> 88 #include <sys/signalvar.h> 89 #ifdef SMP 90 #include <sys/smp.h> 91 #endif 92 #include <sys/syscallsubr.h> 93 #include <sys/sysctl.h> 94 #include <sys/sysent.h> 95 #include <sys/sysproto.h> 96 #include <sys/ucontext.h> 97 #include <sys/vmmeter.h> 98 99 #include <vm/vm.h> 100 #include <vm/vm_param.h> 101 #include <vm/vm_extern.h> 102 #include <vm/vm_kern.h> 103 #include <vm/vm_page.h> 104 #include <vm/vm_map.h> 105 #include <vm/vm_object.h> 106 #include <vm/vm_pager.h> 107 #include <vm/vm_phys.h> 108 #include <vm/vm_dumpset.h> 109 110 #ifdef DDB 111 #ifndef KDB 112 #error KDB must be enabled in order for DDB to work! 113 #endif 114 #include <ddb/ddb.h> 115 #include <ddb/db_sym.h> 116 #endif 117 118 #include <net/netisr.h> 119 120 #include <machine/clock.h> 121 #include <machine/cpu.h> 122 #include <machine/cputypes.h> 123 #include <machine/frame.h> 124 #include <machine/intr_machdep.h> 125 #include <x86/mca.h> 126 #include <machine/md_var.h> 127 #include <machine/metadata.h> 128 #include <machine/pc/bios.h> 129 #include <machine/pcb.h> 130 #include <machine/proc.h> 131 #include <machine/sigframe.h> 132 #include <machine/specialreg.h> 133 #include <machine/trap.h> 134 #include <machine/tss.h> 135 #include <x86/ucode.h> 136 #include <x86/ifunc.h> 137 #ifdef SMP 138 #include <machine/smp.h> 139 #endif 140 #ifdef FDT 141 #include <x86/fdt.h> 142 #endif 143 144 #ifdef DEV_ATPIC 145 #include <x86/isa/icu.h> 146 #else 147 #include <x86/apicvar.h> 148 #endif 149 150 #include <isa/isareg.h> 151 #include <isa/rtc.h> 152 #include <x86/init.h> 153 154 /* Sanity check for __curthread() */ 155 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 156 157 /* 158 * The PTI trampoline stack needs enough space for a hardware trapframe and a 159 * couple of scratch registers, as well as the trapframe left behind after an 160 * iret fault. 161 */ 162 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) - 163 offsetof(struct pti_frame, pti_rip)); 164 165 extern u_int64_t hammer_time(u_int64_t, u_int64_t); 166 167 static void cpu_startup(void *); 168 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 169 170 /* Probe 8254 PIT and TSC. */ 171 static void native_clock_source_init(void); 172 173 /* Preload data parse function */ 174 static caddr_t native_parse_preload_data(u_int64_t); 175 176 /* Native function to fetch and parse the e820 map */ 177 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); 178 179 /* Default init_ops implementation. */ 180 struct init_ops init_ops = { 181 .parse_preload_data = native_parse_preload_data, 182 .early_clock_source_init = native_clock_source_init, 183 .early_delay = i8254_delay, 184 .parse_memmap = native_parse_memmap, 185 }; 186 187 /* 188 * Physical address of the EFI System Table. Stashed from the metadata hints 189 * passed into the kernel and used by the EFI code to call runtime services. 190 */ 191 vm_paddr_t efi_systbl_phys; 192 193 /* Intel ICH registers */ 194 #define ICH_PMBASE 0x400 195 #define ICH_SMI_EN ICH_PMBASE + 0x30 196 197 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; 198 199 int cold = 1; 200 201 long Maxmem = 0; 202 long realmem = 0; 203 int late_console = 1; 204 205 struct kva_md_info kmi; 206 207 struct region_descriptor r_idt; 208 209 struct pcpu *__pcpu; 210 struct pcpu temp_bsp_pcpu; 211 212 struct mtx icu_lock; 213 214 struct mem_range_softc mem_range_softc; 215 216 struct mtx dt_lock; /* lock for GDT and LDT */ 217 218 void (*vmm_resume_p)(void); 219 220 bool efi_boot; 221 222 static void 223 cpu_startup(void *dummy) 224 { 225 uintmax_t memsize; 226 char *sysenv; 227 228 /* 229 * On MacBooks, we need to disallow the legacy USB circuit to 230 * generate an SMI# because this can cause several problems, 231 * namely: incorrect CPU frequency detection and failure to 232 * start the APs. 233 * We do this by disabling a bit in the SMI_EN (SMI Control and 234 * Enable register) of the Intel ICH LPC Interface Bridge. 235 */ 236 sysenv = kern_getenv("smbios.system.product"); 237 if (sysenv != NULL) { 238 if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 239 strncmp(sysenv, "MacBook3,1", 10) == 0 || 240 strncmp(sysenv, "MacBook4,1", 10) == 0 || 241 strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 242 strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 243 strncmp(sysenv, "MacBookPro3,1", 13) == 0 || 244 strncmp(sysenv, "MacBookPro4,1", 13) == 0 || 245 strncmp(sysenv, "Macmini1,1", 10) == 0) { 246 if (bootverbose) 247 printf("Disabling LEGACY_USB_EN bit on " 248 "Intel ICH.\n"); 249 outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 250 } 251 freeenv(sysenv); 252 } 253 254 /* 255 * Good {morning,afternoon,evening,night}. 256 */ 257 startrtclock(); 258 printcpuinfo(); 259 260 /* 261 * Display physical memory if SMBIOS reports reasonable amount. 262 */ 263 memsize = 0; 264 sysenv = kern_getenv("smbios.memory.enabled"); 265 if (sysenv != NULL) { 266 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; 267 freeenv(sysenv); 268 } 269 if (memsize < ptoa((uintmax_t)vm_free_count())) 270 memsize = ptoa((uintmax_t)Maxmem); 271 printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); 272 realmem = atop(memsize); 273 274 /* 275 * Display any holes after the first chunk of extended memory. 276 */ 277 if (bootverbose) { 278 int indx; 279 280 printf("Physical memory chunk(s):\n"); 281 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 282 vm_paddr_t size; 283 284 size = phys_avail[indx + 1] - phys_avail[indx]; 285 printf( 286 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 287 (uintmax_t)phys_avail[indx], 288 (uintmax_t)phys_avail[indx + 1] - 1, 289 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 290 } 291 } 292 293 vm_ksubmap_init(&kmi); 294 295 printf("avail memory = %ju (%ju MB)\n", 296 ptoa((uintmax_t)vm_free_count()), 297 ptoa((uintmax_t)vm_free_count()) / 1048576); 298 #ifdef DEV_PCI 299 if (bootverbose && intel_graphics_stolen_base != 0) 300 printf("intel stolen mem: base %#jx size %ju MB\n", 301 (uintmax_t)intel_graphics_stolen_base, 302 (uintmax_t)intel_graphics_stolen_size / 1024 / 1024); 303 #endif 304 305 /* 306 * Set up buffers, so they can be used to read disk labels. 307 */ 308 bufinit(); 309 vm_pager_bufferinit(); 310 311 cpu_setregs(); 312 } 313 314 static void 315 late_ifunc_resolve(void *dummy __unused) 316 { 317 link_elf_late_ireloc(); 318 } 319 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); 320 321 322 void 323 cpu_setregs(void) 324 { 325 register_t cr0; 326 327 cr0 = rcr0(); 328 /* 329 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the 330 * BSP. See the comments there about why we set them. 331 */ 332 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 333 load_cr0(cr0); 334 } 335 336 /* 337 * Initialize amd64 and configure to run kernel 338 */ 339 340 /* 341 * Initialize segments & interrupt table 342 */ 343 static struct gate_descriptor idt0[NIDT]; 344 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 345 346 static char dblfault_stack[DBLFAULT_STACK_SIZE] __aligned(16); 347 static char mce0_stack[MCE_STACK_SIZE] __aligned(16); 348 static char nmi0_stack[NMI_STACK_SIZE] __aligned(16); 349 static char dbg0_stack[DBG_STACK_SIZE] __aligned(16); 350 CTASSERT(sizeof(struct nmi_pcpu) == 16); 351 352 /* 353 * Software prototypes -- in more palatable form. 354 * 355 * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same 356 * slots as corresponding segments for i386 kernel. 357 */ 358 struct soft_segment_descriptor gdt_segs[] = { 359 /* GNULL_SEL 0 Null Descriptor */ 360 { .ssd_base = 0x0, 361 .ssd_limit = 0x0, 362 .ssd_type = 0, 363 .ssd_dpl = 0, 364 .ssd_p = 0, 365 .ssd_long = 0, 366 .ssd_def32 = 0, 367 .ssd_gran = 0 }, 368 /* GNULL2_SEL 1 Null Descriptor */ 369 { .ssd_base = 0x0, 370 .ssd_limit = 0x0, 371 .ssd_type = 0, 372 .ssd_dpl = 0, 373 .ssd_p = 0, 374 .ssd_long = 0, 375 .ssd_def32 = 0, 376 .ssd_gran = 0 }, 377 /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ 378 { .ssd_base = 0x0, 379 .ssd_limit = 0xfffff, 380 .ssd_type = SDT_MEMRWA, 381 .ssd_dpl = SEL_UPL, 382 .ssd_p = 1, 383 .ssd_long = 0, 384 .ssd_def32 = 1, 385 .ssd_gran = 1 }, 386 /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ 387 { .ssd_base = 0x0, 388 .ssd_limit = 0xfffff, 389 .ssd_type = SDT_MEMRWA, 390 .ssd_dpl = SEL_UPL, 391 .ssd_p = 1, 392 .ssd_long = 0, 393 .ssd_def32 = 1, 394 .ssd_gran = 1 }, 395 /* GCODE_SEL 4 Code Descriptor for kernel */ 396 { .ssd_base = 0x0, 397 .ssd_limit = 0xfffff, 398 .ssd_type = SDT_MEMERA, 399 .ssd_dpl = SEL_KPL, 400 .ssd_p = 1, 401 .ssd_long = 1, 402 .ssd_def32 = 0, 403 .ssd_gran = 1 }, 404 /* GDATA_SEL 5 Data Descriptor for kernel */ 405 { .ssd_base = 0x0, 406 .ssd_limit = 0xfffff, 407 .ssd_type = SDT_MEMRWA, 408 .ssd_dpl = SEL_KPL, 409 .ssd_p = 1, 410 .ssd_long = 1, 411 .ssd_def32 = 0, 412 .ssd_gran = 1 }, 413 /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ 414 { .ssd_base = 0x0, 415 .ssd_limit = 0xfffff, 416 .ssd_type = SDT_MEMERA, 417 .ssd_dpl = SEL_UPL, 418 .ssd_p = 1, 419 .ssd_long = 0, 420 .ssd_def32 = 1, 421 .ssd_gran = 1 }, 422 /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ 423 { .ssd_base = 0x0, 424 .ssd_limit = 0xfffff, 425 .ssd_type = SDT_MEMRWA, 426 .ssd_dpl = SEL_UPL, 427 .ssd_p = 1, 428 .ssd_long = 0, 429 .ssd_def32 = 1, 430 .ssd_gran = 1 }, 431 /* GUCODE_SEL 8 64 bit Code Descriptor for user */ 432 { .ssd_base = 0x0, 433 .ssd_limit = 0xfffff, 434 .ssd_type = SDT_MEMERA, 435 .ssd_dpl = SEL_UPL, 436 .ssd_p = 1, 437 .ssd_long = 1, 438 .ssd_def32 = 0, 439 .ssd_gran = 1 }, 440 /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ 441 { .ssd_base = 0x0, 442 .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, 443 .ssd_type = SDT_SYSTSS, 444 .ssd_dpl = SEL_KPL, 445 .ssd_p = 1, 446 .ssd_long = 0, 447 .ssd_def32 = 0, 448 .ssd_gran = 0 }, 449 /* Actually, the TSS is a system descriptor which is double size */ 450 { .ssd_base = 0x0, 451 .ssd_limit = 0x0, 452 .ssd_type = 0, 453 .ssd_dpl = 0, 454 .ssd_p = 0, 455 .ssd_long = 0, 456 .ssd_def32 = 0, 457 .ssd_gran = 0 }, 458 /* GUSERLDT_SEL 11 LDT Descriptor */ 459 { .ssd_base = 0x0, 460 .ssd_limit = 0x0, 461 .ssd_type = 0, 462 .ssd_dpl = 0, 463 .ssd_p = 0, 464 .ssd_long = 0, 465 .ssd_def32 = 0, 466 .ssd_gran = 0 }, 467 /* GUSERLDT_SEL 12 LDT Descriptor, double size */ 468 { .ssd_base = 0x0, 469 .ssd_limit = 0x0, 470 .ssd_type = 0, 471 .ssd_dpl = 0, 472 .ssd_p = 0, 473 .ssd_long = 0, 474 .ssd_def32 = 0, 475 .ssd_gran = 0 }, 476 }; 477 _Static_assert(nitems(gdt_segs) == NGDT, "Stale NGDT"); 478 479 void 480 setidt(int idx, inthand_t *func, int typ, int dpl, int ist) 481 { 482 struct gate_descriptor *ip; 483 484 ip = idt + idx; 485 ip->gd_looffset = (uintptr_t)func; 486 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); 487 ip->gd_ist = ist; 488 ip->gd_xx = 0; 489 ip->gd_type = typ; 490 ip->gd_dpl = dpl; 491 ip->gd_p = 1; 492 ip->gd_hioffset = ((uintptr_t)func)>>16 ; 493 } 494 495 extern inthand_t 496 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 497 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 498 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 499 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 500 IDTVEC(xmm), IDTVEC(dblfault), 501 IDTVEC(div_pti), IDTVEC(bpt_pti), 502 IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti), 503 IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti), 504 IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), 505 IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti), 506 IDTVEC(xmm_pti), 507 #ifdef KDTRACE_HOOKS 508 IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti), 509 #endif 510 #ifdef XENHVM 511 IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti), 512 #endif 513 IDTVEC(fast_syscall), IDTVEC(fast_syscall32), 514 IDTVEC(fast_syscall_pti); 515 516 #ifdef DDB 517 /* 518 * Display the index and function name of any IDT entries that don't use 519 * the default 'rsvd' entry point. 520 */ 521 DB_SHOW_COMMAND_FLAGS(idt, db_show_idt, DB_CMD_MEMSAFE) 522 { 523 struct gate_descriptor *ip; 524 int idx; 525 uintptr_t func; 526 527 ip = idt; 528 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 529 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 530 if (func != (uintptr_t)&IDTVEC(rsvd)) { 531 db_printf("%3d\t", idx); 532 db_printsym(func, DB_STGY_PROC); 533 db_printf("\n"); 534 } 535 ip++; 536 } 537 } 538 539 /* Show privileged registers. */ 540 DB_SHOW_COMMAND_FLAGS(sysregs, db_show_sysregs, DB_CMD_MEMSAFE) 541 { 542 struct { 543 uint16_t limit; 544 uint64_t base; 545 } __packed idtr, gdtr; 546 uint16_t ldt, tr; 547 548 __asm __volatile("sidt %0" : "=m" (idtr)); 549 db_printf("idtr\t0x%016lx/%04x\n", 550 (u_long)idtr.base, (u_int)idtr.limit); 551 __asm __volatile("sgdt %0" : "=m" (gdtr)); 552 db_printf("gdtr\t0x%016lx/%04x\n", 553 (u_long)gdtr.base, (u_int)gdtr.limit); 554 __asm __volatile("sldt %0" : "=r" (ldt)); 555 db_printf("ldtr\t0x%04x\n", ldt); 556 __asm __volatile("str %0" : "=r" (tr)); 557 db_printf("tr\t0x%04x\n", tr); 558 db_printf("cr0\t0x%016lx\n", rcr0()); 559 db_printf("cr2\t0x%016lx\n", rcr2()); 560 db_printf("cr3\t0x%016lx\n", rcr3()); 561 db_printf("cr4\t0x%016lx\n", rcr4()); 562 if (rcr4() & CR4_XSAVE) 563 db_printf("xcr0\t0x%016lx\n", rxcr(0)); 564 db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); 565 if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) 566 db_printf("FEATURES_CTL\t%016lx\n", 567 rdmsr(MSR_IA32_FEATURE_CONTROL)); 568 db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); 569 db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); 570 db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); 571 } 572 573 DB_SHOW_COMMAND_FLAGS(dbregs, db_show_dbregs, DB_CMD_MEMSAFE) 574 { 575 576 db_printf("dr0\t0x%016lx\n", rdr0()); 577 db_printf("dr1\t0x%016lx\n", rdr1()); 578 db_printf("dr2\t0x%016lx\n", rdr2()); 579 db_printf("dr3\t0x%016lx\n", rdr3()); 580 db_printf("dr6\t0x%016lx\n", rdr6()); 581 db_printf("dr7\t0x%016lx\n", rdr7()); 582 } 583 #endif 584 585 void 586 sdtossd(struct user_segment_descriptor *sd, struct soft_segment_descriptor *ssd) 587 { 588 589 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 590 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 591 ssd->ssd_type = sd->sd_type; 592 ssd->ssd_dpl = sd->sd_dpl; 593 ssd->ssd_p = sd->sd_p; 594 ssd->ssd_long = sd->sd_long; 595 ssd->ssd_def32 = sd->sd_def32; 596 ssd->ssd_gran = sd->sd_gran; 597 } 598 599 void 600 ssdtosd(struct soft_segment_descriptor *ssd, struct user_segment_descriptor *sd) 601 { 602 603 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 604 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; 605 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 606 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 607 sd->sd_type = ssd->ssd_type; 608 sd->sd_dpl = ssd->ssd_dpl; 609 sd->sd_p = ssd->ssd_p; 610 sd->sd_long = ssd->ssd_long; 611 sd->sd_def32 = ssd->ssd_def32; 612 sd->sd_gran = ssd->ssd_gran; 613 } 614 615 void 616 ssdtosyssd(struct soft_segment_descriptor *ssd, struct system_segment_descriptor *sd) 617 { 618 619 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 620 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; 621 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 622 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 623 sd->sd_type = ssd->ssd_type; 624 sd->sd_dpl = ssd->ssd_dpl; 625 sd->sd_p = ssd->ssd_p; 626 sd->sd_gran = ssd->ssd_gran; 627 } 628 629 u_int basemem; 630 631 static int 632 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, 633 int *physmap_idxp) 634 { 635 int i, insert_idx, physmap_idx; 636 637 physmap_idx = *physmap_idxp; 638 639 if (length == 0) 640 return (1); 641 642 /* 643 * Find insertion point while checking for overlap. Start off by 644 * assuming the new entry will be added to the end. 645 * 646 * NB: physmap_idx points to the next free slot. 647 */ 648 insert_idx = physmap_idx; 649 for (i = 0; i <= physmap_idx; i += 2) { 650 if (base < physmap[i + 1]) { 651 if (base + length <= physmap[i]) { 652 insert_idx = i; 653 break; 654 } 655 if (boothowto & RB_VERBOSE) 656 printf( 657 "Overlapping memory regions, ignoring second region\n"); 658 return (1); 659 } 660 } 661 662 /* See if we can prepend to the next entry. */ 663 if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { 664 physmap[insert_idx] = base; 665 return (1); 666 } 667 668 /* See if we can append to the previous entry. */ 669 if (insert_idx > 0 && base == physmap[insert_idx - 1]) { 670 physmap[insert_idx - 1] += length; 671 return (1); 672 } 673 674 physmap_idx += 2; 675 *physmap_idxp = physmap_idx; 676 if (physmap_idx == PHYS_AVAIL_ENTRIES) { 677 printf( 678 "Too many segments in the physical address map, giving up\n"); 679 return (0); 680 } 681 682 /* 683 * Move the last 'N' entries down to make room for the new 684 * entry if needed. 685 */ 686 for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { 687 physmap[i] = physmap[i - 2]; 688 physmap[i + 1] = physmap[i - 1]; 689 } 690 691 /* Insert the new entry. */ 692 physmap[insert_idx] = base; 693 physmap[insert_idx + 1] = base + length; 694 return (1); 695 } 696 697 void 698 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, 699 vm_paddr_t *physmap, int *physmap_idx) 700 { 701 struct bios_smap *smap, *smapend; 702 703 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 704 705 for (smap = smapbase; smap < smapend; smap++) { 706 if (boothowto & RB_VERBOSE) 707 printf("SMAP type=%02x base=%016lx len=%016lx\n", 708 smap->type, smap->base, smap->length); 709 710 if (smap->type != SMAP_TYPE_MEMORY) 711 continue; 712 713 if (!add_physmap_entry(smap->base, smap->length, physmap, 714 physmap_idx)) 715 break; 716 } 717 } 718 719 static void 720 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, 721 int *physmap_idx) 722 { 723 struct efi_md *map, *p; 724 const char *type; 725 size_t efisz; 726 int ndesc, i; 727 728 static const char *types[] = { 729 "Reserved", 730 "LoaderCode", 731 "LoaderData", 732 "BootServicesCode", 733 "BootServicesData", 734 "RuntimeServicesCode", 735 "RuntimeServicesData", 736 "ConventionalMemory", 737 "UnusableMemory", 738 "ACPIReclaimMemory", 739 "ACPIMemoryNVS", 740 "MemoryMappedIO", 741 "MemoryMappedIOPortSpace", 742 "PalCode", 743 "PersistentMemory" 744 }; 745 746 /* 747 * Memory map data provided by UEFI via the GetMemoryMap 748 * Boot Services API. 749 */ 750 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 751 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 752 753 if (efihdr->descriptor_size == 0) 754 return; 755 ndesc = efihdr->memory_size / efihdr->descriptor_size; 756 757 if (boothowto & RB_VERBOSE) 758 printf("%23s %12s %12s %8s %4s\n", 759 "Type", "Physical", "Virtual", "#Pages", "Attr"); 760 761 for (i = 0, p = map; i < ndesc; i++, 762 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 763 if (boothowto & RB_VERBOSE) { 764 if (p->md_type < nitems(types)) 765 type = types[p->md_type]; 766 else 767 type = "<INVALID>"; 768 printf("%23s %012lx %012lx %08lx ", type, p->md_phys, 769 p->md_virt, p->md_pages); 770 if (p->md_attr & EFI_MD_ATTR_UC) 771 printf("UC "); 772 if (p->md_attr & EFI_MD_ATTR_WC) 773 printf("WC "); 774 if (p->md_attr & EFI_MD_ATTR_WT) 775 printf("WT "); 776 if (p->md_attr & EFI_MD_ATTR_WB) 777 printf("WB "); 778 if (p->md_attr & EFI_MD_ATTR_UCE) 779 printf("UCE "); 780 if (p->md_attr & EFI_MD_ATTR_WP) 781 printf("WP "); 782 if (p->md_attr & EFI_MD_ATTR_RP) 783 printf("RP "); 784 if (p->md_attr & EFI_MD_ATTR_XP) 785 printf("XP "); 786 if (p->md_attr & EFI_MD_ATTR_NV) 787 printf("NV "); 788 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) 789 printf("MORE_RELIABLE "); 790 if (p->md_attr & EFI_MD_ATTR_RO) 791 printf("RO "); 792 if (p->md_attr & EFI_MD_ATTR_RT) 793 printf("RUNTIME"); 794 printf("\n"); 795 } 796 797 switch (p->md_type) { 798 case EFI_MD_TYPE_CODE: 799 case EFI_MD_TYPE_DATA: 800 case EFI_MD_TYPE_BS_CODE: 801 case EFI_MD_TYPE_BS_DATA: 802 case EFI_MD_TYPE_FREE: 803 /* 804 * We're allowed to use any entry with these types. 805 */ 806 break; 807 default: 808 continue; 809 } 810 811 if (!add_physmap_entry(p->md_phys, p->md_pages * EFI_PAGE_SIZE, 812 physmap, physmap_idx)) 813 break; 814 } 815 } 816 817 static void 818 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) 819 { 820 struct bios_smap *smap; 821 struct efi_map_header *efihdr; 822 u_int32_t size; 823 824 /* 825 * Memory map from INT 15:E820. 826 * 827 * subr_module.c says: 828 * "Consumer may safely assume that size value precedes data." 829 * ie: an int32_t immediately precedes smap. 830 */ 831 832 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 833 MODINFO_METADATA | MODINFOMD_EFI_MAP); 834 smap = (struct bios_smap *)preload_search_info(kmdp, 835 MODINFO_METADATA | MODINFOMD_SMAP); 836 if (efihdr == NULL && smap == NULL) 837 panic("No BIOS smap or EFI map info from loader!"); 838 839 if (efihdr != NULL) { 840 add_efi_map_entries(efihdr, physmap, physmap_idx); 841 strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); 842 } else { 843 size = *((u_int32_t *)smap - 1); 844 bios_add_smap_entries(smap, size, physmap, physmap_idx); 845 strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); 846 } 847 } 848 849 #define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) 850 851 /* 852 * Populate the (physmap) array with base/bound pairs describing the 853 * available physical memory in the system, then test this memory and 854 * build the phys_avail array describing the actually-available memory. 855 * 856 * Total memory size may be set by the kernel environment variable 857 * hw.physmem or the compile-time define MAXMEM. 858 * 859 * XXX first should be vm_paddr_t. 860 */ 861 static void 862 getmemsize(caddr_t kmdp, u_int64_t first) 863 { 864 int i, physmap_idx, pa_indx, da_indx; 865 vm_paddr_t pa, physmap[PHYS_AVAIL_ENTRIES]; 866 u_long physmem_start, physmem_tunable, memtest; 867 pt_entry_t *pte; 868 quad_t dcons_addr, dcons_size; 869 int page_counter; 870 871 /* 872 * Tell the physical memory allocator about pages used to store 873 * the kernel and preloaded data. See kmem_bootstrap_free(). 874 */ 875 vm_phys_early_add_seg((vm_paddr_t)kernphys, trunc_page(first)); 876 877 bzero(physmap, sizeof(physmap)); 878 physmap_idx = 0; 879 880 init_ops.parse_memmap(kmdp, physmap, &physmap_idx); 881 physmap_idx -= 2; 882 883 /* 884 * Find the 'base memory' segment for SMP 885 */ 886 basemem = 0; 887 for (i = 0; i <= physmap_idx; i += 2) { 888 if (physmap[i] <= 0xA0000) { 889 basemem = physmap[i + 1] / 1024; 890 break; 891 } 892 } 893 if (basemem == 0 || basemem > 640) { 894 if (bootverbose) 895 printf( 896 "Memory map doesn't contain a basemem segment, faking it"); 897 basemem = 640; 898 } 899 900 /* 901 * Maxmem isn't the "maximum memory", it's one larger than the 902 * highest page of the physical address space. It should be 903 * called something like "Maxphyspage". We may adjust this 904 * based on ``hw.physmem'' and the results of the memory test. 905 */ 906 Maxmem = atop(physmap[physmap_idx + 1]); 907 908 #ifdef MAXMEM 909 Maxmem = MAXMEM / 4; 910 #endif 911 912 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 913 Maxmem = atop(physmem_tunable); 914 915 /* 916 * The boot memory test is disabled by default, as it takes a 917 * significant amount of time on large-memory systems, and is 918 * unfriendly to virtual machines as it unnecessarily touches all 919 * pages. 920 * 921 * A general name is used as the code may be extended to support 922 * additional tests beyond the current "page present" test. 923 */ 924 memtest = 0; 925 TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); 926 927 /* 928 * Don't allow MAXMEM or hw.physmem to extend the amount of memory 929 * in the system. 930 */ 931 if (Maxmem > atop(physmap[physmap_idx + 1])) 932 Maxmem = atop(physmap[physmap_idx + 1]); 933 934 if (atop(physmap[physmap_idx + 1]) != Maxmem && 935 (boothowto & RB_VERBOSE)) 936 printf("Physical memory use set to %ldK\n", Maxmem * 4); 937 938 /* call pmap initialization to make new kernel address space */ 939 pmap_bootstrap(&first); 940 941 /* 942 * Size up each available chunk of physical memory. 943 * 944 * XXX Some BIOSes corrupt low 64KB between suspend and resume. 945 * By default, mask off the first 16 pages unless we appear to be 946 * running in a VM. 947 */ 948 physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; 949 TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); 950 if (physmap[0] < physmem_start) { 951 if (physmem_start < PAGE_SIZE) 952 physmap[0] = PAGE_SIZE; 953 else if (physmem_start >= physmap[1]) 954 physmap[0] = round_page(physmap[1] - PAGE_SIZE); 955 else 956 physmap[0] = round_page(physmem_start); 957 } 958 pa_indx = 0; 959 da_indx = 1; 960 phys_avail[pa_indx++] = physmap[0]; 961 phys_avail[pa_indx] = physmap[0]; 962 dump_avail[da_indx] = physmap[0]; 963 pte = CMAP1; 964 965 /* 966 * Get dcons buffer address 967 */ 968 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 969 getenv_quad("dcons.size", &dcons_size) == 0) 970 dcons_addr = 0; 971 972 /* 973 * physmap is in bytes, so when converting to page boundaries, 974 * round up the start address and round down the end address. 975 */ 976 page_counter = 0; 977 if (memtest != 0) 978 printf("Testing system memory"); 979 for (i = 0; i <= physmap_idx; i += 2) { 980 vm_paddr_t end; 981 982 end = ptoa((vm_paddr_t)Maxmem); 983 if (physmap[i + 1] < end) 984 end = trunc_page(physmap[i + 1]); 985 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 986 int tmp, page_bad, full; 987 int *ptr = (int *)CADDR1; 988 989 full = FALSE; 990 /* 991 * block out kernel memory as not available. 992 */ 993 if (pa >= (vm_paddr_t)kernphys && pa < first) 994 goto do_dump_avail; 995 996 /* 997 * block out dcons buffer 998 */ 999 if (dcons_addr > 0 1000 && pa >= trunc_page(dcons_addr) 1001 && pa < dcons_addr + dcons_size) 1002 goto do_dump_avail; 1003 1004 page_bad = FALSE; 1005 if (memtest == 0) 1006 goto skip_memtest; 1007 1008 /* 1009 * Print a "." every GB to show we're making 1010 * progress. 1011 */ 1012 page_counter++; 1013 if ((page_counter % PAGES_PER_GB) == 0) 1014 printf("."); 1015 1016 /* 1017 * map page into kernel: valid, read/write,non-cacheable 1018 */ 1019 *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; 1020 invltlb(); 1021 1022 tmp = *(int *)ptr; 1023 /* 1024 * Test for alternating 1's and 0's 1025 */ 1026 *(volatile int *)ptr = 0xaaaaaaaa; 1027 if (*(volatile int *)ptr != 0xaaaaaaaa) 1028 page_bad = TRUE; 1029 /* 1030 * Test for alternating 0's and 1's 1031 */ 1032 *(volatile int *)ptr = 0x55555555; 1033 if (*(volatile int *)ptr != 0x55555555) 1034 page_bad = TRUE; 1035 /* 1036 * Test for all 1's 1037 */ 1038 *(volatile int *)ptr = 0xffffffff; 1039 if (*(volatile int *)ptr != 0xffffffff) 1040 page_bad = TRUE; 1041 /* 1042 * Test for all 0's 1043 */ 1044 *(volatile int *)ptr = 0x0; 1045 if (*(volatile int *)ptr != 0x0) 1046 page_bad = TRUE; 1047 /* 1048 * Restore original value. 1049 */ 1050 *(int *)ptr = tmp; 1051 1052 skip_memtest: 1053 /* 1054 * Adjust array of valid/good pages. 1055 */ 1056 if (page_bad == TRUE) 1057 continue; 1058 /* 1059 * If this good page is a continuation of the 1060 * previous set of good pages, then just increase 1061 * the end pointer. Otherwise start a new chunk. 1062 * Note that "end" points one higher than end, 1063 * making the range >= start and < end. 1064 * If we're also doing a speculative memory 1065 * test and we at or past the end, bump up Maxmem 1066 * so that we keep going. The first bad page 1067 * will terminate the loop. 1068 */ 1069 if (phys_avail[pa_indx] == pa) { 1070 phys_avail[pa_indx] += PAGE_SIZE; 1071 } else { 1072 pa_indx++; 1073 if (pa_indx == PHYS_AVAIL_ENTRIES) { 1074 printf( 1075 "Too many holes in the physical address space, giving up\n"); 1076 pa_indx--; 1077 full = TRUE; 1078 goto do_dump_avail; 1079 } 1080 phys_avail[pa_indx++] = pa; /* start */ 1081 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 1082 } 1083 physmem++; 1084 do_dump_avail: 1085 if (dump_avail[da_indx] == pa) { 1086 dump_avail[da_indx] += PAGE_SIZE; 1087 } else { 1088 da_indx++; 1089 if (da_indx == PHYS_AVAIL_ENTRIES) { 1090 da_indx--; 1091 goto do_next; 1092 } 1093 dump_avail[da_indx++] = pa; /* start */ 1094 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 1095 } 1096 do_next: 1097 if (full) 1098 break; 1099 } 1100 } 1101 *pte = 0; 1102 invltlb(); 1103 if (memtest != 0) 1104 printf("\n"); 1105 1106 /* 1107 * XXX 1108 * The last chunk must contain at least one page plus the message 1109 * buffer to avoid complicating other code (message buffer address 1110 * calculation, etc.). 1111 */ 1112 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1113 round_page(msgbufsize) >= phys_avail[pa_indx]) { 1114 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1115 phys_avail[pa_indx--] = 0; 1116 phys_avail[pa_indx--] = 0; 1117 } 1118 1119 Maxmem = atop(phys_avail[pa_indx]); 1120 1121 /* Trim off space for the message buffer. */ 1122 phys_avail[pa_indx] -= round_page(msgbufsize); 1123 1124 /* Map the message buffer. */ 1125 msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); 1126 } 1127 1128 static caddr_t 1129 native_parse_preload_data(u_int64_t modulep) 1130 { 1131 caddr_t kmdp; 1132 char *envp; 1133 #ifdef DDB 1134 vm_offset_t ksym_start; 1135 vm_offset_t ksym_end; 1136 #endif 1137 1138 preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); 1139 preload_bootstrap_relocate(KERNBASE); 1140 kmdp = preload_search_by_type("elf kernel"); 1141 if (kmdp == NULL) 1142 kmdp = preload_search_by_type("elf64 kernel"); 1143 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); 1144 envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); 1145 if (envp != NULL) 1146 envp += KERNBASE; 1147 init_static_kenv(envp, 0); 1148 #ifdef DDB 1149 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); 1150 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); 1151 db_fetch_ksymtab(ksym_start, ksym_end, 0); 1152 #endif 1153 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); 1154 1155 return (kmdp); 1156 } 1157 1158 static void 1159 native_clock_source_init(void) 1160 { 1161 i8254_init(); 1162 } 1163 1164 static void 1165 amd64_kdb_init(void) 1166 { 1167 kdb_init(); 1168 #ifdef KDB 1169 if (boothowto & RB_KDB) 1170 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 1171 #endif 1172 } 1173 1174 /* Set up the fast syscall stuff */ 1175 void 1176 amd64_conf_fast_syscall(void) 1177 { 1178 uint64_t msr; 1179 1180 msr = rdmsr(MSR_EFER) | EFER_SCE; 1181 wrmsr(MSR_EFER, msr); 1182 wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) : 1183 (u_int64_t)IDTVEC(fast_syscall)); 1184 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 1185 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1186 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 1187 wrmsr(MSR_STAR, msr); 1188 wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC); 1189 } 1190 1191 void 1192 amd64_bsp_pcpu_init1(struct pcpu *pc) 1193 { 1194 struct user_segment_descriptor *gdt; 1195 1196 PCPU_SET(prvspace, pc); 1197 gdt = *PCPU_PTR(gdt); 1198 PCPU_SET(curthread, &thread0); 1199 PCPU_SET(tssp, PCPU_PTR(common_tss)); 1200 PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1201 PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); 1202 PCPU_SET(fs32p, &gdt[GUFS32_SEL]); 1203 PCPU_SET(gs32p, &gdt[GUGS32_SEL]); 1204 PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK); 1205 PCPU_SET(smp_tlb_gen, 1); 1206 } 1207 1208 void 1209 amd64_bsp_pcpu_init2(uint64_t rsp0) 1210 { 1211 1212 PCPU_SET(rsp0, rsp0); 1213 PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) + 1214 PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); 1215 PCPU_SET(curpcb, thread0.td_pcb); 1216 } 1217 1218 void 1219 amd64_bsp_ist_init(struct pcpu *pc) 1220 { 1221 struct nmi_pcpu *np; 1222 struct amd64tss *tssp; 1223 1224 tssp = &pc->pc_common_tss; 1225 1226 /* doublefault stack space, runs on ist1 */ 1227 np = ((struct nmi_pcpu *)&dblfault_stack[sizeof(dblfault_stack)]) - 1; 1228 np->np_pcpu = (register_t)pc; 1229 tssp->tss_ist1 = (long)np; 1230 1231 /* 1232 * NMI stack, runs on ist2. The pcpu pointer is stored just 1233 * above the start of the ist2 stack. 1234 */ 1235 np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1; 1236 np->np_pcpu = (register_t)pc; 1237 tssp->tss_ist2 = (long)np; 1238 1239 /* 1240 * MC# stack, runs on ist3. The pcpu pointer is stored just 1241 * above the start of the ist3 stack. 1242 */ 1243 np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1; 1244 np->np_pcpu = (register_t)pc; 1245 tssp->tss_ist3 = (long)np; 1246 1247 /* 1248 * DB# stack, runs on ist4. 1249 */ 1250 np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1; 1251 np->np_pcpu = (register_t)pc; 1252 tssp->tss_ist4 = (long)np; 1253 } 1254 1255 /* 1256 * Calculate the kernel load address by inspecting page table created by loader. 1257 * The assumptions: 1258 * - kernel is mapped at KERNBASE, backed by contiguous phys memory 1259 * aligned at 2M, below 4G (the latter is important for AP startup) 1260 * - there is a 2M hole at KERNBASE (KERNSTART = KERNBASE + 2M) 1261 * - kernel is mapped with 2M superpages 1262 * - all participating memory, i.e. kernel, modules, metadata, 1263 * page table is accessible by pre-created 1:1 mapping 1264 * (right now loader creates 1:1 mapping for lower 4G, and all 1265 * memory is from there) 1266 * - there is a usable memory block right after the end of the 1267 * mapped kernel and all modules/metadata, pointed to by 1268 * physfree, for early allocations 1269 */ 1270 vm_paddr_t __nosanitizeaddress __nosanitizememory 1271 amd64_loadaddr(void) 1272 { 1273 pml4_entry_t *pml4e; 1274 pdp_entry_t *pdpe; 1275 pd_entry_t *pde; 1276 uint64_t cr3; 1277 1278 cr3 = rcr3(); 1279 pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(KERNSTART); 1280 pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(KERNSTART); 1281 pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(KERNSTART); 1282 return (*pde & PG_FRAME); 1283 } 1284 1285 u_int64_t 1286 hammer_time(u_int64_t modulep, u_int64_t physfree) 1287 { 1288 caddr_t kmdp; 1289 int gsel_tss, x; 1290 struct pcpu *pc; 1291 uint64_t rsp0; 1292 char *env; 1293 struct user_segment_descriptor *gdt; 1294 struct region_descriptor r_gdt; 1295 size_t kstack0_sz; 1296 1297 TSRAW(&thread0, TS_ENTER, __func__, NULL); 1298 1299 kernphys = amd64_loadaddr(); 1300 1301 physfree += kernphys; 1302 1303 kmdp = init_ops.parse_preload_data(modulep); 1304 1305 efi_boot = preload_search_info(kmdp, MODINFO_METADATA | 1306 MODINFOMD_EFI_MAP) != NULL; 1307 1308 if (!efi_boot) { 1309 /* Tell the bios to warmboot next time */ 1310 atomic_store_short((u_short *)0x472, 0x1234); 1311 } 1312 1313 physfree += ucode_load_bsp(physfree - kernphys + KERNSTART); 1314 physfree = roundup2(physfree, PAGE_SIZE); 1315 1316 identify_cpu1(); 1317 identify_hypervisor(); 1318 identify_cpu_fixup_bsp(); 1319 identify_cpu2(); 1320 initializecpucache(); 1321 1322 /* 1323 * Check for pti, pcid, and invpcid before ifuncs are 1324 * resolved, to correctly select the implementation for 1325 * pmap_activate_sw_mode(). 1326 */ 1327 pti = pti_get_default(); 1328 TUNABLE_INT_FETCH("vm.pmap.pti", &pti); 1329 TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); 1330 if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { 1331 invpcid_works = (cpu_stdext_feature & 1332 CPUID_STDEXT_INVPCID) != 0; 1333 } else { 1334 pmap_pcid_enabled = 0; 1335 } 1336 1337 /* 1338 * Now we can do small core initialization, after the PCID 1339 * CPU features and user knobs are evaluated. 1340 */ 1341 TUNABLE_INT_FETCH("vm.pmap.pcid_invlpg_workaround", 1342 &pmap_pcid_invlpg_workaround_uena); 1343 cpu_init_small_core(); 1344 1345 link_elf_ireloc(kmdp); 1346 1347 /* 1348 * This may be done better later if it gets more high level 1349 * components in it. If so just link td->td_proc here. 1350 */ 1351 proc_linkup0(&proc0, &thread0); 1352 1353 /* Init basic tunables, hz etc */ 1354 init_param1(); 1355 1356 thread0.td_kstack = physfree - kernphys + KERNSTART; 1357 thread0.td_kstack_pages = kstack_pages; 1358 kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; 1359 bzero((void *)thread0.td_kstack, kstack0_sz); 1360 physfree += kstack0_sz; 1361 1362 /* 1363 * Initialize enough of thread0 for delayed invalidation to 1364 * work very early. Rely on thread0.td_base_pri 1365 * zero-initialization, it is reset to PVM at proc0_init(). 1366 */ 1367 pmap_thread_init_invl_gen(&thread0); 1368 1369 pc = &temp_bsp_pcpu; 1370 pcpu_init(pc, 0, sizeof(struct pcpu)); 1371 gdt = &temp_bsp_pcpu.pc_gdt[0]; 1372 1373 /* 1374 * make gdt memory segments 1375 */ 1376 for (x = 0; x < NGDT; x++) { 1377 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 1378 x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) 1379 ssdtosd(&gdt_segs[x], &gdt[x]); 1380 } 1381 gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&pc->pc_common_tss; 1382 ssdtosyssd(&gdt_segs[GPROC0_SEL], 1383 (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1384 1385 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 1386 r_gdt.rd_base = (long)gdt; 1387 lgdt(&r_gdt); 1388 1389 wrmsr(MSR_FSBASE, 0); /* User value */ 1390 wrmsr(MSR_GSBASE, (u_int64_t)pc); 1391 wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ 1392 1393 dpcpu_init((void *)(physfree - kernphys + KERNSTART), 0); 1394 physfree += DPCPU_SIZE; 1395 amd64_bsp_pcpu_init1(pc); 1396 /* Non-late cninit() and printf() can be moved up to here. */ 1397 1398 /* 1399 * Initialize mutexes. 1400 * 1401 * icu_lock: in order to allow an interrupt to occur in a critical 1402 * section, to set pcpu->ipending (etc...) properly, we 1403 * must be able to get the icu lock, so it can't be 1404 * under witness. 1405 */ 1406 mutex_init(); 1407 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); 1408 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); 1409 1410 /* exceptions */ 1411 for (x = 0; x < NIDT; x++) 1412 setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT, 1413 SEL_KPL, 0); 1414 setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT, 1415 SEL_KPL, 0); 1416 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4); 1417 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); 1418 setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT, 1419 SEL_UPL, 0); 1420 setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT, 1421 SEL_UPL, 0); 1422 setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT, 1423 SEL_KPL, 0); 1424 setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT, 1425 SEL_KPL, 0); 1426 setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT, 1427 SEL_KPL, 0); 1428 setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); 1429 setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm), 1430 SDT_SYSIGT, SEL_KPL, 0); 1431 setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT, 1432 SEL_KPL, 0); 1433 setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing), 1434 SDT_SYSIGT, SEL_KPL, 0); 1435 setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT, 1436 SEL_KPL, 0); 1437 setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT, 1438 SEL_KPL, 0); 1439 setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT, 1440 SEL_KPL, 0); 1441 setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT, 1442 SEL_KPL, 0); 1443 setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT, 1444 SEL_KPL, 0); 1445 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3); 1446 setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT, 1447 SEL_KPL, 0); 1448 #ifdef KDTRACE_HOOKS 1449 setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) : 1450 &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); 1451 #endif 1452 #ifdef XENHVM 1453 setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) : 1454 &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0); 1455 #endif 1456 r_idt.rd_limit = sizeof(idt0) - 1; 1457 r_idt.rd_base = (long) idt; 1458 lidt(&r_idt); 1459 1460 /* 1461 * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) 1462 * transition). 1463 * Once bootblocks have updated, we can test directly for 1464 * efi_systbl != NULL here... 1465 */ 1466 if (efi_boot) 1467 vty_set_preferred(VTY_VT); 1468 1469 TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); 1470 TUNABLE_INT_FETCH("machdep.mitigations.ibrs.disable", &hw_ibrs_disable); 1471 1472 TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable); 1473 TUNABLE_INT_FETCH("machdep.mitigations.ssb.disable", &hw_ssb_disable); 1474 1475 TUNABLE_INT_FETCH("machdep.syscall_ret_l1d_flush", 1476 &syscall_ret_l1d_flush_mode); 1477 1478 TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable); 1479 TUNABLE_INT_FETCH("machdep.mitigations.mds.disable", &hw_mds_disable); 1480 1481 TUNABLE_INT_FETCH("machdep.mitigations.taa.enable", &x86_taa_enable); 1482 1483 TUNABLE_INT_FETCH("machdep.mitigations.rndgs.enable", 1484 &x86_rngds_mitg_enable); 1485 1486 finishidentcpu(); /* Final stage of CPU initialization */ 1487 1488 /* 1489 * Initialize the clock before the console so that console 1490 * initialization can use DELAY(). 1491 */ 1492 clock_init(); 1493 1494 initializecpu(); /* Initialize CPU registers */ 1495 1496 amd64_bsp_ist_init(pc); 1497 1498 /* Set the IO permission bitmap (empty due to tss seg limit) */ 1499 pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + 1500 IOPERM_BITMAP_SIZE; 1501 1502 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1503 ltr(gsel_tss); 1504 1505 amd64_conf_fast_syscall(); 1506 1507 /* 1508 * We initialize the PCB pointer early so that exception 1509 * handlers will work. Also set up td_critnest to short-cut 1510 * the page fault handler. 1511 */ 1512 cpu_max_ext_state_size = sizeof(struct savefpu); 1513 set_top_of_stack_td(&thread0); 1514 thread0.td_pcb = get_pcb_td(&thread0); 1515 thread0.td_critnest = 1; 1516 1517 /* 1518 * The console and kdb should be initialized even earlier than here, 1519 * but some console drivers don't work until after getmemsize(). 1520 * Default to late console initialization to support these drivers. 1521 * This loses mainly printf()s in getmemsize() and early debugging. 1522 */ 1523 TUNABLE_INT_FETCH("debug.late_console", &late_console); 1524 if (!late_console) { 1525 cninit(); 1526 amd64_kdb_init(); 1527 } 1528 1529 getmemsize(kmdp, physfree); 1530 init_param2(physmem); 1531 1532 /* now running on new page tables, configured,and u/iom is accessible */ 1533 1534 #ifdef DEV_PCI 1535 /* This call might adjust phys_avail[]. */ 1536 pci_early_quirks(); 1537 #endif 1538 1539 if (late_console) 1540 cninit(); 1541 1542 /* 1543 * Dump the boot metadata. We have to wait for cninit() since console 1544 * output is required. If it's grossly incorrect the kernel will never 1545 * make it this far. 1546 */ 1547 if (getenv_is_true("debug.dump_modinfo_at_boot")) 1548 preload_dump(); 1549 1550 #ifdef DEV_ISA 1551 #ifdef DEV_ATPIC 1552 elcr_probe(); 1553 atpic_startup(); 1554 #else 1555 /* Reset and mask the atpics and leave them shut down. */ 1556 atpic_reset(); 1557 1558 /* 1559 * Point the ICU spurious interrupt vectors at the APIC spurious 1560 * interrupt handler. 1561 */ 1562 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1563 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1564 #endif 1565 #else 1566 #error "have you forgotten the isa device?" 1567 #endif 1568 1569 if (late_console) 1570 amd64_kdb_init(); 1571 1572 msgbufinit(msgbufp, msgbufsize); 1573 fpuinit(); 1574 1575 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1576 rsp0 = thread0.td_md.md_stack_base; 1577 /* Ensure the stack is aligned to 16 bytes */ 1578 rsp0 &= ~0xFul; 1579 PCPU_PTR(common_tss)->tss_rsp0 = rsp0; 1580 amd64_bsp_pcpu_init2(rsp0); 1581 1582 /* transfer to user mode */ 1583 1584 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 1585 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 1586 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); 1587 _ufssel = GSEL(GUFS32_SEL, SEL_UPL); 1588 _ugssel = GSEL(GUGS32_SEL, SEL_UPL); 1589 1590 load_ds(_udatasel); 1591 load_es(_udatasel); 1592 load_fs(_ufssel); 1593 1594 /* setup proc 0's pcb */ 1595 thread0.td_pcb->pcb_flags = 0; 1596 1597 env = kern_getenv("kernelname"); 1598 if (env != NULL) 1599 strlcpy(kernelname, env, sizeof(kernelname)); 1600 1601 kcsan_cpu_init(0); 1602 1603 #ifdef FDT 1604 x86_init_fdt(); 1605 #endif 1606 thread0.td_critnest = 0; 1607 1608 kasan_init(); 1609 kmsan_init(); 1610 1611 TSEXIT(); 1612 1613 /* Location of kernel stack for locore */ 1614 return (thread0.td_md.md_stack_base); 1615 } 1616 1617 void 1618 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 1619 { 1620 1621 pcpu->pc_acpi_id = 0xffffffff; 1622 } 1623 1624 static int 1625 smap_sysctl_handler(SYSCTL_HANDLER_ARGS) 1626 { 1627 struct bios_smap *smapbase; 1628 struct bios_smap_xattr smap; 1629 caddr_t kmdp; 1630 uint32_t *smapattr; 1631 int count, error, i; 1632 1633 /* Retrieve the system memory map from the loader. */ 1634 kmdp = preload_search_by_type("elf kernel"); 1635 if (kmdp == NULL) 1636 kmdp = preload_search_by_type("elf64 kernel"); 1637 smapbase = (struct bios_smap *)preload_search_info(kmdp, 1638 MODINFO_METADATA | MODINFOMD_SMAP); 1639 if (smapbase == NULL) 1640 return (0); 1641 smapattr = (uint32_t *)preload_search_info(kmdp, 1642 MODINFO_METADATA | MODINFOMD_SMAP_XATTR); 1643 count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); 1644 error = 0; 1645 for (i = 0; i < count; i++) { 1646 smap.base = smapbase[i].base; 1647 smap.length = smapbase[i].length; 1648 smap.type = smapbase[i].type; 1649 if (smapattr != NULL) 1650 smap.xattr = smapattr[i]; 1651 else 1652 smap.xattr = 0; 1653 error = SYSCTL_OUT(req, &smap, sizeof(smap)); 1654 } 1655 return (error); 1656 } 1657 SYSCTL_PROC(_machdep, OID_AUTO, smap, 1658 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1659 smap_sysctl_handler, "S,bios_smap_xattr", 1660 "Raw BIOS SMAP data"); 1661 1662 static int 1663 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) 1664 { 1665 struct efi_map_header *efihdr; 1666 caddr_t kmdp; 1667 uint32_t efisize; 1668 1669 kmdp = preload_search_by_type("elf kernel"); 1670 if (kmdp == NULL) 1671 kmdp = preload_search_by_type("elf64 kernel"); 1672 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 1673 MODINFO_METADATA | MODINFOMD_EFI_MAP); 1674 if (efihdr == NULL) 1675 return (0); 1676 efisize = *((uint32_t *)efihdr - 1); 1677 return (SYSCTL_OUT(req, efihdr, efisize)); 1678 } 1679 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, 1680 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1681 efi_map_sysctl_handler, "S,efi_map_header", 1682 "Raw EFI Memory Map"); 1683 1684 void 1685 spinlock_enter(void) 1686 { 1687 struct thread *td; 1688 register_t flags; 1689 1690 td = curthread; 1691 if (td->td_md.md_spinlock_count == 0) { 1692 flags = intr_disable(); 1693 td->td_md.md_spinlock_count = 1; 1694 td->td_md.md_saved_flags = flags; 1695 critical_enter(); 1696 } else 1697 td->td_md.md_spinlock_count++; 1698 } 1699 1700 void 1701 spinlock_exit(void) 1702 { 1703 struct thread *td; 1704 register_t flags; 1705 1706 td = curthread; 1707 flags = td->td_md.md_saved_flags; 1708 td->td_md.md_spinlock_count--; 1709 if (td->td_md.md_spinlock_count == 0) { 1710 critical_exit(); 1711 intr_restore(flags); 1712 } 1713 } 1714 1715 /* 1716 * Construct a PCB from a trapframe. This is called from kdb_trap() where 1717 * we want to start a backtrace from the function that caused us to enter 1718 * the debugger. We have the context in the trapframe, but base the trace 1719 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 1720 * enough for a backtrace. 1721 */ 1722 void 1723 makectx(struct trapframe *tf, struct pcb *pcb) 1724 { 1725 1726 pcb->pcb_r12 = tf->tf_r12; 1727 pcb->pcb_r13 = tf->tf_r13; 1728 pcb->pcb_r14 = tf->tf_r14; 1729 pcb->pcb_r15 = tf->tf_r15; 1730 pcb->pcb_rbp = tf->tf_rbp; 1731 pcb->pcb_rbx = tf->tf_rbx; 1732 pcb->pcb_rip = tf->tf_rip; 1733 pcb->pcb_rsp = tf->tf_rsp; 1734 } 1735 1736 /* 1737 * The pcb_flags is only modified by current thread, or by other threads 1738 * when current thread is stopped. However, current thread may change it 1739 * from the interrupt context in cpu_switch(), or in the trap handler. 1740 * When we read-modify-write pcb_flags from C sources, compiler may generate 1741 * code that is not atomic regarding the interrupt handler. If a trap or 1742 * interrupt happens and any flag is modified from the handler, it can be 1743 * clobbered with the cached value later. Therefore, we implement setting 1744 * and clearing flags with single-instruction functions, which do not race 1745 * with possible modification of the flags from the trap or interrupt context, 1746 * because traps and interrupts are executed only on instruction boundary. 1747 */ 1748 void 1749 set_pcb_flags_raw(struct pcb *pcb, const u_int flags) 1750 { 1751 1752 __asm __volatile("orl %1,%0" 1753 : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) 1754 : "cc", "memory"); 1755 1756 } 1757 1758 /* 1759 * The support for RDFSBASE, WRFSBASE and similar instructions for %gs 1760 * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into 1761 * pcb if user space modified the bases. We must save on the context 1762 * switch or if the return to usermode happens through the doreti. 1763 * 1764 * Tracking of both events is performed by the pcb flag PCB_FULL_IRET, 1765 * which have a consequence that the base MSRs must be saved each time 1766 * the PCB_FULL_IRET flag is set. We disable interrupts to sync with 1767 * context switches. 1768 */ 1769 static void 1770 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags) 1771 { 1772 register_t r; 1773 1774 if (curpcb == pcb && 1775 (flags & PCB_FULL_IRET) != 0 && 1776 (pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1777 r = intr_disable(); 1778 if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { 1779 if (rfs() == _ufssel) 1780 pcb->pcb_fsbase = rdfsbase(); 1781 if (rgs() == _ugssel) 1782 pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); 1783 } 1784 set_pcb_flags_raw(pcb, flags); 1785 intr_restore(r); 1786 } else { 1787 set_pcb_flags_raw(pcb, flags); 1788 } 1789 } 1790 1791 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int)) 1792 { 1793 1794 return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ? 1795 set_pcb_flags_fsgsbase : set_pcb_flags_raw); 1796 } 1797 1798 void 1799 clear_pcb_flags(struct pcb *pcb, const u_int flags) 1800 { 1801 1802 __asm __volatile("andl %1,%0" 1803 : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) 1804 : "cc", "memory"); 1805 } 1806 1807 #ifdef KDB 1808 1809 /* 1810 * Provide inb() and outb() as functions. They are normally only available as 1811 * inline functions, thus cannot be called from the debugger. 1812 */ 1813 1814 /* silence compiler warnings */ 1815 u_char inb_(u_short); 1816 void outb_(u_short, u_char); 1817 1818 u_char 1819 inb_(u_short port) 1820 { 1821 return inb(port); 1822 } 1823 1824 void 1825 outb_(u_short port, u_char data) 1826 { 1827 outb(port, data); 1828 } 1829 1830 #endif /* KDB */ 1831 1832 #undef memset 1833 #undef memmove 1834 #undef memcpy 1835 1836 void *memset_std(void *buf, int c, size_t len); 1837 void *memset_erms(void *buf, int c, size_t len); 1838 void *memmove_std(void * _Nonnull dst, const void * _Nonnull src, 1839 size_t len); 1840 void *memmove_erms(void * _Nonnull dst, const void * _Nonnull src, 1841 size_t len); 1842 void *memcpy_std(void * _Nonnull dst, const void * _Nonnull src, 1843 size_t len); 1844 void *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src, 1845 size_t len); 1846 1847 #ifdef KCSAN 1848 /* 1849 * These fail to build as ifuncs when used with KCSAN. 1850 */ 1851 void * 1852 memset(void *buf, int c, size_t len) 1853 { 1854 1855 return (memset_std(buf, c, len)); 1856 } 1857 1858 void * 1859 memmove(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1860 { 1861 1862 return (memmove_std(dst, src, len)); 1863 } 1864 1865 void * 1866 memcpy(void * _Nonnull dst, const void * _Nonnull src, size_t len) 1867 { 1868 1869 return (memcpy_std(dst, src, len)); 1870 } 1871 #else 1872 DEFINE_IFUNC(, void *, memset, (void *, int, size_t)) 1873 { 1874 1875 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1876 memset_erms : memset_std); 1877 } 1878 1879 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull, 1880 size_t)) 1881 { 1882 1883 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1884 memmove_erms : memmove_std); 1885 } 1886 1887 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t)) 1888 { 1889 1890 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1891 memcpy_erms : memcpy_std); 1892 } 1893 #endif 1894 1895 void pagezero_std(void *addr); 1896 void pagezero_erms(void *addr); 1897 DEFINE_IFUNC(, void , pagezero, (void *)) 1898 { 1899 1900 return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ? 1901 pagezero_erms : pagezero_std); 1902 } 1903