1 /*- 2 * Copyright (c) 2014 Andrew Turner 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 28 #include "opt_acpi.h" 29 #include "opt_kstack_pages.h" 30 #include "opt_platform.h" 31 #include "opt_ddb.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/asan.h> 36 #include <sys/buf.h> 37 #include <sys/bus.h> 38 #include <sys/cons.h> 39 #include <sys/cpu.h> 40 #include <sys/csan.h> 41 #include <sys/devmap.h> 42 #include <sys/efi.h> 43 #include <sys/exec.h> 44 #include <sys/imgact.h> 45 #include <sys/kdb.h> 46 #include <sys/kernel.h> 47 #include <sys/ktr.h> 48 #include <sys/limits.h> 49 #include <sys/linker.h> 50 #include <sys/msan.h> 51 #include <sys/msgbuf.h> 52 #include <sys/pcpu.h> 53 #include <sys/physmem.h> 54 #include <sys/proc.h> 55 #include <sys/ptrace.h> 56 #include <sys/reboot.h> 57 #include <sys/reg.h> 58 #include <sys/rwlock.h> 59 #include <sys/sched.h> 60 #include <sys/signalvar.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/sysent.h> 63 #include <sys/sysproto.h> 64 #include <sys/ucontext.h> 65 #include <sys/vdso.h> 66 #include <sys/vmmeter.h> 67 68 #include <vm/vm.h> 69 #include <vm/vm_param.h> 70 #include <vm/vm_kern.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_phys.h> 74 #include <vm/pmap.h> 75 #include <vm/vm_map.h> 76 #include <vm/vm_pager.h> 77 78 #include <machine/armreg.h> 79 #include <machine/cpu.h> 80 #include <machine/debug_monitor.h> 81 #include <machine/hypervisor.h> 82 #include <machine/kdb.h> 83 #include <machine/machdep.h> 84 #include <machine/metadata.h> 85 #include <machine/md_var.h> 86 #include <machine/pcb.h> 87 #include <machine/undefined.h> 88 #include <machine/vmparam.h> 89 90 #ifdef VFP 91 #include <machine/vfp.h> 92 #endif 93 94 #ifdef DEV_ACPI 95 #include <contrib/dev/acpica/include/acpi.h> 96 #include <machine/acpica_machdep.h> 97 #endif 98 99 #ifdef FDT 100 #include <dev/fdt/fdt_common.h> 101 #include <dev/ofw/openfirm.h> 102 #endif 103 104 #include <dev/smbios/smbios.h> 105 106 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size"); 107 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136, 108 "pcb_fpusaved changed offset"); 109 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192, 110 "pcb_fpustate changed offset"); 111 112 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE; 113 114 /* 115 * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we 116 * could relocate this, but will need to keep the same virtual address as 117 * it's reverenced by the EARLY_COUNTER macro. 118 */ 119 struct pcpu pcpu0; 120 121 #if defined(PERTHREAD_SSP) 122 /* 123 * The boot SSP canary. Will be replaced with a per-thread canary when 124 * scheduling has started. 125 */ 126 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul; 127 #endif 128 129 static struct trapframe proc0_tf; 130 131 int early_boot = 1; 132 int cold = 1; 133 static int boot_el; 134 static uint64_t hcr_el2; 135 136 struct kva_md_info kmi; 137 138 int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */ 139 int has_pan; 140 141 #if defined(SOCDEV_PA) 142 /* 143 * This is the virtual address used to access SOCDEV_PA. As it's set before 144 * .bss is cleared we need to ensure it's preserved. To do this use 145 * __read_mostly as it's only ever set once but read in the putc functions. 146 */ 147 uintptr_t socdev_va __read_mostly; 148 #endif 149 150 /* 151 * Physical address of the EFI System Table. Stashed from the metadata hints 152 * passed into the kernel and used by the EFI code to call runtime services. 153 */ 154 vm_paddr_t efi_systbl_phys; 155 static struct efi_map_header *efihdr; 156 157 /* pagezero_* implementations are provided in support.S */ 158 void pagezero_simple(void *); 159 void pagezero_cache(void *); 160 161 /* pagezero_simple is default pagezero */ 162 void (*pagezero)(void *p) = pagezero_simple; 163 164 int (*apei_nmi)(void); 165 166 #if defined(PERTHREAD_SSP_WARNING) 167 static void 168 print_ssp_warning(void *data __unused) 169 { 170 printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n"); 171 } 172 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL); 173 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL); 174 #endif 175 176 static void 177 pan_setup(void) 178 { 179 uint64_t id_aa64mfr1; 180 181 id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); 182 if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE) 183 has_pan = 1; 184 } 185 186 void 187 pan_enable(void) 188 { 189 190 /* 191 * The LLVM integrated assembler doesn't understand the PAN 192 * PSTATE field. Because of this we need to manually create 193 * the instruction in an asm block. This is equivalent to: 194 * msr pan, #1 195 * 196 * This sets the PAN bit, stopping the kernel from accessing 197 * memory when userspace can also access it unless the kernel 198 * uses the userspace load/store instructions. 199 */ 200 if (has_pan) { 201 WRITE_SPECIALREG(sctlr_el1, 202 READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN); 203 __asm __volatile(".inst 0xd500409f | (0x1 << 8)"); 204 } 205 } 206 207 bool 208 has_hyp(void) 209 { 210 211 /* 212 * XXX The E2H check is wrong, but it's close enough for now. Needs to 213 * be re-evaluated once we're running regularly in EL2. 214 */ 215 return (boot_el == CURRENTEL_EL_EL2 && (hcr_el2 & HCR_E2H) == 0); 216 } 217 218 bool 219 in_vhe(void) 220 { 221 /* If we are currently in EL2 then must be in VHE */ 222 return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) == 223 CURRENTEL_EL_EL2); 224 } 225 226 static void 227 cpu_startup(void *dummy) 228 { 229 vm_paddr_t size; 230 int i; 231 232 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem), 233 ptoa((uintmax_t)realmem) / 1024 / 1024); 234 235 if (bootverbose) { 236 printf("Physical memory chunk(s):\n"); 237 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 238 size = phys_avail[i + 1] - phys_avail[i]; 239 printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n", 240 (uintmax_t)phys_avail[i], 241 (uintmax_t)phys_avail[i + 1] - 1, 242 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 243 } 244 } 245 246 printf("avail memory = %ju (%ju MB)\n", 247 ptoa((uintmax_t)vm_free_count()), 248 ptoa((uintmax_t)vm_free_count()) / 1024 / 1024); 249 250 undef_init(); 251 install_cpu_errata(); 252 253 vm_ksubmap_init(&kmi); 254 bufinit(); 255 vm_pager_bufferinit(); 256 } 257 258 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 259 260 static void 261 late_ifunc_resolve(void *dummy __unused) 262 { 263 link_elf_late_ireloc(); 264 } 265 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); 266 267 int 268 cpu_idle_wakeup(int cpu) 269 { 270 271 return (0); 272 } 273 274 void 275 cpu_idle(int busy) 276 { 277 278 spinlock_enter(); 279 if (!busy) 280 cpu_idleclock(); 281 if (!sched_runnable()) 282 __asm __volatile( 283 "dsb sy \n" 284 "wfi \n"); 285 if (!busy) 286 cpu_activeclock(); 287 spinlock_exit(); 288 } 289 290 void 291 cpu_halt(void) 292 { 293 294 /* We should have shutdown by now, if not enter a low power sleep */ 295 intr_disable(); 296 while (1) { 297 __asm __volatile("wfi"); 298 } 299 } 300 301 /* 302 * Flush the D-cache for non-DMA I/O so that the I-cache can 303 * be made coherent later. 304 */ 305 void 306 cpu_flush_dcache(void *ptr, size_t len) 307 { 308 309 /* ARM64TODO TBD */ 310 } 311 312 /* Get current clock frequency for the given CPU ID. */ 313 int 314 cpu_est_clockrate(int cpu_id, uint64_t *rate) 315 { 316 struct pcpu *pc; 317 318 pc = pcpu_find(cpu_id); 319 if (pc == NULL || rate == NULL) 320 return (EINVAL); 321 322 if (pc->pc_clock == 0) 323 return (EOPNOTSUPP); 324 325 *rate = pc->pc_clock; 326 return (0); 327 } 328 329 void 330 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 331 { 332 333 pcpu->pc_acpi_id = 0xffffffff; 334 pcpu->pc_mpidr = UINT64_MAX; 335 } 336 337 void 338 spinlock_enter(void) 339 { 340 struct thread *td; 341 register_t daif; 342 343 td = curthread; 344 if (td->td_md.md_spinlock_count == 0) { 345 daif = intr_disable(); 346 td->td_md.md_spinlock_count = 1; 347 td->td_md.md_saved_daif = daif; 348 critical_enter(); 349 } else 350 td->td_md.md_spinlock_count++; 351 } 352 353 void 354 spinlock_exit(void) 355 { 356 struct thread *td; 357 register_t daif; 358 359 td = curthread; 360 daif = td->td_md.md_saved_daif; 361 td->td_md.md_spinlock_count--; 362 if (td->td_md.md_spinlock_count == 0) { 363 critical_exit(); 364 intr_restore(daif); 365 } 366 } 367 368 /* 369 * Construct a PCB from a trapframe. This is called from kdb_trap() where 370 * we want to start a backtrace from the function that caused us to enter 371 * the debugger. We have the context in the trapframe, but base the trace 372 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 373 * enough for a backtrace. 374 */ 375 void 376 makectx(struct trapframe *tf, struct pcb *pcb) 377 { 378 int i; 379 380 /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */ 381 for (i = 0; i < nitems(pcb->pcb_x); i++) { 382 if (i == PCB_LR) 383 pcb->pcb_x[i] = tf->tf_elr; 384 else 385 pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START]; 386 } 387 388 pcb->pcb_sp = tf->tf_sp; 389 } 390 391 static void 392 init_proc0(vm_offset_t kstack) 393 { 394 struct pcpu *pcpup; 395 396 pcpup = cpuid_to_pcpu[0]; 397 MPASS(pcpup != NULL); 398 399 proc_linkup0(&proc0, &thread0); 400 thread0.td_kstack = kstack; 401 thread0.td_kstack_pages = KSTACK_PAGES; 402 #if defined(PERTHREAD_SSP) 403 thread0.td_md.md_canary = boot_canary; 404 #endif 405 thread0.td_pcb = (struct pcb *)(thread0.td_kstack + 406 thread0.td_kstack_pages * PAGE_SIZE) - 1; 407 thread0.td_pcb->pcb_flags = 0; 408 thread0.td_pcb->pcb_fpflags = 0; 409 thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate; 410 thread0.td_pcb->pcb_vfpcpu = UINT_MAX; 411 thread0.td_frame = &proc0_tf; 412 ptrauth_thread0(&thread0); 413 pcpup->pc_curpcb = thread0.td_pcb; 414 415 /* 416 * Unmask SError exceptions. They are used to signal a RAS failure, 417 * or other hardware error. 418 */ 419 serror_enable(); 420 } 421 422 /* 423 * Get an address to be used to write to kernel data that may be mapped 424 * read-only, e.g. to patch kernel code. 425 */ 426 bool 427 arm64_get_writable_addr(void *addr, void **out) 428 { 429 vm_paddr_t pa; 430 431 /* Check if the page is writable */ 432 if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) { 433 *out = addr; 434 return (true); 435 } 436 437 /* 438 * Find the physical address of the given page. 439 */ 440 if (!pmap_klookup((vm_offset_t)addr, &pa)) { 441 return (false); 442 } 443 444 /* 445 * If it is within the DMAP region and is writable use that. 446 */ 447 if (PHYS_IN_DMAP_RANGE(pa)) { 448 addr = (void *)PHYS_TO_DMAP(pa); 449 if (PAR_SUCCESS(arm64_address_translate_s1e1w( 450 (vm_offset_t)addr))) { 451 *out = addr; 452 return (true); 453 } 454 } 455 456 return (false); 457 } 458 459 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp); 460 461 static void 462 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp) 463 { 464 struct efi_md *map, *p; 465 size_t efisz; 466 int ndesc, i; 467 468 /* 469 * Memory map data provided by UEFI via the GetMemoryMap 470 * Boot Services API. 471 */ 472 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 473 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 474 475 if (efihdr->descriptor_size == 0) 476 return; 477 ndesc = efihdr->memory_size / efihdr->descriptor_size; 478 479 for (i = 0, p = map; i < ndesc; i++, 480 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 481 cb(p, argp); 482 } 483 } 484 485 /* 486 * Handle the EFI memory map list. 487 * 488 * We will make two passes at this, the first (exclude == false) to populate 489 * physmem with valid physical memory ranges from recognized map entry types. 490 * In the second pass we will exclude memory ranges from physmem which must not 491 * be used for general allocations, either because they are used by runtime 492 * firmware or otherwise reserved. 493 * 494 * Adding the runtime-reserved memory ranges to physmem and excluding them 495 * later ensures that they are included in the DMAP, but excluded from 496 * phys_avail[]. 497 * 498 * Entry types not explicitly listed here are ignored and not mapped. 499 */ 500 static void 501 handle_efi_map_entry(struct efi_md *p, void *argp) 502 { 503 bool exclude = *(bool *)argp; 504 505 switch (p->md_type) { 506 case EFI_MD_TYPE_RECLAIM: 507 /* 508 * The recomended location for ACPI tables. Map into the 509 * DMAP so we can access them from userspace via /dev/mem. 510 */ 511 case EFI_MD_TYPE_RT_CODE: 512 /* 513 * Some UEFI implementations put the system table in the 514 * runtime code section. Include it in the DMAP, but will 515 * be excluded from phys_avail. 516 */ 517 case EFI_MD_TYPE_RT_DATA: 518 /* 519 * Runtime data will be excluded after the DMAP 520 * region is created to stop it from being added 521 * to phys_avail. 522 */ 523 if (exclude) { 524 physmem_exclude_region(p->md_phys, 525 p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC); 526 break; 527 } 528 /* FALLTHROUGH */ 529 case EFI_MD_TYPE_CODE: 530 case EFI_MD_TYPE_DATA: 531 case EFI_MD_TYPE_BS_CODE: 532 case EFI_MD_TYPE_BS_DATA: 533 case EFI_MD_TYPE_FREE: 534 /* 535 * We're allowed to use any entry with these types. 536 */ 537 if (!exclude) 538 physmem_hardware_region(p->md_phys, 539 p->md_pages * EFI_PAGE_SIZE); 540 break; 541 default: 542 /* Other types shall not be handled by physmem. */ 543 break; 544 } 545 } 546 547 static void 548 add_efi_map_entries(struct efi_map_header *efihdr) 549 { 550 bool exclude = false; 551 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude); 552 } 553 554 static void 555 exclude_efi_map_entries(struct efi_map_header *efihdr) 556 { 557 bool exclude = true; 558 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude); 559 } 560 561 static void 562 print_efi_map_entry(struct efi_md *p, void *argp __unused) 563 { 564 const char *type; 565 static const char *types[] = { 566 "Reserved", 567 "LoaderCode", 568 "LoaderData", 569 "BootServicesCode", 570 "BootServicesData", 571 "RuntimeServicesCode", 572 "RuntimeServicesData", 573 "ConventionalMemory", 574 "UnusableMemory", 575 "ACPIReclaimMemory", 576 "ACPIMemoryNVS", 577 "MemoryMappedIO", 578 "MemoryMappedIOPortSpace", 579 "PalCode", 580 "PersistentMemory" 581 }; 582 583 if (p->md_type < nitems(types)) 584 type = types[p->md_type]; 585 else 586 type = "<INVALID>"; 587 printf("%23s %012lx %012lx %08lx ", type, p->md_phys, 588 p->md_virt, p->md_pages); 589 if (p->md_attr & EFI_MD_ATTR_UC) 590 printf("UC "); 591 if (p->md_attr & EFI_MD_ATTR_WC) 592 printf("WC "); 593 if (p->md_attr & EFI_MD_ATTR_WT) 594 printf("WT "); 595 if (p->md_attr & EFI_MD_ATTR_WB) 596 printf("WB "); 597 if (p->md_attr & EFI_MD_ATTR_UCE) 598 printf("UCE "); 599 if (p->md_attr & EFI_MD_ATTR_WP) 600 printf("WP "); 601 if (p->md_attr & EFI_MD_ATTR_RP) 602 printf("RP "); 603 if (p->md_attr & EFI_MD_ATTR_XP) 604 printf("XP "); 605 if (p->md_attr & EFI_MD_ATTR_NV) 606 printf("NV "); 607 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) 608 printf("MORE_RELIABLE "); 609 if (p->md_attr & EFI_MD_ATTR_RO) 610 printf("RO "); 611 if (p->md_attr & EFI_MD_ATTR_RT) 612 printf("RUNTIME"); 613 printf("\n"); 614 } 615 616 static void 617 print_efi_map_entries(struct efi_map_header *efihdr) 618 { 619 620 printf("%23s %12s %12s %8s %4s\n", 621 "Type", "Physical", "Virtual", "#Pages", "Attr"); 622 foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL); 623 } 624 625 /* 626 * Map the passed in VA in EFI space to a void * using the efi memory table to 627 * find the PA and return it in the DMAP, if it exists. We're used between the 628 * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG 629 * tables We assume that either the entry you are mapping fits within its page, 630 * or if it spills to the next page, that's contiguous in PA and in the DMAP. 631 * All observed tables obey the first part of this precondition. 632 */ 633 struct early_map_data 634 { 635 vm_offset_t va; 636 vm_offset_t pa; 637 }; 638 639 static void 640 efi_early_map_entry(struct efi_md *p, void *argp) 641 { 642 struct early_map_data *emdp = argp; 643 vm_offset_t s, e; 644 645 if (emdp->pa != 0) 646 return; 647 if ((p->md_attr & EFI_MD_ATTR_RT) == 0) 648 return; 649 s = p->md_virt; 650 e = p->md_virt + p->md_pages * EFI_PAGE_SIZE; 651 if (emdp->va < s || emdp->va >= e) 652 return; 653 emdp->pa = p->md_phys + (emdp->va - p->md_virt); 654 } 655 656 static void * 657 efi_early_map(vm_offset_t va) 658 { 659 struct early_map_data emd = { .va = va }; 660 661 foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd); 662 if (emd.pa == 0) 663 return NULL; 664 return (void *)PHYS_TO_DMAP(emd.pa); 665 } 666 667 668 /* 669 * When booted via kboot, the prior kernel will pass in reserved memory areas in 670 * a EFI config table. We need to find that table and walk through it excluding 671 * the memory ranges in it. btw, this is called too early for the printf to do 672 * anything since msgbufp isn't initialized, let alone a console... 673 */ 674 static void 675 exclude_efi_memreserve(vm_offset_t efi_systbl_phys) 676 { 677 struct efi_systbl *systbl; 678 struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE; 679 680 systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys); 681 if (systbl == NULL) { 682 printf("can't map systbl\n"); 683 return; 684 } 685 if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) { 686 printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig); 687 return; 688 } 689 690 /* 691 * We don't yet have the pmap system booted enough to create a pmap for 692 * the efi firmware's preferred address space from the GetMemoryMap() 693 * table. The st_cfgtbl is a VA in this space, so we need to do the 694 * mapping ourselves to a kernel VA with efi_early_map. We assume that 695 * the cfgtbl entries don't span a page. Other pointers are PAs, as 696 * noted below. 697 */ 698 if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */ 699 return; 700 for (int i = 0; i < systbl->st_entries; i++) { 701 struct efi_cfgtbl *cfgtbl; 702 struct linux_efi_memreserve *mr; 703 704 cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl)); 705 if (cfgtbl == NULL) 706 panic("Can't map the config table entry %d\n", i); 707 if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0) 708 continue; 709 710 /* 711 * cfgtbl points are either VA or PA, depending on the GUID of 712 * the table. memreserve GUID pointers are PA and not converted 713 * after a SetVirtualAddressMap(). The list's mr_next pointer 714 * is also a PA. 715 */ 716 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP( 717 (vm_offset_t)cfgtbl->ct_data); 718 while (true) { 719 for (int j = 0; j < mr->mr_count; j++) { 720 struct linux_efi_memreserve_entry *mre; 721 722 mre = &mr->mr_entry[j]; 723 physmem_exclude_region(mre->mre_base, mre->mre_size, 724 EXFLAG_NODUMP | EXFLAG_NOALLOC); 725 } 726 if (mr->mr_next == 0) 727 break; 728 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next); 729 }; 730 } 731 732 } 733 734 #ifdef FDT 735 static void 736 try_load_dtb(caddr_t kmdp) 737 { 738 vm_offset_t dtbp; 739 740 dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); 741 #if defined(FDT_DTB_STATIC) 742 /* 743 * In case the device tree blob was not retrieved (from metadata) try 744 * to use the statically embedded one. 745 */ 746 if (dtbp == 0) 747 dtbp = (vm_offset_t)&fdt_static_dtb; 748 #endif 749 750 if (dtbp == (vm_offset_t)NULL) { 751 #ifndef TSLOG 752 printf("ERROR loading DTB\n"); 753 #endif 754 return; 755 } 756 757 if (OF_install(OFW_FDT, 0) == FALSE) 758 panic("Cannot install FDT"); 759 760 if (OF_init((void *)dtbp) != 0) 761 panic("OF_init failed with the found device tree"); 762 763 parse_fdt_bootargs(); 764 } 765 #endif 766 767 static bool 768 bus_probe(void) 769 { 770 bool has_acpi, has_fdt; 771 char *order, *env; 772 773 has_acpi = has_fdt = false; 774 775 #ifdef FDT 776 has_fdt = (OF_peer(0) != 0); 777 #endif 778 #ifdef DEV_ACPI 779 has_acpi = (AcpiOsGetRootPointer() != 0); 780 #endif 781 782 env = kern_getenv("kern.cfg.order"); 783 if (env != NULL) { 784 order = env; 785 while (order != NULL) { 786 if (has_acpi && 787 strncmp(order, "acpi", 4) == 0 && 788 (order[4] == ',' || order[4] == '\0')) { 789 arm64_bus_method = ARM64_BUS_ACPI; 790 break; 791 } 792 if (has_fdt && 793 strncmp(order, "fdt", 3) == 0 && 794 (order[3] == ',' || order[3] == '\0')) { 795 arm64_bus_method = ARM64_BUS_FDT; 796 break; 797 } 798 order = strchr(order, ','); 799 if (order != NULL) 800 order++; /* Skip comma */ 801 } 802 freeenv(env); 803 804 /* If we set the bus method it is valid */ 805 if (arm64_bus_method != ARM64_BUS_NONE) 806 return (true); 807 } 808 /* If no order or an invalid order was set use the default */ 809 if (arm64_bus_method == ARM64_BUS_NONE) { 810 if (has_fdt) 811 arm64_bus_method = ARM64_BUS_FDT; 812 else if (has_acpi) 813 arm64_bus_method = ARM64_BUS_ACPI; 814 } 815 816 /* 817 * If no option was set the default is valid, otherwise we are 818 * setting one to get cninit() working, then calling panic to tell 819 * the user about the invalid bus setup. 820 */ 821 return (env == NULL); 822 } 823 824 static void 825 cache_setup(void) 826 { 827 int dczva_line_shift; 828 uint32_t dczid_el0; 829 830 identify_cache(READ_SPECIALREG(ctr_el0)); 831 832 dczid_el0 = READ_SPECIALREG(dczid_el0); 833 834 /* Check if dc zva is not prohibited */ 835 if (dczid_el0 & DCZID_DZP) 836 dczva_line_size = 0; 837 else { 838 /* Same as with above calculations */ 839 dczva_line_shift = DCZID_BS_SIZE(dczid_el0); 840 dczva_line_size = sizeof(int) << dczva_line_shift; 841 842 /* Change pagezero function */ 843 pagezero = pagezero_cache; 844 } 845 } 846 847 int 848 memory_mapping_mode(vm_paddr_t pa) 849 { 850 struct efi_md *map, *p; 851 size_t efisz; 852 int ndesc, i; 853 854 if (efihdr == NULL) 855 return (VM_MEMATTR_WRITE_BACK); 856 857 /* 858 * Memory map data provided by UEFI via the GetMemoryMap 859 * Boot Services API. 860 */ 861 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 862 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 863 864 if (efihdr->descriptor_size == 0) 865 return (VM_MEMATTR_WRITE_BACK); 866 ndesc = efihdr->memory_size / efihdr->descriptor_size; 867 868 for (i = 0, p = map; i < ndesc; i++, 869 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 870 if (pa < p->md_phys || 871 pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE) 872 continue; 873 if (p->md_type == EFI_MD_TYPE_IOMEM || 874 p->md_type == EFI_MD_TYPE_IOPORT) 875 return (VM_MEMATTR_DEVICE); 876 else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 || 877 p->md_type == EFI_MD_TYPE_RECLAIM) 878 return (VM_MEMATTR_WRITE_BACK); 879 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) 880 return (VM_MEMATTR_WRITE_THROUGH); 881 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) 882 return (VM_MEMATTR_WRITE_COMBINING); 883 break; 884 } 885 886 return (VM_MEMATTR_DEVICE); 887 } 888 889 void 890 initarm(struct arm64_bootparams *abp) 891 { 892 struct efi_fb *efifb; 893 struct pcpu *pcpup; 894 char *env; 895 #ifdef FDT 896 struct mem_region mem_regions[FDT_MEM_REGIONS]; 897 int mem_regions_sz; 898 phandle_t root; 899 char dts_version[255]; 900 #endif 901 vm_offset_t lastaddr; 902 caddr_t kmdp; 903 bool valid; 904 905 TSRAW(&thread0, TS_ENTER, __func__, NULL); 906 907 boot_el = abp->boot_el; 908 hcr_el2 = abp->hcr_el2; 909 910 /* Parse loader or FDT boot parametes. Determine last used address. */ 911 lastaddr = parse_boot_param(abp); 912 913 /* Find the kernel address */ 914 kmdp = preload_search_by_type("elf kernel"); 915 if (kmdp == NULL) 916 kmdp = preload_search_by_type("elf64 kernel"); 917 918 identify_cpu(0); 919 identify_hypervisor_smbios(); 920 921 update_special_regs(0); 922 923 /* Set the pcpu data, this is needed by pmap_bootstrap */ 924 pcpup = &pcpu0; 925 pcpu_init(pcpup, 0, sizeof(struct pcpu)); 926 927 /* 928 * Set the pcpu pointer with a backup in tpidr_el1 to be 929 * loaded when entering the kernel from userland. 930 */ 931 __asm __volatile( 932 "mov x18, %0 \n" 933 "msr tpidr_el1, %0" :: "r"(pcpup)); 934 935 /* locore.S sets sp_el0 to &thread0 so no need to set it here. */ 936 PCPU_SET(curthread, &thread0); 937 PCPU_SET(midr, get_midr()); 938 939 link_elf_ireloc(kmdp); 940 #ifdef FDT 941 try_load_dtb(kmdp); 942 #endif 943 944 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); 945 946 /* Load the physical memory ranges */ 947 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 948 MODINFO_METADATA | MODINFOMD_EFI_MAP); 949 if (efihdr != NULL) 950 add_efi_map_entries(efihdr); 951 #ifdef FDT 952 else { 953 /* Grab physical memory regions information from device tree. */ 954 if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, 955 NULL) != 0) 956 panic("Cannot get physical memory regions"); 957 physmem_hardware_regions(mem_regions, mem_regions_sz); 958 } 959 if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0) 960 physmem_exclude_regions(mem_regions, mem_regions_sz, 961 EXFLAG_NODUMP | EXFLAG_NOALLOC); 962 #endif 963 964 /* Exclude the EFI framebuffer from our view of physical memory. */ 965 efifb = (struct efi_fb *)preload_search_info(kmdp, 966 MODINFO_METADATA | MODINFOMD_EFI_FB); 967 if (efifb != NULL) 968 physmem_exclude_region(efifb->fb_addr, efifb->fb_size, 969 EXFLAG_NOALLOC); 970 971 /* Do basic tuning, hz etc */ 972 init_param1(); 973 974 cache_setup(); 975 pan_setup(); 976 977 /* Bootstrap enough of pmap to enter the kernel proper */ 978 pmap_bootstrap(lastaddr - KERNBASE); 979 /* Exclude entries needed in the DMAP region, but not phys_avail */ 980 if (efihdr != NULL) 981 exclude_efi_map_entries(efihdr); 982 /* Do the same for reserve entries in the EFI MEMRESERVE table */ 983 if (efi_systbl_phys != 0) 984 exclude_efi_memreserve(efi_systbl_phys); 985 986 /* 987 * We carefully bootstrap the sanitizer map after we've excluded 988 * absolutely everything else that could impact phys_avail. There's not 989 * always enough room for the initial shadow map after the kernel, so 990 * we'll end up searching for segments that we can safely use. Those 991 * segments also get excluded from phys_avail. 992 */ 993 #if defined(KASAN) || defined(KMSAN) 994 pmap_bootstrap_san(); 995 #endif 996 997 physmem_init_kernel_globals(); 998 999 devmap_bootstrap(0, NULL); 1000 1001 valid = bus_probe(); 1002 1003 cninit(); 1004 set_ttbr0(abp->kern_ttbr0); 1005 cpu_tlb_flushID(); 1006 1007 if (!valid) 1008 panic("Invalid bus configuration: %s", 1009 kern_getenv("kern.cfg.order")); 1010 1011 /* 1012 * Check if pointer authentication is available on this system, and 1013 * if so enable its use. This needs to be called before init_proc0 1014 * as that will configure the thread0 pointer authentication keys. 1015 */ 1016 ptrauth_init(); 1017 1018 /* 1019 * Dump the boot metadata. We have to wait for cninit() since console 1020 * output is required. If it's grossly incorrect the kernel will never 1021 * make it this far. 1022 */ 1023 if (getenv_is_true("debug.dump_modinfo_at_boot")) 1024 preload_dump(); 1025 1026 init_proc0(abp->kern_stack); 1027 msgbufinit(msgbufp, msgbufsize); 1028 mutex_init(); 1029 init_param2(physmem); 1030 1031 dbg_init(); 1032 kdb_init(); 1033 #ifdef KDB 1034 if ((boothowto & RB_KDB) != 0) 1035 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 1036 #endif 1037 pan_enable(); 1038 1039 kcsan_cpu_init(0); 1040 kasan_init(); 1041 kmsan_init(); 1042 1043 env = kern_getenv("kernelname"); 1044 if (env != NULL) 1045 strlcpy(kernelname, env, sizeof(kernelname)); 1046 1047 #ifdef FDT 1048 if (arm64_bus_method == ARM64_BUS_FDT) { 1049 root = OF_finddevice("/"); 1050 if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) { 1051 if (strcmp(LINUX_DTS_VERSION, dts_version) != 0) 1052 printf("WARNING: DTB version is %s while kernel expects %s, " 1053 "please update the DTB in the ESP\n", 1054 dts_version, 1055 LINUX_DTS_VERSION); 1056 } else { 1057 printf("WARNING: Cannot find freebsd,dts-version property, " 1058 "cannot check DTB compliance\n"); 1059 } 1060 } 1061 #endif 1062 1063 if (boothowto & RB_VERBOSE) { 1064 if (efihdr != NULL) 1065 print_efi_map_entries(efihdr); 1066 physmem_print_tables(); 1067 } 1068 1069 early_boot = 0; 1070 1071 if (bootverbose && kstack_pages != KSTACK_PAGES) 1072 printf("kern.kstack_pages = %d ignored for thread0\n", 1073 kstack_pages); 1074 1075 TSEXIT(); 1076 } 1077 1078 void 1079 dbg_init(void) 1080 { 1081 1082 /* Clear OS lock */ 1083 WRITE_SPECIALREG(oslar_el1, 0); 1084 1085 /* This permits DDB to use debug registers for watchpoints. */ 1086 dbg_monitor_init(); 1087 1088 /* TODO: Eventually will need to initialize debug registers here. */ 1089 } 1090 1091 #ifdef DDB 1092 #include <ddb/ddb.h> 1093 1094 DB_SHOW_COMMAND(specialregs, db_show_spregs) 1095 { 1096 #define PRINT_REG(reg) \ 1097 db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg)) 1098 1099 PRINT_REG(actlr_el1); 1100 PRINT_REG(afsr0_el1); 1101 PRINT_REG(afsr1_el1); 1102 PRINT_REG(aidr_el1); 1103 PRINT_REG(amair_el1); 1104 PRINT_REG(ccsidr_el1); 1105 PRINT_REG(clidr_el1); 1106 PRINT_REG(contextidr_el1); 1107 PRINT_REG(cpacr_el1); 1108 PRINT_REG(csselr_el1); 1109 PRINT_REG(ctr_el0); 1110 PRINT_REG(currentel); 1111 PRINT_REG(daif); 1112 PRINT_REG(dczid_el0); 1113 PRINT_REG(elr_el1); 1114 PRINT_REG(esr_el1); 1115 PRINT_REG(far_el1); 1116 #if 0 1117 /* ARM64TODO: Enable VFP before reading floating-point registers */ 1118 PRINT_REG(fpcr); 1119 PRINT_REG(fpsr); 1120 #endif 1121 PRINT_REG(id_aa64afr0_el1); 1122 PRINT_REG(id_aa64afr1_el1); 1123 PRINT_REG(id_aa64dfr0_el1); 1124 PRINT_REG(id_aa64dfr1_el1); 1125 PRINT_REG(id_aa64isar0_el1); 1126 PRINT_REG(id_aa64isar1_el1); 1127 PRINT_REG(id_aa64pfr0_el1); 1128 PRINT_REG(id_aa64pfr1_el1); 1129 PRINT_REG(id_afr0_el1); 1130 PRINT_REG(id_dfr0_el1); 1131 PRINT_REG(id_isar0_el1); 1132 PRINT_REG(id_isar1_el1); 1133 PRINT_REG(id_isar2_el1); 1134 PRINT_REG(id_isar3_el1); 1135 PRINT_REG(id_isar4_el1); 1136 PRINT_REG(id_isar5_el1); 1137 PRINT_REG(id_mmfr0_el1); 1138 PRINT_REG(id_mmfr1_el1); 1139 PRINT_REG(id_mmfr2_el1); 1140 PRINT_REG(id_mmfr3_el1); 1141 #if 0 1142 /* Missing from llvm */ 1143 PRINT_REG(id_mmfr4_el1); 1144 #endif 1145 PRINT_REG(id_pfr0_el1); 1146 PRINT_REG(id_pfr1_el1); 1147 PRINT_REG(isr_el1); 1148 PRINT_REG(mair_el1); 1149 PRINT_REG(midr_el1); 1150 PRINT_REG(mpidr_el1); 1151 PRINT_REG(mvfr0_el1); 1152 PRINT_REG(mvfr1_el1); 1153 PRINT_REG(mvfr2_el1); 1154 PRINT_REG(revidr_el1); 1155 PRINT_REG(sctlr_el1); 1156 PRINT_REG(sp_el0); 1157 PRINT_REG(spsel); 1158 PRINT_REG(spsr_el1); 1159 PRINT_REG(tcr_el1); 1160 PRINT_REG(tpidr_el0); 1161 PRINT_REG(tpidr_el1); 1162 PRINT_REG(tpidrro_el0); 1163 PRINT_REG(ttbr0_el1); 1164 PRINT_REG(ttbr1_el1); 1165 PRINT_REG(vbar_el1); 1166 #undef PRINT_REG 1167 } 1168 1169 DB_SHOW_COMMAND(vtop, db_show_vtop) 1170 { 1171 uint64_t phys; 1172 1173 if (have_addr) { 1174 phys = arm64_address_translate_s1e1r(addr); 1175 db_printf("EL1 physical address reg (read): 0x%016lx\n", phys); 1176 phys = arm64_address_translate_s1e1w(addr); 1177 db_printf("EL1 physical address reg (write): 0x%016lx\n", phys); 1178 phys = arm64_address_translate_s1e0r(addr); 1179 db_printf("EL0 physical address reg (read): 0x%016lx\n", phys); 1180 phys = arm64_address_translate_s1e0w(addr); 1181 db_printf("EL0 physical address reg (write): 0x%016lx\n", phys); 1182 } else 1183 db_printf("show vtop <virt_addr>\n"); 1184 } 1185 #endif 1186