1 /*- 2 * Copyright (c) 2014 Andrew Turner 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 28 #include "opt_acpi.h" 29 #include "opt_kstack_pages.h" 30 #include "opt_platform.h" 31 #include "opt_ddb.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/asan.h> 36 #include <sys/buf.h> 37 #include <sys/bus.h> 38 #include <sys/cons.h> 39 #include <sys/cpu.h> 40 #include <sys/csan.h> 41 #include <sys/devmap.h> 42 #include <sys/efi.h> 43 #include <sys/exec.h> 44 #include <sys/imgact.h> 45 #include <sys/kdb.h> 46 #include <sys/kernel.h> 47 #include <sys/ktr.h> 48 #include <sys/limits.h> 49 #include <sys/linker.h> 50 #include <sys/msan.h> 51 #include <sys/msgbuf.h> 52 #include <sys/pcpu.h> 53 #include <sys/physmem.h> 54 #include <sys/proc.h> 55 #include <sys/ptrace.h> 56 #include <sys/reboot.h> 57 #include <sys/reg.h> 58 #include <sys/rwlock.h> 59 #include <sys/sched.h> 60 #include <sys/signalvar.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/sysent.h> 63 #include <sys/sysproto.h> 64 #include <sys/ucontext.h> 65 #include <sys/vdso.h> 66 #include <sys/vmmeter.h> 67 68 #include <vm/vm.h> 69 #include <vm/vm_param.h> 70 #include <vm/vm_kern.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_phys.h> 74 #include <vm/pmap.h> 75 #include <vm/vm_map.h> 76 #include <vm/vm_pager.h> 77 78 #include <machine/armreg.h> 79 #include <machine/cpu.h> 80 #include <machine/debug_monitor.h> 81 #include <machine/hypervisor.h> 82 #include <machine/kdb.h> 83 #include <machine/machdep.h> 84 #include <machine/metadata.h> 85 #include <machine/md_var.h> 86 #include <machine/pcb.h> 87 #include <machine/undefined.h> 88 #include <machine/vmparam.h> 89 90 #ifdef VFP 91 #include <machine/vfp.h> 92 #endif 93 94 #ifdef DEV_ACPI 95 #include <contrib/dev/acpica/include/acpi.h> 96 #include <machine/acpica_machdep.h> 97 #endif 98 99 #ifdef FDT 100 #include <dev/fdt/fdt_common.h> 101 #include <dev/ofw/openfirm.h> 102 #endif 103 104 #include <dev/smbios/smbios.h> 105 106 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size"); 107 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136, 108 "pcb_fpusaved changed offset"); 109 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192, 110 "pcb_fpustate changed offset"); 111 112 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE; 113 114 /* 115 * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we 116 * could relocate this, but will need to keep the same virtual address as 117 * it's reverenced by the EARLY_COUNTER macro. 118 */ 119 struct pcpu pcpu0; 120 121 #if defined(PERTHREAD_SSP) 122 /* 123 * The boot SSP canary. Will be replaced with a per-thread canary when 124 * scheduling has started. 125 */ 126 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul; 127 #endif 128 129 static struct trapframe proc0_tf; 130 131 int early_boot = 1; 132 int cold = 1; 133 static int boot_el; 134 static uint64_t hcr_el2; 135 136 struct kva_md_info kmi; 137 138 int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */ 139 int has_pan; 140 141 #if defined(SOCDEV_PA) 142 /* 143 * This is the virtual address used to access SOCDEV_PA. As it's set before 144 * .bss is cleared we need to ensure it's preserved. To do this use 145 * __read_mostly as it's only ever set once but read in the putc functions. 146 */ 147 uintptr_t socdev_va __read_mostly; 148 #endif 149 150 /* 151 * Physical address of the EFI System Table. Stashed from the metadata hints 152 * passed into the kernel and used by the EFI code to call runtime services. 153 */ 154 vm_paddr_t efi_systbl_phys; 155 static struct efi_map_header *efihdr; 156 157 /* pagezero_* implementations are provided in support.S */ 158 void pagezero_simple(void *); 159 void pagezero_cache(void *); 160 161 /* pagezero_simple is default pagezero */ 162 void (*pagezero)(void *p) = pagezero_simple; 163 164 int (*apei_nmi)(void); 165 166 #if defined(PERTHREAD_SSP_WARNING) 167 static void 168 print_ssp_warning(void *data __unused) 169 { 170 printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n"); 171 } 172 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL); 173 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL); 174 #endif 175 176 static void 177 pan_setup(void) 178 { 179 uint64_t id_aa64mfr1; 180 181 id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); 182 if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE) 183 has_pan = 1; 184 } 185 186 void 187 pan_enable(void) 188 { 189 190 /* 191 * The LLVM integrated assembler doesn't understand the PAN 192 * PSTATE field. Because of this we need to manually create 193 * the instruction in an asm block. This is equivalent to: 194 * msr pan, #1 195 * 196 * This sets the PAN bit, stopping the kernel from accessing 197 * memory when userspace can also access it unless the kernel 198 * uses the userspace load/store instructions. 199 */ 200 if (has_pan) { 201 WRITE_SPECIALREG(sctlr_el1, 202 READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN); 203 __asm __volatile(".inst 0xd500409f | (0x1 << 8)"); 204 } 205 } 206 207 bool 208 has_hyp(void) 209 { 210 211 /* 212 * XXX The E2H check is wrong, but it's close enough for now. Needs to 213 * be re-evaluated once we're running regularly in EL2. 214 */ 215 return (boot_el == 2 && (hcr_el2 & HCR_E2H) == 0); 216 } 217 218 static void 219 cpu_startup(void *dummy) 220 { 221 vm_paddr_t size; 222 int i; 223 224 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem), 225 ptoa((uintmax_t)realmem) / 1024 / 1024); 226 227 if (bootverbose) { 228 printf("Physical memory chunk(s):\n"); 229 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 230 size = phys_avail[i + 1] - phys_avail[i]; 231 printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n", 232 (uintmax_t)phys_avail[i], 233 (uintmax_t)phys_avail[i + 1] - 1, 234 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 235 } 236 } 237 238 printf("avail memory = %ju (%ju MB)\n", 239 ptoa((uintmax_t)vm_free_count()), 240 ptoa((uintmax_t)vm_free_count()) / 1024 / 1024); 241 242 undef_init(); 243 install_cpu_errata(); 244 245 vm_ksubmap_init(&kmi); 246 bufinit(); 247 vm_pager_bufferinit(); 248 } 249 250 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 251 252 static void 253 late_ifunc_resolve(void *dummy __unused) 254 { 255 link_elf_late_ireloc(); 256 } 257 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); 258 259 int 260 cpu_idle_wakeup(int cpu) 261 { 262 263 return (0); 264 } 265 266 void 267 cpu_idle(int busy) 268 { 269 270 spinlock_enter(); 271 if (!busy) 272 cpu_idleclock(); 273 if (!sched_runnable()) 274 __asm __volatile( 275 "dsb sy \n" 276 "wfi \n"); 277 if (!busy) 278 cpu_activeclock(); 279 spinlock_exit(); 280 } 281 282 void 283 cpu_halt(void) 284 { 285 286 /* We should have shutdown by now, if not enter a low power sleep */ 287 intr_disable(); 288 while (1) { 289 __asm __volatile("wfi"); 290 } 291 } 292 293 /* 294 * Flush the D-cache for non-DMA I/O so that the I-cache can 295 * be made coherent later. 296 */ 297 void 298 cpu_flush_dcache(void *ptr, size_t len) 299 { 300 301 /* ARM64TODO TBD */ 302 } 303 304 /* Get current clock frequency for the given CPU ID. */ 305 int 306 cpu_est_clockrate(int cpu_id, uint64_t *rate) 307 { 308 struct pcpu *pc; 309 310 pc = pcpu_find(cpu_id); 311 if (pc == NULL || rate == NULL) 312 return (EINVAL); 313 314 if (pc->pc_clock == 0) 315 return (EOPNOTSUPP); 316 317 *rate = pc->pc_clock; 318 return (0); 319 } 320 321 void 322 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 323 { 324 325 pcpu->pc_acpi_id = 0xffffffff; 326 pcpu->pc_mpidr = UINT64_MAX; 327 } 328 329 void 330 spinlock_enter(void) 331 { 332 struct thread *td; 333 register_t daif; 334 335 td = curthread; 336 if (td->td_md.md_spinlock_count == 0) { 337 daif = intr_disable(); 338 td->td_md.md_spinlock_count = 1; 339 td->td_md.md_saved_daif = daif; 340 critical_enter(); 341 } else 342 td->td_md.md_spinlock_count++; 343 } 344 345 void 346 spinlock_exit(void) 347 { 348 struct thread *td; 349 register_t daif; 350 351 td = curthread; 352 daif = td->td_md.md_saved_daif; 353 td->td_md.md_spinlock_count--; 354 if (td->td_md.md_spinlock_count == 0) { 355 critical_exit(); 356 intr_restore(daif); 357 } 358 } 359 360 /* 361 * Construct a PCB from a trapframe. This is called from kdb_trap() where 362 * we want to start a backtrace from the function that caused us to enter 363 * the debugger. We have the context in the trapframe, but base the trace 364 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 365 * enough for a backtrace. 366 */ 367 void 368 makectx(struct trapframe *tf, struct pcb *pcb) 369 { 370 int i; 371 372 /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */ 373 for (i = 0; i < nitems(pcb->pcb_x); i++) { 374 if (i == PCB_LR) 375 pcb->pcb_x[i] = tf->tf_elr; 376 else 377 pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START]; 378 } 379 380 pcb->pcb_sp = tf->tf_sp; 381 } 382 383 static void 384 init_proc0(vm_offset_t kstack) 385 { 386 struct pcpu *pcpup; 387 388 pcpup = cpuid_to_pcpu[0]; 389 MPASS(pcpup != NULL); 390 391 proc_linkup0(&proc0, &thread0); 392 thread0.td_kstack = kstack; 393 thread0.td_kstack_pages = KSTACK_PAGES; 394 #if defined(PERTHREAD_SSP) 395 thread0.td_md.md_canary = boot_canary; 396 #endif 397 thread0.td_pcb = (struct pcb *)(thread0.td_kstack + 398 thread0.td_kstack_pages * PAGE_SIZE) - 1; 399 thread0.td_pcb->pcb_flags = 0; 400 thread0.td_pcb->pcb_fpflags = 0; 401 thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate; 402 thread0.td_pcb->pcb_vfpcpu = UINT_MAX; 403 thread0.td_frame = &proc0_tf; 404 ptrauth_thread0(&thread0); 405 pcpup->pc_curpcb = thread0.td_pcb; 406 407 /* 408 * Unmask SError exceptions. They are used to signal a RAS failure, 409 * or other hardware error. 410 */ 411 serror_enable(); 412 } 413 414 /* 415 * Get an address to be used to write to kernel data that may be mapped 416 * read-only, e.g. to patch kernel code. 417 */ 418 bool 419 arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out) 420 { 421 vm_paddr_t pa; 422 423 /* Check if the page is writable */ 424 if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) { 425 *out = addr; 426 return (true); 427 } 428 429 /* 430 * Find the physical address of the given page. 431 */ 432 if (!pmap_klookup(addr, &pa)) { 433 return (false); 434 } 435 436 /* 437 * If it is within the DMAP region and is writable use that. 438 */ 439 if (PHYS_IN_DMAP(pa)) { 440 addr = PHYS_TO_DMAP(pa); 441 if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) { 442 *out = addr; 443 return (true); 444 } 445 } 446 447 return (false); 448 } 449 450 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp); 451 452 static void 453 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp) 454 { 455 struct efi_md *map, *p; 456 size_t efisz; 457 int ndesc, i; 458 459 /* 460 * Memory map data provided by UEFI via the GetMemoryMap 461 * Boot Services API. 462 */ 463 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 464 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 465 466 if (efihdr->descriptor_size == 0) 467 return; 468 ndesc = efihdr->memory_size / efihdr->descriptor_size; 469 470 for (i = 0, p = map; i < ndesc; i++, 471 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 472 cb(p, argp); 473 } 474 } 475 476 /* 477 * Handle the EFI memory map list. 478 * 479 * We will make two passes at this, the first (exclude == false) to populate 480 * physmem with valid physical memory ranges from recognized map entry types. 481 * In the second pass we will exclude memory ranges from physmem which must not 482 * be used for general allocations, either because they are used by runtime 483 * firmware or otherwise reserved. 484 * 485 * Adding the runtime-reserved memory ranges to physmem and excluding them 486 * later ensures that they are included in the DMAP, but excluded from 487 * phys_avail[]. 488 * 489 * Entry types not explicitly listed here are ignored and not mapped. 490 */ 491 static void 492 handle_efi_map_entry(struct efi_md *p, void *argp) 493 { 494 bool exclude = *(bool *)argp; 495 496 switch (p->md_type) { 497 case EFI_MD_TYPE_RECLAIM: 498 /* 499 * The recomended location for ACPI tables. Map into the 500 * DMAP so we can access them from userspace via /dev/mem. 501 */ 502 case EFI_MD_TYPE_RT_CODE: 503 /* 504 * Some UEFI implementations put the system table in the 505 * runtime code section. Include it in the DMAP, but will 506 * be excluded from phys_avail. 507 */ 508 case EFI_MD_TYPE_RT_DATA: 509 /* 510 * Runtime data will be excluded after the DMAP 511 * region is created to stop it from being added 512 * to phys_avail. 513 */ 514 if (exclude) { 515 physmem_exclude_region(p->md_phys, 516 p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC); 517 break; 518 } 519 /* FALLTHROUGH */ 520 case EFI_MD_TYPE_CODE: 521 case EFI_MD_TYPE_DATA: 522 case EFI_MD_TYPE_BS_CODE: 523 case EFI_MD_TYPE_BS_DATA: 524 case EFI_MD_TYPE_FREE: 525 /* 526 * We're allowed to use any entry with these types. 527 */ 528 if (!exclude) 529 physmem_hardware_region(p->md_phys, 530 p->md_pages * EFI_PAGE_SIZE); 531 break; 532 default: 533 /* Other types shall not be handled by physmem. */ 534 break; 535 } 536 } 537 538 static void 539 add_efi_map_entries(struct efi_map_header *efihdr) 540 { 541 bool exclude = false; 542 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude); 543 } 544 545 static void 546 exclude_efi_map_entries(struct efi_map_header *efihdr) 547 { 548 bool exclude = true; 549 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude); 550 } 551 552 static void 553 print_efi_map_entry(struct efi_md *p, void *argp __unused) 554 { 555 const char *type; 556 static const char *types[] = { 557 "Reserved", 558 "LoaderCode", 559 "LoaderData", 560 "BootServicesCode", 561 "BootServicesData", 562 "RuntimeServicesCode", 563 "RuntimeServicesData", 564 "ConventionalMemory", 565 "UnusableMemory", 566 "ACPIReclaimMemory", 567 "ACPIMemoryNVS", 568 "MemoryMappedIO", 569 "MemoryMappedIOPortSpace", 570 "PalCode", 571 "PersistentMemory" 572 }; 573 574 if (p->md_type < nitems(types)) 575 type = types[p->md_type]; 576 else 577 type = "<INVALID>"; 578 printf("%23s %012lx %012lx %08lx ", type, p->md_phys, 579 p->md_virt, p->md_pages); 580 if (p->md_attr & EFI_MD_ATTR_UC) 581 printf("UC "); 582 if (p->md_attr & EFI_MD_ATTR_WC) 583 printf("WC "); 584 if (p->md_attr & EFI_MD_ATTR_WT) 585 printf("WT "); 586 if (p->md_attr & EFI_MD_ATTR_WB) 587 printf("WB "); 588 if (p->md_attr & EFI_MD_ATTR_UCE) 589 printf("UCE "); 590 if (p->md_attr & EFI_MD_ATTR_WP) 591 printf("WP "); 592 if (p->md_attr & EFI_MD_ATTR_RP) 593 printf("RP "); 594 if (p->md_attr & EFI_MD_ATTR_XP) 595 printf("XP "); 596 if (p->md_attr & EFI_MD_ATTR_NV) 597 printf("NV "); 598 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) 599 printf("MORE_RELIABLE "); 600 if (p->md_attr & EFI_MD_ATTR_RO) 601 printf("RO "); 602 if (p->md_attr & EFI_MD_ATTR_RT) 603 printf("RUNTIME"); 604 printf("\n"); 605 } 606 607 static void 608 print_efi_map_entries(struct efi_map_header *efihdr) 609 { 610 611 printf("%23s %12s %12s %8s %4s\n", 612 "Type", "Physical", "Virtual", "#Pages", "Attr"); 613 foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL); 614 } 615 616 /* 617 * Map the passed in VA in EFI space to a void * using the efi memory table to 618 * find the PA and return it in the DMAP, if it exists. We're used between the 619 * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG 620 * tables We assume that either the entry you are mapping fits within its page, 621 * or if it spills to the next page, that's contiguous in PA and in the DMAP. 622 * All observed tables obey the first part of this precondition. 623 */ 624 struct early_map_data 625 { 626 vm_offset_t va; 627 vm_offset_t pa; 628 }; 629 630 static void 631 efi_early_map_entry(struct efi_md *p, void *argp) 632 { 633 struct early_map_data *emdp = argp; 634 vm_offset_t s, e; 635 636 if (emdp->pa != 0) 637 return; 638 if ((p->md_attr & EFI_MD_ATTR_RT) == 0) 639 return; 640 s = p->md_virt; 641 e = p->md_virt + p->md_pages * EFI_PAGE_SIZE; 642 if (emdp->va < s || emdp->va >= e) 643 return; 644 emdp->pa = p->md_phys + (emdp->va - p->md_virt); 645 } 646 647 static void * 648 efi_early_map(vm_offset_t va) 649 { 650 struct early_map_data emd = { .va = va }; 651 652 foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd); 653 if (emd.pa == 0) 654 return NULL; 655 return (void *)PHYS_TO_DMAP(emd.pa); 656 } 657 658 659 /* 660 * When booted via kboot, the prior kernel will pass in reserved memory areas in 661 * a EFI config table. We need to find that table and walk through it excluding 662 * the memory ranges in it. btw, this is called too early for the printf to do 663 * anything since msgbufp isn't initialized, let alone a console... 664 */ 665 static void 666 exclude_efi_memreserve(vm_offset_t efi_systbl_phys) 667 { 668 struct efi_systbl *systbl; 669 struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE; 670 671 systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys); 672 if (systbl == NULL) { 673 printf("can't map systbl\n"); 674 return; 675 } 676 if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) { 677 printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig); 678 return; 679 } 680 681 /* 682 * We don't yet have the pmap system booted enough to create a pmap for 683 * the efi firmware's preferred address space from the GetMemoryMap() 684 * table. The st_cfgtbl is a VA in this space, so we need to do the 685 * mapping ourselves to a kernel VA with efi_early_map. We assume that 686 * the cfgtbl entries don't span a page. Other pointers are PAs, as 687 * noted below. 688 */ 689 if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */ 690 return; 691 for (int i = 0; i < systbl->st_entries; i++) { 692 struct efi_cfgtbl *cfgtbl; 693 struct linux_efi_memreserve *mr; 694 695 cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl)); 696 if (cfgtbl == NULL) 697 panic("Can't map the config table entry %d\n", i); 698 if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0) 699 continue; 700 701 /* 702 * cfgtbl points are either VA or PA, depending on the GUID of 703 * the table. memreserve GUID pointers are PA and not converted 704 * after a SetVirtualAddressMap(). The list's mr_next pointer 705 * is also a PA. 706 */ 707 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP( 708 (vm_offset_t)cfgtbl->ct_data); 709 while (true) { 710 for (int j = 0; j < mr->mr_count; j++) { 711 struct linux_efi_memreserve_entry *mre; 712 713 mre = &mr->mr_entry[j]; 714 physmem_exclude_region(mre->mre_base, mre->mre_size, 715 EXFLAG_NODUMP | EXFLAG_NOALLOC); 716 } 717 if (mr->mr_next == 0) 718 break; 719 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next); 720 }; 721 } 722 723 } 724 725 #ifdef FDT 726 static void 727 try_load_dtb(caddr_t kmdp) 728 { 729 vm_offset_t dtbp; 730 731 dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); 732 #if defined(FDT_DTB_STATIC) 733 /* 734 * In case the device tree blob was not retrieved (from metadata) try 735 * to use the statically embedded one. 736 */ 737 if (dtbp == 0) 738 dtbp = (vm_offset_t)&fdt_static_dtb; 739 #endif 740 741 if (dtbp == (vm_offset_t)NULL) { 742 #ifndef TSLOG 743 printf("ERROR loading DTB\n"); 744 #endif 745 return; 746 } 747 748 if (OF_install(OFW_FDT, 0) == FALSE) 749 panic("Cannot install FDT"); 750 751 if (OF_init((void *)dtbp) != 0) 752 panic("OF_init failed with the found device tree"); 753 754 parse_fdt_bootargs(); 755 } 756 #endif 757 758 static bool 759 bus_probe(void) 760 { 761 bool has_acpi, has_fdt; 762 char *order, *env; 763 764 has_acpi = has_fdt = false; 765 766 #ifdef FDT 767 has_fdt = (OF_peer(0) != 0); 768 #endif 769 #ifdef DEV_ACPI 770 has_acpi = (AcpiOsGetRootPointer() != 0); 771 #endif 772 773 env = kern_getenv("kern.cfg.order"); 774 if (env != NULL) { 775 order = env; 776 while (order != NULL) { 777 if (has_acpi && 778 strncmp(order, "acpi", 4) == 0 && 779 (order[4] == ',' || order[4] == '\0')) { 780 arm64_bus_method = ARM64_BUS_ACPI; 781 break; 782 } 783 if (has_fdt && 784 strncmp(order, "fdt", 3) == 0 && 785 (order[3] == ',' || order[3] == '\0')) { 786 arm64_bus_method = ARM64_BUS_FDT; 787 break; 788 } 789 order = strchr(order, ','); 790 if (order != NULL) 791 order++; /* Skip comma */ 792 } 793 freeenv(env); 794 795 /* If we set the bus method it is valid */ 796 if (arm64_bus_method != ARM64_BUS_NONE) 797 return (true); 798 } 799 /* If no order or an invalid order was set use the default */ 800 if (arm64_bus_method == ARM64_BUS_NONE) { 801 if (has_fdt) 802 arm64_bus_method = ARM64_BUS_FDT; 803 else if (has_acpi) 804 arm64_bus_method = ARM64_BUS_ACPI; 805 } 806 807 /* 808 * If no option was set the default is valid, otherwise we are 809 * setting one to get cninit() working, then calling panic to tell 810 * the user about the invalid bus setup. 811 */ 812 return (env == NULL); 813 } 814 815 static void 816 cache_setup(void) 817 { 818 int dczva_line_shift; 819 uint32_t dczid_el0; 820 821 identify_cache(READ_SPECIALREG(ctr_el0)); 822 823 dczid_el0 = READ_SPECIALREG(dczid_el0); 824 825 /* Check if dc zva is not prohibited */ 826 if (dczid_el0 & DCZID_DZP) 827 dczva_line_size = 0; 828 else { 829 /* Same as with above calculations */ 830 dczva_line_shift = DCZID_BS_SIZE(dczid_el0); 831 dczva_line_size = sizeof(int) << dczva_line_shift; 832 833 /* Change pagezero function */ 834 pagezero = pagezero_cache; 835 } 836 } 837 838 int 839 memory_mapping_mode(vm_paddr_t pa) 840 { 841 struct efi_md *map, *p; 842 size_t efisz; 843 int ndesc, i; 844 845 if (efihdr == NULL) 846 return (VM_MEMATTR_WRITE_BACK); 847 848 /* 849 * Memory map data provided by UEFI via the GetMemoryMap 850 * Boot Services API. 851 */ 852 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 853 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 854 855 if (efihdr->descriptor_size == 0) 856 return (VM_MEMATTR_WRITE_BACK); 857 ndesc = efihdr->memory_size / efihdr->descriptor_size; 858 859 for (i = 0, p = map; i < ndesc; i++, 860 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 861 if (pa < p->md_phys || 862 pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE) 863 continue; 864 if (p->md_type == EFI_MD_TYPE_IOMEM || 865 p->md_type == EFI_MD_TYPE_IOPORT) 866 return (VM_MEMATTR_DEVICE); 867 else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 || 868 p->md_type == EFI_MD_TYPE_RECLAIM) 869 return (VM_MEMATTR_WRITE_BACK); 870 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) 871 return (VM_MEMATTR_WRITE_THROUGH); 872 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) 873 return (VM_MEMATTR_WRITE_COMBINING); 874 break; 875 } 876 877 return (VM_MEMATTR_DEVICE); 878 } 879 880 void 881 initarm(struct arm64_bootparams *abp) 882 { 883 struct efi_fb *efifb; 884 struct pcpu *pcpup; 885 char *env; 886 #ifdef FDT 887 struct mem_region mem_regions[FDT_MEM_REGIONS]; 888 int mem_regions_sz; 889 phandle_t root; 890 char dts_version[255]; 891 #endif 892 vm_offset_t lastaddr; 893 caddr_t kmdp; 894 bool valid; 895 896 TSRAW(&thread0, TS_ENTER, __func__, NULL); 897 898 boot_el = abp->boot_el; 899 hcr_el2 = abp->hcr_el2; 900 901 /* Parse loader or FDT boot parametes. Determine last used address. */ 902 lastaddr = parse_boot_param(abp); 903 904 /* Find the kernel address */ 905 kmdp = preload_search_by_type("elf kernel"); 906 if (kmdp == NULL) 907 kmdp = preload_search_by_type("elf64 kernel"); 908 909 identify_cpu(0); 910 identify_hypervisor_smbios(); 911 912 update_special_regs(0); 913 914 link_elf_ireloc(kmdp); 915 #ifdef FDT 916 try_load_dtb(kmdp); 917 #endif 918 919 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); 920 921 /* Load the physical memory ranges */ 922 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 923 MODINFO_METADATA | MODINFOMD_EFI_MAP); 924 if (efihdr != NULL) 925 add_efi_map_entries(efihdr); 926 #ifdef FDT 927 else { 928 /* Grab physical memory regions information from device tree. */ 929 if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, 930 NULL) != 0) 931 panic("Cannot get physical memory regions"); 932 physmem_hardware_regions(mem_regions, mem_regions_sz); 933 } 934 if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0) 935 physmem_exclude_regions(mem_regions, mem_regions_sz, 936 EXFLAG_NODUMP | EXFLAG_NOALLOC); 937 #endif 938 939 /* Exclude the EFI framebuffer from our view of physical memory. */ 940 efifb = (struct efi_fb *)preload_search_info(kmdp, 941 MODINFO_METADATA | MODINFOMD_EFI_FB); 942 if (efifb != NULL) 943 physmem_exclude_region(efifb->fb_addr, efifb->fb_size, 944 EXFLAG_NOALLOC); 945 946 /* Set the pcpu data, this is needed by pmap_bootstrap */ 947 pcpup = &pcpu0; 948 pcpu_init(pcpup, 0, sizeof(struct pcpu)); 949 950 /* 951 * Set the pcpu pointer with a backup in tpidr_el1 to be 952 * loaded when entering the kernel from userland. 953 */ 954 __asm __volatile( 955 "mov x18, %0 \n" 956 "msr tpidr_el1, %0" :: "r"(pcpup)); 957 958 /* locore.S sets sp_el0 to &thread0 so no need to set it here. */ 959 PCPU_SET(curthread, &thread0); 960 PCPU_SET(midr, get_midr()); 961 962 /* Do basic tuning, hz etc */ 963 init_param1(); 964 965 cache_setup(); 966 pan_setup(); 967 968 /* Bootstrap enough of pmap to enter the kernel proper */ 969 pmap_bootstrap(lastaddr - KERNBASE); 970 /* Exclude entries needed in the DMAP region, but not phys_avail */ 971 if (efihdr != NULL) 972 exclude_efi_map_entries(efihdr); 973 /* Do the same for reserve entries in the EFI MEMRESERVE table */ 974 if (efi_systbl_phys != 0) 975 exclude_efi_memreserve(efi_systbl_phys); 976 977 /* 978 * We carefully bootstrap the sanitizer map after we've excluded 979 * absolutely everything else that could impact phys_avail. There's not 980 * always enough room for the initial shadow map after the kernel, so 981 * we'll end up searching for segments that we can safely use. Those 982 * segments also get excluded from phys_avail. 983 */ 984 #if defined(KASAN) || defined(KMSAN) 985 pmap_bootstrap_san(); 986 #endif 987 988 physmem_init_kernel_globals(); 989 990 devmap_bootstrap(0, NULL); 991 992 valid = bus_probe(); 993 994 cninit(); 995 set_ttbr0(abp->kern_ttbr0); 996 cpu_tlb_flushID(); 997 998 if (!valid) 999 panic("Invalid bus configuration: %s", 1000 kern_getenv("kern.cfg.order")); 1001 1002 /* 1003 * Check if pointer authentication is available on this system, and 1004 * if so enable its use. This needs to be called before init_proc0 1005 * as that will configure the thread0 pointer authentication keys. 1006 */ 1007 ptrauth_init(); 1008 1009 /* 1010 * Dump the boot metadata. We have to wait for cninit() since console 1011 * output is required. If it's grossly incorrect the kernel will never 1012 * make it this far. 1013 */ 1014 if (getenv_is_true("debug.dump_modinfo_at_boot")) 1015 preload_dump(); 1016 1017 init_proc0(abp->kern_stack); 1018 msgbufinit(msgbufp, msgbufsize); 1019 mutex_init(); 1020 init_param2(physmem); 1021 1022 dbg_init(); 1023 kdb_init(); 1024 #ifdef KDB 1025 if ((boothowto & RB_KDB) != 0) 1026 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 1027 #endif 1028 pan_enable(); 1029 1030 kcsan_cpu_init(0); 1031 kasan_init(); 1032 kmsan_init(); 1033 1034 env = kern_getenv("kernelname"); 1035 if (env != NULL) 1036 strlcpy(kernelname, env, sizeof(kernelname)); 1037 1038 #ifdef FDT 1039 if (arm64_bus_method == ARM64_BUS_FDT) { 1040 root = OF_finddevice("/"); 1041 if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) { 1042 if (strcmp(LINUX_DTS_VERSION, dts_version) != 0) 1043 printf("WARNING: DTB version is %s while kernel expects %s, " 1044 "please update the DTB in the ESP\n", 1045 dts_version, 1046 LINUX_DTS_VERSION); 1047 } else { 1048 printf("WARNING: Cannot find freebsd,dts-version property, " 1049 "cannot check DTB compliance\n"); 1050 } 1051 } 1052 #endif 1053 1054 if (boothowto & RB_VERBOSE) { 1055 if (efihdr != NULL) 1056 print_efi_map_entries(efihdr); 1057 physmem_print_tables(); 1058 } 1059 1060 early_boot = 0; 1061 1062 if (bootverbose && kstack_pages != KSTACK_PAGES) 1063 printf("kern.kstack_pages = %d ignored for thread0\n", 1064 kstack_pages); 1065 1066 TSEXIT(); 1067 } 1068 1069 void 1070 dbg_init(void) 1071 { 1072 1073 /* Clear OS lock */ 1074 WRITE_SPECIALREG(oslar_el1, 0); 1075 1076 /* This permits DDB to use debug registers for watchpoints. */ 1077 dbg_monitor_init(); 1078 1079 /* TODO: Eventually will need to initialize debug registers here. */ 1080 } 1081 1082 #ifdef DDB 1083 #include <ddb/ddb.h> 1084 1085 DB_SHOW_COMMAND(specialregs, db_show_spregs) 1086 { 1087 #define PRINT_REG(reg) \ 1088 db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg)) 1089 1090 PRINT_REG(actlr_el1); 1091 PRINT_REG(afsr0_el1); 1092 PRINT_REG(afsr1_el1); 1093 PRINT_REG(aidr_el1); 1094 PRINT_REG(amair_el1); 1095 PRINT_REG(ccsidr_el1); 1096 PRINT_REG(clidr_el1); 1097 PRINT_REG(contextidr_el1); 1098 PRINT_REG(cpacr_el1); 1099 PRINT_REG(csselr_el1); 1100 PRINT_REG(ctr_el0); 1101 PRINT_REG(currentel); 1102 PRINT_REG(daif); 1103 PRINT_REG(dczid_el0); 1104 PRINT_REG(elr_el1); 1105 PRINT_REG(esr_el1); 1106 PRINT_REG(far_el1); 1107 #if 0 1108 /* ARM64TODO: Enable VFP before reading floating-point registers */ 1109 PRINT_REG(fpcr); 1110 PRINT_REG(fpsr); 1111 #endif 1112 PRINT_REG(id_aa64afr0_el1); 1113 PRINT_REG(id_aa64afr1_el1); 1114 PRINT_REG(id_aa64dfr0_el1); 1115 PRINT_REG(id_aa64dfr1_el1); 1116 PRINT_REG(id_aa64isar0_el1); 1117 PRINT_REG(id_aa64isar1_el1); 1118 PRINT_REG(id_aa64pfr0_el1); 1119 PRINT_REG(id_aa64pfr1_el1); 1120 PRINT_REG(id_afr0_el1); 1121 PRINT_REG(id_dfr0_el1); 1122 PRINT_REG(id_isar0_el1); 1123 PRINT_REG(id_isar1_el1); 1124 PRINT_REG(id_isar2_el1); 1125 PRINT_REG(id_isar3_el1); 1126 PRINT_REG(id_isar4_el1); 1127 PRINT_REG(id_isar5_el1); 1128 PRINT_REG(id_mmfr0_el1); 1129 PRINT_REG(id_mmfr1_el1); 1130 PRINT_REG(id_mmfr2_el1); 1131 PRINT_REG(id_mmfr3_el1); 1132 #if 0 1133 /* Missing from llvm */ 1134 PRINT_REG(id_mmfr4_el1); 1135 #endif 1136 PRINT_REG(id_pfr0_el1); 1137 PRINT_REG(id_pfr1_el1); 1138 PRINT_REG(isr_el1); 1139 PRINT_REG(mair_el1); 1140 PRINT_REG(midr_el1); 1141 PRINT_REG(mpidr_el1); 1142 PRINT_REG(mvfr0_el1); 1143 PRINT_REG(mvfr1_el1); 1144 PRINT_REG(mvfr2_el1); 1145 PRINT_REG(revidr_el1); 1146 PRINT_REG(sctlr_el1); 1147 PRINT_REG(sp_el0); 1148 PRINT_REG(spsel); 1149 PRINT_REG(spsr_el1); 1150 PRINT_REG(tcr_el1); 1151 PRINT_REG(tpidr_el0); 1152 PRINT_REG(tpidr_el1); 1153 PRINT_REG(tpidrro_el0); 1154 PRINT_REG(ttbr0_el1); 1155 PRINT_REG(ttbr1_el1); 1156 PRINT_REG(vbar_el1); 1157 #undef PRINT_REG 1158 } 1159 1160 DB_SHOW_COMMAND(vtop, db_show_vtop) 1161 { 1162 uint64_t phys; 1163 1164 if (have_addr) { 1165 phys = arm64_address_translate_s1e1r(addr); 1166 db_printf("EL1 physical address reg (read): 0x%016lx\n", phys); 1167 phys = arm64_address_translate_s1e1w(addr); 1168 db_printf("EL1 physical address reg (write): 0x%016lx\n", phys); 1169 phys = arm64_address_translate_s1e0r(addr); 1170 db_printf("EL0 physical address reg (read): 0x%016lx\n", phys); 1171 phys = arm64_address_translate_s1e0w(addr); 1172 db_printf("EL0 physical address reg (write): 0x%016lx\n", phys); 1173 } else 1174 db_printf("show vtop <virt_addr>\n"); 1175 } 1176 #endif 1177