1 /*-
2 * Copyright (c) 2014 Andrew Turner
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/devmap.h>
42 #include <sys/efi.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msan.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/debug_monitor.h>
81 #include <machine/hypervisor.h>
82 #include <machine/kdb.h>
83 #include <machine/machdep.h>
84 #include <machine/metadata.h>
85 #include <machine/md_var.h>
86 #include <machine/pcb.h>
87 #include <machine/undefined.h>
88 #include <machine/vmparam.h>
89
90 #ifdef VFP
91 #include <machine/vfp.h>
92 #endif
93
94 #ifdef DEV_ACPI
95 #include <contrib/dev/acpica/include/acpi.h>
96 #include <machine/acpica_machdep.h>
97 #endif
98
99 #ifdef FDT
100 #include <dev/fdt/fdt_common.h>
101 #include <dev/ofw/openfirm.h>
102 #endif
103
104 #include <dev/smbios/smbios.h>
105
106 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
107 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
108 "pcb_fpusaved changed offset");
109 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
110 "pcb_fpustate changed offset");
111
112 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
113
114 /*
115 * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
116 * could relocate this, but will need to keep the same virtual address as
117 * it's reverenced by the EARLY_COUNTER macro.
118 */
119 struct pcpu pcpu0;
120
121 #if defined(PERTHREAD_SSP)
122 /*
123 * The boot SSP canary. Will be replaced with a per-thread canary when
124 * scheduling has started.
125 */
126 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
127 #endif
128
129 static struct trapframe proc0_tf;
130
131 int early_boot = 1;
132 int cold = 1;
133 static int boot_el;
134 static uint64_t hcr_el2;
135
136 struct kva_md_info kmi;
137
138 int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */
139 int has_pan;
140
141 #if defined(SOCDEV_PA)
142 /*
143 * This is the virtual address used to access SOCDEV_PA. As it's set before
144 * .bss is cleared we need to ensure it's preserved. To do this use
145 * __read_mostly as it's only ever set once but read in the putc functions.
146 */
147 uintptr_t socdev_va __read_mostly;
148 #endif
149
150 /*
151 * Physical address of the EFI System Table. Stashed from the metadata hints
152 * passed into the kernel and used by the EFI code to call runtime services.
153 */
154 vm_paddr_t efi_systbl_phys;
155 static struct efi_map_header *efihdr;
156
157 /* pagezero_* implementations are provided in support.S */
158 void pagezero_simple(void *);
159 void pagezero_cache(void *);
160
161 /* pagezero_simple is default pagezero */
162 void (*pagezero)(void *p) = pagezero_simple;
163
164 int (*apei_nmi)(void);
165
166 #if defined(PERTHREAD_SSP_WARNING)
167 static void
print_ssp_warning(void * data __unused)168 print_ssp_warning(void *data __unused)
169 {
170 printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
171 }
172 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
173 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
174 #endif
175
176 static void
pan_setup(void)177 pan_setup(void)
178 {
179 uint64_t id_aa64mfr1;
180
181 id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
182 if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
183 has_pan = 1;
184 }
185
186 void
pan_enable(void)187 pan_enable(void)
188 {
189
190 /*
191 * The LLVM integrated assembler doesn't understand the PAN
192 * PSTATE field. Because of this we need to manually create
193 * the instruction in an asm block. This is equivalent to:
194 * msr pan, #1
195 *
196 * This sets the PAN bit, stopping the kernel from accessing
197 * memory when userspace can also access it unless the kernel
198 * uses the userspace load/store instructions.
199 */
200 if (has_pan) {
201 WRITE_SPECIALREG(sctlr_el1,
202 READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
203 __asm __volatile(".inst 0xd500409f | (0x1 << 8)");
204 }
205 }
206
207 bool
has_hyp(void)208 has_hyp(void)
209 {
210
211 /*
212 * XXX The E2H check is wrong, but it's close enough for now. Needs to
213 * be re-evaluated once we're running regularly in EL2.
214 */
215 return (boot_el == CURRENTEL_EL_EL2 && (hcr_el2 & HCR_E2H) == 0);
216 }
217
218 bool
in_vhe(void)219 in_vhe(void)
220 {
221 /* If we are currently in EL2 then must be in VHE */
222 return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
223 CURRENTEL_EL_EL2);
224 }
225
226 static void
cpu_startup(void * dummy)227 cpu_startup(void *dummy)
228 {
229 vm_paddr_t size;
230 int i;
231
232 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
233 ptoa((uintmax_t)realmem) / 1024 / 1024);
234
235 if (bootverbose) {
236 printf("Physical memory chunk(s):\n");
237 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
238 size = phys_avail[i + 1] - phys_avail[i];
239 printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
240 (uintmax_t)phys_avail[i],
241 (uintmax_t)phys_avail[i + 1] - 1,
242 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
243 }
244 }
245
246 printf("avail memory = %ju (%ju MB)\n",
247 ptoa((uintmax_t)vm_free_count()),
248 ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
249
250 undef_init();
251 install_cpu_errata();
252
253 vm_ksubmap_init(&kmi);
254 bufinit();
255 vm_pager_bufferinit();
256 }
257
258 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
259
260 static void
late_ifunc_resolve(void * dummy __unused)261 late_ifunc_resolve(void *dummy __unused)
262 {
263 link_elf_late_ireloc();
264 }
265 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
266
267 int
cpu_idle_wakeup(int cpu)268 cpu_idle_wakeup(int cpu)
269 {
270
271 return (0);
272 }
273
274 void
cpu_idle(int busy)275 cpu_idle(int busy)
276 {
277
278 spinlock_enter();
279 if (!busy)
280 cpu_idleclock();
281 if (!sched_runnable())
282 __asm __volatile(
283 "dsb sy \n"
284 "wfi \n");
285 if (!busy)
286 cpu_activeclock();
287 spinlock_exit();
288 }
289
290 void
cpu_halt(void)291 cpu_halt(void)
292 {
293
294 /* We should have shutdown by now, if not enter a low power sleep */
295 intr_disable();
296 while (1) {
297 __asm __volatile("wfi");
298 }
299 }
300
301 /*
302 * Flush the D-cache for non-DMA I/O so that the I-cache can
303 * be made coherent later.
304 */
305 void
cpu_flush_dcache(void * ptr,size_t len)306 cpu_flush_dcache(void *ptr, size_t len)
307 {
308
309 /* ARM64TODO TBD */
310 }
311
312 /* Get current clock frequency for the given CPU ID. */
313 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)314 cpu_est_clockrate(int cpu_id, uint64_t *rate)
315 {
316 struct pcpu *pc;
317
318 pc = pcpu_find(cpu_id);
319 if (pc == NULL || rate == NULL)
320 return (EINVAL);
321
322 if (pc->pc_clock == 0)
323 return (EOPNOTSUPP);
324
325 *rate = pc->pc_clock;
326 return (0);
327 }
328
329 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)330 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
331 {
332
333 pcpu->pc_acpi_id = 0xffffffff;
334 pcpu->pc_mpidr = UINT64_MAX;
335 }
336
337 void
spinlock_enter(void)338 spinlock_enter(void)
339 {
340 struct thread *td;
341 register_t daif;
342
343 td = curthread;
344 if (td->td_md.md_spinlock_count == 0) {
345 daif = intr_disable();
346 td->td_md.md_spinlock_count = 1;
347 td->td_md.md_saved_daif = daif;
348 critical_enter();
349 } else
350 td->td_md.md_spinlock_count++;
351 }
352
353 void
spinlock_exit(void)354 spinlock_exit(void)
355 {
356 struct thread *td;
357 register_t daif;
358
359 td = curthread;
360 daif = td->td_md.md_saved_daif;
361 td->td_md.md_spinlock_count--;
362 if (td->td_md.md_spinlock_count == 0) {
363 critical_exit();
364 intr_restore(daif);
365 }
366 }
367
368 /*
369 * Construct a PCB from a trapframe. This is called from kdb_trap() where
370 * we want to start a backtrace from the function that caused us to enter
371 * the debugger. We have the context in the trapframe, but base the trace
372 * on the PCB. The PCB doesn't have to be perfect, as long as it contains
373 * enough for a backtrace.
374 */
375 void
makectx(struct trapframe * tf,struct pcb * pcb)376 makectx(struct trapframe *tf, struct pcb *pcb)
377 {
378 int i;
379
380 /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
381 for (i = 0; i < nitems(pcb->pcb_x); i++) {
382 if (i == PCB_LR)
383 pcb->pcb_x[i] = tf->tf_elr;
384 else
385 pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
386 }
387
388 pcb->pcb_sp = tf->tf_sp;
389 }
390
391 static void
init_proc0(vm_offset_t kstack)392 init_proc0(vm_offset_t kstack)
393 {
394 struct pcpu *pcpup;
395
396 pcpup = cpuid_to_pcpu[0];
397 MPASS(pcpup != NULL);
398
399 proc_linkup0(&proc0, &thread0);
400 thread0.td_kstack = kstack;
401 thread0.td_kstack_pages = KSTACK_PAGES;
402 #if defined(PERTHREAD_SSP)
403 thread0.td_md.md_canary = boot_canary;
404 #endif
405 thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
406 thread0.td_kstack_pages * PAGE_SIZE) - 1;
407 thread0.td_pcb->pcb_flags = 0;
408 thread0.td_pcb->pcb_fpflags = 0;
409 thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
410 thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
411 thread0.td_frame = &proc0_tf;
412 ptrauth_thread0(&thread0);
413 pcpup->pc_curpcb = thread0.td_pcb;
414
415 /*
416 * Unmask SError exceptions. They are used to signal a RAS failure,
417 * or other hardware error.
418 */
419 serror_enable();
420 }
421
422 /*
423 * Get an address to be used to write to kernel data that may be mapped
424 * read-only, e.g. to patch kernel code.
425 */
426 bool
arm64_get_writable_addr(void * addr,void ** out)427 arm64_get_writable_addr(void *addr, void **out)
428 {
429 vm_paddr_t pa;
430
431 /* Check if the page is writable */
432 if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
433 *out = addr;
434 return (true);
435 }
436
437 /*
438 * Find the physical address of the given page.
439 */
440 if (!pmap_klookup((vm_offset_t)addr, &pa)) {
441 return (false);
442 }
443
444 /*
445 * If it is within the DMAP region and is writable use that.
446 */
447 if (PHYS_IN_DMAP_RANGE(pa)) {
448 addr = (void *)PHYS_TO_DMAP(pa);
449 if (PAR_SUCCESS(arm64_address_translate_s1e1w(
450 (vm_offset_t)addr))) {
451 *out = addr;
452 return (true);
453 }
454 }
455
456 return (false);
457 }
458
459 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
460
461 static void
foreach_efi_map_entry(struct efi_map_header * efihdr,efi_map_entry_cb cb,void * argp)462 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
463 {
464 struct efi_md *map, *p;
465 size_t efisz;
466 int ndesc, i;
467
468 /*
469 * Memory map data provided by UEFI via the GetMemoryMap
470 * Boot Services API.
471 */
472 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
473 map = (struct efi_md *)((uint8_t *)efihdr + efisz);
474
475 if (efihdr->descriptor_size == 0)
476 return;
477 ndesc = efihdr->memory_size / efihdr->descriptor_size;
478
479 for (i = 0, p = map; i < ndesc; i++,
480 p = efi_next_descriptor(p, efihdr->descriptor_size)) {
481 cb(p, argp);
482 }
483 }
484
485 /*
486 * Handle the EFI memory map list.
487 *
488 * We will make two passes at this, the first (exclude == false) to populate
489 * physmem with valid physical memory ranges from recognized map entry types.
490 * In the second pass we will exclude memory ranges from physmem which must not
491 * be used for general allocations, either because they are used by runtime
492 * firmware or otherwise reserved.
493 *
494 * Adding the runtime-reserved memory ranges to physmem and excluding them
495 * later ensures that they are included in the DMAP, but excluded from
496 * phys_avail[].
497 *
498 * Entry types not explicitly listed here are ignored and not mapped.
499 */
500 static void
handle_efi_map_entry(struct efi_md * p,void * argp)501 handle_efi_map_entry(struct efi_md *p, void *argp)
502 {
503 bool exclude = *(bool *)argp;
504
505 switch (p->md_type) {
506 case EFI_MD_TYPE_RECLAIM:
507 /*
508 * The recomended location for ACPI tables. Map into the
509 * DMAP so we can access them from userspace via /dev/mem.
510 */
511 case EFI_MD_TYPE_RT_CODE:
512 /*
513 * Some UEFI implementations put the system table in the
514 * runtime code section. Include it in the DMAP, but will
515 * be excluded from phys_avail.
516 */
517 case EFI_MD_TYPE_RT_DATA:
518 /*
519 * Runtime data will be excluded after the DMAP
520 * region is created to stop it from being added
521 * to phys_avail.
522 */
523 if (exclude) {
524 physmem_exclude_region(p->md_phys,
525 p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
526 break;
527 }
528 /* FALLTHROUGH */
529 case EFI_MD_TYPE_CODE:
530 case EFI_MD_TYPE_DATA:
531 case EFI_MD_TYPE_BS_CODE:
532 case EFI_MD_TYPE_BS_DATA:
533 case EFI_MD_TYPE_FREE:
534 /*
535 * We're allowed to use any entry with these types.
536 */
537 if (!exclude)
538 physmem_hardware_region(p->md_phys,
539 p->md_pages * EFI_PAGE_SIZE);
540 break;
541 default:
542 /* Other types shall not be handled by physmem. */
543 break;
544 }
545 }
546
547 static void
add_efi_map_entries(struct efi_map_header * efihdr)548 add_efi_map_entries(struct efi_map_header *efihdr)
549 {
550 bool exclude = false;
551 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
552 }
553
554 static void
exclude_efi_map_entries(struct efi_map_header * efihdr)555 exclude_efi_map_entries(struct efi_map_header *efihdr)
556 {
557 bool exclude = true;
558 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
559 }
560
561 static void
print_efi_map_entry(struct efi_md * p,void * argp __unused)562 print_efi_map_entry(struct efi_md *p, void *argp __unused)
563 {
564 const char *type;
565 static const char *types[] = {
566 "Reserved",
567 "LoaderCode",
568 "LoaderData",
569 "BootServicesCode",
570 "BootServicesData",
571 "RuntimeServicesCode",
572 "RuntimeServicesData",
573 "ConventionalMemory",
574 "UnusableMemory",
575 "ACPIReclaimMemory",
576 "ACPIMemoryNVS",
577 "MemoryMappedIO",
578 "MemoryMappedIOPortSpace",
579 "PalCode",
580 "PersistentMemory"
581 };
582
583 if (p->md_type < nitems(types))
584 type = types[p->md_type];
585 else
586 type = "<INVALID>";
587 printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
588 p->md_virt, p->md_pages);
589 if (p->md_attr & EFI_MD_ATTR_UC)
590 printf("UC ");
591 if (p->md_attr & EFI_MD_ATTR_WC)
592 printf("WC ");
593 if (p->md_attr & EFI_MD_ATTR_WT)
594 printf("WT ");
595 if (p->md_attr & EFI_MD_ATTR_WB)
596 printf("WB ");
597 if (p->md_attr & EFI_MD_ATTR_UCE)
598 printf("UCE ");
599 if (p->md_attr & EFI_MD_ATTR_WP)
600 printf("WP ");
601 if (p->md_attr & EFI_MD_ATTR_RP)
602 printf("RP ");
603 if (p->md_attr & EFI_MD_ATTR_XP)
604 printf("XP ");
605 if (p->md_attr & EFI_MD_ATTR_NV)
606 printf("NV ");
607 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
608 printf("MORE_RELIABLE ");
609 if (p->md_attr & EFI_MD_ATTR_RO)
610 printf("RO ");
611 if (p->md_attr & EFI_MD_ATTR_RT)
612 printf("RUNTIME");
613 printf("\n");
614 }
615
616 static void
print_efi_map_entries(struct efi_map_header * efihdr)617 print_efi_map_entries(struct efi_map_header *efihdr)
618 {
619
620 printf("%23s %12s %12s %8s %4s\n",
621 "Type", "Physical", "Virtual", "#Pages", "Attr");
622 foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
623 }
624
625 /*
626 * Map the passed in VA in EFI space to a void * using the efi memory table to
627 * find the PA and return it in the DMAP, if it exists. We're used between the
628 * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
629 * tables We assume that either the entry you are mapping fits within its page,
630 * or if it spills to the next page, that's contiguous in PA and in the DMAP.
631 * All observed tables obey the first part of this precondition.
632 */
633 struct early_map_data
634 {
635 vm_offset_t va;
636 vm_offset_t pa;
637 };
638
639 static void
efi_early_map_entry(struct efi_md * p,void * argp)640 efi_early_map_entry(struct efi_md *p, void *argp)
641 {
642 struct early_map_data *emdp = argp;
643 vm_offset_t s, e;
644
645 if (emdp->pa != 0)
646 return;
647 if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
648 return;
649 s = p->md_virt;
650 e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
651 if (emdp->va < s || emdp->va >= e)
652 return;
653 emdp->pa = p->md_phys + (emdp->va - p->md_virt);
654 }
655
656 static void *
efi_early_map(vm_offset_t va)657 efi_early_map(vm_offset_t va)
658 {
659 struct early_map_data emd = { .va = va };
660
661 foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
662 if (emd.pa == 0)
663 return NULL;
664 return (void *)PHYS_TO_DMAP(emd.pa);
665 }
666
667
668 /*
669 * When booted via kboot, the prior kernel will pass in reserved memory areas in
670 * a EFI config table. We need to find that table and walk through it excluding
671 * the memory ranges in it. btw, this is called too early for the printf to do
672 * anything since msgbufp isn't initialized, let alone a console...
673 */
674 static void
exclude_efi_memreserve(vm_offset_t efi_systbl_phys)675 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
676 {
677 struct efi_systbl *systbl;
678 struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
679
680 systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
681 if (systbl == NULL) {
682 printf("can't map systbl\n");
683 return;
684 }
685 if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
686 printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
687 return;
688 }
689
690 /*
691 * We don't yet have the pmap system booted enough to create a pmap for
692 * the efi firmware's preferred address space from the GetMemoryMap()
693 * table. The st_cfgtbl is a VA in this space, so we need to do the
694 * mapping ourselves to a kernel VA with efi_early_map. We assume that
695 * the cfgtbl entries don't span a page. Other pointers are PAs, as
696 * noted below.
697 */
698 if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */
699 return;
700 for (int i = 0; i < systbl->st_entries; i++) {
701 struct efi_cfgtbl *cfgtbl;
702 struct linux_efi_memreserve *mr;
703
704 cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
705 if (cfgtbl == NULL)
706 panic("Can't map the config table entry %d\n", i);
707 if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
708 continue;
709
710 /*
711 * cfgtbl points are either VA or PA, depending on the GUID of
712 * the table. memreserve GUID pointers are PA and not converted
713 * after a SetVirtualAddressMap(). The list's mr_next pointer
714 * is also a PA.
715 */
716 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
717 (vm_offset_t)cfgtbl->ct_data);
718 while (true) {
719 for (int j = 0; j < mr->mr_count; j++) {
720 struct linux_efi_memreserve_entry *mre;
721
722 mre = &mr->mr_entry[j];
723 physmem_exclude_region(mre->mre_base, mre->mre_size,
724 EXFLAG_NODUMP | EXFLAG_NOALLOC);
725 }
726 if (mr->mr_next == 0)
727 break;
728 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
729 };
730 }
731
732 }
733
734 #ifdef FDT
735 static void
try_load_dtb(caddr_t kmdp)736 try_load_dtb(caddr_t kmdp)
737 {
738 vm_offset_t dtbp;
739
740 dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
741 #if defined(FDT_DTB_STATIC)
742 /*
743 * In case the device tree blob was not retrieved (from metadata) try
744 * to use the statically embedded one.
745 */
746 if (dtbp == 0)
747 dtbp = (vm_offset_t)&fdt_static_dtb;
748 #endif
749
750 if (dtbp == (vm_offset_t)NULL) {
751 #ifndef TSLOG
752 printf("ERROR loading DTB\n");
753 #endif
754 return;
755 }
756
757 if (OF_install(OFW_FDT, 0) == FALSE)
758 panic("Cannot install FDT");
759
760 if (OF_init((void *)dtbp) != 0)
761 panic("OF_init failed with the found device tree");
762
763 parse_fdt_bootargs();
764 }
765 #endif
766
767 static bool
bus_probe(void)768 bus_probe(void)
769 {
770 bool has_acpi, has_fdt;
771 char *order, *env;
772
773 has_acpi = has_fdt = false;
774
775 #ifdef FDT
776 has_fdt = (OF_peer(0) != 0);
777 #endif
778 #ifdef DEV_ACPI
779 has_acpi = (AcpiOsGetRootPointer() != 0);
780 #endif
781
782 env = kern_getenv("kern.cfg.order");
783 if (env != NULL) {
784 order = env;
785 while (order != NULL) {
786 if (has_acpi &&
787 strncmp(order, "acpi", 4) == 0 &&
788 (order[4] == ',' || order[4] == '\0')) {
789 arm64_bus_method = ARM64_BUS_ACPI;
790 break;
791 }
792 if (has_fdt &&
793 strncmp(order, "fdt", 3) == 0 &&
794 (order[3] == ',' || order[3] == '\0')) {
795 arm64_bus_method = ARM64_BUS_FDT;
796 break;
797 }
798 order = strchr(order, ',');
799 if (order != NULL)
800 order++; /* Skip comma */
801 }
802 freeenv(env);
803
804 /* If we set the bus method it is valid */
805 if (arm64_bus_method != ARM64_BUS_NONE)
806 return (true);
807 }
808 /* If no order or an invalid order was set use the default */
809 if (arm64_bus_method == ARM64_BUS_NONE) {
810 if (has_fdt)
811 arm64_bus_method = ARM64_BUS_FDT;
812 else if (has_acpi)
813 arm64_bus_method = ARM64_BUS_ACPI;
814 }
815
816 /*
817 * If no option was set the default is valid, otherwise we are
818 * setting one to get cninit() working, then calling panic to tell
819 * the user about the invalid bus setup.
820 */
821 return (env == NULL);
822 }
823
824 static void
cache_setup(void)825 cache_setup(void)
826 {
827 int dczva_line_shift;
828 uint32_t dczid_el0;
829
830 identify_cache(READ_SPECIALREG(ctr_el0));
831
832 dczid_el0 = READ_SPECIALREG(dczid_el0);
833
834 /* Check if dc zva is not prohibited */
835 if (dczid_el0 & DCZID_DZP)
836 dczva_line_size = 0;
837 else {
838 /* Same as with above calculations */
839 dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
840 dczva_line_size = sizeof(int) << dczva_line_shift;
841
842 /* Change pagezero function */
843 pagezero = pagezero_cache;
844 }
845 }
846
847 int
memory_mapping_mode(vm_paddr_t pa)848 memory_mapping_mode(vm_paddr_t pa)
849 {
850 struct efi_md *map, *p;
851 size_t efisz;
852 int ndesc, i;
853
854 if (efihdr == NULL)
855 return (VM_MEMATTR_WRITE_BACK);
856
857 /*
858 * Memory map data provided by UEFI via the GetMemoryMap
859 * Boot Services API.
860 */
861 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
862 map = (struct efi_md *)((uint8_t *)efihdr + efisz);
863
864 if (efihdr->descriptor_size == 0)
865 return (VM_MEMATTR_WRITE_BACK);
866 ndesc = efihdr->memory_size / efihdr->descriptor_size;
867
868 for (i = 0, p = map; i < ndesc; i++,
869 p = efi_next_descriptor(p, efihdr->descriptor_size)) {
870 if (pa < p->md_phys ||
871 pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
872 continue;
873 if (p->md_type == EFI_MD_TYPE_IOMEM ||
874 p->md_type == EFI_MD_TYPE_IOPORT)
875 return (VM_MEMATTR_DEVICE);
876 else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
877 p->md_type == EFI_MD_TYPE_RECLAIM)
878 return (VM_MEMATTR_WRITE_BACK);
879 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
880 return (VM_MEMATTR_WRITE_THROUGH);
881 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
882 return (VM_MEMATTR_WRITE_COMBINING);
883 break;
884 }
885
886 return (VM_MEMATTR_DEVICE);
887 }
888
889 void
initarm(struct arm64_bootparams * abp)890 initarm(struct arm64_bootparams *abp)
891 {
892 struct efi_fb *efifb;
893 struct pcpu *pcpup;
894 char *env;
895 #ifdef FDT
896 struct mem_region mem_regions[FDT_MEM_REGIONS];
897 int mem_regions_sz;
898 phandle_t root;
899 char dts_version[255];
900 #endif
901 vm_offset_t lastaddr;
902 caddr_t kmdp;
903 bool valid;
904
905 TSRAW(&thread0, TS_ENTER, __func__, NULL);
906
907 boot_el = abp->boot_el;
908 hcr_el2 = abp->hcr_el2;
909
910 /* Parse loader or FDT boot parametes. Determine last used address. */
911 lastaddr = parse_boot_param(abp);
912
913 /* Find the kernel address */
914 kmdp = preload_search_by_type("elf kernel");
915 if (kmdp == NULL)
916 kmdp = preload_search_by_type("elf64 kernel");
917
918 identify_cpu(0);
919 identify_hypervisor_smbios();
920
921 update_special_regs(0);
922
923 /* Set the pcpu data, this is needed by pmap_bootstrap */
924 pcpup = &pcpu0;
925 pcpu_init(pcpup, 0, sizeof(struct pcpu));
926
927 /*
928 * Set the pcpu pointer with a backup in tpidr_el1 to be
929 * loaded when entering the kernel from userland.
930 */
931 __asm __volatile(
932 "mov x18, %0 \n"
933 "msr tpidr_el1, %0" :: "r"(pcpup));
934
935 /* locore.S sets sp_el0 to &thread0 so no need to set it here. */
936 PCPU_SET(curthread, &thread0);
937 PCPU_SET(midr, get_midr());
938
939 link_elf_ireloc(kmdp);
940 #ifdef FDT
941 try_load_dtb(kmdp);
942 #endif
943
944 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
945
946 /* Load the physical memory ranges */
947 efihdr = (struct efi_map_header *)preload_search_info(kmdp,
948 MODINFO_METADATA | MODINFOMD_EFI_MAP);
949 if (efihdr != NULL)
950 add_efi_map_entries(efihdr);
951 #ifdef FDT
952 else {
953 /* Grab physical memory regions information from device tree. */
954 if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
955 NULL) != 0)
956 panic("Cannot get physical memory regions");
957 physmem_hardware_regions(mem_regions, mem_regions_sz);
958 }
959 if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
960 physmem_exclude_regions(mem_regions, mem_regions_sz,
961 EXFLAG_NODUMP | EXFLAG_NOALLOC);
962 #endif
963
964 /* Exclude the EFI framebuffer from our view of physical memory. */
965 efifb = (struct efi_fb *)preload_search_info(kmdp,
966 MODINFO_METADATA | MODINFOMD_EFI_FB);
967 if (efifb != NULL)
968 physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
969 EXFLAG_NOALLOC);
970
971 /* Do basic tuning, hz etc */
972 init_param1();
973
974 cache_setup();
975 pan_setup();
976
977 /* Bootstrap enough of pmap to enter the kernel proper */
978 pmap_bootstrap(lastaddr - KERNBASE);
979 /* Exclude entries needed in the DMAP region, but not phys_avail */
980 if (efihdr != NULL)
981 exclude_efi_map_entries(efihdr);
982 /* Do the same for reserve entries in the EFI MEMRESERVE table */
983 if (efi_systbl_phys != 0)
984 exclude_efi_memreserve(efi_systbl_phys);
985
986 /*
987 * We carefully bootstrap the sanitizer map after we've excluded
988 * absolutely everything else that could impact phys_avail. There's not
989 * always enough room for the initial shadow map after the kernel, so
990 * we'll end up searching for segments that we can safely use. Those
991 * segments also get excluded from phys_avail.
992 */
993 #if defined(KASAN) || defined(KMSAN)
994 pmap_bootstrap_san();
995 #endif
996
997 physmem_init_kernel_globals();
998
999 devmap_bootstrap();
1000
1001 valid = bus_probe();
1002
1003 cninit();
1004 set_ttbr0(abp->kern_ttbr0);
1005 cpu_tlb_flushID();
1006
1007 if (!valid)
1008 panic("Invalid bus configuration: %s",
1009 kern_getenv("kern.cfg.order"));
1010
1011 /*
1012 * Check if pointer authentication is available on this system, and
1013 * if so enable its use. This needs to be called before init_proc0
1014 * as that will configure the thread0 pointer authentication keys.
1015 */
1016 ptrauth_init();
1017
1018 /*
1019 * Dump the boot metadata. We have to wait for cninit() since console
1020 * output is required. If it's grossly incorrect the kernel will never
1021 * make it this far.
1022 */
1023 if (getenv_is_true("debug.dump_modinfo_at_boot"))
1024 preload_dump();
1025
1026 init_proc0(abp->kern_stack);
1027 msgbufinit(msgbufp, msgbufsize);
1028 mutex_init();
1029 init_param2(physmem);
1030
1031 dbg_init();
1032 kdb_init();
1033 #ifdef KDB
1034 if ((boothowto & RB_KDB) != 0)
1035 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1036 #endif
1037 pan_enable();
1038
1039 kcsan_cpu_init(0);
1040 kasan_init();
1041 kmsan_init();
1042
1043 env = kern_getenv("kernelname");
1044 if (env != NULL)
1045 strlcpy(kernelname, env, sizeof(kernelname));
1046
1047 #ifdef FDT
1048 if (arm64_bus_method == ARM64_BUS_FDT) {
1049 root = OF_finddevice("/");
1050 if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1051 if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1052 printf("WARNING: DTB version is %s while kernel expects %s, "
1053 "please update the DTB in the ESP\n",
1054 dts_version,
1055 LINUX_DTS_VERSION);
1056 } else {
1057 printf("WARNING: Cannot find freebsd,dts-version property, "
1058 "cannot check DTB compliance\n");
1059 }
1060 }
1061 #endif
1062
1063 if (boothowto & RB_VERBOSE) {
1064 if (efihdr != NULL)
1065 print_efi_map_entries(efihdr);
1066 physmem_print_tables();
1067 }
1068
1069 early_boot = 0;
1070
1071 if (bootverbose && kstack_pages != KSTACK_PAGES)
1072 printf("kern.kstack_pages = %d ignored for thread0\n",
1073 kstack_pages);
1074
1075 TSEXIT();
1076 }
1077
1078 void
dbg_init(void)1079 dbg_init(void)
1080 {
1081
1082 /* Clear OS lock */
1083 WRITE_SPECIALREG(oslar_el1, 0);
1084
1085 /* This permits DDB to use debug registers for watchpoints. */
1086 dbg_monitor_init();
1087
1088 /* TODO: Eventually will need to initialize debug registers here. */
1089 }
1090
1091 #ifdef DDB
1092 #include <ddb/ddb.h>
1093
DB_SHOW_COMMAND(specialregs,db_show_spregs)1094 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1095 {
1096 #define PRINT_REG(reg) \
1097 db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1098
1099 PRINT_REG(actlr_el1);
1100 PRINT_REG(afsr0_el1);
1101 PRINT_REG(afsr1_el1);
1102 PRINT_REG(aidr_el1);
1103 PRINT_REG(amair_el1);
1104 PRINT_REG(ccsidr_el1);
1105 PRINT_REG(clidr_el1);
1106 PRINT_REG(contextidr_el1);
1107 PRINT_REG(cpacr_el1);
1108 PRINT_REG(csselr_el1);
1109 PRINT_REG(ctr_el0);
1110 PRINT_REG(currentel);
1111 PRINT_REG(daif);
1112 PRINT_REG(dczid_el0);
1113 PRINT_REG(elr_el1);
1114 PRINT_REG(esr_el1);
1115 PRINT_REG(far_el1);
1116 #if 0
1117 /* ARM64TODO: Enable VFP before reading floating-point registers */
1118 PRINT_REG(fpcr);
1119 PRINT_REG(fpsr);
1120 #endif
1121 PRINT_REG(id_aa64afr0_el1);
1122 PRINT_REG(id_aa64afr1_el1);
1123 PRINT_REG(id_aa64dfr0_el1);
1124 PRINT_REG(id_aa64dfr1_el1);
1125 PRINT_REG(id_aa64isar0_el1);
1126 PRINT_REG(id_aa64isar1_el1);
1127 PRINT_REG(id_aa64pfr0_el1);
1128 PRINT_REG(id_aa64pfr1_el1);
1129 PRINT_REG(id_afr0_el1);
1130 PRINT_REG(id_dfr0_el1);
1131 PRINT_REG(id_isar0_el1);
1132 PRINT_REG(id_isar1_el1);
1133 PRINT_REG(id_isar2_el1);
1134 PRINT_REG(id_isar3_el1);
1135 PRINT_REG(id_isar4_el1);
1136 PRINT_REG(id_isar5_el1);
1137 PRINT_REG(id_mmfr0_el1);
1138 PRINT_REG(id_mmfr1_el1);
1139 PRINT_REG(id_mmfr2_el1);
1140 PRINT_REG(id_mmfr3_el1);
1141 #if 0
1142 /* Missing from llvm */
1143 PRINT_REG(id_mmfr4_el1);
1144 #endif
1145 PRINT_REG(id_pfr0_el1);
1146 PRINT_REG(id_pfr1_el1);
1147 PRINT_REG(isr_el1);
1148 PRINT_REG(mair_el1);
1149 PRINT_REG(midr_el1);
1150 PRINT_REG(mpidr_el1);
1151 PRINT_REG(mvfr0_el1);
1152 PRINT_REG(mvfr1_el1);
1153 PRINT_REG(mvfr2_el1);
1154 PRINT_REG(revidr_el1);
1155 PRINT_REG(sctlr_el1);
1156 PRINT_REG(sp_el0);
1157 PRINT_REG(spsel);
1158 PRINT_REG(spsr_el1);
1159 PRINT_REG(tcr_el1);
1160 PRINT_REG(tpidr_el0);
1161 PRINT_REG(tpidr_el1);
1162 PRINT_REG(tpidrro_el0);
1163 PRINT_REG(ttbr0_el1);
1164 PRINT_REG(ttbr1_el1);
1165 PRINT_REG(vbar_el1);
1166 #undef PRINT_REG
1167 }
1168
DB_SHOW_COMMAND(vtop,db_show_vtop)1169 DB_SHOW_COMMAND(vtop, db_show_vtop)
1170 {
1171 uint64_t phys;
1172
1173 if (have_addr) {
1174 phys = arm64_address_translate_s1e1r(addr);
1175 db_printf("EL1 physical address reg (read): 0x%016lx\n", phys);
1176 phys = arm64_address_translate_s1e1w(addr);
1177 db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1178 phys = arm64_address_translate_s1e0r(addr);
1179 db_printf("EL0 physical address reg (read): 0x%016lx\n", phys);
1180 phys = arm64_address_translate_s1e0w(addr);
1181 db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1182 } else
1183 db_printf("show vtop <virt_addr>\n");
1184 }
1185 #endif
1186