xref: /freebsd/sys/arm64/arm64/machdep.c (revision 069ac184)
1 /*-
2  * Copyright (c) 2014 Andrew Turner
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/devmap.h>
42 #include <sys/efi.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msgbuf.h>
51 #include <sys/pcpu.h>
52 #include <sys/physmem.h>
53 #include <sys/proc.h>
54 #include <sys/ptrace.h>
55 #include <sys/reboot.h>
56 #include <sys/reg.h>
57 #include <sys/rwlock.h>
58 #include <sys/sched.h>
59 #include <sys/signalvar.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysent.h>
62 #include <sys/sysproto.h>
63 #include <sys/ucontext.h>
64 #include <sys/vdso.h>
65 #include <sys/vmmeter.h>
66 
67 #include <vm/vm.h>
68 #include <vm/vm_param.h>
69 #include <vm/vm_kern.h>
70 #include <vm/vm_object.h>
71 #include <vm/vm_page.h>
72 #include <vm/vm_phys.h>
73 #include <vm/pmap.h>
74 #include <vm/vm_map.h>
75 #include <vm/vm_pager.h>
76 
77 #include <machine/armreg.h>
78 #include <machine/cpu.h>
79 #include <machine/debug_monitor.h>
80 #include <machine/hypervisor.h>
81 #include <machine/kdb.h>
82 #include <machine/machdep.h>
83 #include <machine/metadata.h>
84 #include <machine/md_var.h>
85 #include <machine/pcb.h>
86 #include <machine/undefined.h>
87 #include <machine/vmparam.h>
88 
89 #ifdef VFP
90 #include <machine/vfp.h>
91 #endif
92 
93 #ifdef DEV_ACPI
94 #include <contrib/dev/acpica/include/acpi.h>
95 #include <machine/acpica_machdep.h>
96 #endif
97 
98 #ifdef FDT
99 #include <dev/fdt/fdt_common.h>
100 #include <dev/ofw/openfirm.h>
101 #endif
102 
103 #include <dev/smbios/smbios.h>
104 
105 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
106 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
107     "pcb_fpusaved changed offset");
108 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
109     "pcb_fpustate changed offset");
110 
111 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
112 
113 /*
114  * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
115  * could relocate this, but will need to keep the same virtual address as
116  * it's reverenced by the EARLY_COUNTER macro.
117  */
118 struct pcpu pcpu0;
119 
120 #if defined(PERTHREAD_SSP)
121 /*
122  * The boot SSP canary. Will be replaced with a per-thread canary when
123  * scheduling has started.
124  */
125 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
126 #endif
127 
128 static struct trapframe proc0_tf;
129 
130 int early_boot = 1;
131 int cold = 1;
132 static int boot_el;
133 static uint64_t hcr_el2;
134 
135 struct kva_md_info kmi;
136 
137 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
138 int has_pan;
139 
140 /*
141  * Physical address of the EFI System Table. Stashed from the metadata hints
142  * passed into the kernel and used by the EFI code to call runtime services.
143  */
144 vm_paddr_t efi_systbl_phys;
145 static struct efi_map_header *efihdr;
146 
147 /* pagezero_* implementations are provided in support.S */
148 void pagezero_simple(void *);
149 void pagezero_cache(void *);
150 
151 /* pagezero_simple is default pagezero */
152 void (*pagezero)(void *p) = pagezero_simple;
153 
154 int (*apei_nmi)(void);
155 
156 #if defined(PERTHREAD_SSP_WARNING)
157 static void
158 print_ssp_warning(void *data __unused)
159 {
160 	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
161 }
162 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
163 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
164 #endif
165 
166 static void
167 pan_setup(void)
168 {
169 	uint64_t id_aa64mfr1;
170 
171 	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
172 	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
173 		has_pan = 1;
174 }
175 
176 void
177 pan_enable(void)
178 {
179 
180 	/*
181 	 * The LLVM integrated assembler doesn't understand the PAN
182 	 * PSTATE field. Because of this we need to manually create
183 	 * the instruction in an asm block. This is equivalent to:
184 	 * msr pan, #1
185 	 *
186 	 * This sets the PAN bit, stopping the kernel from accessing
187 	 * memory when userspace can also access it unless the kernel
188 	 * uses the userspace load/store instructions.
189 	 */
190 	if (has_pan) {
191 		WRITE_SPECIALREG(sctlr_el1,
192 		    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
193 		__asm __volatile(".inst 0xd500409f | (0x1 << 8)");
194 	}
195 }
196 
197 bool
198 has_hyp(void)
199 {
200 
201 	/*
202 	 * XXX The E2H check is wrong, but it's close enough for now.  Needs to
203 	 * be re-evaluated once we're running regularly in EL2.
204 	 */
205 	return (boot_el == 2 && (hcr_el2 & HCR_E2H) == 0);
206 }
207 
208 static void
209 cpu_startup(void *dummy)
210 {
211 	vm_paddr_t size;
212 	int i;
213 
214 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
215 	    ptoa((uintmax_t)realmem) / 1024 / 1024);
216 
217 	if (bootverbose) {
218 		printf("Physical memory chunk(s):\n");
219 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
220 			size = phys_avail[i + 1] - phys_avail[i];
221 			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
222 			    (uintmax_t)phys_avail[i],
223 			    (uintmax_t)phys_avail[i + 1] - 1,
224 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
225 		}
226 	}
227 
228 	printf("avail memory = %ju (%ju MB)\n",
229 	    ptoa((uintmax_t)vm_free_count()),
230 	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
231 
232 	undef_init();
233 	install_cpu_errata();
234 
235 	vm_ksubmap_init(&kmi);
236 	bufinit();
237 	vm_pager_bufferinit();
238 }
239 
240 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
241 
242 static void
243 late_ifunc_resolve(void *dummy __unused)
244 {
245 	link_elf_late_ireloc();
246 }
247 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
248 
249 int
250 cpu_idle_wakeup(int cpu)
251 {
252 
253 	return (0);
254 }
255 
256 void
257 cpu_idle(int busy)
258 {
259 
260 	spinlock_enter();
261 	if (!busy)
262 		cpu_idleclock();
263 	if (!sched_runnable())
264 		__asm __volatile(
265 		    "dsb sy \n"
266 		    "wfi    \n");
267 	if (!busy)
268 		cpu_activeclock();
269 	spinlock_exit();
270 }
271 
272 void
273 cpu_halt(void)
274 {
275 
276 	/* We should have shutdown by now, if not enter a low power sleep */
277 	intr_disable();
278 	while (1) {
279 		__asm __volatile("wfi");
280 	}
281 }
282 
283 /*
284  * Flush the D-cache for non-DMA I/O so that the I-cache can
285  * be made coherent later.
286  */
287 void
288 cpu_flush_dcache(void *ptr, size_t len)
289 {
290 
291 	/* ARM64TODO TBD */
292 }
293 
294 /* Get current clock frequency for the given CPU ID. */
295 int
296 cpu_est_clockrate(int cpu_id, uint64_t *rate)
297 {
298 	struct pcpu *pc;
299 
300 	pc = pcpu_find(cpu_id);
301 	if (pc == NULL || rate == NULL)
302 		return (EINVAL);
303 
304 	if (pc->pc_clock == 0)
305 		return (EOPNOTSUPP);
306 
307 	*rate = pc->pc_clock;
308 	return (0);
309 }
310 
311 void
312 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
313 {
314 
315 	pcpu->pc_acpi_id = 0xffffffff;
316 	pcpu->pc_mpidr = UINT64_MAX;
317 }
318 
319 void
320 spinlock_enter(void)
321 {
322 	struct thread *td;
323 	register_t daif;
324 
325 	td = curthread;
326 	if (td->td_md.md_spinlock_count == 0) {
327 		daif = intr_disable();
328 		td->td_md.md_spinlock_count = 1;
329 		td->td_md.md_saved_daif = daif;
330 		critical_enter();
331 	} else
332 		td->td_md.md_spinlock_count++;
333 }
334 
335 void
336 spinlock_exit(void)
337 {
338 	struct thread *td;
339 	register_t daif;
340 
341 	td = curthread;
342 	daif = td->td_md.md_saved_daif;
343 	td->td_md.md_spinlock_count--;
344 	if (td->td_md.md_spinlock_count == 0) {
345 		critical_exit();
346 		intr_restore(daif);
347 	}
348 }
349 
350 /*
351  * Construct a PCB from a trapframe. This is called from kdb_trap() where
352  * we want to start a backtrace from the function that caused us to enter
353  * the debugger. We have the context in the trapframe, but base the trace
354  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
355  * enough for a backtrace.
356  */
357 void
358 makectx(struct trapframe *tf, struct pcb *pcb)
359 {
360 	int i;
361 
362 	/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
363 	for (i = 0; i < nitems(pcb->pcb_x); i++) {
364 		if (i == PCB_LR)
365 			pcb->pcb_x[i] = tf->tf_elr;
366 		else
367 			pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
368 	}
369 
370 	pcb->pcb_sp = tf->tf_sp;
371 }
372 
373 static void
374 init_proc0(vm_offset_t kstack)
375 {
376 	struct pcpu *pcpup;
377 
378 	pcpup = cpuid_to_pcpu[0];
379 	MPASS(pcpup != NULL);
380 
381 	proc_linkup0(&proc0, &thread0);
382 	thread0.td_kstack = kstack;
383 	thread0.td_kstack_pages = KSTACK_PAGES;
384 #if defined(PERTHREAD_SSP)
385 	thread0.td_md.md_canary = boot_canary;
386 #endif
387 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
388 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
389 	thread0.td_pcb->pcb_flags = 0;
390 	thread0.td_pcb->pcb_fpflags = 0;
391 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
392 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
393 	thread0.td_frame = &proc0_tf;
394 	ptrauth_thread0(&thread0);
395 	pcpup->pc_curpcb = thread0.td_pcb;
396 
397 	/*
398 	 * Unmask SError exceptions. They are used to signal a RAS failure,
399 	 * or other hardware error.
400 	 */
401 	serror_enable();
402 }
403 
404 /*
405  * Get an address to be used to write to kernel data that may be mapped
406  * read-only, e.g. to patch kernel code.
407  */
408 bool
409 arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out)
410 {
411 	vm_paddr_t pa;
412 
413 	/* Check if the page is writable */
414 	if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
415 		*out = addr;
416 		return (true);
417 	}
418 
419 	/*
420 	 * Find the physical address of the given page.
421 	 */
422 	if (!pmap_klookup(addr, &pa)) {
423 		return (false);
424 	}
425 
426 	/*
427 	 * If it is within the DMAP region and is writable use that.
428 	 */
429 	if (PHYS_IN_DMAP(pa)) {
430 		addr = PHYS_TO_DMAP(pa);
431 		if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
432 			*out = addr;
433 			return (true);
434 		}
435 	}
436 
437 	return (false);
438 }
439 
440 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
441 
442 static void
443 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
444 {
445 	struct efi_md *map, *p;
446 	size_t efisz;
447 	int ndesc, i;
448 
449 	/*
450 	 * Memory map data provided by UEFI via the GetMemoryMap
451 	 * Boot Services API.
452 	 */
453 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
454 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
455 
456 	if (efihdr->descriptor_size == 0)
457 		return;
458 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
459 
460 	for (i = 0, p = map; i < ndesc; i++,
461 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
462 		cb(p, argp);
463 	}
464 }
465 
466 /*
467  * Handle the EFI memory map list.
468  *
469  * We will make two passes at this, the first (exclude == false) to populate
470  * physmem with valid physical memory ranges from recognized map entry types.
471  * In the second pass we will exclude memory ranges from physmem which must not
472  * be used for general allocations, either because they are used by runtime
473  * firmware or otherwise reserved.
474  *
475  * Adding the runtime-reserved memory ranges to physmem and excluding them
476  * later ensures that they are included in the DMAP, but excluded from
477  * phys_avail[].
478  *
479  * Entry types not explicitly listed here are ignored and not mapped.
480  */
481 static void
482 handle_efi_map_entry(struct efi_md *p, void *argp)
483 {
484 	bool exclude = *(bool *)argp;
485 
486 	switch (p->md_type) {
487 	case EFI_MD_TYPE_RECLAIM:
488 		/*
489 		 * The recomended location for ACPI tables. Map into the
490 		 * DMAP so we can access them from userspace via /dev/mem.
491 		 */
492 	case EFI_MD_TYPE_RT_CODE:
493 		/*
494 		 * Some UEFI implementations put the system table in the
495 		 * runtime code section. Include it in the DMAP, but will
496 		 * be excluded from phys_avail.
497 		 */
498 	case EFI_MD_TYPE_RT_DATA:
499 		/*
500 		 * Runtime data will be excluded after the DMAP
501 		 * region is created to stop it from being added
502 		 * to phys_avail.
503 		 */
504 		if (exclude) {
505 			physmem_exclude_region(p->md_phys,
506 			    p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
507 			break;
508 		}
509 		/* FALLTHROUGH */
510 	case EFI_MD_TYPE_CODE:
511 	case EFI_MD_TYPE_DATA:
512 	case EFI_MD_TYPE_BS_CODE:
513 	case EFI_MD_TYPE_BS_DATA:
514 	case EFI_MD_TYPE_FREE:
515 		/*
516 		 * We're allowed to use any entry with these types.
517 		 */
518 		if (!exclude)
519 			physmem_hardware_region(p->md_phys,
520 			    p->md_pages * EFI_PAGE_SIZE);
521 		break;
522 	default:
523 		/* Other types shall not be handled by physmem. */
524 		break;
525 	}
526 }
527 
528 static void
529 add_efi_map_entries(struct efi_map_header *efihdr)
530 {
531 	bool exclude = false;
532 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
533 }
534 
535 static void
536 exclude_efi_map_entries(struct efi_map_header *efihdr)
537 {
538 	bool exclude = true;
539 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
540 }
541 
542 static void
543 print_efi_map_entry(struct efi_md *p, void *argp __unused)
544 {
545 	const char *type;
546 	static const char *types[] = {
547 		"Reserved",
548 		"LoaderCode",
549 		"LoaderData",
550 		"BootServicesCode",
551 		"BootServicesData",
552 		"RuntimeServicesCode",
553 		"RuntimeServicesData",
554 		"ConventionalMemory",
555 		"UnusableMemory",
556 		"ACPIReclaimMemory",
557 		"ACPIMemoryNVS",
558 		"MemoryMappedIO",
559 		"MemoryMappedIOPortSpace",
560 		"PalCode",
561 		"PersistentMemory"
562 	};
563 
564 	if (p->md_type < nitems(types))
565 		type = types[p->md_type];
566 	else
567 		type = "<INVALID>";
568 	printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
569 	    p->md_virt, p->md_pages);
570 	if (p->md_attr & EFI_MD_ATTR_UC)
571 		printf("UC ");
572 	if (p->md_attr & EFI_MD_ATTR_WC)
573 		printf("WC ");
574 	if (p->md_attr & EFI_MD_ATTR_WT)
575 		printf("WT ");
576 	if (p->md_attr & EFI_MD_ATTR_WB)
577 		printf("WB ");
578 	if (p->md_attr & EFI_MD_ATTR_UCE)
579 		printf("UCE ");
580 	if (p->md_attr & EFI_MD_ATTR_WP)
581 		printf("WP ");
582 	if (p->md_attr & EFI_MD_ATTR_RP)
583 		printf("RP ");
584 	if (p->md_attr & EFI_MD_ATTR_XP)
585 		printf("XP ");
586 	if (p->md_attr & EFI_MD_ATTR_NV)
587 		printf("NV ");
588 	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
589 		printf("MORE_RELIABLE ");
590 	if (p->md_attr & EFI_MD_ATTR_RO)
591 		printf("RO ");
592 	if (p->md_attr & EFI_MD_ATTR_RT)
593 		printf("RUNTIME");
594 	printf("\n");
595 }
596 
597 static void
598 print_efi_map_entries(struct efi_map_header *efihdr)
599 {
600 
601 	printf("%23s %12s %12s %8s %4s\n",
602 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
603 	foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
604 }
605 
606 /*
607  * Map the passed in VA in EFI space to a void * using the efi memory table to
608  * find the PA and return it in the DMAP, if it exists. We're used between the
609  * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
610  * tables We assume that either the entry you are mapping fits within its page,
611  * or if it spills to the next page, that's contiguous in PA and in the DMAP.
612  * All observed tables obey the first part of this precondition.
613  */
614 struct early_map_data
615 {
616 	vm_offset_t va;
617 	vm_offset_t pa;
618 };
619 
620 static void
621 efi_early_map_entry(struct efi_md *p, void *argp)
622 {
623 	struct early_map_data *emdp = argp;
624 	vm_offset_t s, e;
625 
626 	if (emdp->pa != 0)
627 		return;
628 	if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
629 		return;
630 	s = p->md_virt;
631 	e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
632 	if (emdp->va < s  || emdp->va >= e)
633 		return;
634 	emdp->pa = p->md_phys + (emdp->va - p->md_virt);
635 }
636 
637 static void *
638 efi_early_map(vm_offset_t va)
639 {
640 	struct early_map_data emd = { .va = va };
641 
642 	foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
643 	if (emd.pa == 0)
644 		return NULL;
645 	return (void *)PHYS_TO_DMAP(emd.pa);
646 }
647 
648 
649 /*
650  * When booted via kboot, the prior kernel will pass in reserved memory areas in
651  * a EFI config table. We need to find that table and walk through it excluding
652  * the memory ranges in it. btw, this is called too early for the printf to do
653  * anything since msgbufp isn't initialized, let alone a console...
654  */
655 static void
656 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
657 {
658 	struct efi_systbl *systbl;
659 	struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
660 
661 	systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
662 	if (systbl == NULL) {
663 		printf("can't map systbl\n");
664 		return;
665 	}
666 	if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
667 		printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
668 		return;
669 	}
670 
671 	/*
672 	 * We don't yet have the pmap system booted enough to create a pmap for
673 	 * the efi firmware's preferred address space from the GetMemoryMap()
674 	 * table. The st_cfgtbl is a VA in this space, so we need to do the
675 	 * mapping ourselves to a kernel VA with efi_early_map. We assume that
676 	 * the cfgtbl entries don't span a page. Other pointers are PAs, as
677 	 * noted below.
678 	 */
679 	if (systbl->st_cfgtbl == 0)	/* Failsafe st_entries should == 0 in this case */
680 		return;
681 	for (int i = 0; i < systbl->st_entries; i++) {
682 		struct efi_cfgtbl *cfgtbl;
683 		struct linux_efi_memreserve *mr;
684 
685 		cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
686 		if (cfgtbl == NULL)
687 			panic("Can't map the config table entry %d\n", i);
688 		if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
689 			continue;
690 
691 		/*
692 		 * cfgtbl points are either VA or PA, depending on the GUID of
693 		 * the table. memreserve GUID pointers are PA and not converted
694 		 * after a SetVirtualAddressMap(). The list's mr_next pointer
695 		 * is also a PA.
696 		 */
697 		mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
698 			(vm_offset_t)cfgtbl->ct_data);
699 		while (true) {
700 			for (int j = 0; j < mr->mr_count; j++) {
701 				struct linux_efi_memreserve_entry *mre;
702 
703 				mre = &mr->mr_entry[j];
704 				physmem_exclude_region(mre->mre_base, mre->mre_size,
705 				    EXFLAG_NODUMP | EXFLAG_NOALLOC);
706 			}
707 			if (mr->mr_next == 0)
708 				break;
709 			mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
710 		};
711 	}
712 
713 }
714 
715 #ifdef FDT
716 static void
717 try_load_dtb(caddr_t kmdp)
718 {
719 	vm_offset_t dtbp;
720 
721 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
722 #if defined(FDT_DTB_STATIC)
723 	/*
724 	 * In case the device tree blob was not retrieved (from metadata) try
725 	 * to use the statically embedded one.
726 	 */
727 	if (dtbp == 0)
728 		dtbp = (vm_offset_t)&fdt_static_dtb;
729 #endif
730 
731 	if (dtbp == (vm_offset_t)NULL) {
732 #ifndef TSLOG
733 		printf("ERROR loading DTB\n");
734 #endif
735 		return;
736 	}
737 
738 	if (OF_install(OFW_FDT, 0) == FALSE)
739 		panic("Cannot install FDT");
740 
741 	if (OF_init((void *)dtbp) != 0)
742 		panic("OF_init failed with the found device tree");
743 
744 	parse_fdt_bootargs();
745 }
746 #endif
747 
748 static bool
749 bus_probe(void)
750 {
751 	bool has_acpi, has_fdt;
752 	char *order, *env;
753 
754 	has_acpi = has_fdt = false;
755 
756 #ifdef FDT
757 	has_fdt = (OF_peer(0) != 0);
758 #endif
759 #ifdef DEV_ACPI
760 	has_acpi = (AcpiOsGetRootPointer() != 0);
761 #endif
762 
763 	env = kern_getenv("kern.cfg.order");
764 	if (env != NULL) {
765 		order = env;
766 		while (order != NULL) {
767 			if (has_acpi &&
768 			    strncmp(order, "acpi", 4) == 0 &&
769 			    (order[4] == ',' || order[4] == '\0')) {
770 				arm64_bus_method = ARM64_BUS_ACPI;
771 				break;
772 			}
773 			if (has_fdt &&
774 			    strncmp(order, "fdt", 3) == 0 &&
775 			    (order[3] == ',' || order[3] == '\0')) {
776 				arm64_bus_method = ARM64_BUS_FDT;
777 				break;
778 			}
779 			order = strchr(order, ',');
780 			if (order != NULL)
781 				order++;	/* Skip comma */
782 		}
783 		freeenv(env);
784 
785 		/* If we set the bus method it is valid */
786 		if (arm64_bus_method != ARM64_BUS_NONE)
787 			return (true);
788 	}
789 	/* If no order or an invalid order was set use the default */
790 	if (arm64_bus_method == ARM64_BUS_NONE) {
791 		if (has_fdt)
792 			arm64_bus_method = ARM64_BUS_FDT;
793 		else if (has_acpi)
794 			arm64_bus_method = ARM64_BUS_ACPI;
795 	}
796 
797 	/*
798 	 * If no option was set the default is valid, otherwise we are
799 	 * setting one to get cninit() working, then calling panic to tell
800 	 * the user about the invalid bus setup.
801 	 */
802 	return (env == NULL);
803 }
804 
805 static void
806 cache_setup(void)
807 {
808 	int dczva_line_shift;
809 	uint32_t dczid_el0;
810 
811 	identify_cache(READ_SPECIALREG(ctr_el0));
812 
813 	dczid_el0 = READ_SPECIALREG(dczid_el0);
814 
815 	/* Check if dc zva is not prohibited */
816 	if (dczid_el0 & DCZID_DZP)
817 		dczva_line_size = 0;
818 	else {
819 		/* Same as with above calculations */
820 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
821 		dczva_line_size = sizeof(int) << dczva_line_shift;
822 
823 		/* Change pagezero function */
824 		pagezero = pagezero_cache;
825 	}
826 }
827 
828 int
829 memory_mapping_mode(vm_paddr_t pa)
830 {
831 	struct efi_md *map, *p;
832 	size_t efisz;
833 	int ndesc, i;
834 
835 	if (efihdr == NULL)
836 		return (VM_MEMATTR_WRITE_BACK);
837 
838 	/*
839 	 * Memory map data provided by UEFI via the GetMemoryMap
840 	 * Boot Services API.
841 	 */
842 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
843 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
844 
845 	if (efihdr->descriptor_size == 0)
846 		return (VM_MEMATTR_WRITE_BACK);
847 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
848 
849 	for (i = 0, p = map; i < ndesc; i++,
850 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
851 		if (pa < p->md_phys ||
852 		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
853 			continue;
854 		if (p->md_type == EFI_MD_TYPE_IOMEM ||
855 		    p->md_type == EFI_MD_TYPE_IOPORT)
856 			return (VM_MEMATTR_DEVICE);
857 		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
858 		    p->md_type == EFI_MD_TYPE_RECLAIM)
859 			return (VM_MEMATTR_WRITE_BACK);
860 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
861 			return (VM_MEMATTR_WRITE_THROUGH);
862 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
863 			return (VM_MEMATTR_WRITE_COMBINING);
864 		break;
865 	}
866 
867 	return (VM_MEMATTR_DEVICE);
868 }
869 
870 void
871 initarm(struct arm64_bootparams *abp)
872 {
873 	struct efi_fb *efifb;
874 	struct pcpu *pcpup;
875 	char *env;
876 #ifdef FDT
877 	struct mem_region mem_regions[FDT_MEM_REGIONS];
878 	int mem_regions_sz;
879 	phandle_t root;
880 	char dts_version[255];
881 #endif
882 	vm_offset_t lastaddr;
883 	caddr_t kmdp;
884 	bool valid;
885 
886 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
887 
888 	boot_el = abp->boot_el;
889 	hcr_el2 = abp->hcr_el2;
890 
891 	/* Parse loader or FDT boot parametes. Determine last used address. */
892 	lastaddr = parse_boot_param(abp);
893 
894 	/* Find the kernel address */
895 	kmdp = preload_search_by_type("elf kernel");
896 	if (kmdp == NULL)
897 		kmdp = preload_search_by_type("elf64 kernel");
898 
899 	identify_cpu(0);
900 	identify_hypervisor_smbios();
901 
902 	update_special_regs(0);
903 
904 	link_elf_ireloc(kmdp);
905 #ifdef FDT
906 	try_load_dtb(kmdp);
907 #endif
908 
909 	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
910 
911 	/* Load the physical memory ranges */
912 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
913 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
914 	if (efihdr != NULL)
915 		add_efi_map_entries(efihdr);
916 #ifdef FDT
917 	else {
918 		/* Grab physical memory regions information from device tree. */
919 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
920 		    NULL) != 0)
921 			panic("Cannot get physical memory regions");
922 		physmem_hardware_regions(mem_regions, mem_regions_sz);
923 	}
924 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
925 		physmem_exclude_regions(mem_regions, mem_regions_sz,
926 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
927 #endif
928 
929 	/* Exclude the EFI framebuffer from our view of physical memory. */
930 	efifb = (struct efi_fb *)preload_search_info(kmdp,
931 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
932 	if (efifb != NULL)
933 		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
934 		    EXFLAG_NOALLOC);
935 
936 	/* Set the pcpu data, this is needed by pmap_bootstrap */
937 	pcpup = &pcpu0;
938 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
939 
940 	/*
941 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
942 	 * loaded when entering the kernel from userland.
943 	 */
944 	__asm __volatile(
945 	    "mov x18, %0 \n"
946 	    "msr tpidr_el1, %0" :: "r"(pcpup));
947 
948 	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
949 	PCPU_SET(curthread, &thread0);
950 	PCPU_SET(midr, get_midr());
951 
952 	/* Do basic tuning, hz etc */
953 	init_param1();
954 
955 	cache_setup();
956 	pan_setup();
957 
958 	/* Bootstrap enough of pmap  to enter the kernel proper */
959 	pmap_bootstrap(lastaddr - KERNBASE);
960 	/* Exclude entries needed in the DMAP region, but not phys_avail */
961 	if (efihdr != NULL)
962 		exclude_efi_map_entries(efihdr);
963 	/*  Do the same for reserve entries in the EFI MEMRESERVE table */
964 	if (efi_systbl_phys != 0)
965 		exclude_efi_memreserve(efi_systbl_phys);
966 
967 	/*
968 	 * We carefully bootstrap the sanitizer map after we've excluded
969 	 * absolutely everything else that could impact phys_avail.  There's not
970 	 * always enough room for the initial shadow map after the kernel, so
971 	 * we'll end up searching for segments that we can safely use.  Those
972 	 * segments also get excluded from phys_avail.
973 	 */
974 #if defined(KASAN)
975 	pmap_bootstrap_san();
976 #endif
977 
978 	physmem_init_kernel_globals();
979 
980 	devmap_bootstrap(0, NULL);
981 
982 	valid = bus_probe();
983 
984 	cninit();
985 	set_ttbr0(abp->kern_ttbr0);
986 	cpu_tlb_flushID();
987 
988 	if (!valid)
989 		panic("Invalid bus configuration: %s",
990 		    kern_getenv("kern.cfg.order"));
991 
992 	/*
993 	 * Check if pointer authentication is available on this system, and
994 	 * if so enable its use. This needs to be called before init_proc0
995 	 * as that will configure the thread0 pointer authentication keys.
996 	 */
997 	ptrauth_init();
998 
999 	/*
1000 	 * Dump the boot metadata. We have to wait for cninit() since console
1001 	 * output is required. If it's grossly incorrect the kernel will never
1002 	 * make it this far.
1003 	 */
1004 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
1005 		preload_dump();
1006 
1007 	init_proc0(abp->kern_stack);
1008 	msgbufinit(msgbufp, msgbufsize);
1009 	mutex_init();
1010 	init_param2(physmem);
1011 
1012 	dbg_init();
1013 	kdb_init();
1014 #ifdef KDB
1015 	if ((boothowto & RB_KDB) != 0)
1016 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1017 #endif
1018 	pan_enable();
1019 
1020 	kcsan_cpu_init(0);
1021 	kasan_init();
1022 
1023 	env = kern_getenv("kernelname");
1024 	if (env != NULL)
1025 		strlcpy(kernelname, env, sizeof(kernelname));
1026 
1027 #ifdef FDT
1028 	if (arm64_bus_method == ARM64_BUS_FDT) {
1029 		root = OF_finddevice("/");
1030 		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1031 			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1032 				printf("WARNING: DTB version is %s while kernel expects %s, "
1033 				    "please update the DTB in the ESP\n",
1034 				    dts_version,
1035 				    LINUX_DTS_VERSION);
1036 		} else {
1037 			printf("WARNING: Cannot find freebsd,dts-version property, "
1038 			    "cannot check DTB compliance\n");
1039 		}
1040 	}
1041 #endif
1042 
1043 	if (boothowto & RB_VERBOSE) {
1044 		if (efihdr != NULL)
1045 			print_efi_map_entries(efihdr);
1046 		physmem_print_tables();
1047 	}
1048 
1049 	early_boot = 0;
1050 
1051 	if (bootverbose && kstack_pages != KSTACK_PAGES)
1052 		printf("kern.kstack_pages = %d ignored for thread0\n",
1053 		    kstack_pages);
1054 
1055 	TSEXIT();
1056 }
1057 
1058 void
1059 dbg_init(void)
1060 {
1061 
1062 	/* Clear OS lock */
1063 	WRITE_SPECIALREG(oslar_el1, 0);
1064 
1065 	/* This permits DDB to use debug registers for watchpoints. */
1066 	dbg_monitor_init();
1067 
1068 	/* TODO: Eventually will need to initialize debug registers here. */
1069 }
1070 
1071 #ifdef DDB
1072 #include <ddb/ddb.h>
1073 
1074 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1075 {
1076 #define	PRINT_REG(reg)	\
1077     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1078 
1079 	PRINT_REG(actlr_el1);
1080 	PRINT_REG(afsr0_el1);
1081 	PRINT_REG(afsr1_el1);
1082 	PRINT_REG(aidr_el1);
1083 	PRINT_REG(amair_el1);
1084 	PRINT_REG(ccsidr_el1);
1085 	PRINT_REG(clidr_el1);
1086 	PRINT_REG(contextidr_el1);
1087 	PRINT_REG(cpacr_el1);
1088 	PRINT_REG(csselr_el1);
1089 	PRINT_REG(ctr_el0);
1090 	PRINT_REG(currentel);
1091 	PRINT_REG(daif);
1092 	PRINT_REG(dczid_el0);
1093 	PRINT_REG(elr_el1);
1094 	PRINT_REG(esr_el1);
1095 	PRINT_REG(far_el1);
1096 #if 0
1097 	/* ARM64TODO: Enable VFP before reading floating-point registers */
1098 	PRINT_REG(fpcr);
1099 	PRINT_REG(fpsr);
1100 #endif
1101 	PRINT_REG(id_aa64afr0_el1);
1102 	PRINT_REG(id_aa64afr1_el1);
1103 	PRINT_REG(id_aa64dfr0_el1);
1104 	PRINT_REG(id_aa64dfr1_el1);
1105 	PRINT_REG(id_aa64isar0_el1);
1106 	PRINT_REG(id_aa64isar1_el1);
1107 	PRINT_REG(id_aa64pfr0_el1);
1108 	PRINT_REG(id_aa64pfr1_el1);
1109 	PRINT_REG(id_afr0_el1);
1110 	PRINT_REG(id_dfr0_el1);
1111 	PRINT_REG(id_isar0_el1);
1112 	PRINT_REG(id_isar1_el1);
1113 	PRINT_REG(id_isar2_el1);
1114 	PRINT_REG(id_isar3_el1);
1115 	PRINT_REG(id_isar4_el1);
1116 	PRINT_REG(id_isar5_el1);
1117 	PRINT_REG(id_mmfr0_el1);
1118 	PRINT_REG(id_mmfr1_el1);
1119 	PRINT_REG(id_mmfr2_el1);
1120 	PRINT_REG(id_mmfr3_el1);
1121 #if 0
1122 	/* Missing from llvm */
1123 	PRINT_REG(id_mmfr4_el1);
1124 #endif
1125 	PRINT_REG(id_pfr0_el1);
1126 	PRINT_REG(id_pfr1_el1);
1127 	PRINT_REG(isr_el1);
1128 	PRINT_REG(mair_el1);
1129 	PRINT_REG(midr_el1);
1130 	PRINT_REG(mpidr_el1);
1131 	PRINT_REG(mvfr0_el1);
1132 	PRINT_REG(mvfr1_el1);
1133 	PRINT_REG(mvfr2_el1);
1134 	PRINT_REG(revidr_el1);
1135 	PRINT_REG(sctlr_el1);
1136 	PRINT_REG(sp_el0);
1137 	PRINT_REG(spsel);
1138 	PRINT_REG(spsr_el1);
1139 	PRINT_REG(tcr_el1);
1140 	PRINT_REG(tpidr_el0);
1141 	PRINT_REG(tpidr_el1);
1142 	PRINT_REG(tpidrro_el0);
1143 	PRINT_REG(ttbr0_el1);
1144 	PRINT_REG(ttbr1_el1);
1145 	PRINT_REG(vbar_el1);
1146 #undef PRINT_REG
1147 }
1148 
1149 DB_SHOW_COMMAND(vtop, db_show_vtop)
1150 {
1151 	uint64_t phys;
1152 
1153 	if (have_addr) {
1154 		phys = arm64_address_translate_s1e1r(addr);
1155 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
1156 		phys = arm64_address_translate_s1e1w(addr);
1157 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1158 		phys = arm64_address_translate_s1e0r(addr);
1159 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
1160 		phys = arm64_address_translate_s1e0w(addr);
1161 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1162 	} else
1163 		db_printf("show vtop <virt_addr>\n");
1164 }
1165 #endif
1166