xref: /freebsd/sys/arm64/arm64/machdep.c (revision 2b833162)
1 /*-
2  * Copyright (c) 2014 Andrew Turner
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include "opt_acpi.h"
29 #include "opt_platform.h"
30 #include "opt_ddb.h"
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/asan.h>
38 #include <sys/buf.h>
39 #include <sys/bus.h>
40 #include <sys/cons.h>
41 #include <sys/cpu.h>
42 #include <sys/csan.h>
43 #include <sys/devmap.h>
44 #include <sys/efi.h>
45 #include <sys/exec.h>
46 #include <sys/imgact.h>
47 #include <sys/kdb.h>
48 #include <sys/kernel.h>
49 #include <sys/ktr.h>
50 #include <sys/limits.h>
51 #include <sys/linker.h>
52 #include <sys/msgbuf.h>
53 #include <sys/pcpu.h>
54 #include <sys/physmem.h>
55 #include <sys/proc.h>
56 #include <sys/ptrace.h>
57 #include <sys/reboot.h>
58 #include <sys/reg.h>
59 #include <sys/rwlock.h>
60 #include <sys/sched.h>
61 #include <sys/signalvar.h>
62 #include <sys/syscallsubr.h>
63 #include <sys/sysent.h>
64 #include <sys/sysproto.h>
65 #include <sys/ucontext.h>
66 #include <sys/vdso.h>
67 #include <sys/vmmeter.h>
68 
69 #include <vm/vm.h>
70 #include <vm/vm_param.h>
71 #include <vm/vm_kern.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_phys.h>
75 #include <vm/pmap.h>
76 #include <vm/vm_map.h>
77 #include <vm/vm_pager.h>
78 
79 #include <machine/armreg.h>
80 #include <machine/cpu.h>
81 #include <machine/debug_monitor.h>
82 #include <machine/hypervisor.h>
83 #include <machine/kdb.h>
84 #include <machine/machdep.h>
85 #include <machine/metadata.h>
86 #include <machine/md_var.h>
87 #include <machine/pcb.h>
88 #include <machine/undefined.h>
89 #include <machine/vmparam.h>
90 
91 #ifdef VFP
92 #include <machine/vfp.h>
93 #endif
94 
95 #ifdef DEV_ACPI
96 #include <contrib/dev/acpica/include/acpi.h>
97 #include <machine/acpica_machdep.h>
98 #endif
99 
100 #ifdef FDT
101 #include <dev/fdt/fdt_common.h>
102 #include <dev/ofw/openfirm.h>
103 #endif
104 
105 #include <dev/smbios/smbios.h>
106 
107 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
108 
109 /*
110  * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
111  * could relocate this, but will need to keep the same virtual address as
112  * it's reverenced by the EARLY_COUNTER macro.
113  */
114 struct pcpu pcpu0;
115 
116 #if defined(PERTHREAD_SSP)
117 /*
118  * The boot SSP canary. Will be replaced with a per-thread canary when
119  * scheduling has started.
120  */
121 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
122 #endif
123 
124 static struct trapframe proc0_tf;
125 
126 int early_boot = 1;
127 int cold = 1;
128 static int boot_el;
129 static uint64_t hcr_el2;
130 
131 struct kva_md_info kmi;
132 
133 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
134 int has_pan;
135 
136 /*
137  * Physical address of the EFI System Table. Stashed from the metadata hints
138  * passed into the kernel and used by the EFI code to call runtime services.
139  */
140 vm_paddr_t efi_systbl_phys;
141 static struct efi_map_header *efihdr;
142 
143 /* pagezero_* implementations are provided in support.S */
144 void pagezero_simple(void *);
145 void pagezero_cache(void *);
146 
147 /* pagezero_simple is default pagezero */
148 void (*pagezero)(void *p) = pagezero_simple;
149 
150 int (*apei_nmi)(void);
151 
152 #if defined(PERTHREAD_SSP_WARNING)
153 static void
154 print_ssp_warning(void *data __unused)
155 {
156 	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
157 }
158 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
159 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
160 #endif
161 
162 static void
163 pan_setup(void)
164 {
165 	uint64_t id_aa64mfr1;
166 
167 	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
168 	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
169 		has_pan = 1;
170 }
171 
172 void
173 pan_enable(void)
174 {
175 
176 	/*
177 	 * The LLVM integrated assembler doesn't understand the PAN
178 	 * PSTATE field. Because of this we need to manually create
179 	 * the instruction in an asm block. This is equivalent to:
180 	 * msr pan, #1
181 	 *
182 	 * This sets the PAN bit, stopping the kernel from accessing
183 	 * memory when userspace can also access it unless the kernel
184 	 * uses the userspace load/store instructions.
185 	 */
186 	if (has_pan) {
187 		WRITE_SPECIALREG(sctlr_el1,
188 		    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
189 		__asm __volatile(".inst 0xd500409f | (0x1 << 8)");
190 	}
191 }
192 
193 bool
194 has_hyp(void)
195 {
196 
197 	/*
198 	 * XXX The E2H check is wrong, but it's close enough for now.  Needs to
199 	 * be re-evaluated once we're running regularly in EL2.
200 	 */
201 	return (boot_el == 2 && (hcr_el2 & HCR_E2H) == 0);
202 }
203 
204 static void
205 cpu_startup(void *dummy)
206 {
207 	vm_paddr_t size;
208 	int i;
209 
210 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
211 	    ptoa((uintmax_t)realmem) / 1024 / 1024);
212 
213 	if (bootverbose) {
214 		printf("Physical memory chunk(s):\n");
215 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
216 			size = phys_avail[i + 1] - phys_avail[i];
217 			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
218 			    (uintmax_t)phys_avail[i],
219 			    (uintmax_t)phys_avail[i + 1] - 1,
220 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
221 		}
222 	}
223 
224 	printf("avail memory = %ju (%ju MB)\n",
225 	    ptoa((uintmax_t)vm_free_count()),
226 	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
227 
228 	undef_init();
229 	install_cpu_errata();
230 
231 	vm_ksubmap_init(&kmi);
232 	bufinit();
233 	vm_pager_bufferinit();
234 }
235 
236 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
237 
238 static void
239 late_ifunc_resolve(void *dummy __unused)
240 {
241 	link_elf_late_ireloc();
242 }
243 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
244 
245 int
246 cpu_idle_wakeup(int cpu)
247 {
248 
249 	return (0);
250 }
251 
252 void
253 cpu_idle(int busy)
254 {
255 
256 	spinlock_enter();
257 	if (!busy)
258 		cpu_idleclock();
259 	if (!sched_runnable())
260 		__asm __volatile(
261 		    "dsb sy \n"
262 		    "wfi    \n");
263 	if (!busy)
264 		cpu_activeclock();
265 	spinlock_exit();
266 }
267 
268 void
269 cpu_halt(void)
270 {
271 
272 	/* We should have shutdown by now, if not enter a low power sleep */
273 	intr_disable();
274 	while (1) {
275 		__asm __volatile("wfi");
276 	}
277 }
278 
279 /*
280  * Flush the D-cache for non-DMA I/O so that the I-cache can
281  * be made coherent later.
282  */
283 void
284 cpu_flush_dcache(void *ptr, size_t len)
285 {
286 
287 	/* ARM64TODO TBD */
288 }
289 
290 /* Get current clock frequency for the given CPU ID. */
291 int
292 cpu_est_clockrate(int cpu_id, uint64_t *rate)
293 {
294 	struct pcpu *pc;
295 
296 	pc = pcpu_find(cpu_id);
297 	if (pc == NULL || rate == NULL)
298 		return (EINVAL);
299 
300 	if (pc->pc_clock == 0)
301 		return (EOPNOTSUPP);
302 
303 	*rate = pc->pc_clock;
304 	return (0);
305 }
306 
307 void
308 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
309 {
310 
311 	pcpu->pc_acpi_id = 0xffffffff;
312 	pcpu->pc_mpidr_low = 0xffffffff;
313 	pcpu->pc_mpidr_high = 0xffffffff;
314 }
315 
316 void
317 spinlock_enter(void)
318 {
319 	struct thread *td;
320 	register_t daif;
321 
322 	td = curthread;
323 	if (td->td_md.md_spinlock_count == 0) {
324 		daif = intr_disable();
325 		td->td_md.md_spinlock_count = 1;
326 		td->td_md.md_saved_daif = daif;
327 		critical_enter();
328 	} else
329 		td->td_md.md_spinlock_count++;
330 }
331 
332 void
333 spinlock_exit(void)
334 {
335 	struct thread *td;
336 	register_t daif;
337 
338 	td = curthread;
339 	daif = td->td_md.md_saved_daif;
340 	td->td_md.md_spinlock_count--;
341 	if (td->td_md.md_spinlock_count == 0) {
342 		critical_exit();
343 		intr_restore(daif);
344 	}
345 }
346 
347 /*
348  * Construct a PCB from a trapframe. This is called from kdb_trap() where
349  * we want to start a backtrace from the function that caused us to enter
350  * the debugger. We have the context in the trapframe, but base the trace
351  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
352  * enough for a backtrace.
353  */
354 void
355 makectx(struct trapframe *tf, struct pcb *pcb)
356 {
357 	int i;
358 
359 	for (i = 0; i < nitems(pcb->pcb_x); i++)
360 		pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
361 
362 	/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
363 	pcb->pcb_x[PCB_LR] = tf->tf_elr;
364 	pcb->pcb_sp = tf->tf_sp;
365 }
366 
367 static void
368 init_proc0(vm_offset_t kstack)
369 {
370 	struct pcpu *pcpup;
371 
372 	pcpup = cpuid_to_pcpu[0];
373 	MPASS(pcpup != NULL);
374 
375 	proc_linkup0(&proc0, &thread0);
376 	thread0.td_kstack = kstack;
377 	thread0.td_kstack_pages = kstack_pages;
378 #if defined(PERTHREAD_SSP)
379 	thread0.td_md.md_canary = boot_canary;
380 #endif
381 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
382 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
383 	thread0.td_pcb->pcb_flags = 0;
384 	thread0.td_pcb->pcb_fpflags = 0;
385 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
386 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
387 	thread0.td_frame = &proc0_tf;
388 	ptrauth_thread0(&thread0);
389 	pcpup->pc_curpcb = thread0.td_pcb;
390 
391 	/*
392 	 * Unmask SError exceptions. They are used to signal a RAS failure,
393 	 * or other hardware error.
394 	 */
395 	serror_enable();
396 }
397 
398 /*
399  * Get an address to be used to write to kernel data that may be mapped
400  * read-only, e.g. to patch kernel code.
401  */
402 bool
403 arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out)
404 {
405 	vm_paddr_t pa;
406 
407 	/* Check if the page is writable */
408 	if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
409 		*out = addr;
410 		return (true);
411 	}
412 
413 	/*
414 	 * Find the physical address of the given page.
415 	 */
416 	if (!pmap_klookup(addr, &pa)) {
417 		return (false);
418 	}
419 
420 	/*
421 	 * If it is within the DMAP region and is writable use that.
422 	 */
423 	if (PHYS_IN_DMAP(pa)) {
424 		addr = PHYS_TO_DMAP(pa);
425 		if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
426 			*out = addr;
427 			return (true);
428 		}
429 	}
430 
431 	return (false);
432 }
433 
434 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
435 
436 static void
437 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
438 {
439 	struct efi_md *map, *p;
440 	size_t efisz;
441 	int ndesc, i;
442 
443 	/*
444 	 * Memory map data provided by UEFI via the GetMemoryMap
445 	 * Boot Services API.
446 	 */
447 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
448 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
449 
450 	if (efihdr->descriptor_size == 0)
451 		return;
452 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
453 
454 	for (i = 0, p = map; i < ndesc; i++,
455 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
456 		cb(p, argp);
457 	}
458 }
459 
460 /*
461  * Handle the EFI memory map list.
462  *
463  * We will make two passes at this, the first (exclude == false) to populate
464  * physmem with valid physical memory ranges from recognized map entry types.
465  * In the second pass we will exclude memory ranges from physmem which must not
466  * be used for general allocations, either because they are used by runtime
467  * firmware or otherwise reserved.
468  *
469  * Adding the runtime-reserved memory ranges to physmem and excluding them
470  * later ensures that they are included in the DMAP, but excluded from
471  * phys_avail[].
472  *
473  * Entry types not explicitly listed here are ignored and not mapped.
474  */
475 static void
476 handle_efi_map_entry(struct efi_md *p, void *argp)
477 {
478 	bool exclude = *(bool *)argp;
479 
480 	switch (p->md_type) {
481 	case EFI_MD_TYPE_RECLAIM:
482 		/*
483 		 * The recomended location for ACPI tables. Map into the
484 		 * DMAP so we can access them from userspace via /dev/mem.
485 		 */
486 	case EFI_MD_TYPE_RT_CODE:
487 		/*
488 		 * Some UEFI implementations put the system table in the
489 		 * runtime code section. Include it in the DMAP, but will
490 		 * be excluded from phys_avail.
491 		 */
492 	case EFI_MD_TYPE_RT_DATA:
493 		/*
494 		 * Runtime data will be excluded after the DMAP
495 		 * region is created to stop it from being added
496 		 * to phys_avail.
497 		 */
498 		if (exclude) {
499 			physmem_exclude_region(p->md_phys,
500 			    p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
501 			break;
502 		}
503 		/* FALLTHROUGH */
504 	case EFI_MD_TYPE_CODE:
505 	case EFI_MD_TYPE_DATA:
506 	case EFI_MD_TYPE_BS_CODE:
507 	case EFI_MD_TYPE_BS_DATA:
508 	case EFI_MD_TYPE_FREE:
509 		/*
510 		 * We're allowed to use any entry with these types.
511 		 */
512 		if (!exclude)
513 			physmem_hardware_region(p->md_phys,
514 			    p->md_pages * EFI_PAGE_SIZE);
515 		break;
516 	default:
517 		/* Other types shall not be handled by physmem. */
518 		break;
519 	}
520 }
521 
522 static void
523 add_efi_map_entries(struct efi_map_header *efihdr)
524 {
525 	bool exclude = false;
526 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
527 }
528 
529 static void
530 exclude_efi_map_entries(struct efi_map_header *efihdr)
531 {
532 	bool exclude = true;
533 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
534 }
535 
536 static void
537 print_efi_map_entry(struct efi_md *p, void *argp __unused)
538 {
539 	const char *type;
540 	static const char *types[] = {
541 		"Reserved",
542 		"LoaderCode",
543 		"LoaderData",
544 		"BootServicesCode",
545 		"BootServicesData",
546 		"RuntimeServicesCode",
547 		"RuntimeServicesData",
548 		"ConventionalMemory",
549 		"UnusableMemory",
550 		"ACPIReclaimMemory",
551 		"ACPIMemoryNVS",
552 		"MemoryMappedIO",
553 		"MemoryMappedIOPortSpace",
554 		"PalCode",
555 		"PersistentMemory"
556 	};
557 
558 	if (p->md_type < nitems(types))
559 		type = types[p->md_type];
560 	else
561 		type = "<INVALID>";
562 	printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
563 	    p->md_virt, p->md_pages);
564 	if (p->md_attr & EFI_MD_ATTR_UC)
565 		printf("UC ");
566 	if (p->md_attr & EFI_MD_ATTR_WC)
567 		printf("WC ");
568 	if (p->md_attr & EFI_MD_ATTR_WT)
569 		printf("WT ");
570 	if (p->md_attr & EFI_MD_ATTR_WB)
571 		printf("WB ");
572 	if (p->md_attr & EFI_MD_ATTR_UCE)
573 		printf("UCE ");
574 	if (p->md_attr & EFI_MD_ATTR_WP)
575 		printf("WP ");
576 	if (p->md_attr & EFI_MD_ATTR_RP)
577 		printf("RP ");
578 	if (p->md_attr & EFI_MD_ATTR_XP)
579 		printf("XP ");
580 	if (p->md_attr & EFI_MD_ATTR_NV)
581 		printf("NV ");
582 	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
583 		printf("MORE_RELIABLE ");
584 	if (p->md_attr & EFI_MD_ATTR_RO)
585 		printf("RO ");
586 	if (p->md_attr & EFI_MD_ATTR_RT)
587 		printf("RUNTIME");
588 	printf("\n");
589 }
590 
591 static void
592 print_efi_map_entries(struct efi_map_header *efihdr)
593 {
594 
595 	printf("%23s %12s %12s %8s %4s\n",
596 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
597 	foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
598 }
599 
600 /*
601  * Map the passed in VA in EFI space to a void * using the efi memory table to
602  * find the PA and return it in the DMAP, if it exists. We're used between the
603  * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
604  * tables We assume that either the entry you are mapping fits within its page,
605  * or if it spills to the next page, that's contiguous in PA and in the DMAP.
606  * All observed tables obey the first part of this precondition.
607  */
608 struct early_map_data
609 {
610 	vm_offset_t va;
611 	vm_offset_t pa;
612 };
613 
614 static void
615 efi_early_map_entry(struct efi_md *p, void *argp)
616 {
617 	struct early_map_data *emdp = argp;
618 	vm_offset_t s, e;
619 
620 	if (emdp->pa != 0)
621 		return;
622 	if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
623 		return;
624 	s = p->md_virt;
625 	e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
626 	if (emdp->va < s  || emdp->va >= e)
627 		return;
628 	emdp->pa = p->md_phys + (emdp->va - p->md_virt);
629 }
630 
631 static void *
632 efi_early_map(vm_offset_t va)
633 {
634 	struct early_map_data emd = { .va = va };
635 
636 	foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
637 	if (emd.pa == 0)
638 		return NULL;
639 	return (void *)PHYS_TO_DMAP(emd.pa);
640 }
641 
642 
643 /*
644  * When booted via kboot, the prior kernel will pass in reserved memory areas in
645  * a EFI config table. We need to find that table and walk through it excluding
646  * the memory ranges in it. btw, this is called too early for the printf to do
647  * anything since msgbufp isn't initialized, let alone a console...
648  */
649 static void
650 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
651 {
652 	struct efi_systbl *systbl;
653 	struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
654 
655 	systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
656 	if (systbl == NULL) {
657 		printf("can't map systbl\n");
658 		return;
659 	}
660 	if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
661 		printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
662 		return;
663 	}
664 
665 	/*
666 	 * We don't yet have the pmap system booted enough to create a pmap for
667 	 * the efi firmware's preferred address space from the GetMemoryMap()
668 	 * table. The st_cfgtbl is a VA in this space, so we need to do the
669 	 * mapping ourselves to a kernel VA with efi_early_map. We assume that
670 	 * the cfgtbl entries don't span a page. Other pointers are PAs, as
671 	 * noted below.
672 	 */
673 	if (systbl->st_cfgtbl == 0)	/* Failsafe st_entries should == 0 in this case */
674 		return;
675 	for (int i = 0; i < systbl->st_entries; i++) {
676 		struct efi_cfgtbl *cfgtbl;
677 		struct linux_efi_memreserve *mr;
678 
679 		cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
680 		if (cfgtbl == NULL)
681 			panic("Can't map the config table entry %d\n", i);
682 		if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
683 			continue;
684 
685 		/*
686 		 * cfgtbl points are either VA or PA, depending on the GUID of
687 		 * the table. memreserve GUID pointers are PA and not converted
688 		 * after a SetVirtualAddressMap(). The list's mr_next pointer
689 		 * is also a PA.
690 		 */
691 		mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
692 			(vm_offset_t)cfgtbl->ct_data);
693 		while (true) {
694 			for (int j = 0; j < mr->mr_count; j++) {
695 				struct linux_efi_memreserve_entry *mre;
696 
697 				mre = &mr->mr_entry[j];
698 				physmem_exclude_region(mre->mre_base, mre->mre_size,
699 				    EXFLAG_NODUMP | EXFLAG_NOALLOC);
700 			}
701 			if (mr->mr_next == 0)
702 				break;
703 			mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
704 		};
705 	}
706 
707 }
708 
709 #ifdef FDT
710 static void
711 try_load_dtb(caddr_t kmdp)
712 {
713 	vm_offset_t dtbp;
714 
715 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
716 #if defined(FDT_DTB_STATIC)
717 	/*
718 	 * In case the device tree blob was not retrieved (from metadata) try
719 	 * to use the statically embedded one.
720 	 */
721 	if (dtbp == 0)
722 		dtbp = (vm_offset_t)&fdt_static_dtb;
723 #endif
724 
725 	if (dtbp == (vm_offset_t)NULL) {
726 #ifndef TSLOG
727 		printf("ERROR loading DTB\n");
728 #endif
729 		return;
730 	}
731 
732 	if (OF_install(OFW_FDT, 0) == FALSE)
733 		panic("Cannot install FDT");
734 
735 	if (OF_init((void *)dtbp) != 0)
736 		panic("OF_init failed with the found device tree");
737 
738 	parse_fdt_bootargs();
739 }
740 #endif
741 
742 static bool
743 bus_probe(void)
744 {
745 	bool has_acpi, has_fdt;
746 	char *order, *env;
747 
748 	has_acpi = has_fdt = false;
749 
750 #ifdef FDT
751 	has_fdt = (OF_peer(0) != 0);
752 #endif
753 #ifdef DEV_ACPI
754 	has_acpi = (AcpiOsGetRootPointer() != 0);
755 #endif
756 
757 	env = kern_getenv("kern.cfg.order");
758 	if (env != NULL) {
759 		order = env;
760 		while (order != NULL) {
761 			if (has_acpi &&
762 			    strncmp(order, "acpi", 4) == 0 &&
763 			    (order[4] == ',' || order[4] == '\0')) {
764 				arm64_bus_method = ARM64_BUS_ACPI;
765 				break;
766 			}
767 			if (has_fdt &&
768 			    strncmp(order, "fdt", 3) == 0 &&
769 			    (order[3] == ',' || order[3] == '\0')) {
770 				arm64_bus_method = ARM64_BUS_FDT;
771 				break;
772 			}
773 			order = strchr(order, ',');
774 			if (order != NULL)
775 				order++;	/* Skip comma */
776 		}
777 		freeenv(env);
778 
779 		/* If we set the bus method it is valid */
780 		if (arm64_bus_method != ARM64_BUS_NONE)
781 			return (true);
782 	}
783 	/* If no order or an invalid order was set use the default */
784 	if (arm64_bus_method == ARM64_BUS_NONE) {
785 		if (has_fdt)
786 			arm64_bus_method = ARM64_BUS_FDT;
787 		else if (has_acpi)
788 			arm64_bus_method = ARM64_BUS_ACPI;
789 	}
790 
791 	/*
792 	 * If no option was set the default is valid, otherwise we are
793 	 * setting one to get cninit() working, then calling panic to tell
794 	 * the user about the invalid bus setup.
795 	 */
796 	return (env == NULL);
797 }
798 
799 static void
800 cache_setup(void)
801 {
802 	int dczva_line_shift;
803 	uint32_t dczid_el0;
804 
805 	identify_cache(READ_SPECIALREG(ctr_el0));
806 
807 	dczid_el0 = READ_SPECIALREG(dczid_el0);
808 
809 	/* Check if dc zva is not prohibited */
810 	if (dczid_el0 & DCZID_DZP)
811 		dczva_line_size = 0;
812 	else {
813 		/* Same as with above calculations */
814 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
815 		dczva_line_size = sizeof(int) << dczva_line_shift;
816 
817 		/* Change pagezero function */
818 		pagezero = pagezero_cache;
819 	}
820 }
821 
822 int
823 memory_mapping_mode(vm_paddr_t pa)
824 {
825 	struct efi_md *map, *p;
826 	size_t efisz;
827 	int ndesc, i;
828 
829 	if (efihdr == NULL)
830 		return (VM_MEMATTR_WRITE_BACK);
831 
832 	/*
833 	 * Memory map data provided by UEFI via the GetMemoryMap
834 	 * Boot Services API.
835 	 */
836 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
837 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
838 
839 	if (efihdr->descriptor_size == 0)
840 		return (VM_MEMATTR_WRITE_BACK);
841 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
842 
843 	for (i = 0, p = map; i < ndesc; i++,
844 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
845 		if (pa < p->md_phys ||
846 		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
847 			continue;
848 		if (p->md_type == EFI_MD_TYPE_IOMEM ||
849 		    p->md_type == EFI_MD_TYPE_IOPORT)
850 			return (VM_MEMATTR_DEVICE);
851 		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
852 		    p->md_type == EFI_MD_TYPE_RECLAIM)
853 			return (VM_MEMATTR_WRITE_BACK);
854 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
855 			return (VM_MEMATTR_WRITE_THROUGH);
856 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
857 			return (VM_MEMATTR_WRITE_COMBINING);
858 		break;
859 	}
860 
861 	return (VM_MEMATTR_DEVICE);
862 }
863 
864 void
865 initarm(struct arm64_bootparams *abp)
866 {
867 	struct efi_fb *efifb;
868 	struct pcpu *pcpup;
869 	char *env;
870 #ifdef FDT
871 	struct mem_region mem_regions[FDT_MEM_REGIONS];
872 	int mem_regions_sz;
873 	phandle_t root;
874 	char dts_version[255];
875 #endif
876 	vm_offset_t lastaddr;
877 	caddr_t kmdp;
878 	bool valid;
879 
880 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
881 
882 	boot_el = abp->boot_el;
883 	hcr_el2 = abp->hcr_el2;
884 
885 	/* Parse loader or FDT boot parametes. Determine last used address. */
886 	lastaddr = parse_boot_param(abp);
887 
888 	/* Find the kernel address */
889 	kmdp = preload_search_by_type("elf kernel");
890 	if (kmdp == NULL)
891 		kmdp = preload_search_by_type("elf64 kernel");
892 
893 	identify_cpu(0);
894 	identify_hypervisor_smbios();
895 
896 	update_special_regs(0);
897 
898 	link_elf_ireloc(kmdp);
899 #ifdef FDT
900 	try_load_dtb(kmdp);
901 #endif
902 
903 	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
904 
905 	/* Load the physical memory ranges */
906 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
907 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
908 	if (efihdr != NULL)
909 		add_efi_map_entries(efihdr);
910 #ifdef FDT
911 	else {
912 		/* Grab physical memory regions information from device tree. */
913 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
914 		    NULL) != 0)
915 			panic("Cannot get physical memory regions");
916 		physmem_hardware_regions(mem_regions, mem_regions_sz);
917 	}
918 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
919 		physmem_exclude_regions(mem_regions, mem_regions_sz,
920 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
921 #endif
922 
923 	/* Exclude the EFI framebuffer from our view of physical memory. */
924 	efifb = (struct efi_fb *)preload_search_info(kmdp,
925 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
926 	if (efifb != NULL)
927 		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
928 		    EXFLAG_NOALLOC);
929 
930 	/* Set the pcpu data, this is needed by pmap_bootstrap */
931 	pcpup = &pcpu0;
932 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
933 
934 	/*
935 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
936 	 * loaded when entering the kernel from userland.
937 	 */
938 	__asm __volatile(
939 	    "mov x18, %0 \n"
940 	    "msr tpidr_el1, %0" :: "r"(pcpup));
941 
942 	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
943 	PCPU_SET(curthread, &thread0);
944 	PCPU_SET(midr, get_midr());
945 
946 	/* Do basic tuning, hz etc */
947 	init_param1();
948 
949 	cache_setup();
950 	pan_setup();
951 
952 	/* Bootstrap enough of pmap  to enter the kernel proper */
953 	pmap_bootstrap(KERNBASE - abp->kern_delta, lastaddr - KERNBASE);
954 	/* Exclude entries needed in the DMAP region, but not phys_avail */
955 	if (efihdr != NULL)
956 		exclude_efi_map_entries(efihdr);
957 	/*  Do the same for reserve entries in the EFI MEMRESERVE table */
958 	if (efi_systbl_phys != 0)
959 		exclude_efi_memreserve(efi_systbl_phys);
960 
961 	/*
962 	 * We carefully bootstrap the sanitizer map after we've excluded
963 	 * absolutely everything else that could impact phys_avail.  There's not
964 	 * always enough room for the initial shadow map after the kernel, so
965 	 * we'll end up searching for segments that we can safely use.  Those
966 	 * segments also get excluded from phys_avail.
967 	 */
968 #if defined(KASAN)
969 	pmap_bootstrap_san(KERNBASE - abp->kern_delta);
970 #endif
971 
972 	physmem_init_kernel_globals();
973 
974 	devmap_bootstrap(0, NULL);
975 
976 	valid = bus_probe();
977 
978 	cninit();
979 	set_ttbr0(abp->kern_ttbr0);
980 	cpu_tlb_flushID();
981 
982 	if (!valid)
983 		panic("Invalid bus configuration: %s",
984 		    kern_getenv("kern.cfg.order"));
985 
986 	/*
987 	 * Check if pointer authentication is available on this system, and
988 	 * if so enable its use. This needs to be called before init_proc0
989 	 * as that will configure the thread0 pointer authentication keys.
990 	 */
991 	ptrauth_init();
992 
993 	/*
994 	 * Dump the boot metadata. We have to wait for cninit() since console
995 	 * output is required. If it's grossly incorrect the kernel will never
996 	 * make it this far.
997 	 */
998 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
999 		preload_dump();
1000 
1001 	init_proc0(abp->kern_stack);
1002 	msgbufinit(msgbufp, msgbufsize);
1003 	mutex_init();
1004 	init_param2(physmem);
1005 
1006 	dbg_init();
1007 	kdb_init();
1008 #ifdef KDB
1009 	if ((boothowto & RB_KDB) != 0)
1010 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1011 #endif
1012 	pan_enable();
1013 
1014 	kcsan_cpu_init(0);
1015 	kasan_init();
1016 
1017 	env = kern_getenv("kernelname");
1018 	if (env != NULL)
1019 		strlcpy(kernelname, env, sizeof(kernelname));
1020 
1021 #ifdef FDT
1022 	if (arm64_bus_method == ARM64_BUS_FDT) {
1023 		root = OF_finddevice("/");
1024 		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1025 			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1026 				printf("WARNING: DTB version is %s while kernel expects %s, "
1027 				    "please update the DTB in the ESP\n",
1028 				    dts_version,
1029 				    LINUX_DTS_VERSION);
1030 		} else {
1031 			printf("WARNING: Cannot find freebsd,dts-version property, "
1032 			    "cannot check DTB compliance\n");
1033 		}
1034 	}
1035 #endif
1036 
1037 	if (boothowto & RB_VERBOSE) {
1038 		if (efihdr != NULL)
1039 			print_efi_map_entries(efihdr);
1040 		physmem_print_tables();
1041 	}
1042 
1043 	early_boot = 0;
1044 
1045 	TSEXIT();
1046 }
1047 
1048 void
1049 dbg_init(void)
1050 {
1051 
1052 	/* Clear OS lock */
1053 	WRITE_SPECIALREG(oslar_el1, 0);
1054 
1055 	/* This permits DDB to use debug registers for watchpoints. */
1056 	dbg_monitor_init();
1057 
1058 	/* TODO: Eventually will need to initialize debug registers here. */
1059 }
1060 
1061 #ifdef DDB
1062 #include <ddb/ddb.h>
1063 
1064 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1065 {
1066 #define	PRINT_REG(reg)	\
1067     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1068 
1069 	PRINT_REG(actlr_el1);
1070 	PRINT_REG(afsr0_el1);
1071 	PRINT_REG(afsr1_el1);
1072 	PRINT_REG(aidr_el1);
1073 	PRINT_REG(amair_el1);
1074 	PRINT_REG(ccsidr_el1);
1075 	PRINT_REG(clidr_el1);
1076 	PRINT_REG(contextidr_el1);
1077 	PRINT_REG(cpacr_el1);
1078 	PRINT_REG(csselr_el1);
1079 	PRINT_REG(ctr_el0);
1080 	PRINT_REG(currentel);
1081 	PRINT_REG(daif);
1082 	PRINT_REG(dczid_el0);
1083 	PRINT_REG(elr_el1);
1084 	PRINT_REG(esr_el1);
1085 	PRINT_REG(far_el1);
1086 #if 0
1087 	/* ARM64TODO: Enable VFP before reading floating-point registers */
1088 	PRINT_REG(fpcr);
1089 	PRINT_REG(fpsr);
1090 #endif
1091 	PRINT_REG(id_aa64afr0_el1);
1092 	PRINT_REG(id_aa64afr1_el1);
1093 	PRINT_REG(id_aa64dfr0_el1);
1094 	PRINT_REG(id_aa64dfr1_el1);
1095 	PRINT_REG(id_aa64isar0_el1);
1096 	PRINT_REG(id_aa64isar1_el1);
1097 	PRINT_REG(id_aa64pfr0_el1);
1098 	PRINT_REG(id_aa64pfr1_el1);
1099 	PRINT_REG(id_afr0_el1);
1100 	PRINT_REG(id_dfr0_el1);
1101 	PRINT_REG(id_isar0_el1);
1102 	PRINT_REG(id_isar1_el1);
1103 	PRINT_REG(id_isar2_el1);
1104 	PRINT_REG(id_isar3_el1);
1105 	PRINT_REG(id_isar4_el1);
1106 	PRINT_REG(id_isar5_el1);
1107 	PRINT_REG(id_mmfr0_el1);
1108 	PRINT_REG(id_mmfr1_el1);
1109 	PRINT_REG(id_mmfr2_el1);
1110 	PRINT_REG(id_mmfr3_el1);
1111 #if 0
1112 	/* Missing from llvm */
1113 	PRINT_REG(id_mmfr4_el1);
1114 #endif
1115 	PRINT_REG(id_pfr0_el1);
1116 	PRINT_REG(id_pfr1_el1);
1117 	PRINT_REG(isr_el1);
1118 	PRINT_REG(mair_el1);
1119 	PRINT_REG(midr_el1);
1120 	PRINT_REG(mpidr_el1);
1121 	PRINT_REG(mvfr0_el1);
1122 	PRINT_REG(mvfr1_el1);
1123 	PRINT_REG(mvfr2_el1);
1124 	PRINT_REG(revidr_el1);
1125 	PRINT_REG(sctlr_el1);
1126 	PRINT_REG(sp_el0);
1127 	PRINT_REG(spsel);
1128 	PRINT_REG(spsr_el1);
1129 	PRINT_REG(tcr_el1);
1130 	PRINT_REG(tpidr_el0);
1131 	PRINT_REG(tpidr_el1);
1132 	PRINT_REG(tpidrro_el0);
1133 	PRINT_REG(ttbr0_el1);
1134 	PRINT_REG(ttbr1_el1);
1135 	PRINT_REG(vbar_el1);
1136 #undef PRINT_REG
1137 }
1138 
1139 DB_SHOW_COMMAND(vtop, db_show_vtop)
1140 {
1141 	uint64_t phys;
1142 
1143 	if (have_addr) {
1144 		phys = arm64_address_translate_s1e1r(addr);
1145 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
1146 		phys = arm64_address_translate_s1e1w(addr);
1147 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1148 		phys = arm64_address_translate_s1e0r(addr);
1149 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
1150 		phys = arm64_address_translate_s1e0w(addr);
1151 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1152 	} else
1153 		db_printf("show vtop <virt_addr>\n");
1154 }
1155 #endif
1156