xref: /freebsd/sys/arm64/arm64/machdep.c (revision bdd1243d)
1 /*-
2  * Copyright (c) 2014 Andrew Turner
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include "opt_acpi.h"
29 #include "opt_platform.h"
30 #include "opt_ddb.h"
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/asan.h>
38 #include <sys/buf.h>
39 #include <sys/bus.h>
40 #include <sys/cons.h>
41 #include <sys/cpu.h>
42 #include <sys/csan.h>
43 #include <sys/devmap.h>
44 #include <sys/efi.h>
45 #include <sys/exec.h>
46 #include <sys/imgact.h>
47 #include <sys/kdb.h>
48 #include <sys/kernel.h>
49 #include <sys/ktr.h>
50 #include <sys/limits.h>
51 #include <sys/linker.h>
52 #include <sys/msgbuf.h>
53 #include <sys/pcpu.h>
54 #include <sys/physmem.h>
55 #include <sys/proc.h>
56 #include <sys/ptrace.h>
57 #include <sys/reboot.h>
58 #include <sys/reg.h>
59 #include <sys/rwlock.h>
60 #include <sys/sched.h>
61 #include <sys/signalvar.h>
62 #include <sys/syscallsubr.h>
63 #include <sys/sysent.h>
64 #include <sys/sysproto.h>
65 #include <sys/ucontext.h>
66 #include <sys/vdso.h>
67 #include <sys/vmmeter.h>
68 
69 #include <vm/vm.h>
70 #include <vm/vm_param.h>
71 #include <vm/vm_kern.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_phys.h>
75 #include <vm/pmap.h>
76 #include <vm/vm_map.h>
77 #include <vm/vm_pager.h>
78 
79 #include <machine/armreg.h>
80 #include <machine/cpu.h>
81 #include <machine/debug_monitor.h>
82 #include <machine/hypervisor.h>
83 #include <machine/kdb.h>
84 #include <machine/machdep.h>
85 #include <machine/metadata.h>
86 #include <machine/md_var.h>
87 #include <machine/pcb.h>
88 #include <machine/undefined.h>
89 #include <machine/vmparam.h>
90 
91 #ifdef VFP
92 #include <machine/vfp.h>
93 #endif
94 
95 #ifdef DEV_ACPI
96 #include <contrib/dev/acpica/include/acpi.h>
97 #include <machine/acpica_machdep.h>
98 #endif
99 
100 #ifdef FDT
101 #include <dev/fdt/fdt_common.h>
102 #include <dev/ofw/openfirm.h>
103 #endif
104 
105 #include <dev/smbios/smbios.h>
106 
107 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
108 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
109     "pcb_fpusaved changed offset");
110 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
111     "pcb_fpustate changed offset");
112 
113 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
114 
115 /*
116  * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
117  * could relocate this, but will need to keep the same virtual address as
118  * it's reverenced by the EARLY_COUNTER macro.
119  */
120 struct pcpu pcpu0;
121 
122 #if defined(PERTHREAD_SSP)
123 /*
124  * The boot SSP canary. Will be replaced with a per-thread canary when
125  * scheduling has started.
126  */
127 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
128 #endif
129 
130 static struct trapframe proc0_tf;
131 
132 int early_boot = 1;
133 int cold = 1;
134 static int boot_el;
135 static uint64_t hcr_el2;
136 
137 struct kva_md_info kmi;
138 
139 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
140 int has_pan;
141 
142 /*
143  * Physical address of the EFI System Table. Stashed from the metadata hints
144  * passed into the kernel and used by the EFI code to call runtime services.
145  */
146 vm_paddr_t efi_systbl_phys;
147 static struct efi_map_header *efihdr;
148 
149 /* pagezero_* implementations are provided in support.S */
150 void pagezero_simple(void *);
151 void pagezero_cache(void *);
152 
153 /* pagezero_simple is default pagezero */
154 void (*pagezero)(void *p) = pagezero_simple;
155 
156 int (*apei_nmi)(void);
157 
158 #if defined(PERTHREAD_SSP_WARNING)
159 static void
160 print_ssp_warning(void *data __unused)
161 {
162 	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
163 }
164 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
165 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
166 #endif
167 
168 static void
169 pan_setup(void)
170 {
171 	uint64_t id_aa64mfr1;
172 
173 	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
174 	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
175 		has_pan = 1;
176 }
177 
178 void
179 pan_enable(void)
180 {
181 
182 	/*
183 	 * The LLVM integrated assembler doesn't understand the PAN
184 	 * PSTATE field. Because of this we need to manually create
185 	 * the instruction in an asm block. This is equivalent to:
186 	 * msr pan, #1
187 	 *
188 	 * This sets the PAN bit, stopping the kernel from accessing
189 	 * memory when userspace can also access it unless the kernel
190 	 * uses the userspace load/store instructions.
191 	 */
192 	if (has_pan) {
193 		WRITE_SPECIALREG(sctlr_el1,
194 		    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
195 		__asm __volatile(".inst 0xd500409f | (0x1 << 8)");
196 	}
197 }
198 
199 bool
200 has_hyp(void)
201 {
202 
203 	/*
204 	 * XXX The E2H check is wrong, but it's close enough for now.  Needs to
205 	 * be re-evaluated once we're running regularly in EL2.
206 	 */
207 	return (boot_el == 2 && (hcr_el2 & HCR_E2H) == 0);
208 }
209 
210 static void
211 cpu_startup(void *dummy)
212 {
213 	vm_paddr_t size;
214 	int i;
215 
216 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
217 	    ptoa((uintmax_t)realmem) / 1024 / 1024);
218 
219 	if (bootverbose) {
220 		printf("Physical memory chunk(s):\n");
221 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
222 			size = phys_avail[i + 1] - phys_avail[i];
223 			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
224 			    (uintmax_t)phys_avail[i],
225 			    (uintmax_t)phys_avail[i + 1] - 1,
226 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
227 		}
228 	}
229 
230 	printf("avail memory = %ju (%ju MB)\n",
231 	    ptoa((uintmax_t)vm_free_count()),
232 	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
233 
234 	undef_init();
235 	install_cpu_errata();
236 
237 	vm_ksubmap_init(&kmi);
238 	bufinit();
239 	vm_pager_bufferinit();
240 }
241 
242 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
243 
244 static void
245 late_ifunc_resolve(void *dummy __unused)
246 {
247 	link_elf_late_ireloc();
248 }
249 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
250 
251 int
252 cpu_idle_wakeup(int cpu)
253 {
254 
255 	return (0);
256 }
257 
258 void
259 cpu_idle(int busy)
260 {
261 
262 	spinlock_enter();
263 	if (!busy)
264 		cpu_idleclock();
265 	if (!sched_runnable())
266 		__asm __volatile(
267 		    "dsb sy \n"
268 		    "wfi    \n");
269 	if (!busy)
270 		cpu_activeclock();
271 	spinlock_exit();
272 }
273 
274 void
275 cpu_halt(void)
276 {
277 
278 	/* We should have shutdown by now, if not enter a low power sleep */
279 	intr_disable();
280 	while (1) {
281 		__asm __volatile("wfi");
282 	}
283 }
284 
285 /*
286  * Flush the D-cache for non-DMA I/O so that the I-cache can
287  * be made coherent later.
288  */
289 void
290 cpu_flush_dcache(void *ptr, size_t len)
291 {
292 
293 	/* ARM64TODO TBD */
294 }
295 
296 /* Get current clock frequency for the given CPU ID. */
297 int
298 cpu_est_clockrate(int cpu_id, uint64_t *rate)
299 {
300 	struct pcpu *pc;
301 
302 	pc = pcpu_find(cpu_id);
303 	if (pc == NULL || rate == NULL)
304 		return (EINVAL);
305 
306 	if (pc->pc_clock == 0)
307 		return (EOPNOTSUPP);
308 
309 	*rate = pc->pc_clock;
310 	return (0);
311 }
312 
313 void
314 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
315 {
316 
317 	pcpu->pc_acpi_id = 0xffffffff;
318 	pcpu->pc_mpidr = UINT64_MAX;
319 }
320 
321 void
322 spinlock_enter(void)
323 {
324 	struct thread *td;
325 	register_t daif;
326 
327 	td = curthread;
328 	if (td->td_md.md_spinlock_count == 0) {
329 		daif = intr_disable();
330 		td->td_md.md_spinlock_count = 1;
331 		td->td_md.md_saved_daif = daif;
332 		critical_enter();
333 	} else
334 		td->td_md.md_spinlock_count++;
335 }
336 
337 void
338 spinlock_exit(void)
339 {
340 	struct thread *td;
341 	register_t daif;
342 
343 	td = curthread;
344 	daif = td->td_md.md_saved_daif;
345 	td->td_md.md_spinlock_count--;
346 	if (td->td_md.md_spinlock_count == 0) {
347 		critical_exit();
348 		intr_restore(daif);
349 	}
350 }
351 
352 /*
353  * Construct a PCB from a trapframe. This is called from kdb_trap() where
354  * we want to start a backtrace from the function that caused us to enter
355  * the debugger. We have the context in the trapframe, but base the trace
356  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
357  * enough for a backtrace.
358  */
359 void
360 makectx(struct trapframe *tf, struct pcb *pcb)
361 {
362 	int i;
363 
364 	for (i = 0; i < nitems(pcb->pcb_x); i++)
365 		pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
366 
367 	/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
368 	pcb->pcb_x[PCB_LR] = tf->tf_elr;
369 	pcb->pcb_sp = tf->tf_sp;
370 }
371 
372 static void
373 init_proc0(vm_offset_t kstack)
374 {
375 	struct pcpu *pcpup;
376 
377 	pcpup = cpuid_to_pcpu[0];
378 	MPASS(pcpup != NULL);
379 
380 	proc_linkup0(&proc0, &thread0);
381 	thread0.td_kstack = kstack;
382 	thread0.td_kstack_pages = kstack_pages;
383 #if defined(PERTHREAD_SSP)
384 	thread0.td_md.md_canary = boot_canary;
385 #endif
386 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
387 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
388 	thread0.td_pcb->pcb_flags = 0;
389 	thread0.td_pcb->pcb_fpflags = 0;
390 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
391 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
392 	thread0.td_frame = &proc0_tf;
393 	ptrauth_thread0(&thread0);
394 	pcpup->pc_curpcb = thread0.td_pcb;
395 
396 	/*
397 	 * Unmask SError exceptions. They are used to signal a RAS failure,
398 	 * or other hardware error.
399 	 */
400 	serror_enable();
401 }
402 
403 /*
404  * Get an address to be used to write to kernel data that may be mapped
405  * read-only, e.g. to patch kernel code.
406  */
407 bool
408 arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out)
409 {
410 	vm_paddr_t pa;
411 
412 	/* Check if the page is writable */
413 	if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
414 		*out = addr;
415 		return (true);
416 	}
417 
418 	/*
419 	 * Find the physical address of the given page.
420 	 */
421 	if (!pmap_klookup(addr, &pa)) {
422 		return (false);
423 	}
424 
425 	/*
426 	 * If it is within the DMAP region and is writable use that.
427 	 */
428 	if (PHYS_IN_DMAP(pa)) {
429 		addr = PHYS_TO_DMAP(pa);
430 		if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
431 			*out = addr;
432 			return (true);
433 		}
434 	}
435 
436 	return (false);
437 }
438 
439 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
440 
441 static void
442 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
443 {
444 	struct efi_md *map, *p;
445 	size_t efisz;
446 	int ndesc, i;
447 
448 	/*
449 	 * Memory map data provided by UEFI via the GetMemoryMap
450 	 * Boot Services API.
451 	 */
452 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
453 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
454 
455 	if (efihdr->descriptor_size == 0)
456 		return;
457 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
458 
459 	for (i = 0, p = map; i < ndesc; i++,
460 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
461 		cb(p, argp);
462 	}
463 }
464 
465 /*
466  * Handle the EFI memory map list.
467  *
468  * We will make two passes at this, the first (exclude == false) to populate
469  * physmem with valid physical memory ranges from recognized map entry types.
470  * In the second pass we will exclude memory ranges from physmem which must not
471  * be used for general allocations, either because they are used by runtime
472  * firmware or otherwise reserved.
473  *
474  * Adding the runtime-reserved memory ranges to physmem and excluding them
475  * later ensures that they are included in the DMAP, but excluded from
476  * phys_avail[].
477  *
478  * Entry types not explicitly listed here are ignored and not mapped.
479  */
480 static void
481 handle_efi_map_entry(struct efi_md *p, void *argp)
482 {
483 	bool exclude = *(bool *)argp;
484 
485 	switch (p->md_type) {
486 	case EFI_MD_TYPE_RECLAIM:
487 		/*
488 		 * The recomended location for ACPI tables. Map into the
489 		 * DMAP so we can access them from userspace via /dev/mem.
490 		 */
491 	case EFI_MD_TYPE_RT_CODE:
492 		/*
493 		 * Some UEFI implementations put the system table in the
494 		 * runtime code section. Include it in the DMAP, but will
495 		 * be excluded from phys_avail.
496 		 */
497 	case EFI_MD_TYPE_RT_DATA:
498 		/*
499 		 * Runtime data will be excluded after the DMAP
500 		 * region is created to stop it from being added
501 		 * to phys_avail.
502 		 */
503 		if (exclude) {
504 			physmem_exclude_region(p->md_phys,
505 			    p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
506 			break;
507 		}
508 		/* FALLTHROUGH */
509 	case EFI_MD_TYPE_CODE:
510 	case EFI_MD_TYPE_DATA:
511 	case EFI_MD_TYPE_BS_CODE:
512 	case EFI_MD_TYPE_BS_DATA:
513 	case EFI_MD_TYPE_FREE:
514 		/*
515 		 * We're allowed to use any entry with these types.
516 		 */
517 		if (!exclude)
518 			physmem_hardware_region(p->md_phys,
519 			    p->md_pages * EFI_PAGE_SIZE);
520 		break;
521 	default:
522 		/* Other types shall not be handled by physmem. */
523 		break;
524 	}
525 }
526 
527 static void
528 add_efi_map_entries(struct efi_map_header *efihdr)
529 {
530 	bool exclude = false;
531 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
532 }
533 
534 static void
535 exclude_efi_map_entries(struct efi_map_header *efihdr)
536 {
537 	bool exclude = true;
538 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
539 }
540 
541 static void
542 print_efi_map_entry(struct efi_md *p, void *argp __unused)
543 {
544 	const char *type;
545 	static const char *types[] = {
546 		"Reserved",
547 		"LoaderCode",
548 		"LoaderData",
549 		"BootServicesCode",
550 		"BootServicesData",
551 		"RuntimeServicesCode",
552 		"RuntimeServicesData",
553 		"ConventionalMemory",
554 		"UnusableMemory",
555 		"ACPIReclaimMemory",
556 		"ACPIMemoryNVS",
557 		"MemoryMappedIO",
558 		"MemoryMappedIOPortSpace",
559 		"PalCode",
560 		"PersistentMemory"
561 	};
562 
563 	if (p->md_type < nitems(types))
564 		type = types[p->md_type];
565 	else
566 		type = "<INVALID>";
567 	printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
568 	    p->md_virt, p->md_pages);
569 	if (p->md_attr & EFI_MD_ATTR_UC)
570 		printf("UC ");
571 	if (p->md_attr & EFI_MD_ATTR_WC)
572 		printf("WC ");
573 	if (p->md_attr & EFI_MD_ATTR_WT)
574 		printf("WT ");
575 	if (p->md_attr & EFI_MD_ATTR_WB)
576 		printf("WB ");
577 	if (p->md_attr & EFI_MD_ATTR_UCE)
578 		printf("UCE ");
579 	if (p->md_attr & EFI_MD_ATTR_WP)
580 		printf("WP ");
581 	if (p->md_attr & EFI_MD_ATTR_RP)
582 		printf("RP ");
583 	if (p->md_attr & EFI_MD_ATTR_XP)
584 		printf("XP ");
585 	if (p->md_attr & EFI_MD_ATTR_NV)
586 		printf("NV ");
587 	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
588 		printf("MORE_RELIABLE ");
589 	if (p->md_attr & EFI_MD_ATTR_RO)
590 		printf("RO ");
591 	if (p->md_attr & EFI_MD_ATTR_RT)
592 		printf("RUNTIME");
593 	printf("\n");
594 }
595 
596 static void
597 print_efi_map_entries(struct efi_map_header *efihdr)
598 {
599 
600 	printf("%23s %12s %12s %8s %4s\n",
601 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
602 	foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
603 }
604 
605 /*
606  * Map the passed in VA in EFI space to a void * using the efi memory table to
607  * find the PA and return it in the DMAP, if it exists. We're used between the
608  * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
609  * tables We assume that either the entry you are mapping fits within its page,
610  * or if it spills to the next page, that's contiguous in PA and in the DMAP.
611  * All observed tables obey the first part of this precondition.
612  */
613 struct early_map_data
614 {
615 	vm_offset_t va;
616 	vm_offset_t pa;
617 };
618 
619 static void
620 efi_early_map_entry(struct efi_md *p, void *argp)
621 {
622 	struct early_map_data *emdp = argp;
623 	vm_offset_t s, e;
624 
625 	if (emdp->pa != 0)
626 		return;
627 	if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
628 		return;
629 	s = p->md_virt;
630 	e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
631 	if (emdp->va < s  || emdp->va >= e)
632 		return;
633 	emdp->pa = p->md_phys + (emdp->va - p->md_virt);
634 }
635 
636 static void *
637 efi_early_map(vm_offset_t va)
638 {
639 	struct early_map_data emd = { .va = va };
640 
641 	foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
642 	if (emd.pa == 0)
643 		return NULL;
644 	return (void *)PHYS_TO_DMAP(emd.pa);
645 }
646 
647 
648 /*
649  * When booted via kboot, the prior kernel will pass in reserved memory areas in
650  * a EFI config table. We need to find that table and walk through it excluding
651  * the memory ranges in it. btw, this is called too early for the printf to do
652  * anything since msgbufp isn't initialized, let alone a console...
653  */
654 static void
655 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
656 {
657 	struct efi_systbl *systbl;
658 	struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
659 
660 	systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
661 	if (systbl == NULL) {
662 		printf("can't map systbl\n");
663 		return;
664 	}
665 	if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
666 		printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
667 		return;
668 	}
669 
670 	/*
671 	 * We don't yet have the pmap system booted enough to create a pmap for
672 	 * the efi firmware's preferred address space from the GetMemoryMap()
673 	 * table. The st_cfgtbl is a VA in this space, so we need to do the
674 	 * mapping ourselves to a kernel VA with efi_early_map. We assume that
675 	 * the cfgtbl entries don't span a page. Other pointers are PAs, as
676 	 * noted below.
677 	 */
678 	if (systbl->st_cfgtbl == 0)	/* Failsafe st_entries should == 0 in this case */
679 		return;
680 	for (int i = 0; i < systbl->st_entries; i++) {
681 		struct efi_cfgtbl *cfgtbl;
682 		struct linux_efi_memreserve *mr;
683 
684 		cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
685 		if (cfgtbl == NULL)
686 			panic("Can't map the config table entry %d\n", i);
687 		if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
688 			continue;
689 
690 		/*
691 		 * cfgtbl points are either VA or PA, depending on the GUID of
692 		 * the table. memreserve GUID pointers are PA and not converted
693 		 * after a SetVirtualAddressMap(). The list's mr_next pointer
694 		 * is also a PA.
695 		 */
696 		mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
697 			(vm_offset_t)cfgtbl->ct_data);
698 		while (true) {
699 			for (int j = 0; j < mr->mr_count; j++) {
700 				struct linux_efi_memreserve_entry *mre;
701 
702 				mre = &mr->mr_entry[j];
703 				physmem_exclude_region(mre->mre_base, mre->mre_size,
704 				    EXFLAG_NODUMP | EXFLAG_NOALLOC);
705 			}
706 			if (mr->mr_next == 0)
707 				break;
708 			mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
709 		};
710 	}
711 
712 }
713 
714 #ifdef FDT
715 static void
716 try_load_dtb(caddr_t kmdp)
717 {
718 	vm_offset_t dtbp;
719 
720 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
721 #if defined(FDT_DTB_STATIC)
722 	/*
723 	 * In case the device tree blob was not retrieved (from metadata) try
724 	 * to use the statically embedded one.
725 	 */
726 	if (dtbp == 0)
727 		dtbp = (vm_offset_t)&fdt_static_dtb;
728 #endif
729 
730 	if (dtbp == (vm_offset_t)NULL) {
731 #ifndef TSLOG
732 		printf("ERROR loading DTB\n");
733 #endif
734 		return;
735 	}
736 
737 	if (OF_install(OFW_FDT, 0) == FALSE)
738 		panic("Cannot install FDT");
739 
740 	if (OF_init((void *)dtbp) != 0)
741 		panic("OF_init failed with the found device tree");
742 
743 	parse_fdt_bootargs();
744 }
745 #endif
746 
747 static bool
748 bus_probe(void)
749 {
750 	bool has_acpi, has_fdt;
751 	char *order, *env;
752 
753 	has_acpi = has_fdt = false;
754 
755 #ifdef FDT
756 	has_fdt = (OF_peer(0) != 0);
757 #endif
758 #ifdef DEV_ACPI
759 	has_acpi = (AcpiOsGetRootPointer() != 0);
760 #endif
761 
762 	env = kern_getenv("kern.cfg.order");
763 	if (env != NULL) {
764 		order = env;
765 		while (order != NULL) {
766 			if (has_acpi &&
767 			    strncmp(order, "acpi", 4) == 0 &&
768 			    (order[4] == ',' || order[4] == '\0')) {
769 				arm64_bus_method = ARM64_BUS_ACPI;
770 				break;
771 			}
772 			if (has_fdt &&
773 			    strncmp(order, "fdt", 3) == 0 &&
774 			    (order[3] == ',' || order[3] == '\0')) {
775 				arm64_bus_method = ARM64_BUS_FDT;
776 				break;
777 			}
778 			order = strchr(order, ',');
779 			if (order != NULL)
780 				order++;	/* Skip comma */
781 		}
782 		freeenv(env);
783 
784 		/* If we set the bus method it is valid */
785 		if (arm64_bus_method != ARM64_BUS_NONE)
786 			return (true);
787 	}
788 	/* If no order or an invalid order was set use the default */
789 	if (arm64_bus_method == ARM64_BUS_NONE) {
790 		if (has_fdt)
791 			arm64_bus_method = ARM64_BUS_FDT;
792 		else if (has_acpi)
793 			arm64_bus_method = ARM64_BUS_ACPI;
794 	}
795 
796 	/*
797 	 * If no option was set the default is valid, otherwise we are
798 	 * setting one to get cninit() working, then calling panic to tell
799 	 * the user about the invalid bus setup.
800 	 */
801 	return (env == NULL);
802 }
803 
804 static void
805 cache_setup(void)
806 {
807 	int dczva_line_shift;
808 	uint32_t dczid_el0;
809 
810 	identify_cache(READ_SPECIALREG(ctr_el0));
811 
812 	dczid_el0 = READ_SPECIALREG(dczid_el0);
813 
814 	/* Check if dc zva is not prohibited */
815 	if (dczid_el0 & DCZID_DZP)
816 		dczva_line_size = 0;
817 	else {
818 		/* Same as with above calculations */
819 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
820 		dczva_line_size = sizeof(int) << dczva_line_shift;
821 
822 		/* Change pagezero function */
823 		pagezero = pagezero_cache;
824 	}
825 }
826 
827 int
828 memory_mapping_mode(vm_paddr_t pa)
829 {
830 	struct efi_md *map, *p;
831 	size_t efisz;
832 	int ndesc, i;
833 
834 	if (efihdr == NULL)
835 		return (VM_MEMATTR_WRITE_BACK);
836 
837 	/*
838 	 * Memory map data provided by UEFI via the GetMemoryMap
839 	 * Boot Services API.
840 	 */
841 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
842 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
843 
844 	if (efihdr->descriptor_size == 0)
845 		return (VM_MEMATTR_WRITE_BACK);
846 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
847 
848 	for (i = 0, p = map; i < ndesc; i++,
849 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
850 		if (pa < p->md_phys ||
851 		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
852 			continue;
853 		if (p->md_type == EFI_MD_TYPE_IOMEM ||
854 		    p->md_type == EFI_MD_TYPE_IOPORT)
855 			return (VM_MEMATTR_DEVICE);
856 		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
857 		    p->md_type == EFI_MD_TYPE_RECLAIM)
858 			return (VM_MEMATTR_WRITE_BACK);
859 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
860 			return (VM_MEMATTR_WRITE_THROUGH);
861 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
862 			return (VM_MEMATTR_WRITE_COMBINING);
863 		break;
864 	}
865 
866 	return (VM_MEMATTR_DEVICE);
867 }
868 
869 void
870 initarm(struct arm64_bootparams *abp)
871 {
872 	struct efi_fb *efifb;
873 	struct pcpu *pcpup;
874 	char *env;
875 #ifdef FDT
876 	struct mem_region mem_regions[FDT_MEM_REGIONS];
877 	int mem_regions_sz;
878 	phandle_t root;
879 	char dts_version[255];
880 #endif
881 	vm_offset_t lastaddr;
882 	caddr_t kmdp;
883 	bool valid;
884 
885 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
886 
887 	boot_el = abp->boot_el;
888 	hcr_el2 = abp->hcr_el2;
889 
890 	/* Parse loader or FDT boot parametes. Determine last used address. */
891 	lastaddr = parse_boot_param(abp);
892 
893 	/* Find the kernel address */
894 	kmdp = preload_search_by_type("elf kernel");
895 	if (kmdp == NULL)
896 		kmdp = preload_search_by_type("elf64 kernel");
897 
898 	identify_cpu(0);
899 	identify_hypervisor_smbios();
900 
901 	update_special_regs(0);
902 
903 	link_elf_ireloc(kmdp);
904 #ifdef FDT
905 	try_load_dtb(kmdp);
906 #endif
907 
908 	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
909 
910 	/* Load the physical memory ranges */
911 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
912 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
913 	if (efihdr != NULL)
914 		add_efi_map_entries(efihdr);
915 #ifdef FDT
916 	else {
917 		/* Grab physical memory regions information from device tree. */
918 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
919 		    NULL) != 0)
920 			panic("Cannot get physical memory regions");
921 		physmem_hardware_regions(mem_regions, mem_regions_sz);
922 	}
923 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
924 		physmem_exclude_regions(mem_regions, mem_regions_sz,
925 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
926 #endif
927 
928 	/* Exclude the EFI framebuffer from our view of physical memory. */
929 	efifb = (struct efi_fb *)preload_search_info(kmdp,
930 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
931 	if (efifb != NULL)
932 		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
933 		    EXFLAG_NOALLOC);
934 
935 	/* Set the pcpu data, this is needed by pmap_bootstrap */
936 	pcpup = &pcpu0;
937 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
938 
939 	/*
940 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
941 	 * loaded when entering the kernel from userland.
942 	 */
943 	__asm __volatile(
944 	    "mov x18, %0 \n"
945 	    "msr tpidr_el1, %0" :: "r"(pcpup));
946 
947 	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
948 	PCPU_SET(curthread, &thread0);
949 	PCPU_SET(midr, get_midr());
950 
951 	/* Do basic tuning, hz etc */
952 	init_param1();
953 
954 	cache_setup();
955 	pan_setup();
956 
957 	/* Bootstrap enough of pmap  to enter the kernel proper */
958 	pmap_bootstrap(KERNBASE - abp->kern_delta, lastaddr - KERNBASE);
959 	/* Exclude entries needed in the DMAP region, but not phys_avail */
960 	if (efihdr != NULL)
961 		exclude_efi_map_entries(efihdr);
962 	/*  Do the same for reserve entries in the EFI MEMRESERVE table */
963 	if (efi_systbl_phys != 0)
964 		exclude_efi_memreserve(efi_systbl_phys);
965 
966 	/*
967 	 * We carefully bootstrap the sanitizer map after we've excluded
968 	 * absolutely everything else that could impact phys_avail.  There's not
969 	 * always enough room for the initial shadow map after the kernel, so
970 	 * we'll end up searching for segments that we can safely use.  Those
971 	 * segments also get excluded from phys_avail.
972 	 */
973 #if defined(KASAN)
974 	pmap_bootstrap_san(KERNBASE - abp->kern_delta);
975 #endif
976 
977 	physmem_init_kernel_globals();
978 
979 	devmap_bootstrap(0, NULL);
980 
981 	valid = bus_probe();
982 
983 	cninit();
984 	set_ttbr0(abp->kern_ttbr0);
985 	cpu_tlb_flushID();
986 
987 	if (!valid)
988 		panic("Invalid bus configuration: %s",
989 		    kern_getenv("kern.cfg.order"));
990 
991 	/*
992 	 * Check if pointer authentication is available on this system, and
993 	 * if so enable its use. This needs to be called before init_proc0
994 	 * as that will configure the thread0 pointer authentication keys.
995 	 */
996 	ptrauth_init();
997 
998 	/*
999 	 * Dump the boot metadata. We have to wait for cninit() since console
1000 	 * output is required. If it's grossly incorrect the kernel will never
1001 	 * make it this far.
1002 	 */
1003 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
1004 		preload_dump();
1005 
1006 	init_proc0(abp->kern_stack);
1007 	msgbufinit(msgbufp, msgbufsize);
1008 	mutex_init();
1009 	init_param2(physmem);
1010 
1011 	dbg_init();
1012 	kdb_init();
1013 #ifdef KDB
1014 	if ((boothowto & RB_KDB) != 0)
1015 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1016 #endif
1017 	pan_enable();
1018 
1019 	kcsan_cpu_init(0);
1020 	kasan_init();
1021 
1022 	env = kern_getenv("kernelname");
1023 	if (env != NULL)
1024 		strlcpy(kernelname, env, sizeof(kernelname));
1025 
1026 #ifdef FDT
1027 	if (arm64_bus_method == ARM64_BUS_FDT) {
1028 		root = OF_finddevice("/");
1029 		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1030 			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1031 				printf("WARNING: DTB version is %s while kernel expects %s, "
1032 				    "please update the DTB in the ESP\n",
1033 				    dts_version,
1034 				    LINUX_DTS_VERSION);
1035 		} else {
1036 			printf("WARNING: Cannot find freebsd,dts-version property, "
1037 			    "cannot check DTB compliance\n");
1038 		}
1039 	}
1040 #endif
1041 
1042 	if (boothowto & RB_VERBOSE) {
1043 		if (efihdr != NULL)
1044 			print_efi_map_entries(efihdr);
1045 		physmem_print_tables();
1046 	}
1047 
1048 	early_boot = 0;
1049 
1050 	TSEXIT();
1051 }
1052 
1053 void
1054 dbg_init(void)
1055 {
1056 
1057 	/* Clear OS lock */
1058 	WRITE_SPECIALREG(oslar_el1, 0);
1059 
1060 	/* This permits DDB to use debug registers for watchpoints. */
1061 	dbg_monitor_init();
1062 
1063 	/* TODO: Eventually will need to initialize debug registers here. */
1064 }
1065 
1066 #ifdef DDB
1067 #include <ddb/ddb.h>
1068 
1069 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1070 {
1071 #define	PRINT_REG(reg)	\
1072     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1073 
1074 	PRINT_REG(actlr_el1);
1075 	PRINT_REG(afsr0_el1);
1076 	PRINT_REG(afsr1_el1);
1077 	PRINT_REG(aidr_el1);
1078 	PRINT_REG(amair_el1);
1079 	PRINT_REG(ccsidr_el1);
1080 	PRINT_REG(clidr_el1);
1081 	PRINT_REG(contextidr_el1);
1082 	PRINT_REG(cpacr_el1);
1083 	PRINT_REG(csselr_el1);
1084 	PRINT_REG(ctr_el0);
1085 	PRINT_REG(currentel);
1086 	PRINT_REG(daif);
1087 	PRINT_REG(dczid_el0);
1088 	PRINT_REG(elr_el1);
1089 	PRINT_REG(esr_el1);
1090 	PRINT_REG(far_el1);
1091 #if 0
1092 	/* ARM64TODO: Enable VFP before reading floating-point registers */
1093 	PRINT_REG(fpcr);
1094 	PRINT_REG(fpsr);
1095 #endif
1096 	PRINT_REG(id_aa64afr0_el1);
1097 	PRINT_REG(id_aa64afr1_el1);
1098 	PRINT_REG(id_aa64dfr0_el1);
1099 	PRINT_REG(id_aa64dfr1_el1);
1100 	PRINT_REG(id_aa64isar0_el1);
1101 	PRINT_REG(id_aa64isar1_el1);
1102 	PRINT_REG(id_aa64pfr0_el1);
1103 	PRINT_REG(id_aa64pfr1_el1);
1104 	PRINT_REG(id_afr0_el1);
1105 	PRINT_REG(id_dfr0_el1);
1106 	PRINT_REG(id_isar0_el1);
1107 	PRINT_REG(id_isar1_el1);
1108 	PRINT_REG(id_isar2_el1);
1109 	PRINT_REG(id_isar3_el1);
1110 	PRINT_REG(id_isar4_el1);
1111 	PRINT_REG(id_isar5_el1);
1112 	PRINT_REG(id_mmfr0_el1);
1113 	PRINT_REG(id_mmfr1_el1);
1114 	PRINT_REG(id_mmfr2_el1);
1115 	PRINT_REG(id_mmfr3_el1);
1116 #if 0
1117 	/* Missing from llvm */
1118 	PRINT_REG(id_mmfr4_el1);
1119 #endif
1120 	PRINT_REG(id_pfr0_el1);
1121 	PRINT_REG(id_pfr1_el1);
1122 	PRINT_REG(isr_el1);
1123 	PRINT_REG(mair_el1);
1124 	PRINT_REG(midr_el1);
1125 	PRINT_REG(mpidr_el1);
1126 	PRINT_REG(mvfr0_el1);
1127 	PRINT_REG(mvfr1_el1);
1128 	PRINT_REG(mvfr2_el1);
1129 	PRINT_REG(revidr_el1);
1130 	PRINT_REG(sctlr_el1);
1131 	PRINT_REG(sp_el0);
1132 	PRINT_REG(spsel);
1133 	PRINT_REG(spsr_el1);
1134 	PRINT_REG(tcr_el1);
1135 	PRINT_REG(tpidr_el0);
1136 	PRINT_REG(tpidr_el1);
1137 	PRINT_REG(tpidrro_el0);
1138 	PRINT_REG(ttbr0_el1);
1139 	PRINT_REG(ttbr1_el1);
1140 	PRINT_REG(vbar_el1);
1141 #undef PRINT_REG
1142 }
1143 
1144 DB_SHOW_COMMAND(vtop, db_show_vtop)
1145 {
1146 	uint64_t phys;
1147 
1148 	if (have_addr) {
1149 		phys = arm64_address_translate_s1e1r(addr);
1150 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
1151 		phys = arm64_address_translate_s1e1w(addr);
1152 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1153 		phys = arm64_address_translate_s1e0r(addr);
1154 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
1155 		phys = arm64_address_translate_s1e0w(addr);
1156 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1157 	} else
1158 		db_printf("show vtop <virt_addr>\n");
1159 }
1160 #endif
1161