xref: /freebsd/sys/arm64/arm64/machdep.c (revision 1edb7116)
1 /*-
2  * Copyright (c) 2014 Andrew Turner
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/devmap.h>
42 #include <sys/efi.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msan.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77 
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/debug_monitor.h>
81 #include <machine/hypervisor.h>
82 #include <machine/kdb.h>
83 #include <machine/machdep.h>
84 #include <machine/metadata.h>
85 #include <machine/md_var.h>
86 #include <machine/pcb.h>
87 #include <machine/undefined.h>
88 #include <machine/vmparam.h>
89 
90 #ifdef VFP
91 #include <machine/vfp.h>
92 #endif
93 
94 #ifdef DEV_ACPI
95 #include <contrib/dev/acpica/include/acpi.h>
96 #include <machine/acpica_machdep.h>
97 #endif
98 
99 #ifdef FDT
100 #include <dev/fdt/fdt_common.h>
101 #include <dev/ofw/openfirm.h>
102 #endif
103 
104 #include <dev/smbios/smbios.h>
105 
106 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
107 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
108     "pcb_fpusaved changed offset");
109 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
110     "pcb_fpustate changed offset");
111 
112 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
113 
114 /*
115  * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
116  * could relocate this, but will need to keep the same virtual address as
117  * it's reverenced by the EARLY_COUNTER macro.
118  */
119 struct pcpu pcpu0;
120 
121 #if defined(PERTHREAD_SSP)
122 /*
123  * The boot SSP canary. Will be replaced with a per-thread canary when
124  * scheduling has started.
125  */
126 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
127 #endif
128 
129 static struct trapframe proc0_tf;
130 
131 int early_boot = 1;
132 int cold = 1;
133 static int boot_el;
134 static uint64_t hcr_el2;
135 
136 struct kva_md_info kmi;
137 
138 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
139 int has_pan;
140 
141 #if defined(SOCDEV_PA)
142 /*
143  * This is the virtual address used to access SOCDEV_PA. As it's set before
144  * .bss is cleared we need to ensure it's preserved. To do this use
145  * __read_mostly as it's only ever set once but read in the putc functions.
146  */
147 uintptr_t socdev_va __read_mostly;
148 #endif
149 
150 /*
151  * Physical address of the EFI System Table. Stashed from the metadata hints
152  * passed into the kernel and used by the EFI code to call runtime services.
153  */
154 vm_paddr_t efi_systbl_phys;
155 static struct efi_map_header *efihdr;
156 
157 /* pagezero_* implementations are provided in support.S */
158 void pagezero_simple(void *);
159 void pagezero_cache(void *);
160 
161 /* pagezero_simple is default pagezero */
162 void (*pagezero)(void *p) = pagezero_simple;
163 
164 int (*apei_nmi)(void);
165 
166 #if defined(PERTHREAD_SSP_WARNING)
167 static void
168 print_ssp_warning(void *data __unused)
169 {
170 	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
171 }
172 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
173 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
174 #endif
175 
176 static void
177 pan_setup(void)
178 {
179 	uint64_t id_aa64mfr1;
180 
181 	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
182 	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
183 		has_pan = 1;
184 }
185 
186 void
187 pan_enable(void)
188 {
189 
190 	/*
191 	 * The LLVM integrated assembler doesn't understand the PAN
192 	 * PSTATE field. Because of this we need to manually create
193 	 * the instruction in an asm block. This is equivalent to:
194 	 * msr pan, #1
195 	 *
196 	 * This sets the PAN bit, stopping the kernel from accessing
197 	 * memory when userspace can also access it unless the kernel
198 	 * uses the userspace load/store instructions.
199 	 */
200 	if (has_pan) {
201 		WRITE_SPECIALREG(sctlr_el1,
202 		    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
203 		__asm __volatile(".inst 0xd500409f | (0x1 << 8)");
204 	}
205 }
206 
207 bool
208 has_hyp(void)
209 {
210 
211 	/*
212 	 * XXX The E2H check is wrong, but it's close enough for now.  Needs to
213 	 * be re-evaluated once we're running regularly in EL2.
214 	 */
215 	return (boot_el == 2 && (hcr_el2 & HCR_E2H) == 0);
216 }
217 
218 static void
219 cpu_startup(void *dummy)
220 {
221 	vm_paddr_t size;
222 	int i;
223 
224 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
225 	    ptoa((uintmax_t)realmem) / 1024 / 1024);
226 
227 	if (bootverbose) {
228 		printf("Physical memory chunk(s):\n");
229 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
230 			size = phys_avail[i + 1] - phys_avail[i];
231 			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
232 			    (uintmax_t)phys_avail[i],
233 			    (uintmax_t)phys_avail[i + 1] - 1,
234 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
235 		}
236 	}
237 
238 	printf("avail memory = %ju (%ju MB)\n",
239 	    ptoa((uintmax_t)vm_free_count()),
240 	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
241 
242 	undef_init();
243 	install_cpu_errata();
244 
245 	vm_ksubmap_init(&kmi);
246 	bufinit();
247 	vm_pager_bufferinit();
248 }
249 
250 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
251 
252 static void
253 late_ifunc_resolve(void *dummy __unused)
254 {
255 	link_elf_late_ireloc();
256 }
257 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
258 
259 int
260 cpu_idle_wakeup(int cpu)
261 {
262 
263 	return (0);
264 }
265 
266 void
267 cpu_idle(int busy)
268 {
269 
270 	spinlock_enter();
271 	if (!busy)
272 		cpu_idleclock();
273 	if (!sched_runnable())
274 		__asm __volatile(
275 		    "dsb sy \n"
276 		    "wfi    \n");
277 	if (!busy)
278 		cpu_activeclock();
279 	spinlock_exit();
280 }
281 
282 void
283 cpu_halt(void)
284 {
285 
286 	/* We should have shutdown by now, if not enter a low power sleep */
287 	intr_disable();
288 	while (1) {
289 		__asm __volatile("wfi");
290 	}
291 }
292 
293 /*
294  * Flush the D-cache for non-DMA I/O so that the I-cache can
295  * be made coherent later.
296  */
297 void
298 cpu_flush_dcache(void *ptr, size_t len)
299 {
300 
301 	/* ARM64TODO TBD */
302 }
303 
304 /* Get current clock frequency for the given CPU ID. */
305 int
306 cpu_est_clockrate(int cpu_id, uint64_t *rate)
307 {
308 	struct pcpu *pc;
309 
310 	pc = pcpu_find(cpu_id);
311 	if (pc == NULL || rate == NULL)
312 		return (EINVAL);
313 
314 	if (pc->pc_clock == 0)
315 		return (EOPNOTSUPP);
316 
317 	*rate = pc->pc_clock;
318 	return (0);
319 }
320 
321 void
322 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
323 {
324 
325 	pcpu->pc_acpi_id = 0xffffffff;
326 	pcpu->pc_mpidr = UINT64_MAX;
327 }
328 
329 void
330 spinlock_enter(void)
331 {
332 	struct thread *td;
333 	register_t daif;
334 
335 	td = curthread;
336 	if (td->td_md.md_spinlock_count == 0) {
337 		daif = intr_disable();
338 		td->td_md.md_spinlock_count = 1;
339 		td->td_md.md_saved_daif = daif;
340 		critical_enter();
341 	} else
342 		td->td_md.md_spinlock_count++;
343 }
344 
345 void
346 spinlock_exit(void)
347 {
348 	struct thread *td;
349 	register_t daif;
350 
351 	td = curthread;
352 	daif = td->td_md.md_saved_daif;
353 	td->td_md.md_spinlock_count--;
354 	if (td->td_md.md_spinlock_count == 0) {
355 		critical_exit();
356 		intr_restore(daif);
357 	}
358 }
359 
360 /*
361  * Construct a PCB from a trapframe. This is called from kdb_trap() where
362  * we want to start a backtrace from the function that caused us to enter
363  * the debugger. We have the context in the trapframe, but base the trace
364  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
365  * enough for a backtrace.
366  */
367 void
368 makectx(struct trapframe *tf, struct pcb *pcb)
369 {
370 	int i;
371 
372 	/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
373 	for (i = 0; i < nitems(pcb->pcb_x); i++) {
374 		if (i == PCB_LR)
375 			pcb->pcb_x[i] = tf->tf_elr;
376 		else
377 			pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
378 	}
379 
380 	pcb->pcb_sp = tf->tf_sp;
381 }
382 
383 static void
384 init_proc0(vm_offset_t kstack)
385 {
386 	struct pcpu *pcpup;
387 
388 	pcpup = cpuid_to_pcpu[0];
389 	MPASS(pcpup != NULL);
390 
391 	proc_linkup0(&proc0, &thread0);
392 	thread0.td_kstack = kstack;
393 	thread0.td_kstack_pages = KSTACK_PAGES;
394 #if defined(PERTHREAD_SSP)
395 	thread0.td_md.md_canary = boot_canary;
396 #endif
397 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
398 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
399 	thread0.td_pcb->pcb_flags = 0;
400 	thread0.td_pcb->pcb_fpflags = 0;
401 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
402 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
403 	thread0.td_frame = &proc0_tf;
404 	ptrauth_thread0(&thread0);
405 	pcpup->pc_curpcb = thread0.td_pcb;
406 
407 	/*
408 	 * Unmask SError exceptions. They are used to signal a RAS failure,
409 	 * or other hardware error.
410 	 */
411 	serror_enable();
412 }
413 
414 /*
415  * Get an address to be used to write to kernel data that may be mapped
416  * read-only, e.g. to patch kernel code.
417  */
418 bool
419 arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out)
420 {
421 	vm_paddr_t pa;
422 
423 	/* Check if the page is writable */
424 	if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
425 		*out = addr;
426 		return (true);
427 	}
428 
429 	/*
430 	 * Find the physical address of the given page.
431 	 */
432 	if (!pmap_klookup(addr, &pa)) {
433 		return (false);
434 	}
435 
436 	/*
437 	 * If it is within the DMAP region and is writable use that.
438 	 */
439 	if (PHYS_IN_DMAP(pa)) {
440 		addr = PHYS_TO_DMAP(pa);
441 		if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
442 			*out = addr;
443 			return (true);
444 		}
445 	}
446 
447 	return (false);
448 }
449 
450 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
451 
452 static void
453 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
454 {
455 	struct efi_md *map, *p;
456 	size_t efisz;
457 	int ndesc, i;
458 
459 	/*
460 	 * Memory map data provided by UEFI via the GetMemoryMap
461 	 * Boot Services API.
462 	 */
463 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
464 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
465 
466 	if (efihdr->descriptor_size == 0)
467 		return;
468 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
469 
470 	for (i = 0, p = map; i < ndesc; i++,
471 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
472 		cb(p, argp);
473 	}
474 }
475 
476 /*
477  * Handle the EFI memory map list.
478  *
479  * We will make two passes at this, the first (exclude == false) to populate
480  * physmem with valid physical memory ranges from recognized map entry types.
481  * In the second pass we will exclude memory ranges from physmem which must not
482  * be used for general allocations, either because they are used by runtime
483  * firmware or otherwise reserved.
484  *
485  * Adding the runtime-reserved memory ranges to physmem and excluding them
486  * later ensures that they are included in the DMAP, but excluded from
487  * phys_avail[].
488  *
489  * Entry types not explicitly listed here are ignored and not mapped.
490  */
491 static void
492 handle_efi_map_entry(struct efi_md *p, void *argp)
493 {
494 	bool exclude = *(bool *)argp;
495 
496 	switch (p->md_type) {
497 	case EFI_MD_TYPE_RECLAIM:
498 		/*
499 		 * The recomended location for ACPI tables. Map into the
500 		 * DMAP so we can access them from userspace via /dev/mem.
501 		 */
502 	case EFI_MD_TYPE_RT_CODE:
503 		/*
504 		 * Some UEFI implementations put the system table in the
505 		 * runtime code section. Include it in the DMAP, but will
506 		 * be excluded from phys_avail.
507 		 */
508 	case EFI_MD_TYPE_RT_DATA:
509 		/*
510 		 * Runtime data will be excluded after the DMAP
511 		 * region is created to stop it from being added
512 		 * to phys_avail.
513 		 */
514 		if (exclude) {
515 			physmem_exclude_region(p->md_phys,
516 			    p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
517 			break;
518 		}
519 		/* FALLTHROUGH */
520 	case EFI_MD_TYPE_CODE:
521 	case EFI_MD_TYPE_DATA:
522 	case EFI_MD_TYPE_BS_CODE:
523 	case EFI_MD_TYPE_BS_DATA:
524 	case EFI_MD_TYPE_FREE:
525 		/*
526 		 * We're allowed to use any entry with these types.
527 		 */
528 		if (!exclude)
529 			physmem_hardware_region(p->md_phys,
530 			    p->md_pages * EFI_PAGE_SIZE);
531 		break;
532 	default:
533 		/* Other types shall not be handled by physmem. */
534 		break;
535 	}
536 }
537 
538 static void
539 add_efi_map_entries(struct efi_map_header *efihdr)
540 {
541 	bool exclude = false;
542 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
543 }
544 
545 static void
546 exclude_efi_map_entries(struct efi_map_header *efihdr)
547 {
548 	bool exclude = true;
549 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
550 }
551 
552 static void
553 print_efi_map_entry(struct efi_md *p, void *argp __unused)
554 {
555 	const char *type;
556 	static const char *types[] = {
557 		"Reserved",
558 		"LoaderCode",
559 		"LoaderData",
560 		"BootServicesCode",
561 		"BootServicesData",
562 		"RuntimeServicesCode",
563 		"RuntimeServicesData",
564 		"ConventionalMemory",
565 		"UnusableMemory",
566 		"ACPIReclaimMemory",
567 		"ACPIMemoryNVS",
568 		"MemoryMappedIO",
569 		"MemoryMappedIOPortSpace",
570 		"PalCode",
571 		"PersistentMemory"
572 	};
573 
574 	if (p->md_type < nitems(types))
575 		type = types[p->md_type];
576 	else
577 		type = "<INVALID>";
578 	printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
579 	    p->md_virt, p->md_pages);
580 	if (p->md_attr & EFI_MD_ATTR_UC)
581 		printf("UC ");
582 	if (p->md_attr & EFI_MD_ATTR_WC)
583 		printf("WC ");
584 	if (p->md_attr & EFI_MD_ATTR_WT)
585 		printf("WT ");
586 	if (p->md_attr & EFI_MD_ATTR_WB)
587 		printf("WB ");
588 	if (p->md_attr & EFI_MD_ATTR_UCE)
589 		printf("UCE ");
590 	if (p->md_attr & EFI_MD_ATTR_WP)
591 		printf("WP ");
592 	if (p->md_attr & EFI_MD_ATTR_RP)
593 		printf("RP ");
594 	if (p->md_attr & EFI_MD_ATTR_XP)
595 		printf("XP ");
596 	if (p->md_attr & EFI_MD_ATTR_NV)
597 		printf("NV ");
598 	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
599 		printf("MORE_RELIABLE ");
600 	if (p->md_attr & EFI_MD_ATTR_RO)
601 		printf("RO ");
602 	if (p->md_attr & EFI_MD_ATTR_RT)
603 		printf("RUNTIME");
604 	printf("\n");
605 }
606 
607 static void
608 print_efi_map_entries(struct efi_map_header *efihdr)
609 {
610 
611 	printf("%23s %12s %12s %8s %4s\n",
612 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
613 	foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
614 }
615 
616 /*
617  * Map the passed in VA in EFI space to a void * using the efi memory table to
618  * find the PA and return it in the DMAP, if it exists. We're used between the
619  * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
620  * tables We assume that either the entry you are mapping fits within its page,
621  * or if it spills to the next page, that's contiguous in PA and in the DMAP.
622  * All observed tables obey the first part of this precondition.
623  */
624 struct early_map_data
625 {
626 	vm_offset_t va;
627 	vm_offset_t pa;
628 };
629 
630 static void
631 efi_early_map_entry(struct efi_md *p, void *argp)
632 {
633 	struct early_map_data *emdp = argp;
634 	vm_offset_t s, e;
635 
636 	if (emdp->pa != 0)
637 		return;
638 	if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
639 		return;
640 	s = p->md_virt;
641 	e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
642 	if (emdp->va < s  || emdp->va >= e)
643 		return;
644 	emdp->pa = p->md_phys + (emdp->va - p->md_virt);
645 }
646 
647 static void *
648 efi_early_map(vm_offset_t va)
649 {
650 	struct early_map_data emd = { .va = va };
651 
652 	foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
653 	if (emd.pa == 0)
654 		return NULL;
655 	return (void *)PHYS_TO_DMAP(emd.pa);
656 }
657 
658 
659 /*
660  * When booted via kboot, the prior kernel will pass in reserved memory areas in
661  * a EFI config table. We need to find that table and walk through it excluding
662  * the memory ranges in it. btw, this is called too early for the printf to do
663  * anything since msgbufp isn't initialized, let alone a console...
664  */
665 static void
666 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
667 {
668 	struct efi_systbl *systbl;
669 	struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
670 
671 	systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
672 	if (systbl == NULL) {
673 		printf("can't map systbl\n");
674 		return;
675 	}
676 	if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
677 		printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
678 		return;
679 	}
680 
681 	/*
682 	 * We don't yet have the pmap system booted enough to create a pmap for
683 	 * the efi firmware's preferred address space from the GetMemoryMap()
684 	 * table. The st_cfgtbl is a VA in this space, so we need to do the
685 	 * mapping ourselves to a kernel VA with efi_early_map. We assume that
686 	 * the cfgtbl entries don't span a page. Other pointers are PAs, as
687 	 * noted below.
688 	 */
689 	if (systbl->st_cfgtbl == 0)	/* Failsafe st_entries should == 0 in this case */
690 		return;
691 	for (int i = 0; i < systbl->st_entries; i++) {
692 		struct efi_cfgtbl *cfgtbl;
693 		struct linux_efi_memreserve *mr;
694 
695 		cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
696 		if (cfgtbl == NULL)
697 			panic("Can't map the config table entry %d\n", i);
698 		if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
699 			continue;
700 
701 		/*
702 		 * cfgtbl points are either VA or PA, depending on the GUID of
703 		 * the table. memreserve GUID pointers are PA and not converted
704 		 * after a SetVirtualAddressMap(). The list's mr_next pointer
705 		 * is also a PA.
706 		 */
707 		mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
708 			(vm_offset_t)cfgtbl->ct_data);
709 		while (true) {
710 			for (int j = 0; j < mr->mr_count; j++) {
711 				struct linux_efi_memreserve_entry *mre;
712 
713 				mre = &mr->mr_entry[j];
714 				physmem_exclude_region(mre->mre_base, mre->mre_size,
715 				    EXFLAG_NODUMP | EXFLAG_NOALLOC);
716 			}
717 			if (mr->mr_next == 0)
718 				break;
719 			mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
720 		};
721 	}
722 
723 }
724 
725 #ifdef FDT
726 static void
727 try_load_dtb(caddr_t kmdp)
728 {
729 	vm_offset_t dtbp;
730 
731 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
732 #if defined(FDT_DTB_STATIC)
733 	/*
734 	 * In case the device tree blob was not retrieved (from metadata) try
735 	 * to use the statically embedded one.
736 	 */
737 	if (dtbp == 0)
738 		dtbp = (vm_offset_t)&fdt_static_dtb;
739 #endif
740 
741 	if (dtbp == (vm_offset_t)NULL) {
742 #ifndef TSLOG
743 		printf("ERROR loading DTB\n");
744 #endif
745 		return;
746 	}
747 
748 	if (OF_install(OFW_FDT, 0) == FALSE)
749 		panic("Cannot install FDT");
750 
751 	if (OF_init((void *)dtbp) != 0)
752 		panic("OF_init failed with the found device tree");
753 
754 	parse_fdt_bootargs();
755 }
756 #endif
757 
758 static bool
759 bus_probe(void)
760 {
761 	bool has_acpi, has_fdt;
762 	char *order, *env;
763 
764 	has_acpi = has_fdt = false;
765 
766 #ifdef FDT
767 	has_fdt = (OF_peer(0) != 0);
768 #endif
769 #ifdef DEV_ACPI
770 	has_acpi = (AcpiOsGetRootPointer() != 0);
771 #endif
772 
773 	env = kern_getenv("kern.cfg.order");
774 	if (env != NULL) {
775 		order = env;
776 		while (order != NULL) {
777 			if (has_acpi &&
778 			    strncmp(order, "acpi", 4) == 0 &&
779 			    (order[4] == ',' || order[4] == '\0')) {
780 				arm64_bus_method = ARM64_BUS_ACPI;
781 				break;
782 			}
783 			if (has_fdt &&
784 			    strncmp(order, "fdt", 3) == 0 &&
785 			    (order[3] == ',' || order[3] == '\0')) {
786 				arm64_bus_method = ARM64_BUS_FDT;
787 				break;
788 			}
789 			order = strchr(order, ',');
790 			if (order != NULL)
791 				order++;	/* Skip comma */
792 		}
793 		freeenv(env);
794 
795 		/* If we set the bus method it is valid */
796 		if (arm64_bus_method != ARM64_BUS_NONE)
797 			return (true);
798 	}
799 	/* If no order or an invalid order was set use the default */
800 	if (arm64_bus_method == ARM64_BUS_NONE) {
801 		if (has_fdt)
802 			arm64_bus_method = ARM64_BUS_FDT;
803 		else if (has_acpi)
804 			arm64_bus_method = ARM64_BUS_ACPI;
805 	}
806 
807 	/*
808 	 * If no option was set the default is valid, otherwise we are
809 	 * setting one to get cninit() working, then calling panic to tell
810 	 * the user about the invalid bus setup.
811 	 */
812 	return (env == NULL);
813 }
814 
815 static void
816 cache_setup(void)
817 {
818 	int dczva_line_shift;
819 	uint32_t dczid_el0;
820 
821 	identify_cache(READ_SPECIALREG(ctr_el0));
822 
823 	dczid_el0 = READ_SPECIALREG(dczid_el0);
824 
825 	/* Check if dc zva is not prohibited */
826 	if (dczid_el0 & DCZID_DZP)
827 		dczva_line_size = 0;
828 	else {
829 		/* Same as with above calculations */
830 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
831 		dczva_line_size = sizeof(int) << dczva_line_shift;
832 
833 		/* Change pagezero function */
834 		pagezero = pagezero_cache;
835 	}
836 }
837 
838 int
839 memory_mapping_mode(vm_paddr_t pa)
840 {
841 	struct efi_md *map, *p;
842 	size_t efisz;
843 	int ndesc, i;
844 
845 	if (efihdr == NULL)
846 		return (VM_MEMATTR_WRITE_BACK);
847 
848 	/*
849 	 * Memory map data provided by UEFI via the GetMemoryMap
850 	 * Boot Services API.
851 	 */
852 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
853 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
854 
855 	if (efihdr->descriptor_size == 0)
856 		return (VM_MEMATTR_WRITE_BACK);
857 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
858 
859 	for (i = 0, p = map; i < ndesc; i++,
860 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
861 		if (pa < p->md_phys ||
862 		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
863 			continue;
864 		if (p->md_type == EFI_MD_TYPE_IOMEM ||
865 		    p->md_type == EFI_MD_TYPE_IOPORT)
866 			return (VM_MEMATTR_DEVICE);
867 		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
868 		    p->md_type == EFI_MD_TYPE_RECLAIM)
869 			return (VM_MEMATTR_WRITE_BACK);
870 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
871 			return (VM_MEMATTR_WRITE_THROUGH);
872 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
873 			return (VM_MEMATTR_WRITE_COMBINING);
874 		break;
875 	}
876 
877 	return (VM_MEMATTR_DEVICE);
878 }
879 
880 void
881 initarm(struct arm64_bootparams *abp)
882 {
883 	struct efi_fb *efifb;
884 	struct pcpu *pcpup;
885 	char *env;
886 #ifdef FDT
887 	struct mem_region mem_regions[FDT_MEM_REGIONS];
888 	int mem_regions_sz;
889 	phandle_t root;
890 	char dts_version[255];
891 #endif
892 	vm_offset_t lastaddr;
893 	caddr_t kmdp;
894 	bool valid;
895 
896 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
897 
898 	boot_el = abp->boot_el;
899 	hcr_el2 = abp->hcr_el2;
900 
901 	/* Parse loader or FDT boot parametes. Determine last used address. */
902 	lastaddr = parse_boot_param(abp);
903 
904 	/* Find the kernel address */
905 	kmdp = preload_search_by_type("elf kernel");
906 	if (kmdp == NULL)
907 		kmdp = preload_search_by_type("elf64 kernel");
908 
909 	identify_cpu(0);
910 	identify_hypervisor_smbios();
911 
912 	update_special_regs(0);
913 
914 	link_elf_ireloc(kmdp);
915 #ifdef FDT
916 	try_load_dtb(kmdp);
917 #endif
918 
919 	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
920 
921 	/* Load the physical memory ranges */
922 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
923 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
924 	if (efihdr != NULL)
925 		add_efi_map_entries(efihdr);
926 #ifdef FDT
927 	else {
928 		/* Grab physical memory regions information from device tree. */
929 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
930 		    NULL) != 0)
931 			panic("Cannot get physical memory regions");
932 		physmem_hardware_regions(mem_regions, mem_regions_sz);
933 	}
934 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
935 		physmem_exclude_regions(mem_regions, mem_regions_sz,
936 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
937 #endif
938 
939 	/* Exclude the EFI framebuffer from our view of physical memory. */
940 	efifb = (struct efi_fb *)preload_search_info(kmdp,
941 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
942 	if (efifb != NULL)
943 		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
944 		    EXFLAG_NOALLOC);
945 
946 	/* Set the pcpu data, this is needed by pmap_bootstrap */
947 	pcpup = &pcpu0;
948 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
949 
950 	/*
951 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
952 	 * loaded when entering the kernel from userland.
953 	 */
954 	__asm __volatile(
955 	    "mov x18, %0 \n"
956 	    "msr tpidr_el1, %0" :: "r"(pcpup));
957 
958 	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
959 	PCPU_SET(curthread, &thread0);
960 	PCPU_SET(midr, get_midr());
961 
962 	/* Do basic tuning, hz etc */
963 	init_param1();
964 
965 	cache_setup();
966 	pan_setup();
967 
968 	/* Bootstrap enough of pmap  to enter the kernel proper */
969 	pmap_bootstrap(lastaddr - KERNBASE);
970 	/* Exclude entries needed in the DMAP region, but not phys_avail */
971 	if (efihdr != NULL)
972 		exclude_efi_map_entries(efihdr);
973 	/*  Do the same for reserve entries in the EFI MEMRESERVE table */
974 	if (efi_systbl_phys != 0)
975 		exclude_efi_memreserve(efi_systbl_phys);
976 
977 	/*
978 	 * We carefully bootstrap the sanitizer map after we've excluded
979 	 * absolutely everything else that could impact phys_avail.  There's not
980 	 * always enough room for the initial shadow map after the kernel, so
981 	 * we'll end up searching for segments that we can safely use.  Those
982 	 * segments also get excluded from phys_avail.
983 	 */
984 #if defined(KASAN) || defined(KMSAN)
985 	pmap_bootstrap_san();
986 #endif
987 
988 	physmem_init_kernel_globals();
989 
990 	devmap_bootstrap(0, NULL);
991 
992 	valid = bus_probe();
993 
994 	cninit();
995 	set_ttbr0(abp->kern_ttbr0);
996 	cpu_tlb_flushID();
997 
998 	if (!valid)
999 		panic("Invalid bus configuration: %s",
1000 		    kern_getenv("kern.cfg.order"));
1001 
1002 	/*
1003 	 * Check if pointer authentication is available on this system, and
1004 	 * if so enable its use. This needs to be called before init_proc0
1005 	 * as that will configure the thread0 pointer authentication keys.
1006 	 */
1007 	ptrauth_init();
1008 
1009 	/*
1010 	 * Dump the boot metadata. We have to wait for cninit() since console
1011 	 * output is required. If it's grossly incorrect the kernel will never
1012 	 * make it this far.
1013 	 */
1014 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
1015 		preload_dump();
1016 
1017 	init_proc0(abp->kern_stack);
1018 	msgbufinit(msgbufp, msgbufsize);
1019 	mutex_init();
1020 	init_param2(physmem);
1021 
1022 	dbg_init();
1023 	kdb_init();
1024 #ifdef KDB
1025 	if ((boothowto & RB_KDB) != 0)
1026 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1027 #endif
1028 	pan_enable();
1029 
1030 	kcsan_cpu_init(0);
1031 	kasan_init();
1032 	kmsan_init();
1033 
1034 	env = kern_getenv("kernelname");
1035 	if (env != NULL)
1036 		strlcpy(kernelname, env, sizeof(kernelname));
1037 
1038 #ifdef FDT
1039 	if (arm64_bus_method == ARM64_BUS_FDT) {
1040 		root = OF_finddevice("/");
1041 		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1042 			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1043 				printf("WARNING: DTB version is %s while kernel expects %s, "
1044 				    "please update the DTB in the ESP\n",
1045 				    dts_version,
1046 				    LINUX_DTS_VERSION);
1047 		} else {
1048 			printf("WARNING: Cannot find freebsd,dts-version property, "
1049 			    "cannot check DTB compliance\n");
1050 		}
1051 	}
1052 #endif
1053 
1054 	if (boothowto & RB_VERBOSE) {
1055 		if (efihdr != NULL)
1056 			print_efi_map_entries(efihdr);
1057 		physmem_print_tables();
1058 	}
1059 
1060 	early_boot = 0;
1061 
1062 	if (bootverbose && kstack_pages != KSTACK_PAGES)
1063 		printf("kern.kstack_pages = %d ignored for thread0\n",
1064 		    kstack_pages);
1065 
1066 	TSEXIT();
1067 }
1068 
1069 void
1070 dbg_init(void)
1071 {
1072 
1073 	/* Clear OS lock */
1074 	WRITE_SPECIALREG(oslar_el1, 0);
1075 
1076 	/* This permits DDB to use debug registers for watchpoints. */
1077 	dbg_monitor_init();
1078 
1079 	/* TODO: Eventually will need to initialize debug registers here. */
1080 }
1081 
1082 #ifdef DDB
1083 #include <ddb/ddb.h>
1084 
1085 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1086 {
1087 #define	PRINT_REG(reg)	\
1088     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1089 
1090 	PRINT_REG(actlr_el1);
1091 	PRINT_REG(afsr0_el1);
1092 	PRINT_REG(afsr1_el1);
1093 	PRINT_REG(aidr_el1);
1094 	PRINT_REG(amair_el1);
1095 	PRINT_REG(ccsidr_el1);
1096 	PRINT_REG(clidr_el1);
1097 	PRINT_REG(contextidr_el1);
1098 	PRINT_REG(cpacr_el1);
1099 	PRINT_REG(csselr_el1);
1100 	PRINT_REG(ctr_el0);
1101 	PRINT_REG(currentel);
1102 	PRINT_REG(daif);
1103 	PRINT_REG(dczid_el0);
1104 	PRINT_REG(elr_el1);
1105 	PRINT_REG(esr_el1);
1106 	PRINT_REG(far_el1);
1107 #if 0
1108 	/* ARM64TODO: Enable VFP before reading floating-point registers */
1109 	PRINT_REG(fpcr);
1110 	PRINT_REG(fpsr);
1111 #endif
1112 	PRINT_REG(id_aa64afr0_el1);
1113 	PRINT_REG(id_aa64afr1_el1);
1114 	PRINT_REG(id_aa64dfr0_el1);
1115 	PRINT_REG(id_aa64dfr1_el1);
1116 	PRINT_REG(id_aa64isar0_el1);
1117 	PRINT_REG(id_aa64isar1_el1);
1118 	PRINT_REG(id_aa64pfr0_el1);
1119 	PRINT_REG(id_aa64pfr1_el1);
1120 	PRINT_REG(id_afr0_el1);
1121 	PRINT_REG(id_dfr0_el1);
1122 	PRINT_REG(id_isar0_el1);
1123 	PRINT_REG(id_isar1_el1);
1124 	PRINT_REG(id_isar2_el1);
1125 	PRINT_REG(id_isar3_el1);
1126 	PRINT_REG(id_isar4_el1);
1127 	PRINT_REG(id_isar5_el1);
1128 	PRINT_REG(id_mmfr0_el1);
1129 	PRINT_REG(id_mmfr1_el1);
1130 	PRINT_REG(id_mmfr2_el1);
1131 	PRINT_REG(id_mmfr3_el1);
1132 #if 0
1133 	/* Missing from llvm */
1134 	PRINT_REG(id_mmfr4_el1);
1135 #endif
1136 	PRINT_REG(id_pfr0_el1);
1137 	PRINT_REG(id_pfr1_el1);
1138 	PRINT_REG(isr_el1);
1139 	PRINT_REG(mair_el1);
1140 	PRINT_REG(midr_el1);
1141 	PRINT_REG(mpidr_el1);
1142 	PRINT_REG(mvfr0_el1);
1143 	PRINT_REG(mvfr1_el1);
1144 	PRINT_REG(mvfr2_el1);
1145 	PRINT_REG(revidr_el1);
1146 	PRINT_REG(sctlr_el1);
1147 	PRINT_REG(sp_el0);
1148 	PRINT_REG(spsel);
1149 	PRINT_REG(spsr_el1);
1150 	PRINT_REG(tcr_el1);
1151 	PRINT_REG(tpidr_el0);
1152 	PRINT_REG(tpidr_el1);
1153 	PRINT_REG(tpidrro_el0);
1154 	PRINT_REG(ttbr0_el1);
1155 	PRINT_REG(ttbr1_el1);
1156 	PRINT_REG(vbar_el1);
1157 #undef PRINT_REG
1158 }
1159 
1160 DB_SHOW_COMMAND(vtop, db_show_vtop)
1161 {
1162 	uint64_t phys;
1163 
1164 	if (have_addr) {
1165 		phys = arm64_address_translate_s1e1r(addr);
1166 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
1167 		phys = arm64_address_translate_s1e1w(addr);
1168 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1169 		phys = arm64_address_translate_s1e0r(addr);
1170 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
1171 		phys = arm64_address_translate_s1e0w(addr);
1172 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1173 	} else
1174 		db_printf("show vtop <virt_addr>\n");
1175 }
1176 #endif
1177