xref: /freebsd/sys/arm64/arm64/machdep.c (revision b985c9ca)
1 /*-
2  * Copyright (c) 2014 Andrew Turner
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/devmap.h>
42 #include <sys/efi.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msan.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77 
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/debug_monitor.h>
81 #include <machine/hypervisor.h>
82 #include <machine/kdb.h>
83 #include <machine/machdep.h>
84 #include <machine/metadata.h>
85 #include <machine/md_var.h>
86 #include <machine/pcb.h>
87 #include <machine/undefined.h>
88 #include <machine/vmparam.h>
89 
90 #ifdef VFP
91 #include <machine/vfp.h>
92 #endif
93 
94 #ifdef DEV_ACPI
95 #include <contrib/dev/acpica/include/acpi.h>
96 #include <machine/acpica_machdep.h>
97 #endif
98 
99 #ifdef FDT
100 #include <dev/fdt/fdt_common.h>
101 #include <dev/ofw/openfirm.h>
102 #endif
103 
104 #include <dev/smbios/smbios.h>
105 
106 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
107 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
108     "pcb_fpusaved changed offset");
109 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
110     "pcb_fpustate changed offset");
111 
112 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
113 
114 /*
115  * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
116  * could relocate this, but will need to keep the same virtual address as
117  * it's reverenced by the EARLY_COUNTER macro.
118  */
119 struct pcpu pcpu0;
120 
121 #if defined(PERTHREAD_SSP)
122 /*
123  * The boot SSP canary. Will be replaced with a per-thread canary when
124  * scheduling has started.
125  */
126 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
127 #endif
128 
129 static struct trapframe proc0_tf;
130 
131 int early_boot = 1;
132 int cold = 1;
133 static int boot_el;
134 static uint64_t hcr_el2;
135 
136 struct kva_md_info kmi;
137 
138 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
139 int has_pan;
140 
141 #if defined(SOCDEV_PA)
142 /*
143  * This is the virtual address used to access SOCDEV_PA. As it's set before
144  * .bss is cleared we need to ensure it's preserved. To do this use
145  * __read_mostly as it's only ever set once but read in the putc functions.
146  */
147 uintptr_t socdev_va __read_mostly;
148 #endif
149 
150 /*
151  * Physical address of the EFI System Table. Stashed from the metadata hints
152  * passed into the kernel and used by the EFI code to call runtime services.
153  */
154 vm_paddr_t efi_systbl_phys;
155 static struct efi_map_header *efihdr;
156 
157 /* pagezero_* implementations are provided in support.S */
158 void pagezero_simple(void *);
159 void pagezero_cache(void *);
160 
161 /* pagezero_simple is default pagezero */
162 void (*pagezero)(void *p) = pagezero_simple;
163 
164 int (*apei_nmi)(void);
165 
166 #if defined(PERTHREAD_SSP_WARNING)
167 static void
168 print_ssp_warning(void *data __unused)
169 {
170 	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
171 }
172 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
173 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
174 #endif
175 
176 static void
177 pan_setup(void)
178 {
179 	uint64_t id_aa64mfr1;
180 
181 	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
182 	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
183 		has_pan = 1;
184 }
185 
186 void
187 pan_enable(void)
188 {
189 
190 	/*
191 	 * The LLVM integrated assembler doesn't understand the PAN
192 	 * PSTATE field. Because of this we need to manually create
193 	 * the instruction in an asm block. This is equivalent to:
194 	 * msr pan, #1
195 	 *
196 	 * This sets the PAN bit, stopping the kernel from accessing
197 	 * memory when userspace can also access it unless the kernel
198 	 * uses the userspace load/store instructions.
199 	 */
200 	if (has_pan) {
201 		WRITE_SPECIALREG(sctlr_el1,
202 		    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
203 		__asm __volatile(".inst 0xd500409f | (0x1 << 8)");
204 	}
205 }
206 
207 bool
208 has_hyp(void)
209 {
210 
211 	/*
212 	 * XXX The E2H check is wrong, but it's close enough for now.  Needs to
213 	 * be re-evaluated once we're running regularly in EL2.
214 	 */
215 	return (boot_el == CURRENTEL_EL_EL2 && (hcr_el2 & HCR_E2H) == 0);
216 }
217 
218 bool
219 in_vhe(void)
220 {
221 	/* If we are currently in EL2 then must be in VHE */
222 	return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
223 	    CURRENTEL_EL_EL2);
224 }
225 
226 static void
227 cpu_startup(void *dummy)
228 {
229 	vm_paddr_t size;
230 	int i;
231 
232 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
233 	    ptoa((uintmax_t)realmem) / 1024 / 1024);
234 
235 	if (bootverbose) {
236 		printf("Physical memory chunk(s):\n");
237 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
238 			size = phys_avail[i + 1] - phys_avail[i];
239 			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
240 			    (uintmax_t)phys_avail[i],
241 			    (uintmax_t)phys_avail[i + 1] - 1,
242 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
243 		}
244 	}
245 
246 	printf("avail memory = %ju (%ju MB)\n",
247 	    ptoa((uintmax_t)vm_free_count()),
248 	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
249 
250 	undef_init();
251 	install_cpu_errata();
252 
253 	vm_ksubmap_init(&kmi);
254 	bufinit();
255 	vm_pager_bufferinit();
256 }
257 
258 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
259 
260 static void
261 late_ifunc_resolve(void *dummy __unused)
262 {
263 	link_elf_late_ireloc();
264 }
265 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
266 
267 int
268 cpu_idle_wakeup(int cpu)
269 {
270 
271 	return (0);
272 }
273 
274 void
275 cpu_idle(int busy)
276 {
277 
278 	spinlock_enter();
279 	if (!busy)
280 		cpu_idleclock();
281 	if (!sched_runnable())
282 		__asm __volatile(
283 		    "dsb sy \n"
284 		    "wfi    \n");
285 	if (!busy)
286 		cpu_activeclock();
287 	spinlock_exit();
288 }
289 
290 void
291 cpu_halt(void)
292 {
293 
294 	/* We should have shutdown by now, if not enter a low power sleep */
295 	intr_disable();
296 	while (1) {
297 		__asm __volatile("wfi");
298 	}
299 }
300 
301 /*
302  * Flush the D-cache for non-DMA I/O so that the I-cache can
303  * be made coherent later.
304  */
305 void
306 cpu_flush_dcache(void *ptr, size_t len)
307 {
308 
309 	/* ARM64TODO TBD */
310 }
311 
312 /* Get current clock frequency for the given CPU ID. */
313 int
314 cpu_est_clockrate(int cpu_id, uint64_t *rate)
315 {
316 	struct pcpu *pc;
317 
318 	pc = pcpu_find(cpu_id);
319 	if (pc == NULL || rate == NULL)
320 		return (EINVAL);
321 
322 	if (pc->pc_clock == 0)
323 		return (EOPNOTSUPP);
324 
325 	*rate = pc->pc_clock;
326 	return (0);
327 }
328 
329 void
330 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
331 {
332 
333 	pcpu->pc_acpi_id = 0xffffffff;
334 	pcpu->pc_mpidr = UINT64_MAX;
335 }
336 
337 void
338 spinlock_enter(void)
339 {
340 	struct thread *td;
341 	register_t daif;
342 
343 	td = curthread;
344 	if (td->td_md.md_spinlock_count == 0) {
345 		daif = intr_disable();
346 		td->td_md.md_spinlock_count = 1;
347 		td->td_md.md_saved_daif = daif;
348 		critical_enter();
349 	} else
350 		td->td_md.md_spinlock_count++;
351 }
352 
353 void
354 spinlock_exit(void)
355 {
356 	struct thread *td;
357 	register_t daif;
358 
359 	td = curthread;
360 	daif = td->td_md.md_saved_daif;
361 	td->td_md.md_spinlock_count--;
362 	if (td->td_md.md_spinlock_count == 0) {
363 		critical_exit();
364 		intr_restore(daif);
365 	}
366 }
367 
368 /*
369  * Construct a PCB from a trapframe. This is called from kdb_trap() where
370  * we want to start a backtrace from the function that caused us to enter
371  * the debugger. We have the context in the trapframe, but base the trace
372  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
373  * enough for a backtrace.
374  */
375 void
376 makectx(struct trapframe *tf, struct pcb *pcb)
377 {
378 	int i;
379 
380 	/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
381 	for (i = 0; i < nitems(pcb->pcb_x); i++) {
382 		if (i == PCB_LR)
383 			pcb->pcb_x[i] = tf->tf_elr;
384 		else
385 			pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
386 	}
387 
388 	pcb->pcb_sp = tf->tf_sp;
389 }
390 
391 static void
392 init_proc0(vm_offset_t kstack)
393 {
394 	struct pcpu *pcpup;
395 
396 	pcpup = cpuid_to_pcpu[0];
397 	MPASS(pcpup != NULL);
398 
399 	proc_linkup0(&proc0, &thread0);
400 	thread0.td_kstack = kstack;
401 	thread0.td_kstack_pages = KSTACK_PAGES;
402 #if defined(PERTHREAD_SSP)
403 	thread0.td_md.md_canary = boot_canary;
404 #endif
405 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
406 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
407 	thread0.td_pcb->pcb_flags = 0;
408 	thread0.td_pcb->pcb_fpflags = 0;
409 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
410 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
411 	thread0.td_frame = &proc0_tf;
412 	ptrauth_thread0(&thread0);
413 	pcpup->pc_curpcb = thread0.td_pcb;
414 
415 	/*
416 	 * Unmask SError exceptions. They are used to signal a RAS failure,
417 	 * or other hardware error.
418 	 */
419 	serror_enable();
420 }
421 
422 /*
423  * Get an address to be used to write to kernel data that may be mapped
424  * read-only, e.g. to patch kernel code.
425  */
426 bool
427 arm64_get_writable_addr(void *addr, void **out)
428 {
429 	vm_paddr_t pa;
430 
431 	/* Check if the page is writable */
432 	if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
433 		*out = addr;
434 		return (true);
435 	}
436 
437 	/*
438 	 * Find the physical address of the given page.
439 	 */
440 	if (!pmap_klookup((vm_offset_t)addr, &pa)) {
441 		return (false);
442 	}
443 
444 	/*
445 	 * If it is within the DMAP region and is writable use that.
446 	 */
447 	if (PHYS_IN_DMAP_RANGE(pa)) {
448 		addr = (void *)PHYS_TO_DMAP(pa);
449 		if (PAR_SUCCESS(arm64_address_translate_s1e1w(
450 		    (vm_offset_t)addr))) {
451 			*out = addr;
452 			return (true);
453 		}
454 	}
455 
456 	return (false);
457 }
458 
459 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
460 
461 static void
462 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
463 {
464 	struct efi_md *map, *p;
465 	size_t efisz;
466 	int ndesc, i;
467 
468 	/*
469 	 * Memory map data provided by UEFI via the GetMemoryMap
470 	 * Boot Services API.
471 	 */
472 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
473 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
474 
475 	if (efihdr->descriptor_size == 0)
476 		return;
477 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
478 
479 	for (i = 0, p = map; i < ndesc; i++,
480 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
481 		cb(p, argp);
482 	}
483 }
484 
485 /*
486  * Handle the EFI memory map list.
487  *
488  * We will make two passes at this, the first (exclude == false) to populate
489  * physmem with valid physical memory ranges from recognized map entry types.
490  * In the second pass we will exclude memory ranges from physmem which must not
491  * be used for general allocations, either because they are used by runtime
492  * firmware or otherwise reserved.
493  *
494  * Adding the runtime-reserved memory ranges to physmem and excluding them
495  * later ensures that they are included in the DMAP, but excluded from
496  * phys_avail[].
497  *
498  * Entry types not explicitly listed here are ignored and not mapped.
499  */
500 static void
501 handle_efi_map_entry(struct efi_md *p, void *argp)
502 {
503 	bool exclude = *(bool *)argp;
504 
505 	switch (p->md_type) {
506 	case EFI_MD_TYPE_RECLAIM:
507 		/*
508 		 * The recomended location for ACPI tables. Map into the
509 		 * DMAP so we can access them from userspace via /dev/mem.
510 		 */
511 	case EFI_MD_TYPE_RT_CODE:
512 		/*
513 		 * Some UEFI implementations put the system table in the
514 		 * runtime code section. Include it in the DMAP, but will
515 		 * be excluded from phys_avail.
516 		 */
517 	case EFI_MD_TYPE_RT_DATA:
518 		/*
519 		 * Runtime data will be excluded after the DMAP
520 		 * region is created to stop it from being added
521 		 * to phys_avail.
522 		 */
523 		if (exclude) {
524 			physmem_exclude_region(p->md_phys,
525 			    p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
526 			break;
527 		}
528 		/* FALLTHROUGH */
529 	case EFI_MD_TYPE_CODE:
530 	case EFI_MD_TYPE_DATA:
531 	case EFI_MD_TYPE_BS_CODE:
532 	case EFI_MD_TYPE_BS_DATA:
533 	case EFI_MD_TYPE_FREE:
534 		/*
535 		 * We're allowed to use any entry with these types.
536 		 */
537 		if (!exclude)
538 			physmem_hardware_region(p->md_phys,
539 			    p->md_pages * EFI_PAGE_SIZE);
540 		break;
541 	default:
542 		/* Other types shall not be handled by physmem. */
543 		break;
544 	}
545 }
546 
547 static void
548 add_efi_map_entries(struct efi_map_header *efihdr)
549 {
550 	bool exclude = false;
551 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
552 }
553 
554 static void
555 exclude_efi_map_entries(struct efi_map_header *efihdr)
556 {
557 	bool exclude = true;
558 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
559 }
560 
561 static void
562 print_efi_map_entry(struct efi_md *p, void *argp __unused)
563 {
564 	const char *type;
565 	static const char *types[] = {
566 		"Reserved",
567 		"LoaderCode",
568 		"LoaderData",
569 		"BootServicesCode",
570 		"BootServicesData",
571 		"RuntimeServicesCode",
572 		"RuntimeServicesData",
573 		"ConventionalMemory",
574 		"UnusableMemory",
575 		"ACPIReclaimMemory",
576 		"ACPIMemoryNVS",
577 		"MemoryMappedIO",
578 		"MemoryMappedIOPortSpace",
579 		"PalCode",
580 		"PersistentMemory"
581 	};
582 
583 	if (p->md_type < nitems(types))
584 		type = types[p->md_type];
585 	else
586 		type = "<INVALID>";
587 	printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
588 	    p->md_virt, p->md_pages);
589 	if (p->md_attr & EFI_MD_ATTR_UC)
590 		printf("UC ");
591 	if (p->md_attr & EFI_MD_ATTR_WC)
592 		printf("WC ");
593 	if (p->md_attr & EFI_MD_ATTR_WT)
594 		printf("WT ");
595 	if (p->md_attr & EFI_MD_ATTR_WB)
596 		printf("WB ");
597 	if (p->md_attr & EFI_MD_ATTR_UCE)
598 		printf("UCE ");
599 	if (p->md_attr & EFI_MD_ATTR_WP)
600 		printf("WP ");
601 	if (p->md_attr & EFI_MD_ATTR_RP)
602 		printf("RP ");
603 	if (p->md_attr & EFI_MD_ATTR_XP)
604 		printf("XP ");
605 	if (p->md_attr & EFI_MD_ATTR_NV)
606 		printf("NV ");
607 	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
608 		printf("MORE_RELIABLE ");
609 	if (p->md_attr & EFI_MD_ATTR_RO)
610 		printf("RO ");
611 	if (p->md_attr & EFI_MD_ATTR_RT)
612 		printf("RUNTIME");
613 	printf("\n");
614 }
615 
616 static void
617 print_efi_map_entries(struct efi_map_header *efihdr)
618 {
619 
620 	printf("%23s %12s %12s %8s %4s\n",
621 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
622 	foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
623 }
624 
625 /*
626  * Map the passed in VA in EFI space to a void * using the efi memory table to
627  * find the PA and return it in the DMAP, if it exists. We're used between the
628  * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
629  * tables We assume that either the entry you are mapping fits within its page,
630  * or if it spills to the next page, that's contiguous in PA and in the DMAP.
631  * All observed tables obey the first part of this precondition.
632  */
633 struct early_map_data
634 {
635 	vm_offset_t va;
636 	vm_offset_t pa;
637 };
638 
639 static void
640 efi_early_map_entry(struct efi_md *p, void *argp)
641 {
642 	struct early_map_data *emdp = argp;
643 	vm_offset_t s, e;
644 
645 	if (emdp->pa != 0)
646 		return;
647 	if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
648 		return;
649 	s = p->md_virt;
650 	e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
651 	if (emdp->va < s  || emdp->va >= e)
652 		return;
653 	emdp->pa = p->md_phys + (emdp->va - p->md_virt);
654 }
655 
656 static void *
657 efi_early_map(vm_offset_t va)
658 {
659 	struct early_map_data emd = { .va = va };
660 
661 	foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
662 	if (emd.pa == 0)
663 		return NULL;
664 	return (void *)PHYS_TO_DMAP(emd.pa);
665 }
666 
667 
668 /*
669  * When booted via kboot, the prior kernel will pass in reserved memory areas in
670  * a EFI config table. We need to find that table and walk through it excluding
671  * the memory ranges in it. btw, this is called too early for the printf to do
672  * anything since msgbufp isn't initialized, let alone a console...
673  */
674 static void
675 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
676 {
677 	struct efi_systbl *systbl;
678 	struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
679 
680 	systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
681 	if (systbl == NULL) {
682 		printf("can't map systbl\n");
683 		return;
684 	}
685 	if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
686 		printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
687 		return;
688 	}
689 
690 	/*
691 	 * We don't yet have the pmap system booted enough to create a pmap for
692 	 * the efi firmware's preferred address space from the GetMemoryMap()
693 	 * table. The st_cfgtbl is a VA in this space, so we need to do the
694 	 * mapping ourselves to a kernel VA with efi_early_map. We assume that
695 	 * the cfgtbl entries don't span a page. Other pointers are PAs, as
696 	 * noted below.
697 	 */
698 	if (systbl->st_cfgtbl == 0)	/* Failsafe st_entries should == 0 in this case */
699 		return;
700 	for (int i = 0; i < systbl->st_entries; i++) {
701 		struct efi_cfgtbl *cfgtbl;
702 		struct linux_efi_memreserve *mr;
703 
704 		cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
705 		if (cfgtbl == NULL)
706 			panic("Can't map the config table entry %d\n", i);
707 		if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
708 			continue;
709 
710 		/*
711 		 * cfgtbl points are either VA or PA, depending on the GUID of
712 		 * the table. memreserve GUID pointers are PA and not converted
713 		 * after a SetVirtualAddressMap(). The list's mr_next pointer
714 		 * is also a PA.
715 		 */
716 		mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
717 			(vm_offset_t)cfgtbl->ct_data);
718 		while (true) {
719 			for (int j = 0; j < mr->mr_count; j++) {
720 				struct linux_efi_memreserve_entry *mre;
721 
722 				mre = &mr->mr_entry[j];
723 				physmem_exclude_region(mre->mre_base, mre->mre_size,
724 				    EXFLAG_NODUMP | EXFLAG_NOALLOC);
725 			}
726 			if (mr->mr_next == 0)
727 				break;
728 			mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
729 		};
730 	}
731 
732 }
733 
734 #ifdef FDT
735 static void
736 try_load_dtb(caddr_t kmdp)
737 {
738 	vm_offset_t dtbp;
739 
740 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
741 #if defined(FDT_DTB_STATIC)
742 	/*
743 	 * In case the device tree blob was not retrieved (from metadata) try
744 	 * to use the statically embedded one.
745 	 */
746 	if (dtbp == 0)
747 		dtbp = (vm_offset_t)&fdt_static_dtb;
748 #endif
749 
750 	if (dtbp == (vm_offset_t)NULL) {
751 #ifndef TSLOG
752 		printf("ERROR loading DTB\n");
753 #endif
754 		return;
755 	}
756 
757 	if (OF_install(OFW_FDT, 0) == FALSE)
758 		panic("Cannot install FDT");
759 
760 	if (OF_init((void *)dtbp) != 0)
761 		panic("OF_init failed with the found device tree");
762 
763 	parse_fdt_bootargs();
764 }
765 #endif
766 
767 static bool
768 bus_probe(void)
769 {
770 	bool has_acpi, has_fdt;
771 	char *order, *env;
772 
773 	has_acpi = has_fdt = false;
774 
775 #ifdef FDT
776 	has_fdt = (OF_peer(0) != 0);
777 #endif
778 #ifdef DEV_ACPI
779 	has_acpi = (AcpiOsGetRootPointer() != 0);
780 #endif
781 
782 	env = kern_getenv("kern.cfg.order");
783 	if (env != NULL) {
784 		order = env;
785 		while (order != NULL) {
786 			if (has_acpi &&
787 			    strncmp(order, "acpi", 4) == 0 &&
788 			    (order[4] == ',' || order[4] == '\0')) {
789 				arm64_bus_method = ARM64_BUS_ACPI;
790 				break;
791 			}
792 			if (has_fdt &&
793 			    strncmp(order, "fdt", 3) == 0 &&
794 			    (order[3] == ',' || order[3] == '\0')) {
795 				arm64_bus_method = ARM64_BUS_FDT;
796 				break;
797 			}
798 			order = strchr(order, ',');
799 			if (order != NULL)
800 				order++;	/* Skip comma */
801 		}
802 		freeenv(env);
803 
804 		/* If we set the bus method it is valid */
805 		if (arm64_bus_method != ARM64_BUS_NONE)
806 			return (true);
807 	}
808 	/* If no order or an invalid order was set use the default */
809 	if (arm64_bus_method == ARM64_BUS_NONE) {
810 		if (has_fdt)
811 			arm64_bus_method = ARM64_BUS_FDT;
812 		else if (has_acpi)
813 			arm64_bus_method = ARM64_BUS_ACPI;
814 	}
815 
816 	/*
817 	 * If no option was set the default is valid, otherwise we are
818 	 * setting one to get cninit() working, then calling panic to tell
819 	 * the user about the invalid bus setup.
820 	 */
821 	return (env == NULL);
822 }
823 
824 static void
825 cache_setup(void)
826 {
827 	int dczva_line_shift;
828 	uint32_t dczid_el0;
829 
830 	identify_cache(READ_SPECIALREG(ctr_el0));
831 
832 	dczid_el0 = READ_SPECIALREG(dczid_el0);
833 
834 	/* Check if dc zva is not prohibited */
835 	if (dczid_el0 & DCZID_DZP)
836 		dczva_line_size = 0;
837 	else {
838 		/* Same as with above calculations */
839 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
840 		dczva_line_size = sizeof(int) << dczva_line_shift;
841 
842 		/* Change pagezero function */
843 		pagezero = pagezero_cache;
844 	}
845 }
846 
847 int
848 memory_mapping_mode(vm_paddr_t pa)
849 {
850 	struct efi_md *map, *p;
851 	size_t efisz;
852 	int ndesc, i;
853 
854 	if (efihdr == NULL)
855 		return (VM_MEMATTR_WRITE_BACK);
856 
857 	/*
858 	 * Memory map data provided by UEFI via the GetMemoryMap
859 	 * Boot Services API.
860 	 */
861 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
862 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
863 
864 	if (efihdr->descriptor_size == 0)
865 		return (VM_MEMATTR_WRITE_BACK);
866 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
867 
868 	for (i = 0, p = map; i < ndesc; i++,
869 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
870 		if (pa < p->md_phys ||
871 		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
872 			continue;
873 		if (p->md_type == EFI_MD_TYPE_IOMEM ||
874 		    p->md_type == EFI_MD_TYPE_IOPORT)
875 			return (VM_MEMATTR_DEVICE);
876 		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
877 		    p->md_type == EFI_MD_TYPE_RECLAIM)
878 			return (VM_MEMATTR_WRITE_BACK);
879 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
880 			return (VM_MEMATTR_WRITE_THROUGH);
881 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
882 			return (VM_MEMATTR_WRITE_COMBINING);
883 		break;
884 	}
885 
886 	return (VM_MEMATTR_DEVICE);
887 }
888 
889 void
890 initarm(struct arm64_bootparams *abp)
891 {
892 	struct efi_fb *efifb;
893 	struct pcpu *pcpup;
894 	char *env;
895 #ifdef FDT
896 	struct mem_region mem_regions[FDT_MEM_REGIONS];
897 	int mem_regions_sz;
898 	phandle_t root;
899 	char dts_version[255];
900 #endif
901 	vm_offset_t lastaddr;
902 	caddr_t kmdp;
903 	bool valid;
904 
905 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
906 
907 	boot_el = abp->boot_el;
908 	hcr_el2 = abp->hcr_el2;
909 
910 	/* Parse loader or FDT boot parametes. Determine last used address. */
911 	lastaddr = parse_boot_param(abp);
912 
913 	/* Find the kernel address */
914 	kmdp = preload_search_by_type("elf kernel");
915 	if (kmdp == NULL)
916 		kmdp = preload_search_by_type("elf64 kernel");
917 
918 	identify_cpu(0);
919 	identify_hypervisor_smbios();
920 
921 	update_special_regs(0);
922 
923 	/* Set the pcpu data, this is needed by pmap_bootstrap */
924 	pcpup = &pcpu0;
925 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
926 
927 	/*
928 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
929 	 * loaded when entering the kernel from userland.
930 	 */
931 	__asm __volatile(
932 	    "mov x18, %0 \n"
933 	    "msr tpidr_el1, %0" :: "r"(pcpup));
934 
935 	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
936 	PCPU_SET(curthread, &thread0);
937 	PCPU_SET(midr, get_midr());
938 
939 	link_elf_ireloc(kmdp);
940 #ifdef FDT
941 	try_load_dtb(kmdp);
942 #endif
943 
944 	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
945 
946 	/* Load the physical memory ranges */
947 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
948 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
949 	if (efihdr != NULL)
950 		add_efi_map_entries(efihdr);
951 #ifdef FDT
952 	else {
953 		/* Grab physical memory regions information from device tree. */
954 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
955 		    NULL) != 0)
956 			panic("Cannot get physical memory regions");
957 		physmem_hardware_regions(mem_regions, mem_regions_sz);
958 	}
959 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
960 		physmem_exclude_regions(mem_regions, mem_regions_sz,
961 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
962 #endif
963 
964 	/* Exclude the EFI framebuffer from our view of physical memory. */
965 	efifb = (struct efi_fb *)preload_search_info(kmdp,
966 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
967 	if (efifb != NULL)
968 		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
969 		    EXFLAG_NOALLOC);
970 
971 	/* Do basic tuning, hz etc */
972 	init_param1();
973 
974 	cache_setup();
975 	pan_setup();
976 
977 	/* Bootstrap enough of pmap  to enter the kernel proper */
978 	pmap_bootstrap(lastaddr - KERNBASE);
979 	/* Exclude entries needed in the DMAP region, but not phys_avail */
980 	if (efihdr != NULL)
981 		exclude_efi_map_entries(efihdr);
982 	/*  Do the same for reserve entries in the EFI MEMRESERVE table */
983 	if (efi_systbl_phys != 0)
984 		exclude_efi_memreserve(efi_systbl_phys);
985 
986 	/*
987 	 * We carefully bootstrap the sanitizer map after we've excluded
988 	 * absolutely everything else that could impact phys_avail.  There's not
989 	 * always enough room for the initial shadow map after the kernel, so
990 	 * we'll end up searching for segments that we can safely use.  Those
991 	 * segments also get excluded from phys_avail.
992 	 */
993 #if defined(KASAN) || defined(KMSAN)
994 	pmap_bootstrap_san();
995 #endif
996 
997 	physmem_init_kernel_globals();
998 
999 	devmap_bootstrap(0, NULL);
1000 
1001 	valid = bus_probe();
1002 
1003 	cninit();
1004 	set_ttbr0(abp->kern_ttbr0);
1005 	cpu_tlb_flushID();
1006 
1007 	if (!valid)
1008 		panic("Invalid bus configuration: %s",
1009 		    kern_getenv("kern.cfg.order"));
1010 
1011 	/*
1012 	 * Check if pointer authentication is available on this system, and
1013 	 * if so enable its use. This needs to be called before init_proc0
1014 	 * as that will configure the thread0 pointer authentication keys.
1015 	 */
1016 	ptrauth_init();
1017 
1018 	/*
1019 	 * Dump the boot metadata. We have to wait for cninit() since console
1020 	 * output is required. If it's grossly incorrect the kernel will never
1021 	 * make it this far.
1022 	 */
1023 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
1024 		preload_dump();
1025 
1026 	init_proc0(abp->kern_stack);
1027 	msgbufinit(msgbufp, msgbufsize);
1028 	mutex_init();
1029 	init_param2(physmem);
1030 
1031 	dbg_init();
1032 	kdb_init();
1033 #ifdef KDB
1034 	if ((boothowto & RB_KDB) != 0)
1035 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1036 #endif
1037 	pan_enable();
1038 
1039 	kcsan_cpu_init(0);
1040 	kasan_init();
1041 	kmsan_init();
1042 
1043 	env = kern_getenv("kernelname");
1044 	if (env != NULL)
1045 		strlcpy(kernelname, env, sizeof(kernelname));
1046 
1047 #ifdef FDT
1048 	if (arm64_bus_method == ARM64_BUS_FDT) {
1049 		root = OF_finddevice("/");
1050 		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1051 			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1052 				printf("WARNING: DTB version is %s while kernel expects %s, "
1053 				    "please update the DTB in the ESP\n",
1054 				    dts_version,
1055 				    LINUX_DTS_VERSION);
1056 		} else {
1057 			printf("WARNING: Cannot find freebsd,dts-version property, "
1058 			    "cannot check DTB compliance\n");
1059 		}
1060 	}
1061 #endif
1062 
1063 	if (boothowto & RB_VERBOSE) {
1064 		if (efihdr != NULL)
1065 			print_efi_map_entries(efihdr);
1066 		physmem_print_tables();
1067 	}
1068 
1069 	early_boot = 0;
1070 
1071 	if (bootverbose && kstack_pages != KSTACK_PAGES)
1072 		printf("kern.kstack_pages = %d ignored for thread0\n",
1073 		    kstack_pages);
1074 
1075 	TSEXIT();
1076 }
1077 
1078 void
1079 dbg_init(void)
1080 {
1081 
1082 	/* Clear OS lock */
1083 	WRITE_SPECIALREG(oslar_el1, 0);
1084 
1085 	/* This permits DDB to use debug registers for watchpoints. */
1086 	dbg_monitor_init();
1087 
1088 	/* TODO: Eventually will need to initialize debug registers here. */
1089 }
1090 
1091 #ifdef DDB
1092 #include <ddb/ddb.h>
1093 
1094 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1095 {
1096 #define	PRINT_REG(reg)	\
1097     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1098 
1099 	PRINT_REG(actlr_el1);
1100 	PRINT_REG(afsr0_el1);
1101 	PRINT_REG(afsr1_el1);
1102 	PRINT_REG(aidr_el1);
1103 	PRINT_REG(amair_el1);
1104 	PRINT_REG(ccsidr_el1);
1105 	PRINT_REG(clidr_el1);
1106 	PRINT_REG(contextidr_el1);
1107 	PRINT_REG(cpacr_el1);
1108 	PRINT_REG(csselr_el1);
1109 	PRINT_REG(ctr_el0);
1110 	PRINT_REG(currentel);
1111 	PRINT_REG(daif);
1112 	PRINT_REG(dczid_el0);
1113 	PRINT_REG(elr_el1);
1114 	PRINT_REG(esr_el1);
1115 	PRINT_REG(far_el1);
1116 #if 0
1117 	/* ARM64TODO: Enable VFP before reading floating-point registers */
1118 	PRINT_REG(fpcr);
1119 	PRINT_REG(fpsr);
1120 #endif
1121 	PRINT_REG(id_aa64afr0_el1);
1122 	PRINT_REG(id_aa64afr1_el1);
1123 	PRINT_REG(id_aa64dfr0_el1);
1124 	PRINT_REG(id_aa64dfr1_el1);
1125 	PRINT_REG(id_aa64isar0_el1);
1126 	PRINT_REG(id_aa64isar1_el1);
1127 	PRINT_REG(id_aa64pfr0_el1);
1128 	PRINT_REG(id_aa64pfr1_el1);
1129 	PRINT_REG(id_afr0_el1);
1130 	PRINT_REG(id_dfr0_el1);
1131 	PRINT_REG(id_isar0_el1);
1132 	PRINT_REG(id_isar1_el1);
1133 	PRINT_REG(id_isar2_el1);
1134 	PRINT_REG(id_isar3_el1);
1135 	PRINT_REG(id_isar4_el1);
1136 	PRINT_REG(id_isar5_el1);
1137 	PRINT_REG(id_mmfr0_el1);
1138 	PRINT_REG(id_mmfr1_el1);
1139 	PRINT_REG(id_mmfr2_el1);
1140 	PRINT_REG(id_mmfr3_el1);
1141 #if 0
1142 	/* Missing from llvm */
1143 	PRINT_REG(id_mmfr4_el1);
1144 #endif
1145 	PRINT_REG(id_pfr0_el1);
1146 	PRINT_REG(id_pfr1_el1);
1147 	PRINT_REG(isr_el1);
1148 	PRINT_REG(mair_el1);
1149 	PRINT_REG(midr_el1);
1150 	PRINT_REG(mpidr_el1);
1151 	PRINT_REG(mvfr0_el1);
1152 	PRINT_REG(mvfr1_el1);
1153 	PRINT_REG(mvfr2_el1);
1154 	PRINT_REG(revidr_el1);
1155 	PRINT_REG(sctlr_el1);
1156 	PRINT_REG(sp_el0);
1157 	PRINT_REG(spsel);
1158 	PRINT_REG(spsr_el1);
1159 	PRINT_REG(tcr_el1);
1160 	PRINT_REG(tpidr_el0);
1161 	PRINT_REG(tpidr_el1);
1162 	PRINT_REG(tpidrro_el0);
1163 	PRINT_REG(ttbr0_el1);
1164 	PRINT_REG(ttbr1_el1);
1165 	PRINT_REG(vbar_el1);
1166 #undef PRINT_REG
1167 }
1168 
1169 DB_SHOW_COMMAND(vtop, db_show_vtop)
1170 {
1171 	uint64_t phys;
1172 
1173 	if (have_addr) {
1174 		phys = arm64_address_translate_s1e1r(addr);
1175 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
1176 		phys = arm64_address_translate_s1e1w(addr);
1177 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1178 		phys = arm64_address_translate_s1e0r(addr);
1179 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
1180 		phys = arm64_address_translate_s1e0w(addr);
1181 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1182 	} else
1183 		db_printf("show vtop <virt_addr>\n");
1184 }
1185 #endif
1186