xref: /freebsd/sys/arm64/arm64/mp_machdep.c (revision 9768746b)
1 /*-
2  * Copyright (c) 2015-2016 The FreeBSD Foundation
3  *
4  * This software was developed by Andrew Turner under
5  * sponsorship from the FreeBSD Foundation.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include "opt_acpi.h"
31 #include "opt_ddb.h"
32 #include "opt_kstack_pages.h"
33 #include "opt_platform.h"
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/bus.h>
41 #include <sys/cpu.h>
42 #include <sys/csan.h>
43 #include <sys/domainset.h>
44 #include <sys/kernel.h>
45 #include <sys/ktr.h>
46 #include <sys/malloc.h>
47 #include <sys/module.h>
48 #include <sys/mutex.h>
49 #include <sys/pcpu.h>
50 #include <sys/proc.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 
54 #include <vm/vm.h>
55 #include <vm/pmap.h>
56 #include <vm/vm_extern.h>
57 #include <vm/vm_kern.h>
58 #include <vm/vm_map.h>
59 
60 #include <machine/machdep.h>
61 #include <machine/cpu.h>
62 #include <machine/debug_monitor.h>
63 #include <machine/intr.h>
64 #include <machine/smp.h>
65 #ifdef VFP
66 #include <machine/vfp.h>
67 #endif
68 
69 #ifdef DEV_ACPI
70 #include <contrib/dev/acpica/include/acpi.h>
71 #include <dev/acpica/acpivar.h>
72 #endif
73 
74 #ifdef FDT
75 #include <dev/ofw/openfirm.h>
76 #include <dev/ofw/ofw_bus.h>
77 #include <dev/ofw/ofw_bus_subr.h>
78 #include <dev/ofw/ofw_cpu.h>
79 #endif
80 
81 #include <dev/psci/psci.h>
82 
83 #include "pic_if.h"
84 
85 #define	MP_BOOTSTACK_SIZE	(kstack_pages * PAGE_SIZE)
86 
87 #define	MP_QUIRK_CPULIST	0x01	/* The list of cpus may be wrong, */
88 					/* don't panic if one fails to start */
89 static uint32_t mp_quirks;
90 
91 #ifdef FDT
92 static struct {
93 	const char *compat;
94 	uint32_t quirks;
95 } fdt_quirks[] = {
96 	{ "arm,foundation-aarch64",	MP_QUIRK_CPULIST },
97 	{ "arm,fvp-base",		MP_QUIRK_CPULIST },
98 	/* This is incorrect in some DTS files */
99 	{ "arm,vfp-base",		MP_QUIRK_CPULIST },
100 	{ NULL, 0 },
101 };
102 #endif
103 
104 typedef void intr_ipi_send_t(void *, cpuset_t, u_int);
105 typedef void intr_ipi_handler_t(void *);
106 
107 #define INTR_IPI_NAMELEN	(MAXCOMLEN + 1)
108 struct intr_ipi {
109 	intr_ipi_handler_t *	ii_handler;
110 	void *			ii_handler_arg;
111 	intr_ipi_send_t *	ii_send;
112 	void *			ii_send_arg;
113 	char			ii_name[INTR_IPI_NAMELEN];
114 	u_long *		ii_count;
115 };
116 
117 static struct intr_ipi ipi_sources[INTR_IPI_COUNT];
118 
119 static struct intr_ipi *intr_ipi_lookup(u_int);
120 static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *,
121     void *);
122 
123 static void ipi_ast(void *);
124 static void ipi_hardclock(void *);
125 static void ipi_preempt(void *);
126 static void ipi_rendezvous(void *);
127 static void ipi_stop(void *);
128 
129 struct pcb stoppcbs[MAXCPU];
130 
131 #ifdef FDT
132 static u_int fdt_cpuid;
133 #endif
134 
135 void mpentry(unsigned long cpuid);
136 void init_secondary(uint64_t);
137 
138 /* Synchronize AP startup. */
139 static struct mtx ap_boot_mtx;
140 
141 /* Stacks for AP initialization, discarded once idle threads are started. */
142 void *bootstack;
143 static void *bootstacks[MAXCPU];
144 
145 /* Count of started APs, used to synchronize access to bootstack. */
146 static volatile int aps_started;
147 
148 /* Set to 1 once we're ready to let the APs out of the pen. */
149 static volatile int aps_ready;
150 
151 /* Temporary variables for init_secondary()  */
152 void *dpcpu[MAXCPU - 1];
153 
154 static bool
155 is_boot_cpu(uint64_t target_cpu)
156 {
157 
158 	return (PCPU_GET_MPIDR(cpuid_to_pcpu[0]) == (target_cpu & CPU_AFF_MASK));
159 }
160 
161 static void
162 release_aps(void *dummy __unused)
163 {
164 	int i, started;
165 
166 	/* Only release CPUs if they exist */
167 	if (mp_ncpus == 1)
168 		return;
169 
170 	intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL);
171 	intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL);
172 	intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL);
173 	intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL);
174 	intr_pic_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL);
175 	intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL);
176 
177 	atomic_store_rel_int(&aps_ready, 1);
178 	/* Wake up the other CPUs */
179 	__asm __volatile(
180 	    "dsb ishst	\n"
181 	    "sev	\n"
182 	    ::: "memory");
183 
184 	printf("Release APs...");
185 
186 	started = 0;
187 	for (i = 0; i < 2000; i++) {
188 		if (atomic_load_acq_int(&smp_started) != 0) {
189 			printf("done\n");
190 			return;
191 		}
192 		/*
193 		 * Don't time out while we are making progress. Some large
194 		 * systems can take a while to start all CPUs.
195 		 */
196 		if (smp_cpus > started) {
197 			i = 0;
198 			started = smp_cpus;
199 		}
200 		DELAY(1000);
201 	}
202 
203 	printf("APs not started\n");
204 }
205 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
206 
207 void
208 init_secondary(uint64_t cpu)
209 {
210 	struct pcpu *pcpup;
211 	pmap_t pmap0;
212 	uint64_t mpidr;
213 
214 	ptrauth_mp_start(cpu);
215 
216 	/*
217 	 * Verify that the value passed in 'cpu' argument (aka context_id) is
218 	 * valid. Some older U-Boot based PSCI implementations are buggy,
219 	 * they can pass random value in it.
220 	 */
221 	mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK;
222 	if (cpu >= MAXCPU || cpuid_to_pcpu[cpu] == NULL ||
223 	    PCPU_GET_MPIDR(cpuid_to_pcpu[cpu]) != mpidr) {
224 		for (cpu = 0; cpu < mp_maxid; cpu++)
225 			if (cpuid_to_pcpu[cpu] != NULL &&
226 			    PCPU_GET_MPIDR(cpuid_to_pcpu[cpu]) == mpidr)
227 				break;
228 		if ( cpu >= MAXCPU)
229 			panic("MPIDR for this CPU is not in pcpu table");
230 	}
231 
232 	pcpup = cpuid_to_pcpu[cpu];
233 	/*
234 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
235 	 * loaded when entering the kernel from userland.
236 	 */
237 	__asm __volatile(
238 	    "mov x18, %0 \n"
239 	    "msr tpidr_el1, %0" :: "r"(pcpup));
240 
241 	/*
242 	 * Identify current CPU. This is necessary to setup
243 	 * affinity registers and to provide support for
244 	 * runtime chip identification.
245 	 *
246 	 * We need this before signalling the CPU is ready to
247 	 * let the boot CPU use the results.
248 	 */
249 	pcpup->pc_midr = get_midr();
250 	identify_cpu(cpu);
251 
252 	/* Ensure the stores in identify_cpu have completed */
253 	atomic_thread_fence_acq_rel();
254 
255 	/* Signal the BSP and spin until it has released all APs. */
256 	atomic_add_int(&aps_started, 1);
257 	while (!atomic_load_int(&aps_ready))
258 		__asm __volatile("wfe");
259 
260 	/* Initialize curthread */
261 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
262 	pcpup->pc_curthread = pcpup->pc_idlethread;
263 	schedinit_ap();
264 
265 	/* Initialize curpmap to match TTBR0's current setting. */
266 	pmap0 = vmspace_pmap(&vmspace0);
267 	KASSERT(pmap_to_ttbr0(pmap0) == READ_SPECIALREG(ttbr0_el1),
268 	    ("pmap0 doesn't match cpu %ld's ttbr0", cpu));
269 	pcpup->pc_curpmap = pmap0;
270 
271 	install_cpu_errata();
272 
273 	intr_pic_init_secondary();
274 
275 	/* Start per-CPU event timers. */
276 	cpu_initclocks_ap();
277 
278 #ifdef VFP
279 	vfp_init();
280 #endif
281 
282 	dbg_init();
283 	pan_enable();
284 
285 	mtx_lock_spin(&ap_boot_mtx);
286 	atomic_add_rel_32(&smp_cpus, 1);
287 	if (smp_cpus == mp_ncpus) {
288 		/* enable IPI's, tlb shootdown, freezes etc */
289 		atomic_store_rel_int(&smp_started, 1);
290 	}
291 	mtx_unlock_spin(&ap_boot_mtx);
292 
293 	kcsan_cpu_init(cpu);
294 
295 	/* Enter the scheduler */
296 	sched_ap_entry();
297 
298 	panic("scheduler returned us to init_secondary");
299 	/* NOTREACHED */
300 }
301 
302 static void
303 smp_after_idle_runnable(void *arg __unused)
304 {
305 	int cpu;
306 
307 	if (mp_ncpus == 1)
308 		return;
309 
310 	KASSERT(smp_started != 0, ("%s: SMP not started yet", __func__));
311 
312 	/*
313 	 * Wait for all APs to handle an interrupt.  After that, we know that
314 	 * the APs have entered the scheduler at least once, so the boot stacks
315 	 * are safe to free.
316 	 */
317 	smp_rendezvous(smp_no_rendezvous_barrier, NULL,
318 	    smp_no_rendezvous_barrier, NULL);
319 
320 	for (cpu = 1; cpu < mp_ncpus; cpu++) {
321 		if (bootstacks[cpu] != NULL)
322 			kmem_free(bootstacks[cpu], MP_BOOTSTACK_SIZE);
323 	}
324 }
325 SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
326     smp_after_idle_runnable, NULL);
327 
328 /*
329  *  Send IPI thru interrupt controller.
330  */
331 static void
332 pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi)
333 {
334 
335 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
336 
337 	/*
338 	 * Ensure that this CPU's stores will be visible to IPI
339 	 * recipients before starting to send the interrupts.
340 	 */
341 	dsb(ishst);
342 
343 	PIC_IPI_SEND(intr_irq_root_dev, arg, cpus, ipi);
344 }
345 
346 /*
347  *  Setup IPI handler on interrupt controller.
348  *
349  *  Not SMP coherent.
350  */
351 static void
352 intr_pic_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand,
353     void *arg)
354 {
355 	struct intr_irqsrc *isrc;
356 	struct intr_ipi *ii;
357 	int error;
358 
359 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
360 	KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi));
361 
362 	error = PIC_IPI_SETUP(intr_irq_root_dev, ipi, &isrc);
363 	if (error != 0)
364 		return;
365 
366 	isrc->isrc_handlers++;
367 
368 	ii = intr_ipi_lookup(ipi);
369 	KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi));
370 
371 	ii->ii_handler = hand;
372 	ii->ii_handler_arg = arg;
373 	ii->ii_send = pic_ipi_send;
374 	ii->ii_send_arg = isrc;
375 	strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN);
376 	ii->ii_count = intr_ipi_setup_counters(name);
377 
378 	PIC_ENABLE_INTR(intr_irq_root_dev, isrc);
379 }
380 
381 static void
382 intr_ipi_send(cpuset_t cpus, u_int ipi)
383 {
384 	struct intr_ipi *ii;
385 
386 	ii = intr_ipi_lookup(ipi);
387 	if (ii->ii_count == NULL)
388 		panic("%s: not setup IPI %u", __func__, ipi);
389 
390 	ii->ii_send(ii->ii_send_arg, cpus, ipi);
391 }
392 
393 static void
394 ipi_ast(void *dummy __unused)
395 {
396 
397 	CTR0(KTR_SMP, "IPI_AST");
398 }
399 
400 static void
401 ipi_hardclock(void *dummy __unused)
402 {
403 
404 	CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
405 	hardclockintr();
406 }
407 
408 static void
409 ipi_preempt(void *dummy __unused)
410 {
411 	CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
412 	sched_preempt(curthread);
413 }
414 
415 static void
416 ipi_rendezvous(void *dummy __unused)
417 {
418 
419 	CTR0(KTR_SMP, "IPI_RENDEZVOUS");
420 	smp_rendezvous_action();
421 }
422 
423 static void
424 ipi_stop(void *dummy __unused)
425 {
426 	u_int cpu;
427 
428 	CTR0(KTR_SMP, "IPI_STOP");
429 
430 	cpu = PCPU_GET(cpuid);
431 	savectx(&stoppcbs[cpu]);
432 
433 	/* Indicate we are stopped */
434 	CPU_SET_ATOMIC(cpu, &stopped_cpus);
435 
436 	/* Wait for restart */
437 	while (!CPU_ISSET(cpu, &started_cpus))
438 		cpu_spinwait();
439 
440 #ifdef DDB
441 	dbg_register_sync(NULL);
442 #endif
443 
444 	CPU_CLR_ATOMIC(cpu, &started_cpus);
445 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
446 	CTR0(KTR_SMP, "IPI_STOP (restart)");
447 }
448 
449 struct cpu_group *
450 cpu_topo(void)
451 {
452 	struct cpu_group *dom, *root;
453 	int i;
454 
455 	root = smp_topo_alloc(1);
456 	dom = smp_topo_alloc(vm_ndomains);
457 
458 	root->cg_parent = NULL;
459 	root->cg_child = dom;
460 	CPU_COPY(&all_cpus, &root->cg_mask);
461 	root->cg_count = mp_ncpus;
462 	root->cg_children = vm_ndomains;
463 	root->cg_level = CG_SHARE_NONE;
464 	root->cg_flags = 0;
465 
466 	/*
467 	 * Redundant layers will be collapsed by the caller so we don't need a
468 	 * special case for a single domain.
469 	 */
470 	for (i = 0; i < vm_ndomains; i++, dom++) {
471 		dom->cg_parent = root;
472 		dom->cg_child = NULL;
473 		CPU_COPY(&cpuset_domain[i], &dom->cg_mask);
474 		dom->cg_count = CPU_COUNT(&dom->cg_mask);
475 		dom->cg_children = 0;
476 		dom->cg_level = CG_SHARE_L3;
477 		dom->cg_flags = 0;
478 	}
479 
480 	return (root);
481 }
482 
483 /* Determine if we running MP machine */
484 int
485 cpu_mp_probe(void)
486 {
487 
488 	/* ARM64TODO: Read the u bit of mpidr_el1 to determine this */
489 	return (1);
490 }
491 
492 static int
493 enable_cpu_psci(uint64_t target_cpu, vm_paddr_t entry, u_int cpuid)
494 {
495 	int err;
496 
497 	err = psci_cpu_on(target_cpu, entry, cpuid);
498 	if (err != PSCI_RETVAL_SUCCESS) {
499 		/*
500 		 * Panic here if INVARIANTS are enabled and PSCI failed to
501 		 * start the requested CPU.  psci_cpu_on() returns PSCI_MISSING
502 		 * to indicate we are unable to use it to start the given CPU.
503 		 */
504 		KASSERT(err == PSCI_MISSING ||
505 		    (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST,
506 		    ("Failed to start CPU %u (%lx), error %d\n",
507 		    cpuid, target_cpu, err));
508 		return (EINVAL);
509 	}
510 
511 	return (0);
512 }
513 
514 static int
515 enable_cpu_spin(uint64_t cpu, vm_paddr_t entry, vm_paddr_t release_paddr)
516 {
517 	vm_paddr_t *release_addr;
518 
519 	release_addr = pmap_mapdev(release_paddr, sizeof(*release_addr));
520 	if (release_addr == NULL)
521 		return (ENOMEM);
522 
523 	*release_addr = entry;
524 	pmap_unmapdev(release_addr, sizeof(*release_addr));
525 
526 	__asm __volatile(
527 	    "dsb sy	\n"
528 	    "sev	\n"
529 	    ::: "memory");
530 
531 	return (0);
532 }
533 
534 /*
535  * Starts a given CPU. If the CPU is already running, i.e. it is the boot CPU,
536  * do nothing. Returns true if the CPU is present and running.
537  */
538 static bool
539 start_cpu(u_int cpuid, uint64_t target_cpu, int domain, vm_paddr_t release_addr)
540 {
541 	struct pcpu *pcpup;
542 	vm_size_t size;
543 	vm_paddr_t pa;
544 	int err, naps;
545 
546 	/* Check we are able to start this cpu */
547 	if (cpuid > mp_maxid)
548 		return (false);
549 
550 	/* Skip boot CPU */
551 	if (is_boot_cpu(target_cpu))
552 		return (true);
553 
554 	KASSERT(cpuid < MAXCPU, ("Too many CPUs"));
555 
556 	size = round_page(sizeof(*pcpup) + DPCPU_SIZE);
557 	pcpup = kmem_malloc_domainset(DOMAINSET_PREF(domain), size,
558 	    M_WAITOK | M_ZERO);
559 	pmap_disable_promotion((vm_offset_t)pcpup, size);
560 	pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
561 	pcpup->pc_mpidr_low = target_cpu & CPU_AFF_MASK;
562 	pcpup->pc_mpidr_high = (target_cpu & CPU_AFF_MASK) >> 32;
563 
564 	dpcpu[cpuid - 1] = (void *)(pcpup + 1);
565 	dpcpu_init(dpcpu[cpuid - 1], cpuid);
566 
567 	bootstacks[cpuid] = kmem_malloc_domainset(DOMAINSET_PREF(domain),
568 	    MP_BOOTSTACK_SIZE, M_WAITOK | M_ZERO);
569 
570 	naps = atomic_load_int(&aps_started);
571 	bootstack = (char *)bootstacks[cpuid] + MP_BOOTSTACK_SIZE;
572 
573 	printf("Starting CPU %u (%lx)\n", cpuid, target_cpu);
574 	pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry);
575 
576 	/*
577 	 * A limited set of hardware we support can only do spintables and
578 	 * remain useful, due to lack of EL3.  Thus, we'll usually fall into the
579 	 * PSCI branch here.
580 	 */
581 	MPASS(release_addr == 0 || !psci_present);
582 	if (release_addr != 0)
583 		err = enable_cpu_spin(target_cpu, pa, release_addr);
584 	else
585 		err = enable_cpu_psci(target_cpu, pa, cpuid);
586 
587 	if (err != 0) {
588 		pcpu_destroy(pcpup);
589 		dpcpu[cpuid - 1] = NULL;
590 		kmem_free(bootstacks[cpuid], MP_BOOTSTACK_SIZE);
591 		kmem_free(pcpup, size);
592 		bootstacks[cpuid] = NULL;
593 		mp_ncpus--;
594 		return (false);
595 	}
596 
597 	/* Wait for the AP to switch to its boot stack. */
598 	while (atomic_load_int(&aps_started) < naps + 1)
599 		cpu_spinwait();
600 	CPU_SET(cpuid, &all_cpus);
601 
602 	return (true);
603 }
604 
605 #ifdef DEV_ACPI
606 static void
607 madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
608 {
609 	ACPI_MADT_GENERIC_INTERRUPT *intr;
610 	u_int *cpuid;
611 	u_int id;
612 	int domain;
613 
614 	switch(entry->Type) {
615 	case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
616 		intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry;
617 		cpuid = arg;
618 
619 		if (is_boot_cpu(intr->ArmMpidr))
620 			id = 0;
621 		else
622 			id = *cpuid;
623 
624 		domain = 0;
625 #ifdef NUMA
626 		if (vm_ndomains > 1)
627 			domain = acpi_pxm_get_cpu_locality(intr->Uid);
628 #endif
629 		if (start_cpu(id, intr->ArmMpidr, domain, 0)) {
630 			MPASS(cpuid_to_pcpu[id] != NULL);
631 			cpuid_to_pcpu[id]->pc_acpi_id = intr->Uid;
632 			/*
633 			 * Don't increment for the boot CPU, its CPU ID is
634 			 * reserved.
635 			 */
636 			if (!is_boot_cpu(intr->ArmMpidr))
637 				(*cpuid)++;
638 		}
639 
640 		break;
641 	default:
642 		break;
643 	}
644 }
645 
646 static void
647 cpu_init_acpi(void)
648 {
649 	ACPI_TABLE_MADT *madt;
650 	vm_paddr_t physaddr;
651 	u_int cpuid;
652 
653 	physaddr = acpi_find_table(ACPI_SIG_MADT);
654 	if (physaddr == 0)
655 		return;
656 
657 	madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
658 	if (madt == NULL) {
659 		printf("Unable to map the MADT, not starting APs\n");
660 		return;
661 	}
662 	/* Boot CPU is always 0 */
663 	cpuid = 1;
664 	acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
665 	    madt_handler, &cpuid);
666 
667 	acpi_unmap_table(madt);
668 
669 #if MAXMEMDOM > 1
670 	acpi_pxm_set_cpu_locality();
671 #endif
672 }
673 #endif
674 
675 #ifdef FDT
676 /*
677  * Failure is indicated by failing to populate *release_addr.
678  */
679 static void
680 populate_release_addr(phandle_t node, vm_paddr_t *release_addr)
681 {
682 	pcell_t buf[2];
683 
684 	if (OF_getencprop(node, "cpu-release-addr", buf, sizeof(buf)) !=
685 	    sizeof(buf))
686 		return;
687 
688 	*release_addr = (((uintptr_t)buf[0] << 32) | buf[1]);
689 }
690 
691 static boolean_t
692 start_cpu_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
693 {
694 	uint64_t target_cpu;
695 	vm_paddr_t release_addr;
696 	char *enable_method;
697 	int domain;
698 	int cpuid;
699 
700 	target_cpu = reg[0];
701 	if (addr_size == 2) {
702 		target_cpu <<= 32;
703 		target_cpu |= reg[1];
704 	}
705 
706 	if (is_boot_cpu(target_cpu))
707 		cpuid = 0;
708 	else
709 		cpuid = fdt_cpuid;
710 
711 	/*
712 	 * If PSCI is present, we'll always use that -- the cpu_on method is
713 	 * mandated in both v0.1 and v0.2.  We'll check the enable-method if
714 	 * we don't have PSCI and use spin table if it's provided.
715 	 */
716 	release_addr = 0;
717 	if (!psci_present && cpuid != 0) {
718 		if (OF_getprop_alloc(node, "enable-method",
719 		    (void **)&enable_method) <= 0)
720 			return (FALSE);
721 
722 		if (strcmp(enable_method, "spin-table") != 0) {
723 			OF_prop_free(enable_method);
724 			return (FALSE);
725 		}
726 
727 		OF_prop_free(enable_method);
728 		populate_release_addr(node, &release_addr);
729 		if (release_addr == 0) {
730 			printf("Failed to fetch release address for CPU %u",
731 			    cpuid);
732 			return (FALSE);
733 		}
734 	}
735 
736 	if (!start_cpu(cpuid, target_cpu, 0, release_addr))
737 		return (FALSE);
738 
739 	/*
740 	 * Don't increment for the boot CPU, its CPU ID is reserved.
741 	 */
742 	if (!is_boot_cpu(target_cpu))
743 		fdt_cpuid++;
744 
745 	/* Try to read the numa node of this cpu */
746 	if (vm_ndomains == 1 ||
747 	    OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0)
748 		domain = 0;
749 	cpuid_to_pcpu[cpuid]->pc_domain = domain;
750 	if (domain < MAXMEMDOM)
751 		CPU_SET(cpuid, &cpuset_domain[domain]);
752 	return (TRUE);
753 }
754 static void
755 cpu_init_fdt(void)
756 {
757 	phandle_t node;
758 	int i;
759 
760 	node = OF_peer(0);
761 	for (i = 0; fdt_quirks[i].compat != NULL; i++) {
762 		if (ofw_bus_node_is_compatible(node,
763 		    fdt_quirks[i].compat) != 0) {
764 			mp_quirks = fdt_quirks[i].quirks;
765 		}
766 	}
767 	fdt_cpuid = 1;
768 	ofw_cpu_early_foreach(start_cpu_fdt, true);
769 }
770 #endif
771 
772 /* Initialize and fire up non-boot processors */
773 void
774 cpu_mp_start(void)
775 {
776 	uint64_t mpidr;
777 
778 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
779 
780 	/* CPU 0 is always boot CPU. */
781 	CPU_SET(0, &all_cpus);
782 	mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK;
783 	cpuid_to_pcpu[0]->pc_mpidr_low = mpidr;
784 	cpuid_to_pcpu[0]->pc_mpidr_high = mpidr >> 32;
785 
786 	switch(arm64_bus_method) {
787 #ifdef DEV_ACPI
788 	case ARM64_BUS_ACPI:
789 		mp_quirks = MP_QUIRK_CPULIST;
790 		cpu_init_acpi();
791 		break;
792 #endif
793 #ifdef FDT
794 	case ARM64_BUS_FDT:
795 		cpu_init_fdt();
796 		break;
797 #endif
798 	default:
799 		break;
800 	}
801 }
802 
803 /* Introduce rest of cores to the world */
804 void
805 cpu_mp_announce(void)
806 {
807 }
808 
809 #ifdef DEV_ACPI
810 static void
811 cpu_count_acpi_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
812 {
813 	u_int *cores = arg;
814 
815 	switch(entry->Type) {
816 	case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
817 		(*cores)++;
818 		break;
819 	default:
820 		break;
821 	}
822 }
823 
824 static u_int
825 cpu_count_acpi(void)
826 {
827 	ACPI_TABLE_MADT *madt;
828 	vm_paddr_t physaddr;
829 	u_int cores;
830 
831 	physaddr = acpi_find_table(ACPI_SIG_MADT);
832 	if (physaddr == 0)
833 		return (0);
834 
835 	madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
836 	if (madt == NULL) {
837 		printf("Unable to map the MADT, not starting APs\n");
838 		return (0);
839 	}
840 
841 	cores = 0;
842 	acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
843 	    cpu_count_acpi_handler, &cores);
844 
845 	acpi_unmap_table(madt);
846 
847 	return (cores);
848 }
849 #endif
850 
851 void
852 cpu_mp_setmaxid(void)
853 {
854 	int cores;
855 
856 	mp_ncpus = 1;
857 	mp_maxid = 0;
858 
859 	switch(arm64_bus_method) {
860 #ifdef DEV_ACPI
861 	case ARM64_BUS_ACPI:
862 		cores = cpu_count_acpi();
863 		if (cores > 0) {
864 			cores = MIN(cores, MAXCPU);
865 			if (bootverbose)
866 				printf("Found %d CPUs in the ACPI tables\n",
867 				    cores);
868 			mp_ncpus = cores;
869 			mp_maxid = cores - 1;
870 		}
871 		break;
872 #endif
873 #ifdef FDT
874 	case ARM64_BUS_FDT:
875 		cores = ofw_cpu_early_foreach(NULL, false);
876 		if (cores > 0) {
877 			cores = MIN(cores, MAXCPU);
878 			if (bootverbose)
879 				printf("Found %d CPUs in the device tree\n",
880 				    cores);
881 			mp_ncpus = cores;
882 			mp_maxid = cores - 1;
883 		}
884 		break;
885 #endif
886 	default:
887 		if (bootverbose)
888 			printf("No CPU data, limiting to 1 core\n");
889 		break;
890 	}
891 
892 	if (TUNABLE_INT_FETCH("hw.ncpu", &cores)) {
893 		if (cores > 0 && cores < mp_ncpus) {
894 			mp_ncpus = cores;
895 			mp_maxid = cores - 1;
896 		}
897 	}
898 }
899 
900 /*
901  *  Lookup IPI source.
902  */
903 static struct intr_ipi *
904 intr_ipi_lookup(u_int ipi)
905 {
906 
907 	if (ipi >= INTR_IPI_COUNT)
908 		panic("%s: no such IPI %u", __func__, ipi);
909 
910 	return (&ipi_sources[ipi]);
911 }
912 
913 /*
914  *  interrupt controller dispatch function for IPIs. It should
915  *  be called straight from the interrupt controller, when associated
916  *  interrupt source is learned. Or from anybody who has an interrupt
917  *  source mapped.
918  */
919 void
920 intr_ipi_dispatch(u_int ipi, struct trapframe *tf)
921 {
922 	void *arg;
923 	struct intr_ipi *ii;
924 
925 	ii = intr_ipi_lookup(ipi);
926 	if (ii->ii_count == NULL)
927 		panic("%s: not setup IPI %u", __func__, ipi);
928 
929 	intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid));
930 
931 	/*
932 	 * Supply ipi filter with trapframe argument
933 	 * if none is registered.
934 	 */
935 	arg = ii->ii_handler_arg != NULL ? ii->ii_handler_arg : tf;
936 	ii->ii_handler(arg);
937 }
938 
939 #ifdef notyet
940 /*
941  *  Map IPI into interrupt controller.
942  *
943  *  Not SMP coherent.
944  */
945 static int
946 ipi_map(struct intr_irqsrc *isrc, u_int ipi)
947 {
948 	boolean_t is_percpu;
949 	int error;
950 
951 	if (ipi >= INTR_IPI_COUNT)
952 		panic("%s: no such IPI %u", __func__, ipi);
953 
954 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
955 
956 	isrc->isrc_type = INTR_ISRCT_NAMESPACE;
957 	isrc->isrc_nspc_type = INTR_IRQ_NSPC_IPI;
958 	isrc->isrc_nspc_num = ipi_next_num;
959 
960 	error = PIC_REGISTER(intr_irq_root_dev, isrc, &is_percpu);
961 	if (error == 0) {
962 		isrc->isrc_dev = intr_irq_root_dev;
963 		ipi_next_num++;
964 	}
965 	return (error);
966 }
967 
968 /*
969  *  Setup IPI handler to interrupt source.
970  *
971  *  Note that there could be more ways how to send and receive IPIs
972  *  on a platform like fast interrupts for example. In that case,
973  *  one can call this function with ASIF_NOALLOC flag set and then
974  *  call intr_ipi_dispatch() when appropriate.
975  *
976  *  Not SMP coherent.
977  */
978 int
979 intr_ipi_set_handler(u_int ipi, const char *name, intr_ipi_filter_t *filter,
980     void *arg, u_int flags)
981 {
982 	struct intr_irqsrc *isrc;
983 	int error;
984 
985 	if (filter == NULL)
986 		return(EINVAL);
987 
988 	isrc = intr_ipi_lookup(ipi);
989 	if (isrc->isrc_ipifilter != NULL)
990 		return (EEXIST);
991 
992 	if ((flags & AISHF_NOALLOC) == 0) {
993 		error = ipi_map(isrc, ipi);
994 		if (error != 0)
995 			return (error);
996 	}
997 
998 	isrc->isrc_ipifilter = filter;
999 	isrc->isrc_arg = arg;
1000 	isrc->isrc_handlers = 1;
1001 	isrc->isrc_count = intr_ipi_setup_counters(name);
1002 	isrc->isrc_index = 0; /* it should not be used in IPI case */
1003 
1004 	if (isrc->isrc_dev != NULL) {
1005 		PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1006 		PIC_ENABLE_SOURCE(isrc->isrc_dev, isrc);
1007 	}
1008 	return (0);
1009 }
1010 #endif
1011 
1012 /* Sending IPI */
1013 void
1014 ipi_all_but_self(u_int ipi)
1015 {
1016 	cpuset_t cpus;
1017 
1018 	cpus = all_cpus;
1019 	CPU_CLR(PCPU_GET(cpuid), &cpus);
1020 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
1021 	intr_ipi_send(cpus, ipi);
1022 }
1023 
1024 void
1025 ipi_cpu(int cpu, u_int ipi)
1026 {
1027 	cpuset_t cpus;
1028 
1029 	CPU_ZERO(&cpus);
1030 	CPU_SET(cpu, &cpus);
1031 
1032 	CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi);
1033 	intr_ipi_send(cpus, ipi);
1034 }
1035 
1036 void
1037 ipi_selected(cpuset_t cpus, u_int ipi)
1038 {
1039 
1040 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
1041 	intr_ipi_send(cpus, ipi);
1042 }
1043