xref: /freebsd/sys/riscv/riscv/mp_machdep.c (revision 1d386b48)
1 /*-
2  * Copyright (c) 2015 The FreeBSD Foundation
3  * Copyright (c) 2016 Ruslan Bukin <br@bsdpad.com>
4  * All rights reserved.
5  *
6  * Portions of this software were developed by Andrew Turner under
7  * sponsorship from the FreeBSD Foundation.
8  *
9  * Portions of this software were developed by SRI International and the
10  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
11  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
12  *
13  * Portions of this software were developed by the University of Cambridge
14  * Computer Laboratory as part of the CTSRD Project, with support from the
15  * UK Higher Education Innovation Fund (HEIF).
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include "opt_kstack_pages.h"
40 #include "opt_platform.h"
41 
42 #include <sys/cdefs.h>
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bus.h>
46 #include <sys/cpu.h>
47 #include <sys/cpuset.h>
48 #include <sys/kernel.h>
49 #include <sys/ktr.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/sched.h>
55 #include <sys/smp.h>
56 
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_kern.h>
61 #include <vm/vm_map.h>
62 
63 #include <machine/intr.h>
64 #include <machine/smp.h>
65 #include <machine/sbi.h>
66 
67 #ifdef FDT
68 #include <dev/ofw/openfirm.h>
69 #include <dev/ofw/ofw_cpu.h>
70 #endif
71 
72 #define	MP_BOOTSTACK_SIZE	(kstack_pages * PAGE_SIZE)
73 
74 uint32_t __riscv_boot_ap[MAXCPU];
75 
76 static enum {
77 	CPUS_UNKNOWN,
78 #ifdef FDT
79 	CPUS_FDT,
80 #endif
81 } cpu_enum_method;
82 
83 static device_identify_t riscv64_cpu_identify;
84 static device_probe_t riscv64_cpu_probe;
85 static device_attach_t riscv64_cpu_attach;
86 
87 static int ipi_handler(void *);
88 
89 extern uint32_t boot_hart;
90 extern cpuset_t all_harts;
91 
92 #ifdef INVARIANTS
93 static uint32_t cpu_reg[MAXCPU][2];
94 #endif
95 static device_t cpu_list[MAXCPU];
96 
97 void mpentry(u_long hartid);
98 void init_secondary(uint64_t);
99 
100 static struct mtx ap_boot_mtx;
101 
102 /* Stacks for AP initialization, discarded once idle threads are started. */
103 void *bootstack;
104 static void *bootstacks[MAXCPU];
105 
106 /* Count of started APs, used to synchronize access to bootstack. */
107 static volatile int aps_started;
108 
109 /* Set to 1 once we're ready to let the APs out of the pen. */
110 static volatile int aps_ready;
111 
112 /* Temporary variables for init_secondary()  */
113 void *dpcpu[MAXCPU - 1];
114 
115 static device_method_t riscv64_cpu_methods[] = {
116 	/* Device interface */
117 	DEVMETHOD(device_identify,	riscv64_cpu_identify),
118 	DEVMETHOD(device_probe,		riscv64_cpu_probe),
119 	DEVMETHOD(device_attach,	riscv64_cpu_attach),
120 
121 	DEVMETHOD_END
122 };
123 
124 static driver_t riscv64_cpu_driver = {
125 	"riscv64_cpu",
126 	riscv64_cpu_methods,
127 	0
128 };
129 
130 DRIVER_MODULE(riscv64_cpu, cpu, riscv64_cpu_driver, 0, 0);
131 
132 static void
133 riscv64_cpu_identify(driver_t *driver, device_t parent)
134 {
135 
136 	if (device_find_child(parent, "riscv64_cpu", -1) != NULL)
137 		return;
138 	if (BUS_ADD_CHILD(parent, 0, "riscv64_cpu", -1) == NULL)
139 		device_printf(parent, "add child failed\n");
140 }
141 
142 static int
143 riscv64_cpu_probe(device_t dev)
144 {
145 	u_int cpuid;
146 
147 	cpuid = device_get_unit(dev);
148 	if (cpuid >= MAXCPU || cpuid > mp_maxid)
149 		return (EINVAL);
150 
151 	device_quiet(dev);
152 	return (0);
153 }
154 
155 static int
156 riscv64_cpu_attach(device_t dev)
157 {
158 	const uint32_t *reg;
159 	size_t reg_size;
160 	u_int cpuid;
161 	int i;
162 
163 	cpuid = device_get_unit(dev);
164 
165 	if (cpuid >= MAXCPU || cpuid > mp_maxid)
166 		return (EINVAL);
167 	KASSERT(cpu_list[cpuid] == NULL, ("Already have cpu %u", cpuid));
168 
169 	reg = cpu_get_cpuid(dev, &reg_size);
170 	if (reg == NULL)
171 		return (EINVAL);
172 
173 	if (bootverbose) {
174 		device_printf(dev, "register <");
175 		for (i = 0; i < reg_size; i++)
176 			printf("%s%x", (i == 0) ? "" : " ", reg[i]);
177 		printf(">\n");
178 	}
179 
180 	/* Set the device to start it later */
181 	cpu_list[cpuid] = dev;
182 
183 	return (0);
184 }
185 
186 static void
187 release_aps(void *dummy __unused)
188 {
189 	cpuset_t mask;
190 	int i;
191 
192 	if (mp_ncpus == 1)
193 		return;
194 
195 	/* Setup the IPI handler */
196 	riscv_setup_ipihandler(ipi_handler);
197 
198 	atomic_store_rel_int(&aps_ready, 1);
199 
200 	/* Wake up the other CPUs */
201 	mask = all_harts;
202 	CPU_CLR(boot_hart, &mask);
203 
204 	printf("Release APs\n");
205 
206 	sbi_send_ipi(mask.__bits);
207 
208 	for (i = 0; i < 2000; i++) {
209 		if (atomic_load_acq_int(&smp_started))
210 			return;
211 		DELAY(1000);
212 	}
213 
214 	printf("APs not started\n");
215 }
216 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
217 
218 void
219 init_secondary(uint64_t hart)
220 {
221 	struct pcpu *pcpup;
222 	u_int cpuid;
223 
224 	/* Renumber this cpu */
225 	cpuid = hart;
226 	if (cpuid < boot_hart)
227 		cpuid += mp_maxid + 1;
228 	cpuid -= boot_hart;
229 
230 	/* Setup the pcpu pointer */
231 	pcpup = &__pcpu[cpuid];
232 	__asm __volatile("mv tp, %0" :: "r"(pcpup));
233 
234 	/* Workaround: make sure wfi doesn't halt the hart */
235 	csr_set(sie, SIE_SSIE);
236 	csr_set(sip, SIE_SSIE);
237 
238 	/* Signal the BSP and spin until it has released all APs. */
239 	atomic_add_int(&aps_started, 1);
240 	while (!atomic_load_int(&aps_ready))
241 		__asm __volatile("wfi");
242 
243 	/* Initialize curthread */
244 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
245 	pcpup->pc_curthread = pcpup->pc_idlethread;
246 	schedinit_ap();
247 
248 	/* Enable software interrupts */
249 	riscv_unmask_ipi();
250 
251 #ifndef EARLY_AP_STARTUP
252 	/* Start per-CPU event timers. */
253 	cpu_initclocks_ap();
254 #endif
255 
256 	/* Enable external (PLIC) interrupts */
257 	csr_set(sie, SIE_SEIE);
258 
259 	/* Activate this hart in the kernel pmap. */
260 	CPU_SET_ATOMIC(hart, &kernel_pmap->pm_active);
261 
262 	/* Activate process 0's pmap. */
263 	pmap_activate_boot(vmspace_pmap(proc0.p_vmspace));
264 
265 	mtx_lock_spin(&ap_boot_mtx);
266 
267 	atomic_add_rel_32(&smp_cpus, 1);
268 
269 	if (smp_cpus == mp_ncpus) {
270 		/* enable IPI's, tlb shootdown, freezes etc */
271 		atomic_store_rel_int(&smp_started, 1);
272 	}
273 
274 	mtx_unlock_spin(&ap_boot_mtx);
275 
276 	if (bootverbose)
277 		printf("Secondary CPU %u fully online\n", cpuid);
278 
279 	/* Enter the scheduler */
280 	sched_ap_entry();
281 
282 	panic("scheduler returned us to init_secondary");
283 	/* NOTREACHED */
284 }
285 
286 static void
287 smp_after_idle_runnable(void *arg __unused)
288 {
289 	int cpu;
290 
291 	if (mp_ncpus == 1)
292 		return;
293 
294 	KASSERT(smp_started != 0, ("%s: SMP not started yet", __func__));
295 
296 	/*
297 	 * Wait for all APs to handle an interrupt.  After that, we know that
298 	 * the APs have entered the scheduler at least once, so the boot stacks
299 	 * are safe to free.
300 	 */
301 	smp_rendezvous(smp_no_rendezvous_barrier, NULL,
302 	    smp_no_rendezvous_barrier, NULL);
303 
304 	for (cpu = 1; cpu <= mp_maxid; cpu++) {
305 		if (bootstacks[cpu] != NULL)
306 			kmem_free(bootstacks[cpu], MP_BOOTSTACK_SIZE);
307 	}
308 }
309 SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
310     smp_after_idle_runnable, NULL);
311 
312 static int
313 ipi_handler(void *arg)
314 {
315 	u_int ipi_bitmap;
316 	u_int cpu, ipi;
317 	int bit;
318 
319 	csr_clear(sip, SIP_SSIP);
320 
321 	cpu = PCPU_GET(cpuid);
322 
323 	mb();
324 
325 	ipi_bitmap = atomic_readandclear_int(PCPU_PTR(pending_ipis));
326 	if (ipi_bitmap == 0)
327 		return (FILTER_HANDLED);
328 
329 	while ((bit = ffs(ipi_bitmap))) {
330 		bit = (bit - 1);
331 		ipi = (1 << bit);
332 		ipi_bitmap &= ~ipi;
333 
334 		mb();
335 
336 		switch (ipi) {
337 		case IPI_AST:
338 			CTR0(KTR_SMP, "IPI_AST");
339 			break;
340 		case IPI_PREEMPT:
341 			CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
342 			sched_preempt(curthread);
343 			break;
344 		case IPI_RENDEZVOUS:
345 			CTR0(KTR_SMP, "IPI_RENDEZVOUS");
346 			smp_rendezvous_action();
347 			break;
348 		case IPI_STOP:
349 		case IPI_STOP_HARD:
350 			CTR0(KTR_SMP, (ipi == IPI_STOP) ? "IPI_STOP" : "IPI_STOP_HARD");
351 			savectx(&stoppcbs[cpu]);
352 
353 			/* Indicate we are stopped */
354 			CPU_SET_ATOMIC(cpu, &stopped_cpus);
355 
356 			/* Wait for restart */
357 			while (!CPU_ISSET(cpu, &started_cpus))
358 				cpu_spinwait();
359 
360 			CPU_CLR_ATOMIC(cpu, &started_cpus);
361 			CPU_CLR_ATOMIC(cpu, &stopped_cpus);
362 			CTR0(KTR_SMP, "IPI_STOP (restart)");
363 
364 			/*
365 			 * The kernel debugger might have set a breakpoint,
366 			 * so flush the instruction cache.
367 			 */
368 			fence_i();
369 			break;
370 		case IPI_HARDCLOCK:
371 			CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
372 			hardclockintr();
373 			break;
374 		default:
375 			panic("Unknown IPI %#0x on cpu %d", ipi, curcpu);
376 		}
377 	}
378 
379 	return (FILTER_HANDLED);
380 }
381 
382 struct cpu_group *
383 cpu_topo(void)
384 {
385 
386 	return (smp_topo_none());
387 }
388 
389 /* Determine if we running MP machine */
390 int
391 cpu_mp_probe(void)
392 {
393 
394 	return (mp_ncpus > 1);
395 }
396 
397 #ifdef FDT
398 static bool
399 cpu_check_mmu(u_int id __unused, phandle_t node, u_int addr_size __unused,
400     pcell_t *reg __unused)
401 {
402 	char type[32];
403 
404 	/* Check if this hart supports MMU. */
405 	if (OF_getprop(node, "mmu-type", (void *)type, sizeof(type)) == -1 ||
406 	    strncmp(type, "riscv,none", 10) == 0)
407 		return (false);
408 
409 	return (true);
410 }
411 
412 static bool
413 cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
414 {
415 	struct pcpu *pcpup;
416 	vm_paddr_t start_addr;
417 	uint64_t hart;
418 	u_int cpuid;
419 	int naps;
420 	int error;
421 
422 	if (!cpu_check_mmu(id, node, addr_size, reg))
423 		return (false);
424 
425 	KASSERT(id < MAXCPU, ("Too many CPUs"));
426 
427 	KASSERT(addr_size == 1 || addr_size == 2, ("Invalid register size"));
428 #ifdef INVARIANTS
429 	cpu_reg[id][0] = reg[0];
430 	if (addr_size == 2)
431 		cpu_reg[id][1] = reg[1];
432 #endif
433 
434 	hart = reg[0];
435 	if (addr_size == 2) {
436 		hart <<= 32;
437 		hart |= reg[1];
438 	}
439 
440 	KASSERT(hart < MAXCPU, ("Too many harts."));
441 
442 	/* We are already running on this cpu */
443 	if (hart == boot_hart)
444 		return (true);
445 
446 	/*
447 	 * Rotate the CPU IDs to put the boot CPU as CPU 0.
448 	 * We keep the other CPUs ordered.
449 	 */
450 	cpuid = hart;
451 	if (cpuid < boot_hart)
452 		cpuid += mp_maxid + 1;
453 	cpuid -= boot_hart;
454 
455 	/* Check if we are able to start this cpu */
456 	if (cpuid > mp_maxid)
457 		return (false);
458 
459 	/*
460 	 * Depending on the SBI implementation, APs are waiting either in
461 	 * locore.S or to be activated explicitly, via SBI call.
462 	 */
463 	if (sbi_probe_extension(SBI_EXT_ID_HSM) != 0) {
464 		start_addr = pmap_kextract((vm_offset_t)mpentry);
465 		error = sbi_hsm_hart_start(hart, start_addr, 0);
466 		if (error != 0) {
467 			mp_ncpus--;
468 
469 			/* Send a warning to the user and continue. */
470 			printf("AP %u (hart %lu) failed to start, error %d\n",
471 			    cpuid, hart, error);
472 			return (false);
473 		}
474 	}
475 
476 	pcpup = &__pcpu[cpuid];
477 	pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
478 	pcpup->pc_hart = hart;
479 
480 	dpcpu[cpuid - 1] = kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
481 	dpcpu_init(dpcpu[cpuid - 1], cpuid);
482 
483 	bootstacks[cpuid] = kmem_malloc(MP_BOOTSTACK_SIZE, M_WAITOK | M_ZERO);
484 
485 	naps = atomic_load_int(&aps_started);
486 	bootstack = (char *)bootstacks[cpuid] + MP_BOOTSTACK_SIZE;
487 
488 	if (bootverbose)
489 		printf("Starting CPU %u (hart %lx)\n", cpuid, hart);
490 	atomic_store_32(&__riscv_boot_ap[hart], 1);
491 
492 	/* Wait for the AP to switch to its boot stack. */
493 	while (atomic_load_int(&aps_started) < naps + 1)
494 		cpu_spinwait();
495 
496 	CPU_SET(cpuid, &all_cpus);
497 	CPU_SET(hart, &all_harts);
498 
499 	return (true);
500 }
501 #endif
502 
503 /* Initialize and fire up non-boot processors */
504 void
505 cpu_mp_start(void)
506 {
507 	u_int cpu;
508 
509 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
510 
511 	CPU_SET(0, &all_cpus);
512 	CPU_SET(boot_hart, &all_harts);
513 
514 	switch(cpu_enum_method) {
515 #ifdef FDT
516 	case CPUS_FDT:
517 		ofw_cpu_early_foreach(cpu_init_fdt, true);
518 		break;
519 #endif
520 	case CPUS_UNKNOWN:
521 		break;
522 	}
523 
524 	CPU_FOREACH(cpu) {
525 		/* Already identified. */
526 		if (cpu == 0)
527 			continue;
528 
529 		identify_cpu(cpu);
530 	}
531 }
532 
533 /* Introduce rest of cores to the world */
534 void
535 cpu_mp_announce(void)
536 {
537 	u_int cpu;
538 
539 	CPU_FOREACH(cpu) {
540 		/* Already announced. */
541 		if (cpu == 0)
542 			continue;
543 
544 		printcpuinfo(cpu);
545 	}
546 }
547 
548 void
549 cpu_mp_setmaxid(void)
550 {
551 	int cores;
552 
553 #ifdef FDT
554 	cores = ofw_cpu_early_foreach(cpu_check_mmu, true);
555 	if (cores > 0) {
556 		cores = MIN(cores, MAXCPU);
557 		if (bootverbose)
558 			printf("Found %d CPUs in the device tree\n", cores);
559 		mp_ncpus = cores;
560 		mp_maxid = cores - 1;
561 		cpu_enum_method = CPUS_FDT;
562 	} else
563 #endif
564 	{
565 		if (bootverbose)
566 			printf("No CPU data, limiting to 1 core\n");
567 		mp_ncpus = 1;
568 		mp_maxid = 0;
569 	}
570 
571 	if (TUNABLE_INT_FETCH("hw.ncpu", &cores)) {
572 		if (cores > 0 && cores < mp_ncpus) {
573 			mp_ncpus = cores;
574 			mp_maxid = cores - 1;
575 		}
576 	}
577 }
578