xref: /netbsd/sys/arch/xen/xen/xen_machdep.c (revision e8cbb42e)
1 /*	$NetBSD: xen_machdep.c,v 1.27 2022/08/20 23:48:51 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  *
30  * Copyright (c) 2004 Christian Limpach.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
43  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
45  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
46  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
47  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
51  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52  */
53 
54 
55 #include <sys/cdefs.h>
56 __KERNEL_RCSID(0, "$NetBSD: xen_machdep.c,v 1.27 2022/08/20 23:48:51 riastradh Exp $");
57 
58 #include "opt_xen.h"
59 
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/boot_flag.h>
63 #include <sys/conf.h>
64 #include <sys/disk.h>
65 #include <sys/device.h>
66 #include <sys/mount.h>
67 #include <sys/reboot.h>
68 #include <sys/timetc.h>
69 #include <sys/sysctl.h>
70 #include <sys/pmf.h>
71 #include <sys/xcall.h>
72 
73 #include <xen/intr.h>
74 #include <xen/hypervisor.h>
75 #include <xen/shutdown_xenbus.h>
76 #include <xen/include/public/version.h>
77 
78 #include <machine/pmap_private.h>
79 
80 #define DPRINTK(x) printk x
81 #if 0
82 #define DPRINTK(x)
83 #endif
84 
85 #ifdef DEBUG_GEOM
86 #define DPRINTF(a) printf a
87 #else
88 #define DPRINTF(a)
89 #endif
90 
91 
92 bool xen_suspend_allow;
93 
94 void
xen_parse_cmdline(int what,union xen_cmdline_parseinfo * xcp)95 xen_parse_cmdline(int what, union xen_cmdline_parseinfo *xcp)
96 {
97 	char _cmd_line[256], *cmd_line, *opt, *s;
98 	int b, i, ipidx = 0;
99 	uint32_t xi_ip[5];
100 	size_t len;
101 
102 	len = strlcpy(_cmd_line, xen_start_info.cmd_line, sizeof(_cmd_line));
103 	if (len > sizeof(_cmd_line)) {
104 		printf("command line exceeded limit of 255 chars. Truncated.\n");
105 	}
106 	cmd_line = _cmd_line;
107 
108 	switch (what) {
109 	case XEN_PARSE_BOOTDEV:
110 		xcp->xcp_bootdev[0] = 0;
111 		break;
112 	case XEN_PARSE_CONSOLE:
113 		xcp->xcp_console[0] = 0;
114 		break;
115 	}
116 
117 	while (cmd_line && *cmd_line) {
118 		opt = cmd_line;
119 		cmd_line = strchr(opt, ' ');
120 		if (cmd_line)
121 			*cmd_line = 0;
122 
123 		switch (what) {
124 		case XEN_PARSE_BOOTDEV:
125 			if (strncasecmp(opt, "bootdev=", 8) == 0) {
126 				strncpy(xcp->xcp_bootdev, opt + 8,
127 				    sizeof(xcp->xcp_bootdev));
128 				break;
129 			}
130 			if (strncasecmp(opt, "root=", 5) == 0) {
131 				strncpy(xcp->xcp_bootdev, opt + 5,
132 				    sizeof(xcp->xcp_bootdev));
133 				break;
134 			}
135 			break;
136 
137 		case XEN_PARSE_NETINFO:
138 			if (xcp->xcp_netinfo.xi_root &&
139 			    strncasecmp(opt, "nfsroot=", 8) == 0)
140 				strncpy(xcp->xcp_netinfo.xi_root, opt + 8,
141 				    MNAMELEN);
142 
143 			if (strncasecmp(opt, "ip=", 3) == 0) {
144 				memset(xi_ip, 0, sizeof(xi_ip));
145 				opt += 3;
146 				ipidx = 0;
147 				while (opt && *opt) {
148 					s = opt;
149 					opt = strchr(opt, ':');
150 					if (opt)
151 						*opt = 0;
152 
153 					switch (ipidx) {
154 					case 0:	/* ip */
155 					case 1:	/* nfs server */
156 					case 2:	/* gw */
157 					case 3:	/* mask */
158 					case 4:	/* host */
159 						if (*s == 0)
160 							break;
161 						for (i = 0; i < 4; i++) {
162 							b = strtoul(s, &s, 10);
163 							xi_ip[ipidx] = b + 256
164 								* xi_ip[ipidx];
165 							if (*s != '.')
166 								break;
167 							s++;
168 						}
169 						if (i < 3)
170 							xi_ip[ipidx] = 0;
171 						break;
172 					case 5:	/* interface */
173 						if (!strncmp(s, "xennet", 6))
174 							s += 6;
175 						else if (!strncmp(s, "eth", 3))
176 							s += 3;
177 						else
178 							break;
179 						if (xcp->xcp_netinfo.xi_ifno
180 						    == strtoul(s, NULL, 10))
181 							memcpy(xcp->
182 							    xcp_netinfo.xi_ip,
183 							    xi_ip,
184 							    sizeof(xi_ip));
185 						break;
186 					}
187 					ipidx++;
188 
189 					if (opt)
190 						*opt++ = ':';
191 				}
192 			}
193 			break;
194 
195 		case XEN_PARSE_CONSOLE:
196 			if (strncasecmp(opt, "console=", 8) == 0)
197 				strncpy(xcp->xcp_console, opt + 8,
198 				    sizeof(xcp->xcp_console));
199 			break;
200 
201 		case XEN_PARSE_BOOTFLAGS:
202 			if (*opt == '-') {
203 				opt++;
204 				while(*opt != '\0') {
205 					BOOT_FLAG(*opt, boothowto);
206 					opt++;
207 				}
208 			}
209 			break;
210 		case XEN_PARSE_PCIBACK:
211 			if (strncasecmp(opt, "pciback.hide=", 13) == 0)
212 				strncpy(xcp->xcp_pcidevs, opt + 13,
213 				    sizeof(xcp->xcp_pcidevs));
214 			break;
215 		}
216 
217 		if (cmd_line)
218 			*cmd_line++ = ' ';
219 	}
220 }
221 
222 #ifdef XENPV
223 
224 static int sysctl_xen_suspend(SYSCTLFN_ARGS);
225 static void xen_suspend_domain(void);
226 static void xen_prepare_suspend(void);
227 static void xen_prepare_resume(void);
228 
229 /*
230  * this function sets up the machdep.xen.suspend sysctl(7) that
231  * controls domain suspend/save.
232  */
233 void
sysctl_xen_suspend_setup(void)234 sysctl_xen_suspend_setup(void)
235 {
236 	const struct sysctlnode *node = NULL;
237 
238 	/*
239 	 * dom0 implements sleep support through ACPI. It should not call
240 	 * this function to register a suspend interface.
241 	 */
242 	KASSERT(!(xendomain_is_dom0()));
243 
244 	sysctl_createv(NULL, 0, NULL, &node,
245 	    CTLFLAG_PERMANENT,
246 	    CTLTYPE_NODE, "machdep", NULL,
247 	    NULL, 0, NULL, 0,
248 	    CTL_MACHDEP, CTL_EOL);
249 
250 	sysctl_createv(NULL, 0, &node, &node,
251 	    CTLFLAG_PERMANENT,
252 	    CTLTYPE_NODE, "xen",
253 	    SYSCTL_DESCR("Xen top level node"),
254 	    NULL, 0, NULL, 0,
255 	    CTL_CREATE, CTL_EOL);
256 
257 	sysctl_createv(NULL, 0, &node, &node,
258 	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE | CTLFLAG_IMMEDIATE,
259 	    CTLTYPE_INT, "suspend",
260 	    SYSCTL_DESCR("Suspend/save current Xen domain"),
261 	    sysctl_xen_suspend, 0, NULL, 0,
262 	    CTL_CREATE, CTL_EOL);
263 }
264 
265 static int
sysctl_xen_suspend(SYSCTLFN_ARGS)266 sysctl_xen_suspend(SYSCTLFN_ARGS)
267 {
268 	int error;
269 	struct sysctlnode node;
270 
271 	node = *rnode;
272 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
273 
274 	if (error || newp == NULL)
275 		return error;
276 
277 	/* only allow domain to suspend when dom0 instructed to do so */
278 	if (xen_suspend_allow == false)
279 		return EAGAIN;
280 
281 	xen_suspend_domain();
282 
283 	return 0;
284 
285 }
286 
287 static void xen_suspendclocks_xc(void *, void*);
288 static void xen_resumeclocks_xc(void *, void*);
289 
290 /*
291  * Last operations before suspending domain
292  */
293 static void
xen_prepare_suspend(void)294 xen_prepare_suspend(void)
295 {
296 
297 	kpreempt_disable();
298 
299 	pmap_xen_suspend();
300 	xc_wait(xc_broadcast(0, &xen_suspendclocks_xc, NULL, NULL));
301 
302 	/*
303 	 * save/restore code does not translate these MFNs to their
304 	 * associated PFNs, so we must do it
305 	 */
306 	xen_start_info.store_mfn =
307 	    atop(xpmap_mtop(ptoa(xen_start_info.store_mfn)));
308 	xen_start_info.console_mfn =
309 	    atop(xpmap_mtop(ptoa(xen_start_info.console_mfn)));
310 
311 	DPRINTK(("suspending domain\n"));
312 	aprint_verbose("suspending domain\n");
313 
314 	/* invalidate the shared_info page */
315 	if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
316 	    0, UVMF_INVLPG)) {
317 		DPRINTK(("HYPERVISOR_shared_info page invalidation failed"));
318 		HYPERVISOR_crash();
319 	}
320 
321 }
322 
323 static void
xen_suspendclocks_xc(void * a,void * b)324 xen_suspendclocks_xc(void *a, void *b)
325 {
326 
327 	kpreempt_disable();
328 	xen_suspendclocks(curcpu());
329 	kpreempt_enable();
330 }
331 
332 /*
333  * First operations before restoring domain context
334  */
335 static void
xen_prepare_resume(void)336 xen_prepare_resume(void)
337 {
338 	/* map the new shared_info page */
339 	if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
340 	    xen_start_info.shared_info | PTE_W | PTE_P,
341 	    UVMF_INVLPG)) {
342 		DPRINTK(("could not map new shared info page"));
343 		HYPERVISOR_crash();
344 	}
345 
346 	pmap_xen_resume();
347 
348 	if (xen_start_info.nr_pages != physmem) {
349 		/*
350 		 * XXX JYM for now, we crash - fix it with memory
351 		 * hotplug when supported
352 		 */
353 		DPRINTK(("xen_start_info.nr_pages != physmem"));
354 		HYPERVISOR_crash();
355 	}
356 
357 	DPRINTK(("preparing domain resume\n"));
358 	aprint_verbose("preparing domain resume\n");
359 
360 	xen_suspend_allow = false;
361 
362 	xc_wait(xc_broadcast(0, xen_resumeclocks_xc, NULL, NULL));
363 
364 	kpreempt_enable();
365 
366 }
367 
368 static void
xen_resumeclocks_xc(void * a,void * b)369 xen_resumeclocks_xc(void *a, void *b)
370 {
371 
372 	kpreempt_disable();
373 	xen_resumeclocks(curcpu());
374 	kpreempt_enable();
375 }
376 
377 static void
xen_suspend_domain(void)378 xen_suspend_domain(void)
379 {
380 	paddr_t mfn;
381 	int s = splvm(); /* XXXSMP */
382 
383 	/*
384 	 * console becomes unavailable when suspended, so
385 	 * direct communications to domain are hampered from there on.
386 	 * We can only rely on low level primitives like printk(), until
387 	 * console is fully restored
388 	 */
389 	if (!pmf_system_suspend(PMF_Q_NONE)) {
390 		DPRINTK(("devices suspend failed"));
391 		HYPERVISOR_crash();
392 	}
393 
394 	/*
395 	 * obtain the MFN of the start_info page now, as we will not be
396 	 * able to do it once pmap is locked
397 	 */
398 	pmap_extract_ma(pmap_kernel(), (vaddr_t)&xen_start_info, &mfn);
399 	mfn >>= PAGE_SHIFT;
400 
401 	xen_prepare_suspend();
402 
403 	DPRINTK(("calling HYPERVISOR_suspend()\n"));
404 	if (HYPERVISOR_suspend(mfn) != 0) {
405 	/* XXX JYM: implement checkpoint/snapshot (ret == 1) */
406 		DPRINTK(("HYPERVISOR_suspend() failed"));
407 		HYPERVISOR_crash();
408 	}
409 
410 	DPRINTK(("left HYPERVISOR_suspend()\n"));
411 
412 	xen_prepare_resume();
413 
414 	DPRINTK(("resuming devices\n"));
415 	if (!pmf_system_resume(PMF_Q_NONE)) {
416 		DPRINTK(("devices resume failed\n"));
417 		HYPERVISOR_crash();
418 	}
419 
420 	splx(s);
421 
422 	/* xencons is back online, we can print to console */
423 	aprint_verbose("domain resumed\n");
424 
425 }
426 #endif /* XENPV */
427 
428 #define PRINTK_BUFSIZE 1024
429 void
printk(const char * fmt,...)430 printk(const char *fmt, ...)
431 {
432 	va_list ap;
433 	int ret;
434 	static char buf[PRINTK_BUFSIZE];
435 
436 	va_start(ap, fmt);
437 	ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
438 	va_end(ap);
439 	buf[ret] = 0;
440 	(void)HYPERVISOR_console_io(CONSOLEIO_write, ret, buf);
441 }
442 
443 bool xen_feature_tables[XENFEAT_NR_SUBMAPS * 32];
444 
445 void
xen_init_features(void)446 xen_init_features(void)
447 {
448 	xen_feature_info_t features;
449 
450 	for (int sm = 0; sm < XENFEAT_NR_SUBMAPS; sm++) {
451 		features.submap_idx = sm;
452 		if (HYPERVISOR_xen_version(XENVER_get_features, &features) < 0)
453 			break;
454 		for (int f = 0; f < 32; f++) {
455 			xen_feature_tables[sm * 32 + f] =
456 			    (features.submap & (1 << f)) ? 1 : 0;
457 		}
458 	}
459 }
460 
461 /*
462  * Attempt to find the device from which we were booted.
463  */
464 
465 static int
is_valid_disk(device_t dv)466 is_valid_disk(device_t dv)
467 {
468 	if (device_class(dv) != DV_DISK)
469 		return (0);
470 
471 	return (device_is_a(dv, "dk") ||
472 		device_is_a(dv, "sd") ||
473 		device_is_a(dv, "wd") ||
474 		device_is_a(dv, "ld") ||
475 		device_is_a(dv, "ed") ||
476 		device_is_a(dv, "xbd"));
477 }
478 
479 void
xen_bootconf(void)480 xen_bootconf(void)
481 {
482 	device_t dv;
483 	deviter_t di;
484 	union xen_cmdline_parseinfo xcp;
485 	static char bootspecbuf[sizeof(xcp.xcp_bootdev)];
486 
487 	if (booted_device) {
488 		DPRINTF(("%s: preset booted_device: %s\n", __func__, device_xname(booted_device)));
489 		return;
490 	}
491 
492 	xen_parse_cmdline(XEN_PARSE_BOOTDEV, &xcp);
493 
494 	for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST);
495 	     dv != NULL;
496 	     dv = deviter_next(&di)) {
497 		bool is_ifnet, is_disk;
498 		const char *devname;
499 
500 		is_ifnet = (device_class(dv) == DV_IFNET);
501 		is_disk = is_valid_disk(dv);
502 		devname = device_xname(dv);
503 
504 		if (!is_ifnet && !is_disk)
505 			continue;
506 
507 		if (is_disk && xcp.xcp_bootdev[0] == 0) {
508 			booted_device = dv;
509 			break;
510 		}
511 
512 		if (strncmp(xcp.xcp_bootdev, devname, strlen(devname)))
513 			continue;
514 
515 		if (is_disk && strlen(xcp.xcp_bootdev) > strlen(devname)) {
516 			/* XXX check device_cfdata as in x86_autoconf.c? */
517 			booted_partition = toupper(
518 				xcp.xcp_bootdev[strlen(devname)]) - 'A';
519 			DPRINTF(("%s: booted_partition: %d\n", __func__, booted_partition));
520 		}
521 
522 		booted_device = dv;
523 		booted_method = "bootinfo/bootdev";
524 		break;
525 	}
526 	deviter_release(&di);
527 
528 	if (booted_device) {
529 		DPRINTF(("%s: booted_device: %s\n", __func__, device_xname(booted_device)));
530 		return;
531 	}
532 
533 	/*
534 	 * not a boot device name, pass through to MI code
535 	 */
536 	if (xcp.xcp_bootdev[0] != '\0') {
537 		strlcpy(bootspecbuf, xcp.xcp_bootdev, sizeof(bootspecbuf));
538 		bootspec = bootspecbuf;
539 		booted_method = "bootinfo/bootspec";
540 		DPRINTF(("%s: bootspec: %s\n", __func__, bootspec));
541 		return;
542 	}
543 }
544