1 /* $NetBSD: xen_machdep.c,v 1.27 2022/08/20 23:48:51 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28 /*
29 *
30 * Copyright (c) 2004 Christian Limpach.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
43 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
45 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
46 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
47 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
51 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54
55 #include <sys/cdefs.h>
56 __KERNEL_RCSID(0, "$NetBSD: xen_machdep.c,v 1.27 2022/08/20 23:48:51 riastradh Exp $");
57
58 #include "opt_xen.h"
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/boot_flag.h>
63 #include <sys/conf.h>
64 #include <sys/disk.h>
65 #include <sys/device.h>
66 #include <sys/mount.h>
67 #include <sys/reboot.h>
68 #include <sys/timetc.h>
69 #include <sys/sysctl.h>
70 #include <sys/pmf.h>
71 #include <sys/xcall.h>
72
73 #include <xen/intr.h>
74 #include <xen/hypervisor.h>
75 #include <xen/shutdown_xenbus.h>
76 #include <xen/include/public/version.h>
77
78 #include <machine/pmap_private.h>
79
80 #define DPRINTK(x) printk x
81 #if 0
82 #define DPRINTK(x)
83 #endif
84
85 #ifdef DEBUG_GEOM
86 #define DPRINTF(a) printf a
87 #else
88 #define DPRINTF(a)
89 #endif
90
91
92 bool xen_suspend_allow;
93
94 void
xen_parse_cmdline(int what,union xen_cmdline_parseinfo * xcp)95 xen_parse_cmdline(int what, union xen_cmdline_parseinfo *xcp)
96 {
97 char _cmd_line[256], *cmd_line, *opt, *s;
98 int b, i, ipidx = 0;
99 uint32_t xi_ip[5];
100 size_t len;
101
102 len = strlcpy(_cmd_line, xen_start_info.cmd_line, sizeof(_cmd_line));
103 if (len > sizeof(_cmd_line)) {
104 printf("command line exceeded limit of 255 chars. Truncated.\n");
105 }
106 cmd_line = _cmd_line;
107
108 switch (what) {
109 case XEN_PARSE_BOOTDEV:
110 xcp->xcp_bootdev[0] = 0;
111 break;
112 case XEN_PARSE_CONSOLE:
113 xcp->xcp_console[0] = 0;
114 break;
115 }
116
117 while (cmd_line && *cmd_line) {
118 opt = cmd_line;
119 cmd_line = strchr(opt, ' ');
120 if (cmd_line)
121 *cmd_line = 0;
122
123 switch (what) {
124 case XEN_PARSE_BOOTDEV:
125 if (strncasecmp(opt, "bootdev=", 8) == 0) {
126 strncpy(xcp->xcp_bootdev, opt + 8,
127 sizeof(xcp->xcp_bootdev));
128 break;
129 }
130 if (strncasecmp(opt, "root=", 5) == 0) {
131 strncpy(xcp->xcp_bootdev, opt + 5,
132 sizeof(xcp->xcp_bootdev));
133 break;
134 }
135 break;
136
137 case XEN_PARSE_NETINFO:
138 if (xcp->xcp_netinfo.xi_root &&
139 strncasecmp(opt, "nfsroot=", 8) == 0)
140 strncpy(xcp->xcp_netinfo.xi_root, opt + 8,
141 MNAMELEN);
142
143 if (strncasecmp(opt, "ip=", 3) == 0) {
144 memset(xi_ip, 0, sizeof(xi_ip));
145 opt += 3;
146 ipidx = 0;
147 while (opt && *opt) {
148 s = opt;
149 opt = strchr(opt, ':');
150 if (opt)
151 *opt = 0;
152
153 switch (ipidx) {
154 case 0: /* ip */
155 case 1: /* nfs server */
156 case 2: /* gw */
157 case 3: /* mask */
158 case 4: /* host */
159 if (*s == 0)
160 break;
161 for (i = 0; i < 4; i++) {
162 b = strtoul(s, &s, 10);
163 xi_ip[ipidx] = b + 256
164 * xi_ip[ipidx];
165 if (*s != '.')
166 break;
167 s++;
168 }
169 if (i < 3)
170 xi_ip[ipidx] = 0;
171 break;
172 case 5: /* interface */
173 if (!strncmp(s, "xennet", 6))
174 s += 6;
175 else if (!strncmp(s, "eth", 3))
176 s += 3;
177 else
178 break;
179 if (xcp->xcp_netinfo.xi_ifno
180 == strtoul(s, NULL, 10))
181 memcpy(xcp->
182 xcp_netinfo.xi_ip,
183 xi_ip,
184 sizeof(xi_ip));
185 break;
186 }
187 ipidx++;
188
189 if (opt)
190 *opt++ = ':';
191 }
192 }
193 break;
194
195 case XEN_PARSE_CONSOLE:
196 if (strncasecmp(opt, "console=", 8) == 0)
197 strncpy(xcp->xcp_console, opt + 8,
198 sizeof(xcp->xcp_console));
199 break;
200
201 case XEN_PARSE_BOOTFLAGS:
202 if (*opt == '-') {
203 opt++;
204 while(*opt != '\0') {
205 BOOT_FLAG(*opt, boothowto);
206 opt++;
207 }
208 }
209 break;
210 case XEN_PARSE_PCIBACK:
211 if (strncasecmp(opt, "pciback.hide=", 13) == 0)
212 strncpy(xcp->xcp_pcidevs, opt + 13,
213 sizeof(xcp->xcp_pcidevs));
214 break;
215 }
216
217 if (cmd_line)
218 *cmd_line++ = ' ';
219 }
220 }
221
222 #ifdef XENPV
223
224 static int sysctl_xen_suspend(SYSCTLFN_ARGS);
225 static void xen_suspend_domain(void);
226 static void xen_prepare_suspend(void);
227 static void xen_prepare_resume(void);
228
229 /*
230 * this function sets up the machdep.xen.suspend sysctl(7) that
231 * controls domain suspend/save.
232 */
233 void
sysctl_xen_suspend_setup(void)234 sysctl_xen_suspend_setup(void)
235 {
236 const struct sysctlnode *node = NULL;
237
238 /*
239 * dom0 implements sleep support through ACPI. It should not call
240 * this function to register a suspend interface.
241 */
242 KASSERT(!(xendomain_is_dom0()));
243
244 sysctl_createv(NULL, 0, NULL, &node,
245 CTLFLAG_PERMANENT,
246 CTLTYPE_NODE, "machdep", NULL,
247 NULL, 0, NULL, 0,
248 CTL_MACHDEP, CTL_EOL);
249
250 sysctl_createv(NULL, 0, &node, &node,
251 CTLFLAG_PERMANENT,
252 CTLTYPE_NODE, "xen",
253 SYSCTL_DESCR("Xen top level node"),
254 NULL, 0, NULL, 0,
255 CTL_CREATE, CTL_EOL);
256
257 sysctl_createv(NULL, 0, &node, &node,
258 CTLFLAG_PERMANENT | CTLFLAG_READWRITE | CTLFLAG_IMMEDIATE,
259 CTLTYPE_INT, "suspend",
260 SYSCTL_DESCR("Suspend/save current Xen domain"),
261 sysctl_xen_suspend, 0, NULL, 0,
262 CTL_CREATE, CTL_EOL);
263 }
264
265 static int
sysctl_xen_suspend(SYSCTLFN_ARGS)266 sysctl_xen_suspend(SYSCTLFN_ARGS)
267 {
268 int error;
269 struct sysctlnode node;
270
271 node = *rnode;
272 error = sysctl_lookup(SYSCTLFN_CALL(&node));
273
274 if (error || newp == NULL)
275 return error;
276
277 /* only allow domain to suspend when dom0 instructed to do so */
278 if (xen_suspend_allow == false)
279 return EAGAIN;
280
281 xen_suspend_domain();
282
283 return 0;
284
285 }
286
287 static void xen_suspendclocks_xc(void *, void*);
288 static void xen_resumeclocks_xc(void *, void*);
289
290 /*
291 * Last operations before suspending domain
292 */
293 static void
xen_prepare_suspend(void)294 xen_prepare_suspend(void)
295 {
296
297 kpreempt_disable();
298
299 pmap_xen_suspend();
300 xc_wait(xc_broadcast(0, &xen_suspendclocks_xc, NULL, NULL));
301
302 /*
303 * save/restore code does not translate these MFNs to their
304 * associated PFNs, so we must do it
305 */
306 xen_start_info.store_mfn =
307 atop(xpmap_mtop(ptoa(xen_start_info.store_mfn)));
308 xen_start_info.console_mfn =
309 atop(xpmap_mtop(ptoa(xen_start_info.console_mfn)));
310
311 DPRINTK(("suspending domain\n"));
312 aprint_verbose("suspending domain\n");
313
314 /* invalidate the shared_info page */
315 if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
316 0, UVMF_INVLPG)) {
317 DPRINTK(("HYPERVISOR_shared_info page invalidation failed"));
318 HYPERVISOR_crash();
319 }
320
321 }
322
323 static void
xen_suspendclocks_xc(void * a,void * b)324 xen_suspendclocks_xc(void *a, void *b)
325 {
326
327 kpreempt_disable();
328 xen_suspendclocks(curcpu());
329 kpreempt_enable();
330 }
331
332 /*
333 * First operations before restoring domain context
334 */
335 static void
xen_prepare_resume(void)336 xen_prepare_resume(void)
337 {
338 /* map the new shared_info page */
339 if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
340 xen_start_info.shared_info | PTE_W | PTE_P,
341 UVMF_INVLPG)) {
342 DPRINTK(("could not map new shared info page"));
343 HYPERVISOR_crash();
344 }
345
346 pmap_xen_resume();
347
348 if (xen_start_info.nr_pages != physmem) {
349 /*
350 * XXX JYM for now, we crash - fix it with memory
351 * hotplug when supported
352 */
353 DPRINTK(("xen_start_info.nr_pages != physmem"));
354 HYPERVISOR_crash();
355 }
356
357 DPRINTK(("preparing domain resume\n"));
358 aprint_verbose("preparing domain resume\n");
359
360 xen_suspend_allow = false;
361
362 xc_wait(xc_broadcast(0, xen_resumeclocks_xc, NULL, NULL));
363
364 kpreempt_enable();
365
366 }
367
368 static void
xen_resumeclocks_xc(void * a,void * b)369 xen_resumeclocks_xc(void *a, void *b)
370 {
371
372 kpreempt_disable();
373 xen_resumeclocks(curcpu());
374 kpreempt_enable();
375 }
376
377 static void
xen_suspend_domain(void)378 xen_suspend_domain(void)
379 {
380 paddr_t mfn;
381 int s = splvm(); /* XXXSMP */
382
383 /*
384 * console becomes unavailable when suspended, so
385 * direct communications to domain are hampered from there on.
386 * We can only rely on low level primitives like printk(), until
387 * console is fully restored
388 */
389 if (!pmf_system_suspend(PMF_Q_NONE)) {
390 DPRINTK(("devices suspend failed"));
391 HYPERVISOR_crash();
392 }
393
394 /*
395 * obtain the MFN of the start_info page now, as we will not be
396 * able to do it once pmap is locked
397 */
398 pmap_extract_ma(pmap_kernel(), (vaddr_t)&xen_start_info, &mfn);
399 mfn >>= PAGE_SHIFT;
400
401 xen_prepare_suspend();
402
403 DPRINTK(("calling HYPERVISOR_suspend()\n"));
404 if (HYPERVISOR_suspend(mfn) != 0) {
405 /* XXX JYM: implement checkpoint/snapshot (ret == 1) */
406 DPRINTK(("HYPERVISOR_suspend() failed"));
407 HYPERVISOR_crash();
408 }
409
410 DPRINTK(("left HYPERVISOR_suspend()\n"));
411
412 xen_prepare_resume();
413
414 DPRINTK(("resuming devices\n"));
415 if (!pmf_system_resume(PMF_Q_NONE)) {
416 DPRINTK(("devices resume failed\n"));
417 HYPERVISOR_crash();
418 }
419
420 splx(s);
421
422 /* xencons is back online, we can print to console */
423 aprint_verbose("domain resumed\n");
424
425 }
426 #endif /* XENPV */
427
428 #define PRINTK_BUFSIZE 1024
429 void
printk(const char * fmt,...)430 printk(const char *fmt, ...)
431 {
432 va_list ap;
433 int ret;
434 static char buf[PRINTK_BUFSIZE];
435
436 va_start(ap, fmt);
437 ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
438 va_end(ap);
439 buf[ret] = 0;
440 (void)HYPERVISOR_console_io(CONSOLEIO_write, ret, buf);
441 }
442
443 bool xen_feature_tables[XENFEAT_NR_SUBMAPS * 32];
444
445 void
xen_init_features(void)446 xen_init_features(void)
447 {
448 xen_feature_info_t features;
449
450 for (int sm = 0; sm < XENFEAT_NR_SUBMAPS; sm++) {
451 features.submap_idx = sm;
452 if (HYPERVISOR_xen_version(XENVER_get_features, &features) < 0)
453 break;
454 for (int f = 0; f < 32; f++) {
455 xen_feature_tables[sm * 32 + f] =
456 (features.submap & (1 << f)) ? 1 : 0;
457 }
458 }
459 }
460
461 /*
462 * Attempt to find the device from which we were booted.
463 */
464
465 static int
is_valid_disk(device_t dv)466 is_valid_disk(device_t dv)
467 {
468 if (device_class(dv) != DV_DISK)
469 return (0);
470
471 return (device_is_a(dv, "dk") ||
472 device_is_a(dv, "sd") ||
473 device_is_a(dv, "wd") ||
474 device_is_a(dv, "ld") ||
475 device_is_a(dv, "ed") ||
476 device_is_a(dv, "xbd"));
477 }
478
479 void
xen_bootconf(void)480 xen_bootconf(void)
481 {
482 device_t dv;
483 deviter_t di;
484 union xen_cmdline_parseinfo xcp;
485 static char bootspecbuf[sizeof(xcp.xcp_bootdev)];
486
487 if (booted_device) {
488 DPRINTF(("%s: preset booted_device: %s\n", __func__, device_xname(booted_device)));
489 return;
490 }
491
492 xen_parse_cmdline(XEN_PARSE_BOOTDEV, &xcp);
493
494 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST);
495 dv != NULL;
496 dv = deviter_next(&di)) {
497 bool is_ifnet, is_disk;
498 const char *devname;
499
500 is_ifnet = (device_class(dv) == DV_IFNET);
501 is_disk = is_valid_disk(dv);
502 devname = device_xname(dv);
503
504 if (!is_ifnet && !is_disk)
505 continue;
506
507 if (is_disk && xcp.xcp_bootdev[0] == 0) {
508 booted_device = dv;
509 break;
510 }
511
512 if (strncmp(xcp.xcp_bootdev, devname, strlen(devname)))
513 continue;
514
515 if (is_disk && strlen(xcp.xcp_bootdev) > strlen(devname)) {
516 /* XXX check device_cfdata as in x86_autoconf.c? */
517 booted_partition = toupper(
518 xcp.xcp_bootdev[strlen(devname)]) - 'A';
519 DPRINTF(("%s: booted_partition: %d\n", __func__, booted_partition));
520 }
521
522 booted_device = dv;
523 booted_method = "bootinfo/bootdev";
524 break;
525 }
526 deviter_release(&di);
527
528 if (booted_device) {
529 DPRINTF(("%s: booted_device: %s\n", __func__, device_xname(booted_device)));
530 return;
531 }
532
533 /*
534 * not a boot device name, pass through to MI code
535 */
536 if (xcp.xcp_bootdev[0] != '\0') {
537 strlcpy(bootspecbuf, xcp.xcp_bootdev, sizeof(bootspecbuf));
538 bootspec = bootspecbuf;
539 booted_method = "bootinfo/bootspec";
540 DPRINTF(("%s: bootspec: %s\n", __func__, bootspec));
541 return;
542 }
543 }
544