1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008, 2013 Citrix Systems, Inc.
5 * Copyright (c) 2012 Spectra Logic Corporation
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/param.h>
31 #include <sys/bus.h>
32 #include <sys/kernel.h>
33 #include <sys/linker.h>
34 #include <sys/malloc.h>
35 #include <sys/proc.h>
36 #include <sys/smp.h>
37 #include <sys/systm.h>
38
39 #include <vm/vm.h>
40 #include <vm/pmap.h>
41 #include <vm/vm_param.h>
42
43 #include <dev/pci/pcivar.h>
44
45 #include <machine/_inttypes.h>
46 #include <machine/cpufunc.h>
47 #include <machine/cpu.h>
48 #include <machine/md_var.h>
49 #include <machine/metadata.h>
50 #include <machine/smp.h>
51
52 #include <x86/apicreg.h>
53
54 #include <xen/xen-os.h>
55 #include <xen/error.h>
56 #include <xen/features.h>
57 #include <xen/gnttab.h>
58 #include <xen/hypervisor.h>
59 #include <xen/hvm.h>
60 #include <xen/xen_intr.h>
61
62 #include <contrib/xen/arch-x86/cpuid.h>
63 #include <contrib/xen/hvm/params.h>
64 #include <contrib/xen/vcpu.h>
65
66 /*--------------------------- Forward Declarations ---------------------------*/
67 static void xen_hvm_cpu_init(void);
68
69 /*-------------------------------- Global Data -------------------------------*/
70 #ifdef SMP
71 struct cpu_ops xen_hvm_cpu_ops = {
72 .cpu_init = xen_hvm_cpu_init,
73 .cpu_resume = xen_hvm_cpu_init
74 };
75 #endif
76
77 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
78
79 /**
80 * If non-zero, the hypervisor has been configured to use a direct
81 * IDT event callback for interrupt injection.
82 */
83 int xen_vector_callback_enabled;
84
85 /**
86 * Signal whether the vector injected for the event channel upcall requires to
87 * be EOI'ed on the local APIC.
88 */
89 bool xen_evtchn_needs_ack;
90
91 /*------------------------------- Per-CPU Data -------------------------------*/
92 DPCPU_DECLARE(struct vcpu_info *, vcpu_info);
93
94 /*------------------------------ Sysctl tunables -----------------------------*/
95 int xen_disable_pv_disks = 0;
96 int xen_disable_pv_nics = 0;
97 TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks);
98 TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics);
99
100 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
101
xen_emergency_print(const char * str,size_t size)102 void xen_emergency_print(const char *str, size_t size)
103 {
104 outsb(XEN_HVM_DEBUGCONS_IOPORT, str, size);
105 }
106
107 static void
hypervisor_quirks(unsigned int major,unsigned int minor)108 hypervisor_quirks(unsigned int major, unsigned int minor)
109 {
110 #ifdef SMP
111 if (((major < 4) || (major == 4 && minor <= 5)) &&
112 msix_disable_migration == -1) {
113 /*
114 * Xen hypervisors prior to 4.6.0 do not properly
115 * handle updates to enabled MSI-X table entries,
116 * so disable MSI-X interrupt migration in that
117 * case.
118 */
119 if (bootverbose)
120 printf(
121 "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n"
122 "Set machdep.msix_disable_migration=0 to forcefully enable it.\n");
123 msix_disable_migration = 1;
124 }
125 #endif
126 }
127
128 static void
hypervisor_version(void)129 hypervisor_version(void)
130 {
131 uint32_t regs[4];
132 int major, minor;
133
134 do_cpuid(hv_base + 1, regs);
135
136 major = regs[0] >> 16;
137 minor = regs[0] & 0xffff;
138 printf("XEN: Hypervisor version %d.%d detected.\n", major, minor);
139
140 hypervisor_quirks(major, minor);
141 }
142
143 /*
144 * Translate linear to physical address when still running on the bootloader
145 * created page-tables.
146 */
147 static vm_paddr_t
early_init_vtop(void * addr)148 early_init_vtop(void *addr)
149 {
150
151 /*
152 * Using a KASSERT won't print anything, as this is before console
153 * initialization.
154 */
155 if (__predict_false((uintptr_t)addr < KERNBASE)) {
156 xc_printf("invalid linear address: %p\n", addr);
157 halt();
158 }
159
160 return ((uintptr_t)addr - KERNBASE
161 #ifdef __amd64__
162 + kernphys - KERNLOAD
163 #endif
164 );
165 }
166
167 static int
map_shared_info(void)168 map_shared_info(void)
169 {
170 /*
171 * TODO shared info page should be mapped in an unpopulated (IOW:
172 * non-RAM) address. But finding one at this point in boot is
173 * complicated, hence re-use a RAM address for the time being. This
174 * sadly causes super-page shattering in the second stage translation
175 * page tables.
176 */
177 static union {
178 shared_info_t shared_info;
179 uint8_t raw[PAGE_SIZE];
180 } shared_page __attribute__((aligned(PAGE_SIZE)));
181 static struct xen_add_to_physmap xatp = {
182 .domid = DOMID_SELF,
183 .space = XENMAPSPACE_shared_info,
184 };
185 int rc;
186
187 _Static_assert(sizeof(shared_page) == PAGE_SIZE,
188 "invalid Xen shared_info struct size");
189
190 if (xatp.gpfn == 0)
191 xatp.gpfn = atop(early_init_vtop(&shared_page.shared_info));
192
193 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
194 if (rc != 0) {
195 xc_printf("cannot map shared info page: %d\n", rc);
196 HYPERVISOR_shared_info = NULL;
197 } else if (HYPERVISOR_shared_info == NULL)
198 HYPERVISOR_shared_info = &shared_page.shared_info;
199
200 return (rc);
201 }
202
203 static void
fixup_console(void)204 fixup_console(void)
205 {
206 struct xen_platform_op op = {
207 .cmd = XENPF_get_dom0_console,
208 };
209 xenpf_dom0_console_t *console = &op.u.dom0_console;
210 union {
211 struct efi_fb efi;
212 struct vbe_fb vbe;
213 } *fb = NULL;
214 int size;
215 caddr_t kmdp;
216
217 kmdp = preload_search_by_type("elf kernel");
218 if (kmdp == NULL)
219 kmdp = preload_search_by_type("elf64 kernel");
220 if (kmdp == NULL) {
221 xc_printf("Unable to find kernel metadata\n");
222 return;
223 }
224
225 size = HYPERVISOR_platform_op(&op);
226 if (size < 0) {
227 xc_printf("Failed to get video console info: %d\n", size);
228 return;
229 }
230
231 switch (console->video_type) {
232 case XEN_VGATYPE_VESA_LFB:
233 fb = (__typeof__ (fb))preload_search_info(kmdp,
234 MODINFO_METADATA | MODINFOMD_VBE_FB);
235
236 if (fb == NULL) {
237 xc_printf("No VBE FB in kernel metadata\n");
238 return;
239 }
240
241 _Static_assert(offsetof(struct vbe_fb, fb_bpp) ==
242 offsetof(struct efi_fb, fb_mask_reserved) +
243 sizeof(fb->efi.fb_mask_reserved),
244 "Bad structure overlay\n");
245 fb->vbe.fb_bpp = console->u.vesa_lfb.bits_per_pixel;
246 /* FALLTHROUGH */
247 case XEN_VGATYPE_EFI_LFB:
248 if (fb == NULL) {
249 fb = (__typeof__ (fb))preload_search_info(kmdp,
250 MODINFO_METADATA | MODINFOMD_EFI_FB);
251 if (fb == NULL) {
252 xc_printf("No EFI FB in kernel metadata\n");
253 return;
254 }
255 }
256
257 fb->efi.fb_addr = console->u.vesa_lfb.lfb_base;
258 if (size >
259 offsetof(xenpf_dom0_console_t, u.vesa_lfb.ext_lfb_base))
260 fb->efi.fb_addr |=
261 (uint64_t)console->u.vesa_lfb.ext_lfb_base << 32;
262 fb->efi.fb_size = console->u.vesa_lfb.lfb_size << 16;
263 fb->efi.fb_height = console->u.vesa_lfb.height;
264 fb->efi.fb_width = console->u.vesa_lfb.width;
265 fb->efi.fb_stride = (console->u.vesa_lfb.bytes_per_line << 3) /
266 console->u.vesa_lfb.bits_per_pixel;
267 #define FBMASK(c) \
268 ((~0u << console->u.vesa_lfb.c ## _pos) & \
269 (~0u >> (32 - console->u.vesa_lfb.c ## _pos - \
270 console->u.vesa_lfb.c ## _size)))
271 fb->efi.fb_mask_red = FBMASK(red);
272 fb->efi.fb_mask_green = FBMASK(green);
273 fb->efi.fb_mask_blue = FBMASK(blue);
274 fb->efi.fb_mask_reserved = FBMASK(rsvd);
275 #undef FBMASK
276 break;
277
278 default:
279 xc_printf("Video console type unsupported\n");
280 return;
281 }
282 }
283
284 /* Early initialization when running as a Xen guest. */
285 void
xen_early_init(void)286 xen_early_init(void)
287 {
288 uint32_t regs[4];
289 int rc;
290
291 if (hv_high < hv_base + 2) {
292 xc_printf("Invalid maximum leaves for hv_base\n");
293 vm_guest = VM_GUEST_VM;
294 return;
295 }
296
297 /* Find the hypercall pages. */
298 do_cpuid(hv_base + 2, regs);
299 if (regs[0] != 1) {
300 xc_printf("Invalid number of hypercall pages %u\n",
301 regs[0]);
302 vm_guest = VM_GUEST_VM;
303 return;
304 }
305
306 wrmsr(regs[1], early_init_vtop(&hypercall_page));
307
308 rc = map_shared_info();
309 if (rc != 0) {
310 vm_guest = VM_GUEST_VM;
311 return;
312 }
313
314 if (xen_initial_domain())
315 /* Fixup video console information in case Xen changed the mode. */
316 fixup_console();
317 }
318
319 static int
set_percpu_callback(unsigned int vcpu)320 set_percpu_callback(unsigned int vcpu)
321 {
322 struct xen_hvm_evtchn_upcall_vector vec;
323 int error;
324
325 vec.vcpu = vcpu;
326 vec.vector = IDT_EVTCHN;
327 error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec);
328
329 return (error != 0 ? xen_translate_error(error) : 0);
330 }
331
332 /*
333 * Tell the hypervisor how to contact us for event channel callbacks.
334 */
335 void
xen_hvm_set_callback(device_t dev)336 xen_hvm_set_callback(device_t dev)
337 {
338 struct xen_hvm_param xhp;
339 int irq;
340
341 if (xen_vector_callback_enabled)
342 return;
343
344 xhp.domid = DOMID_SELF;
345 xhp.index = HVM_PARAM_CALLBACK_IRQ;
346 if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
347 int error;
348
349 error = set_percpu_callback(0);
350 if (error == 0) {
351 xen_evtchn_needs_ack = true;
352 /* Trick toolstack to think we are enlightened */
353 xhp.value = 1;
354 } else
355 xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN);
356 error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp);
357 if (error == 0) {
358 xen_vector_callback_enabled = 1;
359 return;
360 } else if (xen_evtchn_needs_ack)
361 panic("Unable to setup fake HVM param: %d", error);
362
363 printf("Xen HVM callback vector registration failed (%d). "
364 "Falling back to emulated device interrupt\n", error);
365 }
366 xen_vector_callback_enabled = 0;
367 if (dev == NULL) {
368 /*
369 * Called from early boot or resume.
370 * xenpci will invoke us again later.
371 */
372 return;
373 }
374
375 irq = pci_get_irq(dev);
376 if (irq < 16) {
377 xhp.value = HVM_CALLBACK_GSI(irq);
378 } else {
379 u_int slot;
380 u_int pin;
381
382 slot = pci_get_slot(dev);
383 pin = pci_get_intpin(dev) - 1;
384 xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin);
385 }
386
387 if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0)
388 panic("Can't set evtchn callback");
389 }
390
391 #define XEN_MAGIC_IOPORT 0x10
392 enum {
393 XMI_MAGIC = 0x49d2,
394 XMI_UNPLUG_IDE_DISKS = 0x01,
395 XMI_UNPLUG_NICS = 0x02,
396 XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
397 };
398
399 static void
xen_hvm_disable_emulated_devices(void)400 xen_hvm_disable_emulated_devices(void)
401 {
402 u_short disable_devs = 0;
403
404 if (xen_pv_domain()) {
405 /*
406 * No emulated devices in the PV case, so no need to unplug
407 * anything.
408 */
409 if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0)
410 printf("PV devices cannot be disabled in PV guests\n");
411 return;
412 }
413
414 if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC)
415 return;
416
417 if (xen_disable_pv_disks == 0) {
418 if (bootverbose)
419 printf("XEN: disabling emulated disks\n");
420 disable_devs |= XMI_UNPLUG_IDE_DISKS;
421 }
422 if (xen_disable_pv_nics == 0) {
423 if (bootverbose)
424 printf("XEN: disabling emulated nics\n");
425 disable_devs |= XMI_UNPLUG_NICS;
426 }
427
428 if (disable_devs != 0)
429 outw(XEN_MAGIC_IOPORT, disable_devs);
430 }
431
432 static void
xen_hvm_init(enum xen_hvm_init_type init_type)433 xen_hvm_init(enum xen_hvm_init_type init_type)
434 {
435 unsigned int i;
436
437 if (!xen_domain() ||
438 init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
439 return;
440
441 hypervisor_version();
442
443 switch (init_type) {
444 case XEN_HVM_INIT_LATE:
445 setup_xen_features();
446 #ifdef SMP
447 cpu_ops = xen_hvm_cpu_ops;
448 #endif
449 break;
450 case XEN_HVM_INIT_RESUME:
451 /* Clear stale vcpu_info. */
452 CPU_FOREACH(i)
453 DPCPU_ID_SET(i, vcpu_info, NULL);
454
455 if (map_shared_info() != 0)
456 panic("cannot map Xen shared info page");
457
458 break;
459 default:
460 panic("Unsupported HVM initialization type");
461 }
462
463 xen_vector_callback_enabled = 0;
464 xen_evtchn_needs_ack = false;
465 xen_hvm_set_callback(NULL);
466
467 xen_hvm_disable_emulated_devices();
468 }
469
470 void
xen_hvm_suspend(void)471 xen_hvm_suspend(void)
472 {
473 }
474
475 void
xen_hvm_resume(bool suspend_cancelled)476 xen_hvm_resume(bool suspend_cancelled)
477 {
478
479 xen_hvm_init(suspend_cancelled ?
480 XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
481
482 /* Register vcpu_info area for CPU#0. */
483 xen_hvm_cpu_init();
484 }
485
486 static void
xen_hvm_sysinit(void * arg __unused)487 xen_hvm_sysinit(void *arg __unused)
488 {
489 xen_hvm_init(XEN_HVM_INIT_LATE);
490 }
491 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
492
493 static void
xen_hvm_cpu_init(void)494 xen_hvm_cpu_init(void)
495 {
496 uint32_t regs[4];
497 int rc;
498
499 if (!xen_domain())
500 return;
501
502 if (DPCPU_GET(vcpu_info) != NULL) {
503 /*
504 * vcpu_info is already set. We're resuming
505 * from a failed migration and our pre-suspend
506 * configuration is still valid.
507 */
508 return;
509 }
510
511 /*
512 * Set vCPU ID. If available fetch the ID from CPUID, if not just use
513 * the ACPI ID.
514 */
515 KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf"));
516 cpuid_count(hv_base + 4, 0, regs);
517 KASSERT((regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ||
518 !xen_pv_domain(),
519 ("Xen PV domain without vcpu_id in cpuid"));
520 PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ?
521 regs[1] : PCPU_GET(acpi_id));
522
523 if (xen_evtchn_needs_ack && !IS_BSP()) {
524 /*
525 * Setup the per-vpcu event channel upcall vector. This is only
526 * required when using the new HVMOP_set_evtchn_upcall_vector
527 * hypercall, which allows using a different vector for each
528 * vCPU. Note that FreeBSD uses the same vector for all vCPUs
529 * because it's not dynamically allocated.
530 */
531 rc = set_percpu_callback(PCPU_GET(vcpu_id));
532 if (rc != 0)
533 panic("Event channel upcall vector setup failed: %d",
534 rc);
535 }
536
537 xen_setup_vcpu_info();
538 }
539 SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL);
540
541 bool
xen_has_iommu_maps(void)542 xen_has_iommu_maps(void)
543 {
544 uint32_t regs[4];
545
546 KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf"));
547 cpuid_count(hv_base + 4, 0, regs);
548
549 return (regs[0] & XEN_HVM_CPUID_IOMMU_MAPPINGS);
550 }
551