xref: /qemu/hw/i386/kvm/xen_evtchn.c (revision 95a36455)
1 /*
2  * QEMU Xen emulation: Event channel support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/log.h"
18 #include "qemu/error-report.h"
19 #include "monitor/monitor.h"
20 #include "monitor/hmp.h"
21 #include "qapi/error.h"
22 #include "qapi/qapi-commands-misc-target.h"
23 #include "qapi/qmp/qdict.h"
24 #include "qom/object.h"
25 #include "exec/target_page.h"
26 #include "exec/address-spaces.h"
27 #include "migration/vmstate.h"
28 #include "trace.h"
29 
30 #include "hw/sysbus.h"
31 #include "hw/xen/xen.h"
32 #include "hw/i386/x86.h"
33 #include "hw/i386/pc.h"
34 #include "hw/pci/pci.h"
35 #include "hw/pci/msi.h"
36 #include "hw/pci/msix.h"
37 #include "hw/irq.h"
38 #include "hw/xen/xen_backend_ops.h"
39 
40 #include "xen_evtchn.h"
41 #include "xen_overlay.h"
42 #include "xen_xenstore.h"
43 
44 #include "sysemu/kvm.h"
45 #include "sysemu/kvm_xen.h"
46 #include <linux/kvm.h>
47 #include <sys/eventfd.h>
48 
49 #include "hw/xen/interface/memory.h"
50 #include "hw/xen/interface/hvm/params.h"
51 
52 /* XX: For kvm_update_msi_routes_all() */
53 #include "target/i386/kvm/kvm_i386.h"
54 
55 #define TYPE_XEN_EVTCHN "xen-evtchn"
56 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
57 
58 typedef struct XenEvtchnPort {
59     uint32_t vcpu;      /* Xen/ACPI vcpu_id */
60     uint16_t type;      /* EVTCHNSTAT_xxxx */
61     union {
62         uint16_t val;  /* raw value for serialization etc. */
63         uint16_t pirq;
64         uint16_t virq;
65         struct {
66             uint16_t port:15;
67             uint16_t to_qemu:1; /* Only two targets; qemu or loopback */
68         } interdomain;
69     } u;
70 } XenEvtchnPort;
71 
72 /* 32-bit compatibility definitions, also used natively in 32-bit build */
73 struct compat_arch_vcpu_info {
74     unsigned int cr2;
75     unsigned int pad[5];
76 };
77 
78 struct compat_vcpu_info {
79     uint8_t evtchn_upcall_pending;
80     uint8_t evtchn_upcall_mask;
81     uint16_t pad;
82     uint32_t evtchn_pending_sel;
83     struct compat_arch_vcpu_info arch;
84     struct vcpu_time_info time;
85 }; /* 64 bytes (x86) */
86 
87 struct compat_arch_shared_info {
88     unsigned int max_pfn;
89     unsigned int pfn_to_mfn_frame_list_list;
90     unsigned int nmi_reason;
91     unsigned int p2m_cr3;
92     unsigned int p2m_vaddr;
93     unsigned int p2m_generation;
94     uint32_t wc_sec_hi;
95 };
96 
97 struct compat_shared_info {
98     struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
99     uint32_t evtchn_pending[32];
100     uint32_t evtchn_mask[32];
101     uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
102     uint32_t wc_sec;
103     uint32_t wc_nsec;
104     struct compat_arch_shared_info arch;
105 };
106 
107 #define COMPAT_EVTCHN_2L_NR_CHANNELS            1024
108 
109 /* Local private implementation of struct xenevtchn_handle */
110 struct xenevtchn_handle {
111     evtchn_port_t be_port;
112     evtchn_port_t guest_port; /* Or zero for unbound */
113     int fd;
114 };
115 
116 /*
117  * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
118  * insane enough to think about guest-transparent live migration from actual
119  * Xen to QEMU, and ensuring that we can convert/consume the stream.
120  */
121 #define IRQ_UNBOUND -1
122 #define IRQ_PT -2
123 #define IRQ_MSI_EMU -3
124 
125 
126 struct pirq_info {
127     int gsi;
128     uint16_t port;
129     PCIDevice *dev;
130     int vector;
131     bool is_msix;
132     bool is_masked;
133     bool is_translated;
134 };
135 
136 struct XenEvtchnState {
137     /*< private >*/
138     SysBusDevice busdev;
139     /*< public >*/
140 
141     uint64_t callback_param;
142     bool evtchn_in_kernel;
143     uint32_t callback_gsi;
144 
145     QEMUBH *gsi_bh;
146 
147     QemuMutex port_lock;
148     uint32_t nr_ports;
149     XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
150 
151     /* Connected to the system GSIs for raising callback as GSI / INTx */
152     unsigned int nr_callback_gsis;
153     qemu_irq *callback_gsis;
154 
155     struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
156 
157     uint32_t nr_pirqs;
158 
159     /* Bitmap of allocated PIRQs (serialized) */
160     uint16_t nr_pirq_inuse_words;
161     uint64_t *pirq_inuse_bitmap;
162 
163     /* GSI → PIRQ mapping (serialized) */
164     uint16_t gsi_pirq[IOAPIC_NUM_PINS];
165 
166     /* Per-GSI assertion state (serialized) */
167     uint32_t pirq_gsi_set;
168 
169     /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
170     struct pirq_info *pirq;
171 };
172 
173 #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
174 #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
175 
176 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
177 
178 struct XenEvtchnState *xen_evtchn_singleton;
179 
180 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
181 #define CALLBACK_VIA_TYPE_SHIFT 56
182 
183 static void unbind_backend_ports(XenEvtchnState *s);
184 
xen_evtchn_pre_load(void * opaque)185 static int xen_evtchn_pre_load(void *opaque)
186 {
187     XenEvtchnState *s = opaque;
188 
189     /* Unbind all the backend-side ports; they need to rebind */
190     unbind_backend_ports(s);
191 
192     /* It'll be leaked otherwise. */
193     g_free(s->pirq_inuse_bitmap);
194     s->pirq_inuse_bitmap = NULL;
195 
196     return 0;
197 }
198 
xen_evtchn_post_load(void * opaque,int version_id)199 static int xen_evtchn_post_load(void *opaque, int version_id)
200 {
201     XenEvtchnState *s = opaque;
202     uint32_t i;
203 
204     if (s->callback_param) {
205         xen_evtchn_set_callback_param(s->callback_param);
206     }
207 
208     /* Rebuild s->pirq[].port mapping */
209     for (i = 0; i < s->nr_ports; i++) {
210         XenEvtchnPort *p = &s->port_table[i];
211 
212         if (p->type == EVTCHNSTAT_pirq) {
213             assert(p->u.pirq);
214             assert(p->u.pirq < s->nr_pirqs);
215 
216             /*
217              * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
218              * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
219              * catches up with it.
220              */
221             s->pirq[p->u.pirq].gsi = IRQ_UNBOUND;
222             s->pirq[p->u.pirq].port = i;
223         }
224     }
225     /* Rebuild s->pirq[].gsi mapping */
226     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
227         if (s->gsi_pirq[i]) {
228             s->pirq[s->gsi_pirq[i]].gsi = i;
229         }
230     }
231     return 0;
232 }
233 
xen_evtchn_is_needed(void * opaque)234 static bool xen_evtchn_is_needed(void *opaque)
235 {
236     return xen_mode == XEN_EMULATE;
237 }
238 
239 static const VMStateDescription xen_evtchn_port_vmstate = {
240     .name = "xen_evtchn_port",
241     .version_id = 1,
242     .minimum_version_id = 1,
243     .fields = (const VMStateField[]) {
244         VMSTATE_UINT32(vcpu, XenEvtchnPort),
245         VMSTATE_UINT16(type, XenEvtchnPort),
246         VMSTATE_UINT16(u.val, XenEvtchnPort),
247         VMSTATE_END_OF_LIST()
248     }
249 };
250 
251 static const VMStateDescription xen_evtchn_vmstate = {
252     .name = "xen_evtchn",
253     .version_id = 1,
254     .minimum_version_id = 1,
255     .needed = xen_evtchn_is_needed,
256     .pre_load = xen_evtchn_pre_load,
257     .post_load = xen_evtchn_post_load,
258     .fields = (const VMStateField[]) {
259         VMSTATE_UINT64(callback_param, XenEvtchnState),
260         VMSTATE_UINT32(nr_ports, XenEvtchnState),
261         VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
262                                      xen_evtchn_port_vmstate, XenEvtchnPort),
263         VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
264         VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
265                                     nr_pirq_inuse_words, 0,
266                                     vmstate_info_uint64, uint64_t),
267         VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
268         VMSTATE_END_OF_LIST()
269     }
270 };
271 
xen_evtchn_class_init(ObjectClass * klass,void * data)272 static void xen_evtchn_class_init(ObjectClass *klass, void *data)
273 {
274     DeviceClass *dc = DEVICE_CLASS(klass);
275 
276     dc->vmsd = &xen_evtchn_vmstate;
277 }
278 
279 static const TypeInfo xen_evtchn_info = {
280     .name          = TYPE_XEN_EVTCHN,
281     .parent        = TYPE_SYS_BUS_DEVICE,
282     .instance_size = sizeof(XenEvtchnState),
283     .class_init    = xen_evtchn_class_init,
284 };
285 
286 static struct evtchn_backend_ops emu_evtchn_backend_ops = {
287     .open = xen_be_evtchn_open,
288     .bind_interdomain = xen_be_evtchn_bind_interdomain,
289     .unbind = xen_be_evtchn_unbind,
290     .close = xen_be_evtchn_close,
291     .get_fd = xen_be_evtchn_fd,
292     .notify = xen_be_evtchn_notify,
293     .unmask = xen_be_evtchn_unmask,
294     .pending = xen_be_evtchn_pending,
295 };
296 
gsi_assert_bh(void * opaque)297 static void gsi_assert_bh(void *opaque)
298 {
299     struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
300     if (vi) {
301         xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
302     }
303 }
304 
xen_evtchn_create(unsigned int nr_gsis,qemu_irq * system_gsis)305 void xen_evtchn_create(unsigned int nr_gsis, qemu_irq *system_gsis)
306 {
307     XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
308                                                         -1, NULL));
309     int i;
310 
311     xen_evtchn_singleton = s;
312 
313     qemu_mutex_init(&s->port_lock);
314     s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
315 
316     /*
317      * These are the *output* GSI from event channel support, for
318      * signalling CPU0's events via GSI or PCI INTx instead of the
319      * per-CPU vector. We create a *set* of irqs and connect one to
320      * each of the system GSIs which were passed in from the platform
321      * code, and then just trigger the right one as appropriate from
322      * xen_evtchn_set_callback_level().
323      */
324     s->nr_callback_gsis = nr_gsis;
325     s->callback_gsis = g_new0(qemu_irq, nr_gsis);
326     for (i = 0; i < nr_gsis; i++) {
327         sysbus_init_irq(SYS_BUS_DEVICE(s), &s->callback_gsis[i]);
328         sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
329     }
330 
331     /*
332      * The Xen scheme for encoding PIRQ# into an MSI message is not
333      * compatible with 32-bit MSI, as it puts the high bits of the
334      * PIRQ# into the high bits of the MSI message address, instead of
335      * using the Extended Destination ID in address bits 4-11 which
336      * perhaps would have been a better choice.
337      *
338      * To keep life simple, kvm_accel_instance_init() initialises the
339      * default to 256. which conveniently doesn't need to set anything
340      * outside the low 32 bits of the address. It can be increased by
341      * setting the xen-evtchn-max-pirq property.
342      */
343     s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
344 
345     s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
346     s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
347     s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
348 
349     /* Set event channel functions for backend drivers to use */
350     xen_evtchn_ops = &emu_evtchn_backend_ops;
351 }
352 
xen_evtchn_register_types(void)353 static void xen_evtchn_register_types(void)
354 {
355     type_register_static(&xen_evtchn_info);
356 }
357 
type_init(xen_evtchn_register_types)358 type_init(xen_evtchn_register_types)
359 
360 static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
361 {
362     PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
363     uint8_t pin = param & 3;
364     uint8_t devfn = (param >> 8) & 0xff;
365     uint16_t bus = (param >> 16) & 0xffff;
366     uint16_t domain = (param >> 32) & 0xffff;
367     PCIDevice *pdev;
368     PCIINTxRoute r;
369 
370     if (domain || !pcms) {
371         return 0;
372     }
373 
374     pdev = pci_find_device(pcms->pcibus, bus, devfn);
375     if (!pdev) {
376         return 0;
377     }
378 
379     r = pci_device_route_intx_to_irq(pdev, pin);
380     if (r.mode != PCI_INTX_ENABLED) {
381         return 0;
382     }
383 
384     /*
385      * Hm, can we be notified of INTX routing changes? Not without
386      * *owning* the device and being allowed to overwrite its own
387      * ->intx_routing_notifier, AFAICT. So let's not.
388      */
389     return r.irq;
390 }
391 
xen_evtchn_set_callback_level(int level)392 void xen_evtchn_set_callback_level(int level)
393 {
394     XenEvtchnState *s = xen_evtchn_singleton;
395     if (!s) {
396         return;
397     }
398 
399     /*
400      * We get to this function in a number of ways:
401      *
402      *  • From I/O context, via PV backend drivers sending a notification to
403      *    the guest.
404      *
405      *  • From guest vCPU context, via loopback interdomain event channels
406      *    (or theoretically even IPIs but guests don't use those with GSI
407      *    delivery because that's pointless. We don't want a malicious guest
408      *    to be able to trigger a deadlock though, so we can't rule it out.)
409      *
410      *  • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
411      *    configured.
412      *
413      *  • From guest vCPU context in the KVM exit handler, if the upcall
414      *    pending flag has been cleared and the GSI needs to be deasserted.
415      *
416      *  • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
417      *    been acked in the irqchip.
418      *
419      * Whichever context we come from if we aren't already holding the BQL
420      * then e can't take it now, as we may already hold s->port_lock. So
421      * trigger the BH to set the IRQ for us instead of doing it immediately.
422      *
423      * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
424      * will deliberately take the BQL because they want the change to take
425      * effect immediately. That just leaves interdomain loopback as the case
426      * which uses the BH.
427      */
428     if (!bql_locked()) {
429         qemu_bh_schedule(s->gsi_bh);
430         return;
431     }
432 
433     if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) {
434         qemu_set_irq(s->callback_gsis[s->callback_gsi], level);
435         if (level) {
436             /* Ensure the vCPU polls for deassertion */
437             kvm_xen_set_callback_asserted();
438         }
439     }
440 }
441 
xen_evtchn_set_callback_param(uint64_t param)442 int xen_evtchn_set_callback_param(uint64_t param)
443 {
444     XenEvtchnState *s = xen_evtchn_singleton;
445     struct kvm_xen_hvm_attr xa = {
446         .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
447         .u.vector = 0,
448     };
449     bool in_kernel = false;
450     uint32_t gsi = 0;
451     int type = param >> CALLBACK_VIA_TYPE_SHIFT;
452     int ret;
453 
454     if (!s) {
455         return -ENOTSUP;
456     }
457 
458     /*
459      * We need the BQL because set_callback_pci_intx() may call into PCI code,
460      * and because we may need to manipulate the old and new GSI levels.
461      */
462     assert(bql_locked());
463     qemu_mutex_lock(&s->port_lock);
464 
465     switch (type) {
466     case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
467         xa.u.vector = (uint8_t)param,
468 
469         ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
470         if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
471             in_kernel = true;
472         }
473         gsi = 0;
474         break;
475     }
476 
477     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
478         gsi = set_callback_pci_intx(s, param);
479         ret = gsi ? 0 : -EINVAL;
480         break;
481 
482     case HVM_PARAM_CALLBACK_TYPE_GSI:
483         gsi = (uint32_t)param;
484         ret = 0;
485         break;
486 
487     default:
488         /* Xen doesn't return error even if you set something bogus */
489         ret = 0;
490         break;
491     }
492 
493     /* If the guest has set a per-vCPU callback vector, prefer that. */
494     if (gsi && kvm_xen_has_vcpu_callback_vector()) {
495         in_kernel = kvm_xen_has_cap(EVTCHN_SEND);
496         gsi = 0;
497     }
498 
499     if (!ret) {
500         /* If vector delivery was turned *off* then tell the kernel */
501         if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
502             HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
503             kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
504         }
505         s->callback_param = param;
506         s->evtchn_in_kernel = in_kernel;
507 
508         if (gsi != s->callback_gsi) {
509             struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
510 
511             xen_evtchn_set_callback_level(0);
512             s->callback_gsi = gsi;
513 
514             if (gsi && vi && vi->evtchn_upcall_pending) {
515                 kvm_xen_inject_vcpu_callback_vector(0, type);
516             }
517         }
518     }
519 
520     qemu_mutex_unlock(&s->port_lock);
521 
522     return ret;
523 }
524 
inject_callback(XenEvtchnState * s,uint32_t vcpu)525 static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
526 {
527     int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
528 
529     kvm_xen_inject_vcpu_callback_vector(vcpu, type);
530 }
531 
deassign_kernel_port(evtchn_port_t port)532 static void deassign_kernel_port(evtchn_port_t port)
533 {
534     struct kvm_xen_hvm_attr ha;
535     int ret;
536 
537     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
538     ha.u.evtchn.send_port = port;
539     ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
540 
541     ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
542     if (ret) {
543         qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
544                       port, strerror(ret));
545     }
546 }
547 
assign_kernel_port(uint16_t type,evtchn_port_t port,uint32_t vcpu_id)548 static int assign_kernel_port(uint16_t type, evtchn_port_t port,
549                               uint32_t vcpu_id)
550 {
551     CPUState *cpu = qemu_get_cpu(vcpu_id);
552     struct kvm_xen_hvm_attr ha;
553 
554     if (!cpu) {
555         return -ENOENT;
556     }
557 
558     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
559     ha.u.evtchn.send_port = port;
560     ha.u.evtchn.type = type;
561     ha.u.evtchn.flags = 0;
562     ha.u.evtchn.deliver.port.port = port;
563     ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
564     ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
565 
566     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
567 }
568 
assign_kernel_eventfd(uint16_t type,evtchn_port_t port,int fd)569 static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
570 {
571     struct kvm_xen_hvm_attr ha;
572 
573     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
574     ha.u.evtchn.send_port = port;
575     ha.u.evtchn.type = type;
576     ha.u.evtchn.flags = 0;
577     ha.u.evtchn.deliver.eventfd.port = 0;
578     ha.u.evtchn.deliver.eventfd.fd = fd;
579 
580     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
581 }
582 
valid_port(evtchn_port_t port)583 static bool valid_port(evtchn_port_t port)
584 {
585     if (!port) {
586         return false;
587     }
588 
589     if (xen_is_long_mode()) {
590         return port < EVTCHN_2L_NR_CHANNELS;
591     } else {
592         return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
593     }
594 }
595 
valid_vcpu(uint32_t vcpu)596 static bool valid_vcpu(uint32_t vcpu)
597 {
598     return !!qemu_get_cpu(vcpu);
599 }
600 
unbind_backend_ports(XenEvtchnState * s)601 static void unbind_backend_ports(XenEvtchnState *s)
602 {
603     XenEvtchnPort *p;
604     int i;
605 
606     for (i = 1; i < s->nr_ports; i++) {
607         p = &s->port_table[i];
608         if (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu) {
609             evtchn_port_t be_port = p->u.interdomain.port;
610 
611             if (s->be_handles[be_port]) {
612                 /* This part will be overwritten on the load anyway. */
613                 p->type = EVTCHNSTAT_unbound;
614                 p->u.interdomain.port = 0;
615 
616                 /* Leave the backend port open and unbound too. */
617                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
618                     deassign_kernel_port(i);
619                 }
620                 s->be_handles[be_port]->guest_port = 0;
621             }
622         }
623     }
624 }
625 
xen_evtchn_status_op(struct evtchn_status * status)626 int xen_evtchn_status_op(struct evtchn_status *status)
627 {
628     XenEvtchnState *s = xen_evtchn_singleton;
629     XenEvtchnPort *p;
630 
631     if (!s) {
632         return -ENOTSUP;
633     }
634 
635     if (status->dom != DOMID_SELF && status->dom != xen_domid) {
636         return -ESRCH;
637     }
638 
639     if (!valid_port(status->port)) {
640         return -EINVAL;
641     }
642 
643     qemu_mutex_lock(&s->port_lock);
644 
645     p = &s->port_table[status->port];
646 
647     status->status = p->type;
648     status->vcpu = p->vcpu;
649 
650     switch (p->type) {
651     case EVTCHNSTAT_unbound:
652         status->u.unbound.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
653                                                          : xen_domid;
654         break;
655 
656     case EVTCHNSTAT_interdomain:
657         status->u.interdomain.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
658                                                              : xen_domid;
659         status->u.interdomain.port = p->u.interdomain.port;
660         break;
661 
662     case EVTCHNSTAT_pirq:
663         status->u.pirq = p->u.pirq;
664         break;
665 
666     case EVTCHNSTAT_virq:
667         status->u.virq = p->u.virq;
668         break;
669     }
670 
671     qemu_mutex_unlock(&s->port_lock);
672     return 0;
673 }
674 
675 /*
676  * Never thought I'd hear myself say this, but C++ templates would be
677  * kind of nice here.
678  *
679  * template<class T> static int do_unmask_port(T *shinfo, ...);
680  */
do_unmask_port_lm(XenEvtchnState * s,evtchn_port_t port,bool do_unmask,struct shared_info * shinfo,struct vcpu_info * vcpu_info)681 static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
682                              bool do_unmask, struct shared_info *shinfo,
683                              struct vcpu_info *vcpu_info)
684 {
685     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
686     typeof(shinfo->evtchn_pending[0]) mask;
687     int idx = port / bits_per_word;
688     int offset = port % bits_per_word;
689 
690     mask = 1UL << offset;
691 
692     if (idx >= bits_per_word) {
693         return -EINVAL;
694     }
695 
696     if (do_unmask) {
697         /*
698          * If this is a true unmask operation, clear the mask bit. If
699          * it was already unmasked, we have nothing further to do.
700          */
701         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
702             return 0;
703         }
704     } else {
705         /*
706          * This is a pseudo-unmask for affinity changes. We don't
707          * change the mask bit, and if it's *masked* we have nothing
708          * else to do.
709          */
710         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
711             return 0;
712         }
713     }
714 
715     /* If the event was not pending, we're done. */
716     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
717         return 0;
718     }
719 
720     /* Now on to the vcpu_info evtchn_pending_sel index... */
721     mask = 1UL << idx;
722 
723     /* If a port in this word was already pending for this vCPU, all done. */
724     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
725         return 0;
726     }
727 
728     /* Set evtchn_upcall_pending for this vCPU */
729     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
730         return 0;
731     }
732 
733     inject_callback(s, s->port_table[port].vcpu);
734 
735     return 0;
736 }
737 
do_unmask_port_compat(XenEvtchnState * s,evtchn_port_t port,bool do_unmask,struct compat_shared_info * shinfo,struct compat_vcpu_info * vcpu_info)738 static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
739                                  bool do_unmask,
740                                  struct compat_shared_info *shinfo,
741                                  struct compat_vcpu_info *vcpu_info)
742 {
743     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
744     typeof(shinfo->evtchn_pending[0]) mask;
745     int idx = port / bits_per_word;
746     int offset = port % bits_per_word;
747 
748     mask = 1UL << offset;
749 
750     if (idx >= bits_per_word) {
751         return -EINVAL;
752     }
753 
754     if (do_unmask) {
755         /*
756          * If this is a true unmask operation, clear the mask bit. If
757          * it was already unmasked, we have nothing further to do.
758          */
759         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
760             return 0;
761         }
762     } else {
763         /*
764          * This is a pseudo-unmask for affinity changes. We don't
765          * change the mask bit, and if it's *masked* we have nothing
766          * else to do.
767          */
768         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
769             return 0;
770         }
771     }
772 
773     /* If the event was not pending, we're done. */
774     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
775         return 0;
776     }
777 
778     /* Now on to the vcpu_info evtchn_pending_sel index... */
779     mask = 1UL << idx;
780 
781     /* If a port in this word was already pending for this vCPU, all done. */
782     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
783         return 0;
784     }
785 
786     /* Set evtchn_upcall_pending for this vCPU */
787     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
788         return 0;
789     }
790 
791     inject_callback(s, s->port_table[port].vcpu);
792 
793     return 0;
794 }
795 
unmask_port(XenEvtchnState * s,evtchn_port_t port,bool do_unmask)796 static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
797 {
798     void *vcpu_info, *shinfo;
799 
800     if (s->port_table[port].type == EVTCHNSTAT_closed) {
801         return -EINVAL;
802     }
803 
804     shinfo = xen_overlay_get_shinfo_ptr();
805     if (!shinfo) {
806         return -ENOTSUP;
807     }
808 
809     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
810     if (!vcpu_info) {
811         return -EINVAL;
812     }
813 
814     if (xen_is_long_mode()) {
815         return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
816     } else {
817         return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
818     }
819 }
820 
do_set_port_lm(XenEvtchnState * s,evtchn_port_t port,struct shared_info * shinfo,struct vcpu_info * vcpu_info)821 static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
822                           struct shared_info *shinfo,
823                           struct vcpu_info *vcpu_info)
824 {
825     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
826     typeof(shinfo->evtchn_pending[0]) mask;
827     int idx = port / bits_per_word;
828     int offset = port % bits_per_word;
829 
830     mask = 1UL << offset;
831 
832     if (idx >= bits_per_word) {
833         return -EINVAL;
834     }
835 
836     /* Update the pending bit itself. If it was already set, we're done. */
837     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
838         return 0;
839     }
840 
841     /* Check if it's masked. */
842     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
843         return 0;
844     }
845 
846     /* Now on to the vcpu_info evtchn_pending_sel index... */
847     mask = 1UL << idx;
848 
849     /* If a port in this word was already pending for this vCPU, all done. */
850     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
851         return 0;
852     }
853 
854     /* Set evtchn_upcall_pending for this vCPU */
855     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
856         return 0;
857     }
858 
859     inject_callback(s, s->port_table[port].vcpu);
860 
861     return 0;
862 }
863 
do_set_port_compat(XenEvtchnState * s,evtchn_port_t port,struct compat_shared_info * shinfo,struct compat_vcpu_info * vcpu_info)864 static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
865                               struct compat_shared_info *shinfo,
866                               struct compat_vcpu_info *vcpu_info)
867 {
868     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
869     typeof(shinfo->evtchn_pending[0]) mask;
870     int idx = port / bits_per_word;
871     int offset = port % bits_per_word;
872 
873     mask = 1UL << offset;
874 
875     if (idx >= bits_per_word) {
876         return -EINVAL;
877     }
878 
879     /* Update the pending bit itself. If it was already set, we're done. */
880     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
881         return 0;
882     }
883 
884     /* Check if it's masked. */
885     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
886         return 0;
887     }
888 
889     /* Now on to the vcpu_info evtchn_pending_sel index... */
890     mask = 1UL << idx;
891 
892     /* If a port in this word was already pending for this vCPU, all done. */
893     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
894         return 0;
895     }
896 
897     /* Set evtchn_upcall_pending for this vCPU */
898     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
899         return 0;
900     }
901 
902     inject_callback(s, s->port_table[port].vcpu);
903 
904     return 0;
905 }
906 
set_port_pending(XenEvtchnState * s,evtchn_port_t port)907 static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
908 {
909     void *vcpu_info, *shinfo;
910 
911     if (s->port_table[port].type == EVTCHNSTAT_closed) {
912         return -EINVAL;
913     }
914 
915     if (s->evtchn_in_kernel) {
916         XenEvtchnPort *p = &s->port_table[port];
917         CPUState *cpu = qemu_get_cpu(p->vcpu);
918         struct kvm_irq_routing_xen_evtchn evt;
919 
920         if (!cpu) {
921             return 0;
922         }
923 
924         evt.port = port;
925         evt.vcpu = kvm_arch_vcpu_id(cpu);
926         evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
927 
928         return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
929     }
930 
931     shinfo = xen_overlay_get_shinfo_ptr();
932     if (!shinfo) {
933         return -ENOTSUP;
934     }
935 
936     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
937     if (!vcpu_info) {
938         return -EINVAL;
939     }
940 
941     if (xen_is_long_mode()) {
942         return do_set_port_lm(s, port, shinfo, vcpu_info);
943     } else {
944         return do_set_port_compat(s, port, shinfo, vcpu_info);
945     }
946 }
947 
clear_port_pending(XenEvtchnState * s,evtchn_port_t port)948 static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
949 {
950     void *p = xen_overlay_get_shinfo_ptr();
951 
952     if (!p) {
953         return -ENOTSUP;
954     }
955 
956     if (xen_is_long_mode()) {
957         struct shared_info *shinfo = p;
958         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
959         typeof(shinfo->evtchn_pending[0]) mask;
960         int idx = port / bits_per_word;
961         int offset = port % bits_per_word;
962 
963         mask = 1UL << offset;
964 
965         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
966     } else {
967         struct compat_shared_info *shinfo = p;
968         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
969         typeof(shinfo->evtchn_pending[0]) mask;
970         int idx = port / bits_per_word;
971         int offset = port % bits_per_word;
972 
973         mask = 1UL << offset;
974 
975         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
976     }
977     return 0;
978 }
979 
free_port(XenEvtchnState * s,evtchn_port_t port)980 static void free_port(XenEvtchnState *s, evtchn_port_t port)
981 {
982     s->port_table[port].type = EVTCHNSTAT_closed;
983     s->port_table[port].u.val = 0;
984     s->port_table[port].vcpu = 0;
985 
986     if (s->nr_ports == port + 1) {
987         do {
988             s->nr_ports--;
989         } while (s->nr_ports &&
990                  s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
991     }
992 
993     /* Clear pending event to avoid unexpected behavior on re-bind. */
994     clear_port_pending(s, port);
995 }
996 
allocate_port(XenEvtchnState * s,uint32_t vcpu,uint16_t type,uint16_t val,evtchn_port_t * port)997 static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
998                          uint16_t val, evtchn_port_t *port)
999 {
1000     evtchn_port_t p = 1;
1001 
1002     for (p = 1; valid_port(p); p++) {
1003         if (s->port_table[p].type == EVTCHNSTAT_closed) {
1004             s->port_table[p].vcpu = vcpu;
1005             s->port_table[p].type = type;
1006             s->port_table[p].u.val = val;
1007 
1008             *port = p;
1009 
1010             if (s->nr_ports < p + 1) {
1011                 s->nr_ports = p + 1;
1012             }
1013 
1014             return 0;
1015         }
1016     }
1017     return -ENOSPC;
1018 }
1019 
virq_is_global(uint32_t virq)1020 static bool virq_is_global(uint32_t virq)
1021 {
1022     switch (virq) {
1023     case VIRQ_TIMER:
1024     case VIRQ_DEBUG:
1025     case VIRQ_XENOPROF:
1026     case VIRQ_XENPMU:
1027         return false;
1028 
1029     default:
1030         return true;
1031     }
1032 }
1033 
close_port(XenEvtchnState * s,evtchn_port_t port,bool * flush_kvm_routes)1034 static int close_port(XenEvtchnState *s, evtchn_port_t port,
1035                       bool *flush_kvm_routes)
1036 {
1037     XenEvtchnPort *p = &s->port_table[port];
1038 
1039     /* Because it *might* be a PIRQ port */
1040     assert(bql_locked());
1041 
1042     switch (p->type) {
1043     case EVTCHNSTAT_closed:
1044         return -ENOENT;
1045 
1046     case EVTCHNSTAT_pirq:
1047         s->pirq[p->u.pirq].port = 0;
1048         if (s->pirq[p->u.pirq].is_translated) {
1049             *flush_kvm_routes = true;
1050         }
1051         break;
1052 
1053     case EVTCHNSTAT_virq:
1054         kvm_xen_set_vcpu_virq(virq_is_global(p->u.virq) ? 0 : p->vcpu,
1055                               p->u.virq, 0);
1056         break;
1057 
1058     case EVTCHNSTAT_ipi:
1059         if (s->evtchn_in_kernel) {
1060             deassign_kernel_port(port);
1061         }
1062         break;
1063 
1064     case EVTCHNSTAT_interdomain:
1065         if (p->u.interdomain.to_qemu) {
1066             uint16_t be_port = p->u.interdomain.port;
1067             struct xenevtchn_handle *xc = s->be_handles[be_port];
1068             if (xc) {
1069                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1070                     deassign_kernel_port(port);
1071                 }
1072                 xc->guest_port = 0;
1073             }
1074         } else {
1075             /* Loopback interdomain */
1076             XenEvtchnPort *rp = &s->port_table[p->u.interdomain.port];
1077             if (!valid_port(p->u.interdomain.port) ||
1078                 rp->u.interdomain.port != port ||
1079                 rp->type != EVTCHNSTAT_interdomain) {
1080                 error_report("Inconsistent state for interdomain unbind");
1081             } else {
1082                 /* Set the other end back to unbound */
1083                 rp->type = EVTCHNSTAT_unbound;
1084                 rp->u.interdomain.port = 0;
1085             }
1086         }
1087         break;
1088 
1089     default:
1090         break;
1091     }
1092 
1093     free_port(s, port);
1094     return 0;
1095 }
1096 
xen_evtchn_soft_reset(void)1097 int xen_evtchn_soft_reset(void)
1098 {
1099     XenEvtchnState *s = xen_evtchn_singleton;
1100     bool flush_kvm_routes = false;
1101     int i;
1102 
1103     if (!s) {
1104         return -ENOTSUP;
1105     }
1106 
1107     assert(bql_locked());
1108 
1109     qemu_mutex_lock(&s->port_lock);
1110 
1111     for (i = 0; i < s->nr_ports; i++) {
1112         close_port(s, i, &flush_kvm_routes);
1113     }
1114 
1115     qemu_mutex_unlock(&s->port_lock);
1116 
1117     if (flush_kvm_routes) {
1118         kvm_update_msi_routes_all(NULL, true, 0, 0);
1119     }
1120 
1121     return 0;
1122 }
1123 
xen_evtchn_reset_op(struct evtchn_reset * reset)1124 int xen_evtchn_reset_op(struct evtchn_reset *reset)
1125 {
1126     if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
1127         return -ESRCH;
1128     }
1129 
1130     BQL_LOCK_GUARD();
1131     return xen_evtchn_soft_reset();
1132 }
1133 
xen_evtchn_close_op(struct evtchn_close * close)1134 int xen_evtchn_close_op(struct evtchn_close *close)
1135 {
1136     XenEvtchnState *s = xen_evtchn_singleton;
1137     bool flush_kvm_routes = false;
1138     int ret;
1139 
1140     if (!s) {
1141         return -ENOTSUP;
1142     }
1143 
1144     if (!valid_port(close->port)) {
1145         return -EINVAL;
1146     }
1147 
1148     BQL_LOCK_GUARD();
1149     qemu_mutex_lock(&s->port_lock);
1150 
1151     ret = close_port(s, close->port, &flush_kvm_routes);
1152 
1153     qemu_mutex_unlock(&s->port_lock);
1154 
1155     if (flush_kvm_routes) {
1156         kvm_update_msi_routes_all(NULL, true, 0, 0);
1157     }
1158 
1159     return ret;
1160 }
1161 
xen_evtchn_unmask_op(struct evtchn_unmask * unmask)1162 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
1163 {
1164     XenEvtchnState *s = xen_evtchn_singleton;
1165     int ret;
1166 
1167     if (!s) {
1168         return -ENOTSUP;
1169     }
1170 
1171     if (!valid_port(unmask->port)) {
1172         return -EINVAL;
1173     }
1174 
1175     qemu_mutex_lock(&s->port_lock);
1176 
1177     ret = unmask_port(s, unmask->port, true);
1178 
1179     qemu_mutex_unlock(&s->port_lock);
1180 
1181     return ret;
1182 }
1183 
xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu * vcpu)1184 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
1185 {
1186     XenEvtchnState *s = xen_evtchn_singleton;
1187     XenEvtchnPort *p;
1188     int ret = -EINVAL;
1189 
1190     if (!s) {
1191         return -ENOTSUP;
1192     }
1193 
1194     if (!valid_port(vcpu->port)) {
1195         return -EINVAL;
1196     }
1197 
1198     if (!valid_vcpu(vcpu->vcpu)) {
1199         return -ENOENT;
1200     }
1201 
1202     qemu_mutex_lock(&s->port_lock);
1203 
1204     p = &s->port_table[vcpu->port];
1205 
1206     if (p->type == EVTCHNSTAT_interdomain ||
1207         p->type == EVTCHNSTAT_unbound ||
1208         p->type == EVTCHNSTAT_pirq ||
1209         (p->type == EVTCHNSTAT_virq && virq_is_global(p->u.virq))) {
1210         /*
1211          * unmask_port() with do_unmask==false will just raise the event
1212          * on the new vCPU if the port was already pending.
1213          */
1214         p->vcpu = vcpu->vcpu;
1215         unmask_port(s, vcpu->port, false);
1216         ret = 0;
1217     }
1218 
1219     qemu_mutex_unlock(&s->port_lock);
1220 
1221     return ret;
1222 }
1223 
xen_evtchn_bind_virq_op(struct evtchn_bind_virq * virq)1224 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
1225 {
1226     XenEvtchnState *s = xen_evtchn_singleton;
1227     int ret;
1228 
1229     if (!s) {
1230         return -ENOTSUP;
1231     }
1232 
1233     if (virq->virq >= NR_VIRQS) {
1234         return -EINVAL;
1235     }
1236 
1237     /* Global VIRQ must be allocated on vCPU0 first */
1238     if (virq_is_global(virq->virq) && virq->vcpu != 0) {
1239         return -EINVAL;
1240     }
1241 
1242     if (!valid_vcpu(virq->vcpu)) {
1243         return -ENOENT;
1244     }
1245 
1246     qemu_mutex_lock(&s->port_lock);
1247 
1248     ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
1249                         &virq->port);
1250     if (!ret) {
1251         ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
1252         if (ret) {
1253             free_port(s, virq->port);
1254         }
1255     }
1256 
1257     qemu_mutex_unlock(&s->port_lock);
1258 
1259     return ret;
1260 }
1261 
xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq * pirq)1262 int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
1263 {
1264     XenEvtchnState *s = xen_evtchn_singleton;
1265     int ret;
1266 
1267     if (!s) {
1268         return -ENOTSUP;
1269     }
1270 
1271     if (pirq->pirq >= s->nr_pirqs) {
1272         return -EINVAL;
1273     }
1274 
1275     BQL_LOCK_GUARD();
1276 
1277     if (s->pirq[pirq->pirq].port) {
1278         return -EBUSY;
1279     }
1280 
1281     qemu_mutex_lock(&s->port_lock);
1282 
1283     ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
1284                         &pirq->port);
1285     if (ret) {
1286         qemu_mutex_unlock(&s->port_lock);
1287         return ret;
1288     }
1289 
1290     s->pirq[pirq->pirq].port = pirq->port;
1291     trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
1292 
1293     qemu_mutex_unlock(&s->port_lock);
1294 
1295     /*
1296      * Need to do the unmask outside port_lock because it may call
1297      * back into the MSI translate function.
1298      */
1299     if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
1300         if (s->pirq[pirq->pirq].is_masked) {
1301             PCIDevice *dev = s->pirq[pirq->pirq].dev;
1302             int vector = s->pirq[pirq->pirq].vector;
1303             char *dev_path = qdev_get_dev_path(DEVICE(dev));
1304 
1305             trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
1306             g_free(dev_path);
1307 
1308             if (s->pirq[pirq->pirq].is_msix) {
1309                 msix_set_mask(dev, vector, false);
1310             } else {
1311                 msi_set_mask(dev, vector, false, NULL);
1312             }
1313         } else if (s->pirq[pirq->pirq].is_translated) {
1314             /*
1315              * If KVM had attempted to translate this one before, make it try
1316              * again. If we unmasked, then the notifier on the MSI(-X) vector
1317              * will already have had the same effect.
1318              */
1319             kvm_update_msi_routes_all(NULL, true, 0, 0);
1320         }
1321     }
1322 
1323     return ret;
1324 }
1325 
xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi * ipi)1326 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
1327 {
1328     XenEvtchnState *s = xen_evtchn_singleton;
1329     int ret;
1330 
1331     if (!s) {
1332         return -ENOTSUP;
1333     }
1334 
1335     if (!valid_vcpu(ipi->vcpu)) {
1336         return -ENOENT;
1337     }
1338 
1339     qemu_mutex_lock(&s->port_lock);
1340 
1341     ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
1342     if (!ret && s->evtchn_in_kernel) {
1343         assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
1344     }
1345 
1346     qemu_mutex_unlock(&s->port_lock);
1347 
1348     return ret;
1349 }
1350 
xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain * interdomain)1351 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
1352 {
1353     XenEvtchnState *s = xen_evtchn_singleton;
1354     int ret;
1355 
1356     if (!s) {
1357         return -ENOTSUP;
1358     }
1359 
1360     if (interdomain->remote_dom != DOMID_QEMU &&
1361         interdomain->remote_dom != DOMID_SELF &&
1362         interdomain->remote_dom != xen_domid) {
1363         return -ESRCH;
1364     }
1365 
1366     if (!valid_port(interdomain->remote_port)) {
1367         return -EINVAL;
1368     }
1369 
1370     qemu_mutex_lock(&s->port_lock);
1371 
1372     /* The newly allocated port starts out as unbound */
1373     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &interdomain->local_port);
1374 
1375     if (ret) {
1376         goto out;
1377     }
1378 
1379     if (interdomain->remote_dom == DOMID_QEMU) {
1380         struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
1381         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1382 
1383         if (!xc) {
1384             ret = -ENOENT;
1385             goto out_free_port;
1386         }
1387 
1388         if (xc->guest_port) {
1389             ret = -EBUSY;
1390             goto out_free_port;
1391         }
1392 
1393         assert(xc->be_port == interdomain->remote_port);
1394         xc->guest_port = interdomain->local_port;
1395         if (kvm_xen_has_cap(EVTCHN_SEND)) {
1396             assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
1397         }
1398         lp->type = EVTCHNSTAT_interdomain;
1399         lp->u.interdomain.to_qemu = 1;
1400         lp->u.interdomain.port = interdomain->remote_port;
1401         ret = 0;
1402     } else {
1403         /* Loopback */
1404         XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
1405         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1406 
1407         /*
1408          * The 'remote' port for loopback must be an unbound port allocated
1409          * for communication with the local domain, and must *not* be the
1410          * port that was just allocated for the local end.
1411          */
1412         if (interdomain->local_port != interdomain->remote_port &&
1413             rp->type == EVTCHNSTAT_unbound && !rp->u.interdomain.to_qemu) {
1414 
1415             rp->type = EVTCHNSTAT_interdomain;
1416             rp->u.interdomain.port = interdomain->local_port;
1417 
1418             lp->type = EVTCHNSTAT_interdomain;
1419             lp->u.interdomain.port = interdomain->remote_port;
1420         } else {
1421             ret = -EINVAL;
1422         }
1423     }
1424 
1425  out_free_port:
1426     if (ret) {
1427         free_port(s, interdomain->local_port);
1428     }
1429  out:
1430     qemu_mutex_unlock(&s->port_lock);
1431 
1432     return ret;
1433 
1434 }
xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound * alloc)1435 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
1436 {
1437     XenEvtchnState *s = xen_evtchn_singleton;
1438     int ret;
1439 
1440     if (!s) {
1441         return -ENOTSUP;
1442     }
1443 
1444     if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
1445         return -ESRCH;
1446     }
1447 
1448     if (alloc->remote_dom != DOMID_QEMU &&
1449         alloc->remote_dom != DOMID_SELF &&
1450         alloc->remote_dom != xen_domid) {
1451         return -EPERM;
1452     }
1453 
1454     qemu_mutex_lock(&s->port_lock);
1455 
1456     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &alloc->port);
1457 
1458     if (!ret && alloc->remote_dom == DOMID_QEMU) {
1459         XenEvtchnPort *p = &s->port_table[alloc->port];
1460         p->u.interdomain.to_qemu = 1;
1461     }
1462 
1463     qemu_mutex_unlock(&s->port_lock);
1464 
1465     return ret;
1466 }
1467 
xen_evtchn_send_op(struct evtchn_send * send)1468 int xen_evtchn_send_op(struct evtchn_send *send)
1469 {
1470     XenEvtchnState *s = xen_evtchn_singleton;
1471     XenEvtchnPort *p;
1472     int ret = 0;
1473 
1474     if (!s) {
1475         return -ENOTSUP;
1476     }
1477 
1478     if (!valid_port(send->port)) {
1479         return -EINVAL;
1480     }
1481 
1482     qemu_mutex_lock(&s->port_lock);
1483 
1484     p = &s->port_table[send->port];
1485 
1486     switch (p->type) {
1487     case EVTCHNSTAT_interdomain:
1488         if (p->u.interdomain.to_qemu) {
1489             /*
1490              * This is an event from the guest to qemu itself, which is
1491              * serving as the driver domain.
1492              */
1493             uint16_t be_port = p->u.interdomain.port;
1494             struct xenevtchn_handle *xc = s->be_handles[be_port];
1495             if (xc) {
1496                 eventfd_write(xc->fd, 1);
1497                 ret = 0;
1498             } else {
1499                 ret = -ENOENT;
1500             }
1501         } else {
1502             /* Loopback interdomain ports; just a complex IPI */
1503             set_port_pending(s, p->u.interdomain.port);
1504         }
1505         break;
1506 
1507     case EVTCHNSTAT_ipi:
1508         set_port_pending(s, send->port);
1509         break;
1510 
1511     case EVTCHNSTAT_unbound:
1512         /* Xen will silently drop these */
1513         break;
1514 
1515     default:
1516         ret = -EINVAL;
1517         break;
1518     }
1519 
1520     qemu_mutex_unlock(&s->port_lock);
1521 
1522     return ret;
1523 }
1524 
xen_evtchn_set_port(uint16_t port)1525 int xen_evtchn_set_port(uint16_t port)
1526 {
1527     XenEvtchnState *s = xen_evtchn_singleton;
1528     XenEvtchnPort *p;
1529     int ret = -EINVAL;
1530 
1531     if (!s) {
1532         return -ENOTSUP;
1533     }
1534 
1535     if (!valid_port(port)) {
1536         return -EINVAL;
1537     }
1538 
1539     qemu_mutex_lock(&s->port_lock);
1540 
1541     p = &s->port_table[port];
1542 
1543     /* QEMU has no business sending to anything but these */
1544     if (p->type == EVTCHNSTAT_virq ||
1545         (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu)) {
1546         set_port_pending(s, port);
1547         ret = 0;
1548     }
1549 
1550     qemu_mutex_unlock(&s->port_lock);
1551 
1552     return ret;
1553 }
1554 
allocate_pirq(XenEvtchnState * s,int type,int gsi)1555 static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
1556 {
1557     uint16_t pirq;
1558 
1559     /*
1560      * Preserve the allocation strategy that Xen has. It looks like
1561      * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
1562      * to GSIs (counting up from 16), and then we count backwards from
1563      * the top for MSIs or when the GSI space is exhausted.
1564      */
1565     if (type == MAP_PIRQ_TYPE_GSI) {
1566         for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
1567             if (pirq_inuse(s, pirq)) {
1568                 continue;
1569             }
1570 
1571             /* Found it */
1572             goto found;
1573         }
1574     }
1575     for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
1576         /* Skip whole words at a time when they're full */
1577         if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
1578             pirq &= ~63ULL;
1579             continue;
1580         }
1581         if (pirq_inuse(s, pirq)) {
1582             continue;
1583         }
1584 
1585         goto found;
1586     }
1587     return -ENOSPC;
1588 
1589  found:
1590     pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1591     if (gsi >= 0) {
1592         assert(gsi < IOAPIC_NUM_PINS);
1593         s->gsi_pirq[gsi] = pirq;
1594     }
1595     s->pirq[pirq].gsi = gsi;
1596     return pirq;
1597 }
1598 
xen_evtchn_set_gsi(int gsi,int level)1599 bool xen_evtchn_set_gsi(int gsi, int level)
1600 {
1601     XenEvtchnState *s = xen_evtchn_singleton;
1602     int pirq;
1603 
1604     assert(bql_locked());
1605 
1606     if (!s || gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1607         return false;
1608     }
1609 
1610     /*
1611      * Check that that it *isn't* the event channel GSI, and thus
1612      * that we are not recursing and it's safe to take s->port_lock.
1613      *
1614      * Locking aside, it's perfectly sane to bail out early for that
1615      * special case, as it would make no sense for the event channel
1616      * GSI to be routed back to event channels, when the delivery
1617      * method is to raise the GSI... that recursion wouldn't *just*
1618      * be a locking issue.
1619      */
1620     if (gsi && gsi == s->callback_gsi) {
1621         return false;
1622     }
1623 
1624     QEMU_LOCK_GUARD(&s->port_lock);
1625 
1626     pirq = s->gsi_pirq[gsi];
1627     if (!pirq) {
1628         return false;
1629     }
1630 
1631     if (level) {
1632         int port = s->pirq[pirq].port;
1633 
1634         s->pirq_gsi_set |= (1U << gsi);
1635         if (port) {
1636             set_port_pending(s, port);
1637         }
1638     } else {
1639         s->pirq_gsi_set &= ~(1U << gsi);
1640     }
1641     return true;
1642 }
1643 
msi_pirq_target(uint64_t addr,uint32_t data)1644 static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
1645 {
1646     /* The vector (in low 8 bits of data) must be zero */
1647     if (data & 0xff) {
1648         return 0;
1649     }
1650 
1651     uint32_t pirq = (addr & 0xff000) >> 12;
1652     pirq |= (addr >> 32) & 0xffffff00;
1653 
1654     return pirq;
1655 }
1656 
do_remove_pci_vector(XenEvtchnState * s,PCIDevice * dev,int vector,int except_pirq)1657 static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
1658                                  int except_pirq)
1659 {
1660     uint32_t pirq;
1661 
1662     for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
1663         /*
1664          * We could be cleverer here, but it isn't really a fast path, and
1665          * this trivial optimisation is enough to let us skip the big gap
1666          * in the middle a bit quicker (in terms of both loop iterations,
1667          * and cache lines).
1668          */
1669         if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
1670             pirq += 64;
1671             continue;
1672         }
1673         if (except_pirq && pirq == except_pirq) {
1674             continue;
1675         }
1676         if (s->pirq[pirq].dev != dev) {
1677             continue;
1678         }
1679         if (vector != -1 && s->pirq[pirq].vector != vector) {
1680             continue;
1681         }
1682 
1683         /* It could theoretically be bound to a port already, but that is OK. */
1684         s->pirq[pirq].dev = dev;
1685         s->pirq[pirq].gsi = IRQ_UNBOUND;
1686         s->pirq[pirq].is_msix = false;
1687         s->pirq[pirq].vector = 0;
1688         s->pirq[pirq].is_masked = false;
1689         s->pirq[pirq].is_translated = false;
1690     }
1691 }
1692 
xen_evtchn_remove_pci_device(PCIDevice * dev)1693 void xen_evtchn_remove_pci_device(PCIDevice *dev)
1694 {
1695     XenEvtchnState *s = xen_evtchn_singleton;
1696 
1697     if (!s) {
1698         return;
1699     }
1700 
1701     QEMU_LOCK_GUARD(&s->port_lock);
1702     do_remove_pci_vector(s, dev, -1, 0);
1703 }
1704 
xen_evtchn_snoop_msi(PCIDevice * dev,bool is_msix,unsigned int vector,uint64_t addr,uint32_t data,bool is_masked)1705 void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
1706                           uint64_t addr, uint32_t data, bool is_masked)
1707 {
1708     XenEvtchnState *s = xen_evtchn_singleton;
1709     uint32_t pirq;
1710 
1711     if (!s) {
1712         return;
1713     }
1714 
1715     assert(bql_locked());
1716 
1717     pirq = msi_pirq_target(addr, data);
1718 
1719     /*
1720      * The PIRQ# must be sane, and there must be an allocated PIRQ in
1721      * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
1722      */
1723     if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
1724         (s->pirq[pirq].gsi != IRQ_UNBOUND &&
1725          s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
1726         pirq = 0;
1727     }
1728 
1729     if (pirq) {
1730         s->pirq[pirq].dev = dev;
1731         s->pirq[pirq].gsi = IRQ_MSI_EMU;
1732         s->pirq[pirq].is_msix = is_msix;
1733         s->pirq[pirq].vector = vector;
1734         s->pirq[pirq].is_masked = is_masked;
1735     }
1736 
1737     /* Remove any (other) entries for this {device, vector} */
1738     do_remove_pci_vector(s, dev, vector, pirq);
1739 }
1740 
xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry * route,uint64_t address,uint32_t data)1741 int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
1742                                   uint64_t address, uint32_t data)
1743 {
1744     XenEvtchnState *s = xen_evtchn_singleton;
1745     uint32_t pirq, port;
1746     CPUState *cpu;
1747 
1748     if (!s) {
1749         return 1; /* Not a PIRQ */
1750     }
1751 
1752     assert(bql_locked());
1753 
1754     pirq = msi_pirq_target(address, data);
1755     if (!pirq || pirq >= s->nr_pirqs) {
1756         return 1; /* Not a PIRQ */
1757     }
1758 
1759     if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
1760         return -ENOTSUP;
1761     }
1762 
1763     if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
1764         return -EINVAL;
1765     }
1766 
1767     /* Remember that KVM tried to translate this. It might need to try again. */
1768     s->pirq[pirq].is_translated = true;
1769 
1770     QEMU_LOCK_GUARD(&s->port_lock);
1771 
1772     port = s->pirq[pirq].port;
1773     if (!valid_port(port)) {
1774         return -EINVAL;
1775     }
1776 
1777     cpu = qemu_get_cpu(s->port_table[port].vcpu);
1778     if (!cpu) {
1779         return -EINVAL;
1780     }
1781 
1782     route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
1783     route->u.xen_evtchn.port = port;
1784     route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
1785     route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
1786 
1787     return 0; /* Handled */
1788 }
1789 
xen_evtchn_deliver_pirq_msi(uint64_t address,uint32_t data)1790 bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
1791 {
1792     XenEvtchnState *s = xen_evtchn_singleton;
1793     uint32_t pirq, port;
1794 
1795     if (!s) {
1796         return false;
1797     }
1798 
1799     assert(bql_locked());
1800 
1801     pirq = msi_pirq_target(address, data);
1802     if (!pirq || pirq >= s->nr_pirqs) {
1803         return false;
1804     }
1805 
1806     QEMU_LOCK_GUARD(&s->port_lock);
1807 
1808     port = s->pirq[pirq].port;
1809     if (!valid_port(port)) {
1810         return false;
1811     }
1812 
1813     set_port_pending(s, port);
1814     return true;
1815 }
1816 
xen_physdev_map_pirq(struct physdev_map_pirq * map)1817 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
1818 {
1819     XenEvtchnState *s = xen_evtchn_singleton;
1820     int pirq = map->pirq;
1821     int gsi = map->index;
1822 
1823     if (!s) {
1824         return -ENOTSUP;
1825     }
1826 
1827     BQL_LOCK_GUARD();
1828     QEMU_LOCK_GUARD(&s->port_lock);
1829 
1830     if (map->domid != DOMID_SELF && map->domid != xen_domid) {
1831         return -EPERM;
1832     }
1833     if (map->type != MAP_PIRQ_TYPE_GSI) {
1834         return -EINVAL;
1835     }
1836     if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1837         return -EINVAL;
1838     }
1839 
1840     if (pirq < 0) {
1841         pirq = allocate_pirq(s, map->type, gsi);
1842         if (pirq < 0) {
1843             return pirq;
1844         }
1845         map->pirq = pirq;
1846     } else if (pirq > s->nr_pirqs) {
1847         return -EINVAL;
1848     } else {
1849         /*
1850          * User specified a valid-looking PIRQ#. Allow it if it is
1851          * allocated and not yet bound, or if it is unallocated
1852          */
1853         if (pirq_inuse(s, pirq)) {
1854             if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
1855                 return -EBUSY;
1856             }
1857         } else {
1858             /* If it was unused, mark it used now. */
1859             pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1860         }
1861         /* Set the mapping in both directions. */
1862         s->pirq[pirq].gsi = gsi;
1863         s->gsi_pirq[gsi] = pirq;
1864     }
1865 
1866     trace_kvm_xen_map_pirq(pirq, gsi);
1867     return 0;
1868 }
1869 
xen_physdev_unmap_pirq(struct physdev_unmap_pirq * unmap)1870 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
1871 {
1872     XenEvtchnState *s = xen_evtchn_singleton;
1873     int pirq = unmap->pirq;
1874     int gsi;
1875 
1876     if (!s) {
1877         return -ENOTSUP;
1878     }
1879 
1880     if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
1881         return -EPERM;
1882     }
1883     if (pirq < 0 || pirq >= s->nr_pirqs) {
1884         return -EINVAL;
1885     }
1886 
1887     BQL_LOCK_GUARD();
1888     qemu_mutex_lock(&s->port_lock);
1889 
1890     if (!pirq_inuse(s, pirq)) {
1891         qemu_mutex_unlock(&s->port_lock);
1892         return -ENOENT;
1893     }
1894 
1895     gsi = s->pirq[pirq].gsi;
1896 
1897     /* We can only unmap GSI PIRQs */
1898     if (gsi < 0) {
1899         qemu_mutex_unlock(&s->port_lock);
1900         return -EINVAL;
1901     }
1902 
1903     s->gsi_pirq[gsi] = 0;
1904     s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
1905     pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
1906 
1907     trace_kvm_xen_unmap_pirq(pirq, gsi);
1908     qemu_mutex_unlock(&s->port_lock);
1909 
1910     if (gsi == IRQ_MSI_EMU) {
1911         kvm_update_msi_routes_all(NULL, true, 0, 0);
1912     }
1913 
1914     return 0;
1915 }
1916 
xen_physdev_eoi_pirq(struct physdev_eoi * eoi)1917 int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
1918 {
1919     XenEvtchnState *s = xen_evtchn_singleton;
1920     int pirq = eoi->irq;
1921     int gsi;
1922 
1923     if (!s) {
1924         return -ENOTSUP;
1925     }
1926 
1927     BQL_LOCK_GUARD();
1928     QEMU_LOCK_GUARD(&s->port_lock);
1929 
1930     if (!pirq_inuse(s, pirq)) {
1931         return -ENOENT;
1932     }
1933 
1934     gsi = s->pirq[pirq].gsi;
1935     if (gsi < 0) {
1936         return -EINVAL;
1937     }
1938 
1939     /* Reassert a level IRQ if needed */
1940     if (s->pirq_gsi_set & (1U << gsi)) {
1941         int port = s->pirq[pirq].port;
1942         if (port) {
1943             set_port_pending(s, port);
1944         }
1945     }
1946 
1947     return 0;
1948 }
1949 
xen_physdev_query_pirq(struct physdev_irq_status_query * query)1950 int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
1951 {
1952     XenEvtchnState *s = xen_evtchn_singleton;
1953     int pirq = query->irq;
1954 
1955     if (!s) {
1956         return -ENOTSUP;
1957     }
1958 
1959     BQL_LOCK_GUARD();
1960     QEMU_LOCK_GUARD(&s->port_lock);
1961 
1962     if (!pirq_inuse(s, pirq)) {
1963         return -ENOENT;
1964     }
1965 
1966     if (s->pirq[pirq].gsi >= 0) {
1967         query->flags = XENIRQSTAT_needs_eoi;
1968     } else {
1969         query->flags = 0;
1970     }
1971 
1972     return 0;
1973 }
1974 
xen_physdev_get_free_pirq(struct physdev_get_free_pirq * get)1975 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
1976 {
1977     XenEvtchnState *s = xen_evtchn_singleton;
1978     int pirq;
1979 
1980     if (!s) {
1981         return -ENOTSUP;
1982     }
1983 
1984     QEMU_LOCK_GUARD(&s->port_lock);
1985 
1986     pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
1987     if (pirq < 0) {
1988         return pirq;
1989     }
1990 
1991     get->pirq = pirq;
1992     trace_kvm_xen_get_free_pirq(pirq, get->type);
1993     return 0;
1994 }
1995 
xen_be_evtchn_open(void)1996 struct xenevtchn_handle *xen_be_evtchn_open(void)
1997 {
1998     struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
1999 
2000     xc->fd = eventfd(0, EFD_CLOEXEC);
2001     if (xc->fd < 0) {
2002         free(xc);
2003         return NULL;
2004     }
2005 
2006     return xc;
2007 }
2008 
find_be_port(XenEvtchnState * s,struct xenevtchn_handle * xc)2009 static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
2010 {
2011     int i;
2012 
2013     for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
2014         if (!s->be_handles[i]) {
2015             s->be_handles[i] = xc;
2016             xc->be_port = i;
2017             return i;
2018         }
2019     }
2020     return 0;
2021 }
2022 
xen_be_evtchn_bind_interdomain(struct xenevtchn_handle * xc,uint32_t domid,evtchn_port_t guest_port)2023 int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
2024                                    evtchn_port_t guest_port)
2025 {
2026     XenEvtchnState *s = xen_evtchn_singleton;
2027     XenEvtchnPort *gp;
2028     uint16_t be_port = 0;
2029     int ret;
2030 
2031     if (!s) {
2032         return -ENOTSUP;
2033     }
2034 
2035     if (!xc) {
2036         return -EFAULT;
2037     }
2038 
2039     if (domid != xen_domid) {
2040         return -ESRCH;
2041     }
2042 
2043     if (!valid_port(guest_port)) {
2044         return -EINVAL;
2045     }
2046 
2047     qemu_mutex_lock(&s->port_lock);
2048 
2049     /* The guest has to have an unbound port waiting for us to bind */
2050     gp = &s->port_table[guest_port];
2051 
2052     switch (gp->type) {
2053     case EVTCHNSTAT_interdomain:
2054         /* Allow rebinding after migration, preserve port # if possible */
2055         be_port = gp->u.interdomain.port;
2056         assert(be_port != 0);
2057         if (!s->be_handles[be_port]) {
2058             s->be_handles[be_port] = xc;
2059             xc->guest_port = guest_port;
2060             ret = xc->be_port = be_port;
2061             if (kvm_xen_has_cap(EVTCHN_SEND)) {
2062                 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2063             }
2064             break;
2065         }
2066         /* fall through */
2067 
2068     case EVTCHNSTAT_unbound:
2069         be_port = find_be_port(s, xc);
2070         if (!be_port) {
2071             ret = -ENOSPC;
2072             goto out;
2073         }
2074 
2075         gp->type = EVTCHNSTAT_interdomain;
2076         gp->u.interdomain.to_qemu = 1;
2077         gp->u.interdomain.port = be_port;
2078         xc->guest_port = guest_port;
2079         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2080             assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2081         }
2082         ret = be_port;
2083         break;
2084 
2085     default:
2086         ret = -EINVAL;
2087         break;
2088     }
2089 
2090  out:
2091     qemu_mutex_unlock(&s->port_lock);
2092 
2093     return ret;
2094 }
2095 
xen_be_evtchn_unbind(struct xenevtchn_handle * xc,evtchn_port_t port)2096 int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
2097 {
2098     XenEvtchnState *s = xen_evtchn_singleton;
2099     int ret;
2100 
2101     if (!s) {
2102         return -ENOTSUP;
2103     }
2104 
2105     if (!xc) {
2106         return -EFAULT;
2107     }
2108 
2109     qemu_mutex_lock(&s->port_lock);
2110 
2111     if (port && port != xc->be_port) {
2112         ret = -EINVAL;
2113         goto out;
2114     }
2115 
2116     if (xc->guest_port) {
2117         XenEvtchnPort *gp = &s->port_table[xc->guest_port];
2118 
2119         /* This should never *not* be true */
2120         if (gp->type == EVTCHNSTAT_interdomain) {
2121             gp->type = EVTCHNSTAT_unbound;
2122             gp->u.interdomain.port = 0;
2123         }
2124 
2125         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2126             deassign_kernel_port(xc->guest_port);
2127         }
2128         xc->guest_port = 0;
2129     }
2130 
2131     s->be_handles[xc->be_port] = NULL;
2132     xc->be_port = 0;
2133     ret = 0;
2134  out:
2135     qemu_mutex_unlock(&s->port_lock);
2136     return ret;
2137 }
2138 
xen_be_evtchn_close(struct xenevtchn_handle * xc)2139 int xen_be_evtchn_close(struct xenevtchn_handle *xc)
2140 {
2141     if (!xc) {
2142         return -EFAULT;
2143     }
2144 
2145     xen_be_evtchn_unbind(xc, 0);
2146 
2147     close(xc->fd);
2148     free(xc);
2149     return 0;
2150 }
2151 
xen_be_evtchn_fd(struct xenevtchn_handle * xc)2152 int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
2153 {
2154     if (!xc) {
2155         return -1;
2156     }
2157     return xc->fd;
2158 }
2159 
xen_be_evtchn_notify(struct xenevtchn_handle * xc,evtchn_port_t port)2160 int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
2161 {
2162     XenEvtchnState *s = xen_evtchn_singleton;
2163     int ret;
2164 
2165     if (!s) {
2166         return -ENOTSUP;
2167     }
2168 
2169     if (!xc) {
2170         return -EFAULT;
2171     }
2172 
2173     qemu_mutex_lock(&s->port_lock);
2174 
2175     if (xc->guest_port) {
2176         set_port_pending(s, xc->guest_port);
2177         ret = 0;
2178     } else {
2179         ret = -ENOTCONN;
2180     }
2181 
2182     qemu_mutex_unlock(&s->port_lock);
2183 
2184     return ret;
2185 }
2186 
xen_be_evtchn_pending(struct xenevtchn_handle * xc)2187 int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
2188 {
2189     uint64_t val;
2190 
2191     if (!xc) {
2192         return -EFAULT;
2193     }
2194 
2195     if (!xc->be_port) {
2196         return 0;
2197     }
2198 
2199     if (eventfd_read(xc->fd, &val)) {
2200         return -errno;
2201     }
2202 
2203     return val ? xc->be_port : 0;
2204 }
2205 
xen_be_evtchn_unmask(struct xenevtchn_handle * xc,evtchn_port_t port)2206 int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
2207 {
2208     if (!xc) {
2209         return -EFAULT;
2210     }
2211 
2212     if (xc->be_port != port) {
2213         return -EINVAL;
2214     }
2215 
2216     /*
2217      * We don't actually do anything to unmask it; the event was already
2218      * consumed in xen_be_evtchn_pending().
2219      */
2220     return 0;
2221 }
2222 
xen_be_evtchn_get_guest_port(struct xenevtchn_handle * xc)2223 int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
2224 {
2225     return xc->guest_port;
2226 }
2227 
qmp_xen_event_list(Error ** errp)2228 EvtchnInfoList *qmp_xen_event_list(Error **errp)
2229 {
2230     XenEvtchnState *s = xen_evtchn_singleton;
2231     EvtchnInfoList *head = NULL, **tail = &head;
2232     void *shinfo, *pending, *mask;
2233     int i;
2234 
2235     if (!s) {
2236         error_setg(errp, "Xen event channel emulation not enabled");
2237         return NULL;
2238     }
2239 
2240     shinfo = xen_overlay_get_shinfo_ptr();
2241     if (!shinfo) {
2242         error_setg(errp, "Xen shared info page not allocated");
2243         return NULL;
2244     }
2245 
2246     if (xen_is_long_mode()) {
2247         pending = shinfo + offsetof(struct shared_info, evtchn_pending);
2248         mask = shinfo + offsetof(struct shared_info, evtchn_mask);
2249     } else {
2250         pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
2251         mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
2252     }
2253 
2254     QEMU_LOCK_GUARD(&s->port_lock);
2255 
2256     for (i = 0; i < s->nr_ports; i++) {
2257         XenEvtchnPort *p = &s->port_table[i];
2258         EvtchnInfo *info;
2259 
2260         if (p->type == EVTCHNSTAT_closed) {
2261             continue;
2262         }
2263 
2264         info = g_new0(EvtchnInfo, 1);
2265 
2266         info->port = i;
2267         qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
2268         qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
2269         qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
2270         qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
2271         qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
2272         qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
2273 
2274         info->type = p->type;
2275         if (p->type == EVTCHNSTAT_interdomain) {
2276             info->remote_domain = g_strdup(p->u.interdomain.to_qemu ?
2277                                            "qemu" : "loopback");
2278             info->target = p->u.interdomain.port;
2279         } else {
2280             info->target = p->u.val; /* pirq# or virq# */
2281         }
2282         info->vcpu = p->vcpu;
2283         info->pending = test_bit(i, pending);
2284         info->masked = test_bit(i, mask);
2285 
2286         QAPI_LIST_APPEND(tail, info);
2287     }
2288 
2289     return head;
2290 }
2291 
qmp_xen_event_inject(uint32_t port,Error ** errp)2292 void qmp_xen_event_inject(uint32_t port, Error **errp)
2293 {
2294     XenEvtchnState *s = xen_evtchn_singleton;
2295 
2296     if (!s) {
2297         error_setg(errp, "Xen event channel emulation not enabled");
2298         return;
2299     }
2300 
2301     if (!valid_port(port)) {
2302         error_setg(errp, "Invalid port %u", port);
2303     }
2304 
2305     QEMU_LOCK_GUARD(&s->port_lock);
2306 
2307     if (set_port_pending(s, port)) {
2308         error_setg(errp, "Failed to set port %u", port);
2309         return;
2310     }
2311 }
2312 
hmp_xen_event_list(Monitor * mon,const QDict * qdict)2313 void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
2314 {
2315     EvtchnInfoList *iter, *info_list;
2316     Error *err = NULL;
2317 
2318     info_list = qmp_xen_event_list(&err);
2319     if (err) {
2320         hmp_handle_error(mon, err);
2321         return;
2322     }
2323 
2324     for (iter = info_list; iter; iter = iter->next) {
2325         EvtchnInfo *info = iter->value;
2326 
2327         monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
2328                        EvtchnPortType_str(info->type));
2329         if (info->type != EVTCHN_PORT_TYPE_IPI) {
2330             monitor_printf(mon,  "(");
2331             if (info->remote_domain) {
2332                 monitor_printf(mon, "%s:", info->remote_domain);
2333             }
2334             monitor_printf(mon, "%d)", info->target);
2335         }
2336         if (info->pending) {
2337             monitor_printf(mon, " PENDING");
2338         }
2339         if (info->masked) {
2340             monitor_printf(mon, " MASKED");
2341         }
2342         monitor_printf(mon, "\n");
2343     }
2344 
2345     qapi_free_EvtchnInfoList(info_list);
2346 }
2347 
hmp_xen_event_inject(Monitor * mon,const QDict * qdict)2348 void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
2349 {
2350     int port = qdict_get_int(qdict, "port");
2351     Error *err = NULL;
2352 
2353     qmp_xen_event_inject(port, &err);
2354     if (err) {
2355         hmp_handle_error(mon, err);
2356     } else {
2357         monitor_printf(mon, "Delivered port %d\n", port);
2358     }
2359 }
2360 
2361