xref: /qemu/hw/i386/kvm/xen_evtchn.c (revision e995d5cc)
1 /*
2  * QEMU Xen emulation: Event channel support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/log.h"
18 #include "monitor/monitor.h"
19 #include "monitor/hmp.h"
20 #include "qapi/error.h"
21 #include "qapi/qapi-commands-misc-target.h"
22 #include "qapi/qmp/qdict.h"
23 #include "qom/object.h"
24 #include "exec/target_page.h"
25 #include "exec/address-spaces.h"
26 #include "migration/vmstate.h"
27 #include "trace.h"
28 
29 #include "hw/sysbus.h"
30 #include "hw/xen/xen.h"
31 #include "hw/i386/x86.h"
32 #include "hw/i386/pc.h"
33 #include "hw/pci/pci.h"
34 #include "hw/pci/msi.h"
35 #include "hw/pci/msix.h"
36 #include "hw/irq.h"
37 
38 #include "xen_evtchn.h"
39 #include "xen_overlay.h"
40 #include "xen_xenstore.h"
41 
42 #include "sysemu/kvm.h"
43 #include "sysemu/kvm_xen.h"
44 #include <linux/kvm.h>
45 #include <sys/eventfd.h>
46 
47 #include "hw/xen/interface/memory.h"
48 #include "hw/xen/interface/hvm/params.h"
49 
50 /* XX: For kvm_update_msi_routes_all() */
51 #include "target/i386/kvm/kvm_i386.h"
52 
53 #define TYPE_XEN_EVTCHN "xen-evtchn"
54 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
55 
56 typedef struct XenEvtchnPort {
57     uint32_t vcpu;      /* Xen/ACPI vcpu_id */
58     uint16_t type;      /* EVTCHNSTAT_xxxx */
59     uint16_t type_val;  /* pirq# / virq# / remote port according to type */
60 } XenEvtchnPort;
61 
62 /* 32-bit compatibility definitions, also used natively in 32-bit build */
63 struct compat_arch_vcpu_info {
64     unsigned int cr2;
65     unsigned int pad[5];
66 };
67 
68 struct compat_vcpu_info {
69     uint8_t evtchn_upcall_pending;
70     uint8_t evtchn_upcall_mask;
71     uint16_t pad;
72     uint32_t evtchn_pending_sel;
73     struct compat_arch_vcpu_info arch;
74     struct vcpu_time_info time;
75 }; /* 64 bytes (x86) */
76 
77 struct compat_arch_shared_info {
78     unsigned int max_pfn;
79     unsigned int pfn_to_mfn_frame_list_list;
80     unsigned int nmi_reason;
81     unsigned int p2m_cr3;
82     unsigned int p2m_vaddr;
83     unsigned int p2m_generation;
84     uint32_t wc_sec_hi;
85 };
86 
87 struct compat_shared_info {
88     struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
89     uint32_t evtchn_pending[32];
90     uint32_t evtchn_mask[32];
91     uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
92     uint32_t wc_sec;
93     uint32_t wc_nsec;
94     struct compat_arch_shared_info arch;
95 };
96 
97 #define COMPAT_EVTCHN_2L_NR_CHANNELS            1024
98 
99 /* Local private implementation of struct xenevtchn_handle */
100 struct xenevtchn_handle {
101     evtchn_port_t be_port;
102     evtchn_port_t guest_port; /* Or zero for unbound */
103     int fd;
104 };
105 
106 /*
107  * For unbound/interdomain ports there are only two possible remote
108  * domains; self and QEMU. Use a single high bit in type_val for that,
109  * and the low bits for the remote port number (or 0 for unbound).
110  */
111 #define PORT_INFO_TYPEVAL_REMOTE_QEMU           0x8000
112 #define PORT_INFO_TYPEVAL_REMOTE_PORT_MASK      0x7FFF
113 
114 /*
115  * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
116  * insane enough to think about guest-transparent live migration from actual
117  * Xen to QEMU, and ensuring that we can convert/consume the stream.
118  */
119 #define IRQ_UNBOUND -1
120 #define IRQ_PT -2
121 #define IRQ_MSI_EMU -3
122 
123 
124 struct pirq_info {
125     int gsi;
126     uint16_t port;
127     PCIDevice *dev;
128     int vector;
129     bool is_msix;
130     bool is_masked;
131     bool is_translated;
132 };
133 
134 struct XenEvtchnState {
135     /*< private >*/
136     SysBusDevice busdev;
137     /*< public >*/
138 
139     uint64_t callback_param;
140     bool evtchn_in_kernel;
141     uint32_t callback_gsi;
142 
143     QEMUBH *gsi_bh;
144 
145     QemuMutex port_lock;
146     uint32_t nr_ports;
147     XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
148     qemu_irq gsis[IOAPIC_NUM_PINS];
149 
150     struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
151 
152     uint32_t nr_pirqs;
153 
154     /* Bitmap of allocated PIRQs (serialized) */
155     uint16_t nr_pirq_inuse_words;
156     uint64_t *pirq_inuse_bitmap;
157 
158     /* GSI → PIRQ mapping (serialized) */
159     uint16_t gsi_pirq[IOAPIC_NUM_PINS];
160 
161     /* Per-GSI assertion state (serialized) */
162     uint32_t pirq_gsi_set;
163 
164     /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
165     struct pirq_info *pirq;
166 };
167 
168 #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
169 #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
170 
171 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
172 
173 struct XenEvtchnState *xen_evtchn_singleton;
174 
175 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
176 #define CALLBACK_VIA_TYPE_SHIFT 56
177 
178 static void unbind_backend_ports(XenEvtchnState *s);
179 
180 static int xen_evtchn_pre_load(void *opaque)
181 {
182     XenEvtchnState *s = opaque;
183 
184     /* Unbind all the backend-side ports; they need to rebind */
185     unbind_backend_ports(s);
186 
187     /* It'll be leaked otherwise. */
188     g_free(s->pirq_inuse_bitmap);
189     s->pirq_inuse_bitmap = NULL;
190 
191     return 0;
192 }
193 
194 static int xen_evtchn_post_load(void *opaque, int version_id)
195 {
196     XenEvtchnState *s = opaque;
197     uint32_t i;
198 
199     if (s->callback_param) {
200         xen_evtchn_set_callback_param(s->callback_param);
201     }
202 
203     /* Rebuild s->pirq[].port mapping */
204     for (i = 0; i < s->nr_ports; i++) {
205         XenEvtchnPort *p = &s->port_table[i];
206 
207         if (p->type == EVTCHNSTAT_pirq) {
208             assert(p->type_val);
209             assert(p->type_val < s->nr_pirqs);
210 
211             /*
212              * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
213              * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
214              * catches up with it.
215              */
216             s->pirq[p->type_val].gsi = IRQ_UNBOUND;
217             s->pirq[p->type_val].port = i;
218         }
219     }
220     /* Rebuild s->pirq[].gsi mapping */
221     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
222         if (s->gsi_pirq[i]) {
223             s->pirq[s->gsi_pirq[i]].gsi = i;
224         }
225     }
226     return 0;
227 }
228 
229 static bool xen_evtchn_is_needed(void *opaque)
230 {
231     return xen_mode == XEN_EMULATE;
232 }
233 
234 static const VMStateDescription xen_evtchn_port_vmstate = {
235     .name = "xen_evtchn_port",
236     .version_id = 1,
237     .minimum_version_id = 1,
238     .fields = (VMStateField[]) {
239         VMSTATE_UINT32(vcpu, XenEvtchnPort),
240         VMSTATE_UINT16(type, XenEvtchnPort),
241         VMSTATE_UINT16(type_val, XenEvtchnPort),
242         VMSTATE_END_OF_LIST()
243     }
244 };
245 
246 static const VMStateDescription xen_evtchn_vmstate = {
247     .name = "xen_evtchn",
248     .version_id = 1,
249     .minimum_version_id = 1,
250     .needed = xen_evtchn_is_needed,
251     .pre_load = xen_evtchn_pre_load,
252     .post_load = xen_evtchn_post_load,
253     .fields = (VMStateField[]) {
254         VMSTATE_UINT64(callback_param, XenEvtchnState),
255         VMSTATE_UINT32(nr_ports, XenEvtchnState),
256         VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
257                                      xen_evtchn_port_vmstate, XenEvtchnPort),
258         VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
259         VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
260                                     nr_pirq_inuse_words, 0,
261                                     vmstate_info_uint64, uint64_t),
262         VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
263         VMSTATE_END_OF_LIST()
264     }
265 };
266 
267 static void xen_evtchn_class_init(ObjectClass *klass, void *data)
268 {
269     DeviceClass *dc = DEVICE_CLASS(klass);
270 
271     dc->vmsd = &xen_evtchn_vmstate;
272 }
273 
274 static const TypeInfo xen_evtchn_info = {
275     .name          = TYPE_XEN_EVTCHN,
276     .parent        = TYPE_SYS_BUS_DEVICE,
277     .instance_size = sizeof(XenEvtchnState),
278     .class_init    = xen_evtchn_class_init,
279 };
280 
281 static void gsi_assert_bh(void *opaque)
282 {
283     struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
284     if (vi) {
285         xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
286     }
287 }
288 
289 void xen_evtchn_create(void)
290 {
291     XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
292                                                         -1, NULL));
293     int i;
294 
295     xen_evtchn_singleton = s;
296 
297     qemu_mutex_init(&s->port_lock);
298     s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
299 
300     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
301         sysbus_init_irq(SYS_BUS_DEVICE(s), &s->gsis[i]);
302     }
303 
304     /*
305      * The Xen scheme for encoding PIRQ# into an MSI message is not
306      * compatible with 32-bit MSI, as it puts the high bits of the
307      * PIRQ# into the high bits of the MSI message address, instead of
308      * using the Extended Destination ID in address bits 4-11 which
309      * perhaps would have been a better choice.
310      *
311      * To keep life simple, kvm_accel_instance_init() initialises the
312      * default to 256. which conveniently doesn't need to set anything
313      * outside the low 32 bits of the address. It can be increased by
314      * setting the xen-evtchn-max-pirq property.
315      */
316     s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
317 
318     s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
319     s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
320     s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
321 }
322 
323 void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
324 {
325     XenEvtchnState *s = xen_evtchn_singleton;
326     int i;
327 
328     if (!s) {
329         return;
330     }
331 
332     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
333         sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
334     }
335 }
336 
337 static void xen_evtchn_register_types(void)
338 {
339     type_register_static(&xen_evtchn_info);
340 }
341 
342 type_init(xen_evtchn_register_types)
343 
344 static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
345 {
346     PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
347     uint8_t pin = param & 3;
348     uint8_t devfn = (param >> 8) & 0xff;
349     uint16_t bus = (param >> 16) & 0xffff;
350     uint16_t domain = (param >> 32) & 0xffff;
351     PCIDevice *pdev;
352     PCIINTxRoute r;
353 
354     if (domain || !pcms) {
355         return 0;
356     }
357 
358     pdev = pci_find_device(pcms->bus, bus, devfn);
359     if (!pdev) {
360         return 0;
361     }
362 
363     r = pci_device_route_intx_to_irq(pdev, pin);
364     if (r.mode != PCI_INTX_ENABLED) {
365         return 0;
366     }
367 
368     /*
369      * Hm, can we be notified of INTX routing changes? Not without
370      * *owning* the device and being allowed to overwrite its own
371      * ->intx_routing_notifier, AFAICT. So let's not.
372      */
373     return r.irq;
374 }
375 
376 void xen_evtchn_set_callback_level(int level)
377 {
378     XenEvtchnState *s = xen_evtchn_singleton;
379     if (!s) {
380         return;
381     }
382 
383     /*
384      * We get to this function in a number of ways:
385      *
386      *  • From I/O context, via PV backend drivers sending a notification to
387      *    the guest.
388      *
389      *  • From guest vCPU context, via loopback interdomain event channels
390      *    (or theoretically even IPIs but guests don't use those with GSI
391      *    delivery because that's pointless. We don't want a malicious guest
392      *    to be able to trigger a deadlock though, so we can't rule it out.)
393      *
394      *  • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
395      *    configured.
396      *
397      *  • From guest vCPU context in the KVM exit handler, if the upcall
398      *    pending flag has been cleared and the GSI needs to be deasserted.
399      *
400      *  • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
401      *    been acked in the irqchip.
402      *
403      * Whichever context we come from if we aren't already holding the BQL
404      * then e can't take it now, as we may already hold s->port_lock. So
405      * trigger the BH to set the IRQ for us instead of doing it immediately.
406      *
407      * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
408      * will deliberately take the BQL because they want the change to take
409      * effect immediately. That just leaves interdomain loopback as the case
410      * which uses the BH.
411      */
412     if (!qemu_mutex_iothread_locked()) {
413         qemu_bh_schedule(s->gsi_bh);
414         return;
415     }
416 
417     if (s->callback_gsi && s->callback_gsi < IOAPIC_NUM_PINS) {
418         qemu_set_irq(s->gsis[s->callback_gsi], level);
419         if (level) {
420             /* Ensure the vCPU polls for deassertion */
421             kvm_xen_set_callback_asserted();
422         }
423     }
424 }
425 
426 int xen_evtchn_set_callback_param(uint64_t param)
427 {
428     XenEvtchnState *s = xen_evtchn_singleton;
429     struct kvm_xen_hvm_attr xa = {
430         .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
431         .u.vector = 0,
432     };
433     bool in_kernel = false;
434     uint32_t gsi = 0;
435     int type = param >> CALLBACK_VIA_TYPE_SHIFT;
436     int ret;
437 
438     if (!s) {
439         return -ENOTSUP;
440     }
441 
442     /*
443      * We need the BQL because set_callback_pci_intx() may call into PCI code,
444      * and because we may need to manipulate the old and new GSI levels.
445      */
446     assert(qemu_mutex_iothread_locked());
447     qemu_mutex_lock(&s->port_lock);
448 
449     switch (type) {
450     case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
451         xa.u.vector = (uint8_t)param,
452 
453         ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
454         if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
455             in_kernel = true;
456         }
457         gsi = 0;
458         break;
459     }
460 
461     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
462         gsi = set_callback_pci_intx(s, param);
463         ret = gsi ? 0 : -EINVAL;
464         break;
465 
466     case HVM_PARAM_CALLBACK_TYPE_GSI:
467         gsi = (uint32_t)param;
468         ret = 0;
469         break;
470 
471     default:
472         /* Xen doesn't return error even if you set something bogus */
473         ret = 0;
474         break;
475     }
476 
477     if (!ret) {
478         /* If vector delivery was turned *off* then tell the kernel */
479         if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
480             HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
481             kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
482         }
483         s->callback_param = param;
484         s->evtchn_in_kernel = in_kernel;
485 
486         if (gsi != s->callback_gsi) {
487             struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
488 
489             xen_evtchn_set_callback_level(0);
490             s->callback_gsi = gsi;
491 
492             if (gsi && vi && vi->evtchn_upcall_pending) {
493                 kvm_xen_inject_vcpu_callback_vector(0, type);
494             }
495         }
496     }
497 
498     qemu_mutex_unlock(&s->port_lock);
499 
500     return ret;
501 }
502 
503 static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
504 {
505     int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
506 
507     kvm_xen_inject_vcpu_callback_vector(vcpu, type);
508 }
509 
510 static void deassign_kernel_port(evtchn_port_t port)
511 {
512     struct kvm_xen_hvm_attr ha;
513     int ret;
514 
515     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
516     ha.u.evtchn.send_port = port;
517     ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
518 
519     ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
520     if (ret) {
521         qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
522                       port, strerror(ret));
523     }
524 }
525 
526 static int assign_kernel_port(uint16_t type, evtchn_port_t port,
527                               uint32_t vcpu_id)
528 {
529     CPUState *cpu = qemu_get_cpu(vcpu_id);
530     struct kvm_xen_hvm_attr ha;
531 
532     if (!cpu) {
533         return -ENOENT;
534     }
535 
536     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
537     ha.u.evtchn.send_port = port;
538     ha.u.evtchn.type = type;
539     ha.u.evtchn.flags = 0;
540     ha.u.evtchn.deliver.port.port = port;
541     ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
542     ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
543 
544     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
545 }
546 
547 static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
548 {
549     struct kvm_xen_hvm_attr ha;
550 
551     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
552     ha.u.evtchn.send_port = port;
553     ha.u.evtchn.type = type;
554     ha.u.evtchn.flags = 0;
555     ha.u.evtchn.deliver.eventfd.port = 0;
556     ha.u.evtchn.deliver.eventfd.fd = fd;
557 
558     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
559 }
560 
561 static bool valid_port(evtchn_port_t port)
562 {
563     if (!port) {
564         return false;
565     }
566 
567     if (xen_is_long_mode()) {
568         return port < EVTCHN_2L_NR_CHANNELS;
569     } else {
570         return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
571     }
572 }
573 
574 static bool valid_vcpu(uint32_t vcpu)
575 {
576     return !!qemu_get_cpu(vcpu);
577 }
578 
579 static void unbind_backend_ports(XenEvtchnState *s)
580 {
581     XenEvtchnPort *p;
582     int i;
583 
584     for (i = 1; i < s->nr_ports; i++) {
585         p = &s->port_table[i];
586         if (p->type == EVTCHNSTAT_interdomain &&
587             (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU)) {
588             evtchn_port_t be_port = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
589 
590             if (s->be_handles[be_port]) {
591                 /* This part will be overwritten on the load anyway. */
592                 p->type = EVTCHNSTAT_unbound;
593                 p->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
594 
595                 /* Leave the backend port open and unbound too. */
596                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
597                     deassign_kernel_port(i);
598                 }
599                 s->be_handles[be_port]->guest_port = 0;
600             }
601         }
602     }
603 }
604 
605 int xen_evtchn_status_op(struct evtchn_status *status)
606 {
607     XenEvtchnState *s = xen_evtchn_singleton;
608     XenEvtchnPort *p;
609 
610     if (!s) {
611         return -ENOTSUP;
612     }
613 
614     if (status->dom != DOMID_SELF && status->dom != xen_domid) {
615         return -ESRCH;
616     }
617 
618     if (!valid_port(status->port)) {
619         return -EINVAL;
620     }
621 
622     qemu_mutex_lock(&s->port_lock);
623 
624     p = &s->port_table[status->port];
625 
626     status->status = p->type;
627     status->vcpu = p->vcpu;
628 
629     switch (p->type) {
630     case EVTCHNSTAT_unbound:
631         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
632             status->u.unbound.dom = DOMID_QEMU;
633         } else {
634             status->u.unbound.dom = xen_domid;
635         }
636         break;
637 
638     case EVTCHNSTAT_interdomain:
639         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
640             status->u.interdomain.dom = DOMID_QEMU;
641         } else {
642             status->u.interdomain.dom = xen_domid;
643         }
644 
645         status->u.interdomain.port = p->type_val &
646             PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
647         break;
648 
649     case EVTCHNSTAT_pirq:
650         status->u.pirq = p->type_val;
651         break;
652 
653     case EVTCHNSTAT_virq:
654         status->u.virq = p->type_val;
655         break;
656     }
657 
658     qemu_mutex_unlock(&s->port_lock);
659     return 0;
660 }
661 
662 /*
663  * Never thought I'd hear myself say this, but C++ templates would be
664  * kind of nice here.
665  *
666  * template<class T> static int do_unmask_port(T *shinfo, ...);
667  */
668 static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
669                              bool do_unmask, struct shared_info *shinfo,
670                              struct vcpu_info *vcpu_info)
671 {
672     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
673     typeof(shinfo->evtchn_pending[0]) mask;
674     int idx = port / bits_per_word;
675     int offset = port % bits_per_word;
676 
677     mask = 1UL << offset;
678 
679     if (idx >= bits_per_word) {
680         return -EINVAL;
681     }
682 
683     if (do_unmask) {
684         /*
685          * If this is a true unmask operation, clear the mask bit. If
686          * it was already unmasked, we have nothing further to do.
687          */
688         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
689             return 0;
690         }
691     } else {
692         /*
693          * This is a pseudo-unmask for affinity changes. We don't
694          * change the mask bit, and if it's *masked* we have nothing
695          * else to do.
696          */
697         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
698             return 0;
699         }
700     }
701 
702     /* If the event was not pending, we're done. */
703     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
704         return 0;
705     }
706 
707     /* Now on to the vcpu_info evtchn_pending_sel index... */
708     mask = 1UL << idx;
709 
710     /* If a port in this word was already pending for this vCPU, all done. */
711     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
712         return 0;
713     }
714 
715     /* Set evtchn_upcall_pending for this vCPU */
716     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
717         return 0;
718     }
719 
720     inject_callback(s, s->port_table[port].vcpu);
721 
722     return 0;
723 }
724 
725 static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
726                                  bool do_unmask,
727                                  struct compat_shared_info *shinfo,
728                                  struct compat_vcpu_info *vcpu_info)
729 {
730     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
731     typeof(shinfo->evtchn_pending[0]) mask;
732     int idx = port / bits_per_word;
733     int offset = port % bits_per_word;
734 
735     mask = 1UL << offset;
736 
737     if (idx >= bits_per_word) {
738         return -EINVAL;
739     }
740 
741     if (do_unmask) {
742         /*
743          * If this is a true unmask operation, clear the mask bit. If
744          * it was already unmasked, we have nothing further to do.
745          */
746         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
747             return 0;
748         }
749     } else {
750         /*
751          * This is a pseudo-unmask for affinity changes. We don't
752          * change the mask bit, and if it's *masked* we have nothing
753          * else to do.
754          */
755         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
756             return 0;
757         }
758     }
759 
760     /* If the event was not pending, we're done. */
761     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
762         return 0;
763     }
764 
765     /* Now on to the vcpu_info evtchn_pending_sel index... */
766     mask = 1UL << idx;
767 
768     /* If a port in this word was already pending for this vCPU, all done. */
769     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
770         return 0;
771     }
772 
773     /* Set evtchn_upcall_pending for this vCPU */
774     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
775         return 0;
776     }
777 
778     inject_callback(s, s->port_table[port].vcpu);
779 
780     return 0;
781 }
782 
783 static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
784 {
785     void *vcpu_info, *shinfo;
786 
787     if (s->port_table[port].type == EVTCHNSTAT_closed) {
788         return -EINVAL;
789     }
790 
791     shinfo = xen_overlay_get_shinfo_ptr();
792     if (!shinfo) {
793         return -ENOTSUP;
794     }
795 
796     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
797     if (!vcpu_info) {
798         return -EINVAL;
799     }
800 
801     if (xen_is_long_mode()) {
802         return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
803     } else {
804         return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
805     }
806 }
807 
808 static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
809                           struct shared_info *shinfo,
810                           struct vcpu_info *vcpu_info)
811 {
812     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
813     typeof(shinfo->evtchn_pending[0]) mask;
814     int idx = port / bits_per_word;
815     int offset = port % bits_per_word;
816 
817     mask = 1UL << offset;
818 
819     if (idx >= bits_per_word) {
820         return -EINVAL;
821     }
822 
823     /* Update the pending bit itself. If it was already set, we're done. */
824     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
825         return 0;
826     }
827 
828     /* Check if it's masked. */
829     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
830         return 0;
831     }
832 
833     /* Now on to the vcpu_info evtchn_pending_sel index... */
834     mask = 1UL << idx;
835 
836     /* If a port in this word was already pending for this vCPU, all done. */
837     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
838         return 0;
839     }
840 
841     /* Set evtchn_upcall_pending for this vCPU */
842     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
843         return 0;
844     }
845 
846     inject_callback(s, s->port_table[port].vcpu);
847 
848     return 0;
849 }
850 
851 static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
852                               struct compat_shared_info *shinfo,
853                               struct compat_vcpu_info *vcpu_info)
854 {
855     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
856     typeof(shinfo->evtchn_pending[0]) mask;
857     int idx = port / bits_per_word;
858     int offset = port % bits_per_word;
859 
860     mask = 1UL << offset;
861 
862     if (idx >= bits_per_word) {
863         return -EINVAL;
864     }
865 
866     /* Update the pending bit itself. If it was already set, we're done. */
867     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
868         return 0;
869     }
870 
871     /* Check if it's masked. */
872     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
873         return 0;
874     }
875 
876     /* Now on to the vcpu_info evtchn_pending_sel index... */
877     mask = 1UL << idx;
878 
879     /* If a port in this word was already pending for this vCPU, all done. */
880     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
881         return 0;
882     }
883 
884     /* Set evtchn_upcall_pending for this vCPU */
885     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
886         return 0;
887     }
888 
889     inject_callback(s, s->port_table[port].vcpu);
890 
891     return 0;
892 }
893 
894 static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
895 {
896     void *vcpu_info, *shinfo;
897 
898     if (s->port_table[port].type == EVTCHNSTAT_closed) {
899         return -EINVAL;
900     }
901 
902     if (s->evtchn_in_kernel) {
903         XenEvtchnPort *p = &s->port_table[port];
904         CPUState *cpu = qemu_get_cpu(p->vcpu);
905         struct kvm_irq_routing_xen_evtchn evt;
906 
907         if (!cpu) {
908             return 0;
909         }
910 
911         evt.port = port;
912         evt.vcpu = kvm_arch_vcpu_id(cpu);
913         evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
914 
915         return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
916     }
917 
918     shinfo = xen_overlay_get_shinfo_ptr();
919     if (!shinfo) {
920         return -ENOTSUP;
921     }
922 
923     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
924     if (!vcpu_info) {
925         return -EINVAL;
926     }
927 
928     if (xen_is_long_mode()) {
929         return do_set_port_lm(s, port, shinfo, vcpu_info);
930     } else {
931         return do_set_port_compat(s, port, shinfo, vcpu_info);
932     }
933 }
934 
935 static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
936 {
937     void *p = xen_overlay_get_shinfo_ptr();
938 
939     if (!p) {
940         return -ENOTSUP;
941     }
942 
943     if (xen_is_long_mode()) {
944         struct shared_info *shinfo = p;
945         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
946         typeof(shinfo->evtchn_pending[0]) mask;
947         int idx = port / bits_per_word;
948         int offset = port % bits_per_word;
949 
950         mask = 1UL << offset;
951 
952         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
953     } else {
954         struct compat_shared_info *shinfo = p;
955         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
956         typeof(shinfo->evtchn_pending[0]) mask;
957         int idx = port / bits_per_word;
958         int offset = port % bits_per_word;
959 
960         mask = 1UL << offset;
961 
962         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
963     }
964     return 0;
965 }
966 
967 static void free_port(XenEvtchnState *s, evtchn_port_t port)
968 {
969     s->port_table[port].type = EVTCHNSTAT_closed;
970     s->port_table[port].type_val = 0;
971     s->port_table[port].vcpu = 0;
972 
973     if (s->nr_ports == port + 1) {
974         do {
975             s->nr_ports--;
976         } while (s->nr_ports &&
977                  s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
978     }
979 
980     /* Clear pending event to avoid unexpected behavior on re-bind. */
981     clear_port_pending(s, port);
982 }
983 
984 static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
985                          uint16_t val, evtchn_port_t *port)
986 {
987     evtchn_port_t p = 1;
988 
989     for (p = 1; valid_port(p); p++) {
990         if (s->port_table[p].type == EVTCHNSTAT_closed) {
991             s->port_table[p].vcpu = vcpu;
992             s->port_table[p].type = type;
993             s->port_table[p].type_val = val;
994 
995             *port = p;
996 
997             if (s->nr_ports < p + 1) {
998                 s->nr_ports = p + 1;
999             }
1000 
1001             return 0;
1002         }
1003     }
1004     return -ENOSPC;
1005 }
1006 
1007 static bool virq_is_global(uint32_t virq)
1008 {
1009     switch (virq) {
1010     case VIRQ_TIMER:
1011     case VIRQ_DEBUG:
1012     case VIRQ_XENOPROF:
1013     case VIRQ_XENPMU:
1014         return false;
1015 
1016     default:
1017         return true;
1018     }
1019 }
1020 
1021 static int close_port(XenEvtchnState *s, evtchn_port_t port,
1022                       bool *flush_kvm_routes)
1023 {
1024     XenEvtchnPort *p = &s->port_table[port];
1025 
1026     /* Because it *might* be a PIRQ port */
1027     assert(qemu_mutex_iothread_locked());
1028 
1029     switch (p->type) {
1030     case EVTCHNSTAT_closed:
1031         return -ENOENT;
1032 
1033     case EVTCHNSTAT_pirq:
1034         s->pirq[p->type_val].port = 0;
1035         if (s->pirq[p->type_val].is_translated) {
1036             *flush_kvm_routes = true;
1037         }
1038         break;
1039 
1040     case EVTCHNSTAT_virq:
1041         kvm_xen_set_vcpu_virq(virq_is_global(p->type_val) ? 0 : p->vcpu,
1042                               p->type_val, 0);
1043         break;
1044 
1045     case EVTCHNSTAT_ipi:
1046         if (s->evtchn_in_kernel) {
1047             deassign_kernel_port(port);
1048         }
1049         break;
1050 
1051     case EVTCHNSTAT_interdomain:
1052         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
1053             uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
1054             struct xenevtchn_handle *xc = s->be_handles[be_port];
1055             if (xc) {
1056                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1057                     deassign_kernel_port(port);
1058                 }
1059                 xc->guest_port = 0;
1060             }
1061         } else {
1062             /* Loopback interdomain */
1063             XenEvtchnPort *rp = &s->port_table[p->type_val];
1064             if (!valid_port(p->type_val) || rp->type_val != port ||
1065                 rp->type != EVTCHNSTAT_interdomain) {
1066                 error_report("Inconsistent state for interdomain unbind");
1067             } else {
1068                 /* Set the other end back to unbound */
1069                 rp->type = EVTCHNSTAT_unbound;
1070                 rp->type_val = 0;
1071             }
1072         }
1073         break;
1074 
1075     default:
1076         break;
1077     }
1078 
1079     free_port(s, port);
1080     return 0;
1081 }
1082 
1083 int xen_evtchn_soft_reset(void)
1084 {
1085     XenEvtchnState *s = xen_evtchn_singleton;
1086     bool flush_kvm_routes;
1087     int i;
1088 
1089     if (!s) {
1090         return -ENOTSUP;
1091     }
1092 
1093     assert(qemu_mutex_iothread_locked());
1094 
1095     qemu_mutex_lock(&s->port_lock);
1096 
1097     for (i = 0; i < s->nr_ports; i++) {
1098         close_port(s, i, &flush_kvm_routes);
1099     }
1100 
1101     qemu_mutex_unlock(&s->port_lock);
1102 
1103     if (flush_kvm_routes) {
1104         kvm_update_msi_routes_all(NULL, true, 0, 0);
1105     }
1106 
1107     return 0;
1108 }
1109 
1110 int xen_evtchn_reset_op(struct evtchn_reset *reset)
1111 {
1112     if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
1113         return -ESRCH;
1114     }
1115 
1116     return xen_evtchn_soft_reset();
1117 }
1118 
1119 int xen_evtchn_close_op(struct evtchn_close *close)
1120 {
1121     XenEvtchnState *s = xen_evtchn_singleton;
1122     bool flush_kvm_routes = false;
1123     int ret;
1124 
1125     if (!s) {
1126         return -ENOTSUP;
1127     }
1128 
1129     if (!valid_port(close->port)) {
1130         return -EINVAL;
1131     }
1132 
1133     QEMU_IOTHREAD_LOCK_GUARD();
1134     qemu_mutex_lock(&s->port_lock);
1135 
1136     ret = close_port(s, close->port, &flush_kvm_routes);
1137 
1138     qemu_mutex_unlock(&s->port_lock);
1139 
1140     if (flush_kvm_routes) {
1141         kvm_update_msi_routes_all(NULL, true, 0, 0);
1142     }
1143 
1144     return ret;
1145 }
1146 
1147 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
1148 {
1149     XenEvtchnState *s = xen_evtchn_singleton;
1150     int ret;
1151 
1152     if (!s) {
1153         return -ENOTSUP;
1154     }
1155 
1156     if (!valid_port(unmask->port)) {
1157         return -EINVAL;
1158     }
1159 
1160     qemu_mutex_lock(&s->port_lock);
1161 
1162     ret = unmask_port(s, unmask->port, true);
1163 
1164     qemu_mutex_unlock(&s->port_lock);
1165 
1166     return ret;
1167 }
1168 
1169 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
1170 {
1171     XenEvtchnState *s = xen_evtchn_singleton;
1172     XenEvtchnPort *p;
1173     int ret = -EINVAL;
1174 
1175     if (!s) {
1176         return -ENOTSUP;
1177     }
1178 
1179     if (!valid_port(vcpu->port)) {
1180         return -EINVAL;
1181     }
1182 
1183     if (!valid_vcpu(vcpu->vcpu)) {
1184         return -ENOENT;
1185     }
1186 
1187     qemu_mutex_lock(&s->port_lock);
1188 
1189     p = &s->port_table[vcpu->port];
1190 
1191     if (p->type == EVTCHNSTAT_interdomain ||
1192         p->type == EVTCHNSTAT_unbound ||
1193         p->type == EVTCHNSTAT_pirq ||
1194         (p->type == EVTCHNSTAT_virq && virq_is_global(p->type_val))) {
1195         /*
1196          * unmask_port() with do_unmask==false will just raise the event
1197          * on the new vCPU if the port was already pending.
1198          */
1199         p->vcpu = vcpu->vcpu;
1200         unmask_port(s, vcpu->port, false);
1201         ret = 0;
1202     }
1203 
1204     qemu_mutex_unlock(&s->port_lock);
1205 
1206     return ret;
1207 }
1208 
1209 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
1210 {
1211     XenEvtchnState *s = xen_evtchn_singleton;
1212     int ret;
1213 
1214     if (!s) {
1215         return -ENOTSUP;
1216     }
1217 
1218     if (virq->virq >= NR_VIRQS) {
1219         return -EINVAL;
1220     }
1221 
1222     /* Global VIRQ must be allocated on vCPU0 first */
1223     if (virq_is_global(virq->virq) && virq->vcpu != 0) {
1224         return -EINVAL;
1225     }
1226 
1227     if (!valid_vcpu(virq->vcpu)) {
1228         return -ENOENT;
1229     }
1230 
1231     qemu_mutex_lock(&s->port_lock);
1232 
1233     ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
1234                         &virq->port);
1235     if (!ret) {
1236         ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
1237         if (ret) {
1238             free_port(s, virq->port);
1239         }
1240     }
1241 
1242     qemu_mutex_unlock(&s->port_lock);
1243 
1244     return ret;
1245 }
1246 
1247 int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
1248 {
1249     XenEvtchnState *s = xen_evtchn_singleton;
1250     int ret;
1251 
1252     if (!s) {
1253         return -ENOTSUP;
1254     }
1255 
1256     if (pirq->pirq >= s->nr_pirqs) {
1257         return -EINVAL;
1258     }
1259 
1260     QEMU_IOTHREAD_LOCK_GUARD();
1261 
1262     if (s->pirq[pirq->pirq].port) {
1263         return -EBUSY;
1264     }
1265 
1266     qemu_mutex_lock(&s->port_lock);
1267 
1268     ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
1269                         &pirq->port);
1270     if (ret) {
1271         qemu_mutex_unlock(&s->port_lock);
1272         return ret;
1273     }
1274 
1275     s->pirq[pirq->pirq].port = pirq->port;
1276     trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
1277 
1278     qemu_mutex_unlock(&s->port_lock);
1279 
1280     /*
1281      * Need to do the unmask outside port_lock because it may call
1282      * back into the MSI translate function.
1283      */
1284     if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
1285         if (s->pirq[pirq->pirq].is_masked) {
1286             PCIDevice *dev = s->pirq[pirq->pirq].dev;
1287             int vector = s->pirq[pirq->pirq].vector;
1288             char *dev_path = qdev_get_dev_path(DEVICE(dev));
1289 
1290             trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
1291             g_free(dev_path);
1292 
1293             if (s->pirq[pirq->pirq].is_msix) {
1294                 msix_set_mask(dev, vector, false);
1295             } else {
1296                 msi_set_mask(dev, vector, false, NULL);
1297             }
1298         } else if (s->pirq[pirq->pirq].is_translated) {
1299             /*
1300              * If KVM had attempted to translate this one before, make it try
1301              * again. If we unmasked, then the notifier on the MSI(-X) vector
1302              * will already have had the same effect.
1303              */
1304             kvm_update_msi_routes_all(NULL, true, 0, 0);
1305         }
1306     }
1307 
1308     return ret;
1309 }
1310 
1311 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
1312 {
1313     XenEvtchnState *s = xen_evtchn_singleton;
1314     int ret;
1315 
1316     if (!s) {
1317         return -ENOTSUP;
1318     }
1319 
1320     if (!valid_vcpu(ipi->vcpu)) {
1321         return -ENOENT;
1322     }
1323 
1324     qemu_mutex_lock(&s->port_lock);
1325 
1326     ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
1327     if (!ret && s->evtchn_in_kernel) {
1328         assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
1329     }
1330 
1331     qemu_mutex_unlock(&s->port_lock);
1332 
1333     return ret;
1334 }
1335 
1336 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
1337 {
1338     XenEvtchnState *s = xen_evtchn_singleton;
1339     uint16_t type_val;
1340     int ret;
1341 
1342     if (!s) {
1343         return -ENOTSUP;
1344     }
1345 
1346     if (interdomain->remote_dom == DOMID_QEMU) {
1347         type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
1348     } else if (interdomain->remote_dom == DOMID_SELF ||
1349                interdomain->remote_dom == xen_domid) {
1350         type_val = 0;
1351     } else {
1352         return -ESRCH;
1353     }
1354 
1355     if (!valid_port(interdomain->remote_port)) {
1356         return -EINVAL;
1357     }
1358 
1359     qemu_mutex_lock(&s->port_lock);
1360 
1361     /* The newly allocated port starts out as unbound */
1362     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val,
1363                         &interdomain->local_port);
1364     if (ret) {
1365         goto out;
1366     }
1367 
1368     if (interdomain->remote_dom == DOMID_QEMU) {
1369         struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
1370         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1371 
1372         if (!xc) {
1373             ret = -ENOENT;
1374             goto out_free_port;
1375         }
1376 
1377         if (xc->guest_port) {
1378             ret = -EBUSY;
1379             goto out_free_port;
1380         }
1381 
1382         assert(xc->be_port == interdomain->remote_port);
1383         xc->guest_port = interdomain->local_port;
1384         if (kvm_xen_has_cap(EVTCHN_SEND)) {
1385             assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
1386         }
1387         lp->type = EVTCHNSTAT_interdomain;
1388         lp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU | interdomain->remote_port;
1389         ret = 0;
1390     } else {
1391         /* Loopback */
1392         XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
1393         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1394 
1395         if (rp->type == EVTCHNSTAT_unbound && rp->type_val == 0) {
1396             /* It's a match! */
1397             rp->type = EVTCHNSTAT_interdomain;
1398             rp->type_val = interdomain->local_port;
1399 
1400             lp->type = EVTCHNSTAT_interdomain;
1401             lp->type_val = interdomain->remote_port;
1402         } else {
1403             ret = -EINVAL;
1404         }
1405     }
1406 
1407  out_free_port:
1408     if (ret) {
1409         free_port(s, interdomain->local_port);
1410     }
1411  out:
1412     qemu_mutex_unlock(&s->port_lock);
1413 
1414     return ret;
1415 
1416 }
1417 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
1418 {
1419     XenEvtchnState *s = xen_evtchn_singleton;
1420     uint16_t type_val;
1421     int ret;
1422 
1423     if (!s) {
1424         return -ENOTSUP;
1425     }
1426 
1427     if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
1428         return -ESRCH;
1429     }
1430 
1431     if (alloc->remote_dom == DOMID_QEMU) {
1432         type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
1433     } else if (alloc->remote_dom == DOMID_SELF ||
1434                alloc->remote_dom == xen_domid) {
1435         type_val = 0;
1436     } else {
1437         return -EPERM;
1438     }
1439 
1440     qemu_mutex_lock(&s->port_lock);
1441 
1442     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val, &alloc->port);
1443 
1444     qemu_mutex_unlock(&s->port_lock);
1445 
1446     return ret;
1447 }
1448 
1449 int xen_evtchn_send_op(struct evtchn_send *send)
1450 {
1451     XenEvtchnState *s = xen_evtchn_singleton;
1452     XenEvtchnPort *p;
1453     int ret = 0;
1454 
1455     if (!s) {
1456         return -ENOTSUP;
1457     }
1458 
1459     if (!valid_port(send->port)) {
1460         return -EINVAL;
1461     }
1462 
1463     qemu_mutex_lock(&s->port_lock);
1464 
1465     p = &s->port_table[send->port];
1466 
1467     switch (p->type) {
1468     case EVTCHNSTAT_interdomain:
1469         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
1470             /*
1471              * This is an event from the guest to qemu itself, which is
1472              * serving as the driver domain.
1473              */
1474             uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
1475             struct xenevtchn_handle *xc = s->be_handles[be_port];
1476             if (xc) {
1477                 eventfd_write(xc->fd, 1);
1478                 ret = 0;
1479             } else {
1480                 ret = -ENOENT;
1481             }
1482         } else {
1483             /* Loopback interdomain ports; just a complex IPI */
1484             set_port_pending(s, p->type_val);
1485         }
1486         break;
1487 
1488     case EVTCHNSTAT_ipi:
1489         set_port_pending(s, send->port);
1490         break;
1491 
1492     case EVTCHNSTAT_unbound:
1493         /* Xen will silently drop these */
1494         break;
1495 
1496     default:
1497         ret = -EINVAL;
1498         break;
1499     }
1500 
1501     qemu_mutex_unlock(&s->port_lock);
1502 
1503     return ret;
1504 }
1505 
1506 int xen_evtchn_set_port(uint16_t port)
1507 {
1508     XenEvtchnState *s = xen_evtchn_singleton;
1509     XenEvtchnPort *p;
1510     int ret = -EINVAL;
1511 
1512     if (!s) {
1513         return -ENOTSUP;
1514     }
1515 
1516     if (!valid_port(port)) {
1517         return -EINVAL;
1518     }
1519 
1520     qemu_mutex_lock(&s->port_lock);
1521 
1522     p = &s->port_table[port];
1523 
1524     /* QEMU has no business sending to anything but these */
1525     if (p->type == EVTCHNSTAT_virq ||
1526         (p->type == EVTCHNSTAT_interdomain &&
1527          (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU))) {
1528         set_port_pending(s, port);
1529         ret = 0;
1530     }
1531 
1532     qemu_mutex_unlock(&s->port_lock);
1533 
1534     return ret;
1535 }
1536 
1537 static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
1538 {
1539     uint16_t pirq;
1540 
1541     /*
1542      * Preserve the allocation strategy that Xen has. It looks like
1543      * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
1544      * to GSIs (counting up from 16), and then we count backwards from
1545      * the top for MSIs or when the GSI space is exhausted.
1546      */
1547     if (type == MAP_PIRQ_TYPE_GSI) {
1548         for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
1549             if (pirq_inuse(s, pirq)) {
1550                 continue;
1551             }
1552 
1553             /* Found it */
1554             goto found;
1555         }
1556     }
1557     for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
1558         /* Skip whole words at a time when they're full */
1559         if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
1560             pirq &= ~63ULL;
1561             continue;
1562         }
1563         if (pirq_inuse(s, pirq)) {
1564             continue;
1565         }
1566 
1567         goto found;
1568     }
1569     return -ENOSPC;
1570 
1571  found:
1572     pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1573     if (gsi >= 0) {
1574         assert(gsi <= IOAPIC_NUM_PINS);
1575         s->gsi_pirq[gsi] = pirq;
1576     }
1577     s->pirq[pirq].gsi = gsi;
1578     return pirq;
1579 }
1580 
1581 bool xen_evtchn_set_gsi(int gsi, int level)
1582 {
1583     XenEvtchnState *s = xen_evtchn_singleton;
1584     int pirq;
1585 
1586     assert(qemu_mutex_iothread_locked());
1587 
1588     if (!s || gsi < 0 || gsi > IOAPIC_NUM_PINS) {
1589         return false;
1590     }
1591 
1592     /*
1593      * Check that that it *isn't* the event channel GSI, and thus
1594      * that we are not recursing and it's safe to take s->port_lock.
1595      *
1596      * Locking aside, it's perfectly sane to bail out early for that
1597      * special case, as it would make no sense for the event channel
1598      * GSI to be routed back to event channels, when the delivery
1599      * method is to raise the GSI... that recursion wouldn't *just*
1600      * be a locking issue.
1601      */
1602     if (gsi && gsi == s->callback_gsi) {
1603         return false;
1604     }
1605 
1606     QEMU_LOCK_GUARD(&s->port_lock);
1607 
1608     pirq = s->gsi_pirq[gsi];
1609     if (!pirq) {
1610         return false;
1611     }
1612 
1613     if (level) {
1614         int port = s->pirq[pirq].port;
1615 
1616         s->pirq_gsi_set |= (1U << gsi);
1617         if (port) {
1618             set_port_pending(s, port);
1619         }
1620     } else {
1621         s->pirq_gsi_set &= ~(1U << gsi);
1622     }
1623     return true;
1624 }
1625 
1626 static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
1627 {
1628     /* The vector (in low 8 bits of data) must be zero */
1629     if (data & 0xff) {
1630         return 0;
1631     }
1632 
1633     uint32_t pirq = (addr & 0xff000) >> 12;
1634     pirq |= (addr >> 32) & 0xffffff00;
1635 
1636     return pirq;
1637 }
1638 
1639 static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
1640                                  int except_pirq)
1641 {
1642     uint32_t pirq;
1643 
1644     for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
1645         /*
1646          * We could be cleverer here, but it isn't really a fast path, and
1647          * this trivial optimisation is enough to let us skip the big gap
1648          * in the middle a bit quicker (in terms of both loop iterations,
1649          * and cache lines).
1650          */
1651         if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
1652             pirq += 64;
1653             continue;
1654         }
1655         if (except_pirq && pirq == except_pirq) {
1656             continue;
1657         }
1658         if (s->pirq[pirq].dev != dev) {
1659             continue;
1660         }
1661         if (vector != -1 && s->pirq[pirq].vector != vector) {
1662             continue;
1663         }
1664 
1665         /* It could theoretically be bound to a port already, but that is OK. */
1666         s->pirq[pirq].dev = dev;
1667         s->pirq[pirq].gsi = IRQ_UNBOUND;
1668         s->pirq[pirq].is_msix = false;
1669         s->pirq[pirq].vector = 0;
1670         s->pirq[pirq].is_masked = false;
1671         s->pirq[pirq].is_translated = false;
1672     }
1673 }
1674 
1675 void xen_evtchn_remove_pci_device(PCIDevice *dev)
1676 {
1677     XenEvtchnState *s = xen_evtchn_singleton;
1678 
1679     if (!s) {
1680         return;
1681     }
1682 
1683     QEMU_LOCK_GUARD(&s->port_lock);
1684     do_remove_pci_vector(s, dev, -1, 0);
1685 }
1686 
1687 void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
1688                           uint64_t addr, uint32_t data, bool is_masked)
1689 {
1690     XenEvtchnState *s = xen_evtchn_singleton;
1691     uint32_t pirq;
1692 
1693     if (!s) {
1694         return;
1695     }
1696 
1697     assert(qemu_mutex_iothread_locked());
1698 
1699     pirq = msi_pirq_target(addr, data);
1700 
1701     /*
1702      * The PIRQ# must be sane, and there must be an allocated PIRQ in
1703      * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
1704      */
1705     if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
1706         (s->pirq[pirq].gsi != IRQ_UNBOUND &&
1707          s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
1708         pirq = 0;
1709     }
1710 
1711     if (pirq) {
1712         s->pirq[pirq].dev = dev;
1713         s->pirq[pirq].gsi = IRQ_MSI_EMU;
1714         s->pirq[pirq].is_msix = is_msix;
1715         s->pirq[pirq].vector = vector;
1716         s->pirq[pirq].is_masked = is_masked;
1717     }
1718 
1719     /* Remove any (other) entries for this {device, vector} */
1720     do_remove_pci_vector(s, dev, vector, pirq);
1721 }
1722 
1723 int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
1724                                   uint64_t address, uint32_t data)
1725 {
1726     XenEvtchnState *s = xen_evtchn_singleton;
1727     uint32_t pirq, port;
1728     CPUState *cpu;
1729 
1730     if (!s) {
1731         return 1; /* Not a PIRQ */
1732     }
1733 
1734     assert(qemu_mutex_iothread_locked());
1735 
1736     pirq = msi_pirq_target(address, data);
1737     if (!pirq || pirq >= s->nr_pirqs) {
1738         return 1; /* Not a PIRQ */
1739     }
1740 
1741     if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
1742         return -ENOTSUP;
1743     }
1744 
1745     if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
1746         return -EINVAL;
1747     }
1748 
1749     /* Remember that KVM tried to translate this. It might need to try again. */
1750     s->pirq[pirq].is_translated = true;
1751 
1752     QEMU_LOCK_GUARD(&s->port_lock);
1753 
1754     port = s->pirq[pirq].port;
1755     if (!valid_port(port)) {
1756         return -EINVAL;
1757     }
1758 
1759     cpu = qemu_get_cpu(s->port_table[port].vcpu);
1760     if (!cpu) {
1761         return -EINVAL;
1762     }
1763 
1764     route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
1765     route->u.xen_evtchn.port = port;
1766     route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
1767     route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
1768 
1769     return 0; /* Handled */
1770 }
1771 
1772 bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
1773 {
1774     XenEvtchnState *s = xen_evtchn_singleton;
1775     uint32_t pirq, port;
1776 
1777     if (!s) {
1778         return false;
1779     }
1780 
1781     assert(qemu_mutex_iothread_locked());
1782 
1783     pirq = msi_pirq_target(address, data);
1784     if (!pirq || pirq >= s->nr_pirqs) {
1785         return false;
1786     }
1787 
1788     QEMU_LOCK_GUARD(&s->port_lock);
1789 
1790     port = s->pirq[pirq].port;
1791     if (!valid_port(port)) {
1792         return false;
1793     }
1794 
1795     set_port_pending(s, port);
1796     return true;
1797 }
1798 
1799 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
1800 {
1801     XenEvtchnState *s = xen_evtchn_singleton;
1802     int pirq = map->pirq;
1803     int gsi = map->index;
1804 
1805     if (!s) {
1806         return -ENOTSUP;
1807     }
1808 
1809     QEMU_IOTHREAD_LOCK_GUARD();
1810     QEMU_LOCK_GUARD(&s->port_lock);
1811 
1812     if (map->domid != DOMID_SELF && map->domid != xen_domid) {
1813         return -EPERM;
1814     }
1815     if (map->type != MAP_PIRQ_TYPE_GSI) {
1816         return -EINVAL;
1817     }
1818     if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1819         return -EINVAL;
1820     }
1821 
1822     if (pirq < 0) {
1823         pirq = allocate_pirq(s, map->type, gsi);
1824         if (pirq < 0) {
1825             return pirq;
1826         }
1827         map->pirq = pirq;
1828     } else if (pirq > s->nr_pirqs) {
1829         return -EINVAL;
1830     } else {
1831         /*
1832          * User specified a valid-looking PIRQ#. Allow it if it is
1833          * allocated and not yet bound, or if it is unallocated
1834          */
1835         if (pirq_inuse(s, pirq)) {
1836             if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
1837                 return -EBUSY;
1838             }
1839         } else {
1840             /* If it was unused, mark it used now. */
1841             pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1842         }
1843         /* Set the mapping in both directions. */
1844         s->pirq[pirq].gsi = gsi;
1845         s->gsi_pirq[gsi] = pirq;
1846     }
1847 
1848     trace_kvm_xen_map_pirq(pirq, gsi);
1849     return 0;
1850 }
1851 
1852 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
1853 {
1854     XenEvtchnState *s = xen_evtchn_singleton;
1855     int pirq = unmap->pirq;
1856     int gsi;
1857 
1858     if (!s) {
1859         return -ENOTSUP;
1860     }
1861 
1862     if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
1863         return -EPERM;
1864     }
1865     if (pirq < 0 || pirq >= s->nr_pirqs) {
1866         return -EINVAL;
1867     }
1868 
1869     QEMU_IOTHREAD_LOCK_GUARD();
1870     qemu_mutex_lock(&s->port_lock);
1871 
1872     if (!pirq_inuse(s, pirq)) {
1873         qemu_mutex_unlock(&s->port_lock);
1874         return -ENOENT;
1875     }
1876 
1877     gsi = s->pirq[pirq].gsi;
1878 
1879     /* We can only unmap GSI PIRQs */
1880     if (gsi < 0) {
1881         qemu_mutex_unlock(&s->port_lock);
1882         return -EINVAL;
1883     }
1884 
1885     s->gsi_pirq[gsi] = 0;
1886     s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
1887     pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
1888 
1889     trace_kvm_xen_unmap_pirq(pirq, gsi);
1890     qemu_mutex_unlock(&s->port_lock);
1891 
1892     if (gsi == IRQ_MSI_EMU) {
1893         kvm_update_msi_routes_all(NULL, true, 0, 0);
1894     }
1895 
1896     return 0;
1897 }
1898 
1899 int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
1900 {
1901     XenEvtchnState *s = xen_evtchn_singleton;
1902     int pirq = eoi->irq;
1903     int gsi;
1904 
1905     if (!s) {
1906         return -ENOTSUP;
1907     }
1908 
1909     QEMU_IOTHREAD_LOCK_GUARD();
1910     QEMU_LOCK_GUARD(&s->port_lock);
1911 
1912     if (!pirq_inuse(s, pirq)) {
1913         return -ENOENT;
1914     }
1915 
1916     gsi = s->pirq[pirq].gsi;
1917     if (gsi < 0) {
1918         return -EINVAL;
1919     }
1920 
1921     /* Reassert a level IRQ if needed */
1922     if (s->pirq_gsi_set & (1U << gsi)) {
1923         int port = s->pirq[pirq].port;
1924         if (port) {
1925             set_port_pending(s, port);
1926         }
1927     }
1928 
1929     return 0;
1930 }
1931 
1932 int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
1933 {
1934     XenEvtchnState *s = xen_evtchn_singleton;
1935     int pirq = query->irq;
1936 
1937     if (!s) {
1938         return -ENOTSUP;
1939     }
1940 
1941     QEMU_IOTHREAD_LOCK_GUARD();
1942     QEMU_LOCK_GUARD(&s->port_lock);
1943 
1944     if (!pirq_inuse(s, pirq)) {
1945         return -ENOENT;
1946     }
1947 
1948     if (s->pirq[pirq].gsi >= 0) {
1949         query->flags = XENIRQSTAT_needs_eoi;
1950     } else {
1951         query->flags = 0;
1952     }
1953 
1954     return 0;
1955 }
1956 
1957 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
1958 {
1959     XenEvtchnState *s = xen_evtchn_singleton;
1960     int pirq;
1961 
1962     if (!s) {
1963         return -ENOTSUP;
1964     }
1965 
1966     QEMU_LOCK_GUARD(&s->port_lock);
1967 
1968     pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
1969     if (pirq < 0) {
1970         return pirq;
1971     }
1972 
1973     get->pirq = pirq;
1974     trace_kvm_xen_get_free_pirq(pirq, get->type);
1975     return 0;
1976 }
1977 
1978 struct xenevtchn_handle *xen_be_evtchn_open(void)
1979 {
1980     struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
1981 
1982     xc->fd = eventfd(0, EFD_CLOEXEC);
1983     if (xc->fd < 0) {
1984         free(xc);
1985         return NULL;
1986     }
1987 
1988     return xc;
1989 }
1990 
1991 static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
1992 {
1993     int i;
1994 
1995     for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
1996         if (!s->be_handles[i]) {
1997             s->be_handles[i] = xc;
1998             xc->be_port = i;
1999             return i;
2000         }
2001     }
2002     return 0;
2003 }
2004 
2005 int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
2006                                    evtchn_port_t guest_port)
2007 {
2008     XenEvtchnState *s = xen_evtchn_singleton;
2009     XenEvtchnPort *gp;
2010     uint16_t be_port = 0;
2011     int ret;
2012 
2013     if (!s) {
2014         return -ENOTSUP;
2015     }
2016 
2017     if (!xc) {
2018         return -EFAULT;
2019     }
2020 
2021     if (domid != xen_domid) {
2022         return -ESRCH;
2023     }
2024 
2025     if (!valid_port(guest_port)) {
2026         return -EINVAL;
2027     }
2028 
2029     qemu_mutex_lock(&s->port_lock);
2030 
2031     /* The guest has to have an unbound port waiting for us to bind */
2032     gp = &s->port_table[guest_port];
2033 
2034     switch (gp->type) {
2035     case EVTCHNSTAT_interdomain:
2036         /* Allow rebinding after migration, preserve port # if possible */
2037         be_port = gp->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
2038         assert(be_port != 0);
2039         if (!s->be_handles[be_port]) {
2040             s->be_handles[be_port] = xc;
2041             xc->guest_port = guest_port;
2042             ret = xc->be_port = be_port;
2043             if (kvm_xen_has_cap(EVTCHN_SEND)) {
2044                 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2045             }
2046             break;
2047         }
2048         /* fall through */
2049 
2050     case EVTCHNSTAT_unbound:
2051         be_port = find_be_port(s, xc);
2052         if (!be_port) {
2053             ret = -ENOSPC;
2054             goto out;
2055         }
2056 
2057         gp->type = EVTCHNSTAT_interdomain;
2058         gp->type_val = be_port | PORT_INFO_TYPEVAL_REMOTE_QEMU;
2059         xc->guest_port = guest_port;
2060         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2061             assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2062         }
2063         ret = be_port;
2064         break;
2065 
2066     default:
2067         ret = -EINVAL;
2068         break;
2069     }
2070 
2071  out:
2072     qemu_mutex_unlock(&s->port_lock);
2073 
2074     return ret;
2075 }
2076 
2077 int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
2078 {
2079     XenEvtchnState *s = xen_evtchn_singleton;
2080     int ret;
2081 
2082     if (!s) {
2083         return -ENOTSUP;
2084     }
2085 
2086     if (!xc) {
2087         return -EFAULT;
2088     }
2089 
2090     qemu_mutex_lock(&s->port_lock);
2091 
2092     if (port && port != xc->be_port) {
2093         ret = -EINVAL;
2094         goto out;
2095     }
2096 
2097     if (xc->guest_port) {
2098         XenEvtchnPort *gp = &s->port_table[xc->guest_port];
2099 
2100         /* This should never *not* be true */
2101         if (gp->type == EVTCHNSTAT_interdomain) {
2102             gp->type = EVTCHNSTAT_unbound;
2103             gp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
2104         }
2105 
2106         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2107             deassign_kernel_port(xc->guest_port);
2108         }
2109         xc->guest_port = 0;
2110     }
2111 
2112     s->be_handles[xc->be_port] = NULL;
2113     xc->be_port = 0;
2114     ret = 0;
2115  out:
2116     qemu_mutex_unlock(&s->port_lock);
2117     return ret;
2118 }
2119 
2120 int xen_be_evtchn_close(struct xenevtchn_handle *xc)
2121 {
2122     if (!xc) {
2123         return -EFAULT;
2124     }
2125 
2126     xen_be_evtchn_unbind(xc, 0);
2127 
2128     close(xc->fd);
2129     free(xc);
2130     return 0;
2131 }
2132 
2133 int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
2134 {
2135     if (!xc) {
2136         return -1;
2137     }
2138     return xc->fd;
2139 }
2140 
2141 int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
2142 {
2143     XenEvtchnState *s = xen_evtchn_singleton;
2144     int ret;
2145 
2146     if (!s) {
2147         return -ENOTSUP;
2148     }
2149 
2150     if (!xc) {
2151         return -EFAULT;
2152     }
2153 
2154     qemu_mutex_lock(&s->port_lock);
2155 
2156     if (xc->guest_port) {
2157         set_port_pending(s, xc->guest_port);
2158         ret = 0;
2159     } else {
2160         ret = -ENOTCONN;
2161     }
2162 
2163     qemu_mutex_unlock(&s->port_lock);
2164 
2165     return ret;
2166 }
2167 
2168 int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
2169 {
2170     uint64_t val;
2171 
2172     if (!xc) {
2173         return -EFAULT;
2174     }
2175 
2176     if (!xc->be_port) {
2177         return 0;
2178     }
2179 
2180     if (eventfd_read(xc->fd, &val)) {
2181         return -errno;
2182     }
2183 
2184     return val ? xc->be_port : 0;
2185 }
2186 
2187 int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
2188 {
2189     if (!xc) {
2190         return -EFAULT;
2191     }
2192 
2193     if (xc->be_port != port) {
2194         return -EINVAL;
2195     }
2196 
2197     /*
2198      * We don't actually do anything to unmask it; the event was already
2199      * consumed in xen_be_evtchn_pending().
2200      */
2201     return 0;
2202 }
2203 
2204 int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
2205 {
2206     return xc->guest_port;
2207 }
2208 
2209 EvtchnInfoList *qmp_xen_event_list(Error **errp)
2210 {
2211     XenEvtchnState *s = xen_evtchn_singleton;
2212     EvtchnInfoList *head = NULL, **tail = &head;
2213     void *shinfo, *pending, *mask;
2214     int i;
2215 
2216     if (!s) {
2217         error_setg(errp, "Xen event channel emulation not enabled");
2218         return NULL;
2219     }
2220 
2221     shinfo = xen_overlay_get_shinfo_ptr();
2222     if (!shinfo) {
2223         error_setg(errp, "Xen shared info page not allocated");
2224         return NULL;
2225     }
2226 
2227     if (xen_is_long_mode()) {
2228         pending = shinfo + offsetof(struct shared_info, evtchn_pending);
2229         mask = shinfo + offsetof(struct shared_info, evtchn_mask);
2230     } else {
2231         pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
2232         mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
2233     }
2234 
2235     QEMU_LOCK_GUARD(&s->port_lock);
2236 
2237     for (i = 0; i < s->nr_ports; i++) {
2238         XenEvtchnPort *p = &s->port_table[i];
2239         EvtchnInfo *info;
2240 
2241         if (p->type == EVTCHNSTAT_closed) {
2242             continue;
2243         }
2244 
2245         info = g_new0(EvtchnInfo, 1);
2246 
2247         info->port = i;
2248         qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
2249         qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
2250         qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
2251         qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
2252         qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
2253         qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
2254 
2255         info->type = p->type;
2256         if (p->type == EVTCHNSTAT_interdomain) {
2257             info->remote_domain = g_strdup((p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) ?
2258                                            "qemu" : "loopback");
2259             info->target = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
2260         } else {
2261             info->target = p->type_val;
2262         }
2263         info->vcpu = p->vcpu;
2264         info->pending = test_bit(i, pending);
2265         info->masked = test_bit(i, mask);
2266 
2267         QAPI_LIST_APPEND(tail, info);
2268     }
2269 
2270     return head;
2271 }
2272 
2273 void qmp_xen_event_inject(uint32_t port, Error **errp)
2274 {
2275     XenEvtchnState *s = xen_evtchn_singleton;
2276 
2277     if (!s) {
2278         error_setg(errp, "Xen event channel emulation not enabled");
2279         return;
2280     }
2281 
2282     if (!valid_port(port)) {
2283         error_setg(errp, "Invalid port %u", port);
2284     }
2285 
2286     QEMU_LOCK_GUARD(&s->port_lock);
2287 
2288     if (set_port_pending(s, port)) {
2289         error_setg(errp, "Failed to set port %u", port);
2290         return;
2291     }
2292 }
2293 
2294 void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
2295 {
2296     EvtchnInfoList *iter, *info_list;
2297     Error *err = NULL;
2298 
2299     info_list = qmp_xen_event_list(&err);
2300     if (err) {
2301         hmp_handle_error(mon, err);
2302         return;
2303     }
2304 
2305     for (iter = info_list; iter; iter = iter->next) {
2306         EvtchnInfo *info = iter->value;
2307 
2308         monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
2309                        EvtchnPortType_str(info->type));
2310         if (info->type != EVTCHN_PORT_TYPE_IPI) {
2311             monitor_printf(mon,  "(");
2312             if (info->remote_domain) {
2313                 monitor_printf(mon, "%s:", info->remote_domain);
2314             }
2315             monitor_printf(mon, "%d)", info->target);
2316         }
2317         if (info->pending) {
2318             monitor_printf(mon, " PENDING");
2319         }
2320         if (info->masked) {
2321             monitor_printf(mon, " MASKED");
2322         }
2323         monitor_printf(mon, "\n");
2324     }
2325 
2326     qapi_free_EvtchnInfoList(info_list);
2327 }
2328 
2329 void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
2330 {
2331     int port = qdict_get_int(qdict, "port");
2332     Error *err = NULL;
2333 
2334     qmp_xen_event_inject(port, &err);
2335     if (err) {
2336         hmp_handle_error(mon, err);
2337     } else {
2338         monitor_printf(mon, "Delivered port %d\n", port);
2339     }
2340 }
2341 
2342