xref: /qemu/hw/i386/kvm/xen_evtchn.c (revision 95a40c44)
1 /*
2  * QEMU Xen emulation: Event channel support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/log.h"
18 #include "qemu/error-report.h"
19 #include "monitor/monitor.h"
20 #include "monitor/hmp.h"
21 #include "qapi/error.h"
22 #include "qapi/qapi-commands-misc-target.h"
23 #include "qapi/qmp/qdict.h"
24 #include "qom/object.h"
25 #include "exec/target_page.h"
26 #include "exec/address-spaces.h"
27 #include "migration/vmstate.h"
28 #include "trace.h"
29 
30 #include "hw/sysbus.h"
31 #include "hw/xen/xen.h"
32 #include "hw/i386/x86.h"
33 #include "hw/i386/pc.h"
34 #include "hw/pci/pci.h"
35 #include "hw/pci/msi.h"
36 #include "hw/pci/msix.h"
37 #include "hw/irq.h"
38 #include "hw/xen/xen_backend_ops.h"
39 
40 #include "xen_evtchn.h"
41 #include "xen_overlay.h"
42 #include "xen_xenstore.h"
43 
44 #include "sysemu/kvm.h"
45 #include "sysemu/kvm_xen.h"
46 #include <linux/kvm.h>
47 #include <sys/eventfd.h>
48 
49 #include "hw/xen/interface/memory.h"
50 #include "hw/xen/interface/hvm/params.h"
51 
52 /* XX: For kvm_update_msi_routes_all() */
53 #include "target/i386/kvm/kvm_i386.h"
54 
55 #define TYPE_XEN_EVTCHN "xen-evtchn"
56 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
57 
58 typedef struct XenEvtchnPort {
59     uint32_t vcpu;      /* Xen/ACPI vcpu_id */
60     uint16_t type;      /* EVTCHNSTAT_xxxx */
61     uint16_t type_val;  /* pirq# / virq# / remote port according to type */
62 } XenEvtchnPort;
63 
64 /* 32-bit compatibility definitions, also used natively in 32-bit build */
65 struct compat_arch_vcpu_info {
66     unsigned int cr2;
67     unsigned int pad[5];
68 };
69 
70 struct compat_vcpu_info {
71     uint8_t evtchn_upcall_pending;
72     uint8_t evtchn_upcall_mask;
73     uint16_t pad;
74     uint32_t evtchn_pending_sel;
75     struct compat_arch_vcpu_info arch;
76     struct vcpu_time_info time;
77 }; /* 64 bytes (x86) */
78 
79 struct compat_arch_shared_info {
80     unsigned int max_pfn;
81     unsigned int pfn_to_mfn_frame_list_list;
82     unsigned int nmi_reason;
83     unsigned int p2m_cr3;
84     unsigned int p2m_vaddr;
85     unsigned int p2m_generation;
86     uint32_t wc_sec_hi;
87 };
88 
89 struct compat_shared_info {
90     struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
91     uint32_t evtchn_pending[32];
92     uint32_t evtchn_mask[32];
93     uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
94     uint32_t wc_sec;
95     uint32_t wc_nsec;
96     struct compat_arch_shared_info arch;
97 };
98 
99 #define COMPAT_EVTCHN_2L_NR_CHANNELS            1024
100 
101 /* Local private implementation of struct xenevtchn_handle */
102 struct xenevtchn_handle {
103     evtchn_port_t be_port;
104     evtchn_port_t guest_port; /* Or zero for unbound */
105     int fd;
106 };
107 
108 /*
109  * For unbound/interdomain ports there are only two possible remote
110  * domains; self and QEMU. Use a single high bit in type_val for that,
111  * and the low bits for the remote port number (or 0 for unbound).
112  */
113 #define PORT_INFO_TYPEVAL_REMOTE_QEMU           0x8000
114 #define PORT_INFO_TYPEVAL_REMOTE_PORT_MASK      0x7FFF
115 
116 /*
117  * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
118  * insane enough to think about guest-transparent live migration from actual
119  * Xen to QEMU, and ensuring that we can convert/consume the stream.
120  */
121 #define IRQ_UNBOUND -1
122 #define IRQ_PT -2
123 #define IRQ_MSI_EMU -3
124 
125 
126 struct pirq_info {
127     int gsi;
128     uint16_t port;
129     PCIDevice *dev;
130     int vector;
131     bool is_msix;
132     bool is_masked;
133     bool is_translated;
134 };
135 
136 struct XenEvtchnState {
137     /*< private >*/
138     SysBusDevice busdev;
139     /*< public >*/
140 
141     uint64_t callback_param;
142     bool evtchn_in_kernel;
143     uint32_t callback_gsi;
144 
145     QEMUBH *gsi_bh;
146 
147     QemuMutex port_lock;
148     uint32_t nr_ports;
149     XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
150 
151     /* Connected to the system GSIs for raising callback as GSI / INTx */
152     unsigned int nr_callback_gsis;
153     qemu_irq *callback_gsis;
154 
155     struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
156 
157     uint32_t nr_pirqs;
158 
159     /* Bitmap of allocated PIRQs (serialized) */
160     uint16_t nr_pirq_inuse_words;
161     uint64_t *pirq_inuse_bitmap;
162 
163     /* GSI → PIRQ mapping (serialized) */
164     uint16_t gsi_pirq[IOAPIC_NUM_PINS];
165 
166     /* Per-GSI assertion state (serialized) */
167     uint32_t pirq_gsi_set;
168 
169     /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
170     struct pirq_info *pirq;
171 };
172 
173 #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
174 #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
175 
176 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
177 
178 struct XenEvtchnState *xen_evtchn_singleton;
179 
180 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
181 #define CALLBACK_VIA_TYPE_SHIFT 56
182 
183 static void unbind_backend_ports(XenEvtchnState *s);
184 
185 static int xen_evtchn_pre_load(void *opaque)
186 {
187     XenEvtchnState *s = opaque;
188 
189     /* Unbind all the backend-side ports; they need to rebind */
190     unbind_backend_ports(s);
191 
192     /* It'll be leaked otherwise. */
193     g_free(s->pirq_inuse_bitmap);
194     s->pirq_inuse_bitmap = NULL;
195 
196     return 0;
197 }
198 
199 static int xen_evtchn_post_load(void *opaque, int version_id)
200 {
201     XenEvtchnState *s = opaque;
202     uint32_t i;
203 
204     if (s->callback_param) {
205         xen_evtchn_set_callback_param(s->callback_param);
206     }
207 
208     /* Rebuild s->pirq[].port mapping */
209     for (i = 0; i < s->nr_ports; i++) {
210         XenEvtchnPort *p = &s->port_table[i];
211 
212         if (p->type == EVTCHNSTAT_pirq) {
213             assert(p->type_val);
214             assert(p->type_val < s->nr_pirqs);
215 
216             /*
217              * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
218              * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
219              * catches up with it.
220              */
221             s->pirq[p->type_val].gsi = IRQ_UNBOUND;
222             s->pirq[p->type_val].port = i;
223         }
224     }
225     /* Rebuild s->pirq[].gsi mapping */
226     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
227         if (s->gsi_pirq[i]) {
228             s->pirq[s->gsi_pirq[i]].gsi = i;
229         }
230     }
231     return 0;
232 }
233 
234 static bool xen_evtchn_is_needed(void *opaque)
235 {
236     return xen_mode == XEN_EMULATE;
237 }
238 
239 static const VMStateDescription xen_evtchn_port_vmstate = {
240     .name = "xen_evtchn_port",
241     .version_id = 1,
242     .minimum_version_id = 1,
243     .fields = (VMStateField[]) {
244         VMSTATE_UINT32(vcpu, XenEvtchnPort),
245         VMSTATE_UINT16(type, XenEvtchnPort),
246         VMSTATE_UINT16(type_val, XenEvtchnPort),
247         VMSTATE_END_OF_LIST()
248     }
249 };
250 
251 static const VMStateDescription xen_evtchn_vmstate = {
252     .name = "xen_evtchn",
253     .version_id = 1,
254     .minimum_version_id = 1,
255     .needed = xen_evtchn_is_needed,
256     .pre_load = xen_evtchn_pre_load,
257     .post_load = xen_evtchn_post_load,
258     .fields = (VMStateField[]) {
259         VMSTATE_UINT64(callback_param, XenEvtchnState),
260         VMSTATE_UINT32(nr_ports, XenEvtchnState),
261         VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
262                                      xen_evtchn_port_vmstate, XenEvtchnPort),
263         VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
264         VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
265                                     nr_pirq_inuse_words, 0,
266                                     vmstate_info_uint64, uint64_t),
267         VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
268         VMSTATE_END_OF_LIST()
269     }
270 };
271 
272 static void xen_evtchn_class_init(ObjectClass *klass, void *data)
273 {
274     DeviceClass *dc = DEVICE_CLASS(klass);
275 
276     dc->vmsd = &xen_evtchn_vmstate;
277 }
278 
279 static const TypeInfo xen_evtchn_info = {
280     .name          = TYPE_XEN_EVTCHN,
281     .parent        = TYPE_SYS_BUS_DEVICE,
282     .instance_size = sizeof(XenEvtchnState),
283     .class_init    = xen_evtchn_class_init,
284 };
285 
286 static struct evtchn_backend_ops emu_evtchn_backend_ops = {
287     .open = xen_be_evtchn_open,
288     .bind_interdomain = xen_be_evtchn_bind_interdomain,
289     .unbind = xen_be_evtchn_unbind,
290     .close = xen_be_evtchn_close,
291     .get_fd = xen_be_evtchn_fd,
292     .notify = xen_be_evtchn_notify,
293     .unmask = xen_be_evtchn_unmask,
294     .pending = xen_be_evtchn_pending,
295 };
296 
297 static void gsi_assert_bh(void *opaque)
298 {
299     struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
300     if (vi) {
301         xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
302     }
303 }
304 
305 void xen_evtchn_create(unsigned int nr_gsis, qemu_irq *system_gsis)
306 {
307     XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
308                                                         -1, NULL));
309     int i;
310 
311     xen_evtchn_singleton = s;
312 
313     qemu_mutex_init(&s->port_lock);
314     s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
315 
316     /*
317      * These are the *output* GSI from event channel support, for
318      * signalling CPU0's events via GSI or PCI INTx instead of the
319      * per-CPU vector. We create a *set* of irqs and connect one to
320      * each of the system GSIs which were passed in from the platform
321      * code, and then just trigger the right one as appropriate from
322      * xen_evtchn_set_callback_level().
323      */
324     s->nr_callback_gsis = nr_gsis;
325     s->callback_gsis = g_new0(qemu_irq, nr_gsis);
326     for (i = 0; i < nr_gsis; i++) {
327         sysbus_init_irq(SYS_BUS_DEVICE(s), &s->callback_gsis[i]);
328         sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
329     }
330 
331     /*
332      * The Xen scheme for encoding PIRQ# into an MSI message is not
333      * compatible with 32-bit MSI, as it puts the high bits of the
334      * PIRQ# into the high bits of the MSI message address, instead of
335      * using the Extended Destination ID in address bits 4-11 which
336      * perhaps would have been a better choice.
337      *
338      * To keep life simple, kvm_accel_instance_init() initialises the
339      * default to 256. which conveniently doesn't need to set anything
340      * outside the low 32 bits of the address. It can be increased by
341      * setting the xen-evtchn-max-pirq property.
342      */
343     s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
344 
345     s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
346     s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
347     s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
348 
349     /* Set event channel functions for backend drivers to use */
350     xen_evtchn_ops = &emu_evtchn_backend_ops;
351 }
352 
353 static void xen_evtchn_register_types(void)
354 {
355     type_register_static(&xen_evtchn_info);
356 }
357 
358 type_init(xen_evtchn_register_types)
359 
360 static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
361 {
362     PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
363     uint8_t pin = param & 3;
364     uint8_t devfn = (param >> 8) & 0xff;
365     uint16_t bus = (param >> 16) & 0xffff;
366     uint16_t domain = (param >> 32) & 0xffff;
367     PCIDevice *pdev;
368     PCIINTxRoute r;
369 
370     if (domain || !pcms) {
371         return 0;
372     }
373 
374     pdev = pci_find_device(pcms->bus, bus, devfn);
375     if (!pdev) {
376         return 0;
377     }
378 
379     r = pci_device_route_intx_to_irq(pdev, pin);
380     if (r.mode != PCI_INTX_ENABLED) {
381         return 0;
382     }
383 
384     /*
385      * Hm, can we be notified of INTX routing changes? Not without
386      * *owning* the device and being allowed to overwrite its own
387      * ->intx_routing_notifier, AFAICT. So let's not.
388      */
389     return r.irq;
390 }
391 
392 void xen_evtchn_set_callback_level(int level)
393 {
394     XenEvtchnState *s = xen_evtchn_singleton;
395     if (!s) {
396         return;
397     }
398 
399     /*
400      * We get to this function in a number of ways:
401      *
402      *  • From I/O context, via PV backend drivers sending a notification to
403      *    the guest.
404      *
405      *  • From guest vCPU context, via loopback interdomain event channels
406      *    (or theoretically even IPIs but guests don't use those with GSI
407      *    delivery because that's pointless. We don't want a malicious guest
408      *    to be able to trigger a deadlock though, so we can't rule it out.)
409      *
410      *  • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
411      *    configured.
412      *
413      *  • From guest vCPU context in the KVM exit handler, if the upcall
414      *    pending flag has been cleared and the GSI needs to be deasserted.
415      *
416      *  • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
417      *    been acked in the irqchip.
418      *
419      * Whichever context we come from if we aren't already holding the BQL
420      * then e can't take it now, as we may already hold s->port_lock. So
421      * trigger the BH to set the IRQ for us instead of doing it immediately.
422      *
423      * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
424      * will deliberately take the BQL because they want the change to take
425      * effect immediately. That just leaves interdomain loopback as the case
426      * which uses the BH.
427      */
428     if (!qemu_mutex_iothread_locked()) {
429         qemu_bh_schedule(s->gsi_bh);
430         return;
431     }
432 
433     if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) {
434         qemu_set_irq(s->callback_gsis[s->callback_gsi], level);
435         if (level) {
436             /* Ensure the vCPU polls for deassertion */
437             kvm_xen_set_callback_asserted();
438         }
439     }
440 }
441 
442 int xen_evtchn_set_callback_param(uint64_t param)
443 {
444     XenEvtchnState *s = xen_evtchn_singleton;
445     struct kvm_xen_hvm_attr xa = {
446         .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
447         .u.vector = 0,
448     };
449     bool in_kernel = false;
450     uint32_t gsi = 0;
451     int type = param >> CALLBACK_VIA_TYPE_SHIFT;
452     int ret;
453 
454     if (!s) {
455         return -ENOTSUP;
456     }
457 
458     /*
459      * We need the BQL because set_callback_pci_intx() may call into PCI code,
460      * and because we may need to manipulate the old and new GSI levels.
461      */
462     assert(qemu_mutex_iothread_locked());
463     qemu_mutex_lock(&s->port_lock);
464 
465     switch (type) {
466     case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
467         xa.u.vector = (uint8_t)param,
468 
469         ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
470         if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
471             in_kernel = true;
472         }
473         gsi = 0;
474         break;
475     }
476 
477     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
478         gsi = set_callback_pci_intx(s, param);
479         ret = gsi ? 0 : -EINVAL;
480         break;
481 
482     case HVM_PARAM_CALLBACK_TYPE_GSI:
483         gsi = (uint32_t)param;
484         ret = 0;
485         break;
486 
487     default:
488         /* Xen doesn't return error even if you set something bogus */
489         ret = 0;
490         break;
491     }
492 
493     /* If the guest has set a per-vCPU callback vector, prefer that. */
494     if (gsi && kvm_xen_has_vcpu_callback_vector()) {
495         in_kernel = kvm_xen_has_cap(EVTCHN_SEND);
496         gsi = 0;
497     }
498 
499     if (!ret) {
500         /* If vector delivery was turned *off* then tell the kernel */
501         if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
502             HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
503             kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
504         }
505         s->callback_param = param;
506         s->evtchn_in_kernel = in_kernel;
507 
508         if (gsi != s->callback_gsi) {
509             struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
510 
511             xen_evtchn_set_callback_level(0);
512             s->callback_gsi = gsi;
513 
514             if (gsi && vi && vi->evtchn_upcall_pending) {
515                 kvm_xen_inject_vcpu_callback_vector(0, type);
516             }
517         }
518     }
519 
520     qemu_mutex_unlock(&s->port_lock);
521 
522     return ret;
523 }
524 
525 static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
526 {
527     int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
528 
529     kvm_xen_inject_vcpu_callback_vector(vcpu, type);
530 }
531 
532 static void deassign_kernel_port(evtchn_port_t port)
533 {
534     struct kvm_xen_hvm_attr ha;
535     int ret;
536 
537     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
538     ha.u.evtchn.send_port = port;
539     ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
540 
541     ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
542     if (ret) {
543         qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
544                       port, strerror(ret));
545     }
546 }
547 
548 static int assign_kernel_port(uint16_t type, evtchn_port_t port,
549                               uint32_t vcpu_id)
550 {
551     CPUState *cpu = qemu_get_cpu(vcpu_id);
552     struct kvm_xen_hvm_attr ha;
553 
554     if (!cpu) {
555         return -ENOENT;
556     }
557 
558     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
559     ha.u.evtchn.send_port = port;
560     ha.u.evtchn.type = type;
561     ha.u.evtchn.flags = 0;
562     ha.u.evtchn.deliver.port.port = port;
563     ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
564     ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
565 
566     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
567 }
568 
569 static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
570 {
571     struct kvm_xen_hvm_attr ha;
572 
573     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
574     ha.u.evtchn.send_port = port;
575     ha.u.evtchn.type = type;
576     ha.u.evtchn.flags = 0;
577     ha.u.evtchn.deliver.eventfd.port = 0;
578     ha.u.evtchn.deliver.eventfd.fd = fd;
579 
580     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
581 }
582 
583 static bool valid_port(evtchn_port_t port)
584 {
585     if (!port) {
586         return false;
587     }
588 
589     if (xen_is_long_mode()) {
590         return port < EVTCHN_2L_NR_CHANNELS;
591     } else {
592         return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
593     }
594 }
595 
596 static bool valid_vcpu(uint32_t vcpu)
597 {
598     return !!qemu_get_cpu(vcpu);
599 }
600 
601 static void unbind_backend_ports(XenEvtchnState *s)
602 {
603     XenEvtchnPort *p;
604     int i;
605 
606     for (i = 1; i < s->nr_ports; i++) {
607         p = &s->port_table[i];
608         if (p->type == EVTCHNSTAT_interdomain &&
609             (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU)) {
610             evtchn_port_t be_port = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
611 
612             if (s->be_handles[be_port]) {
613                 /* This part will be overwritten on the load anyway. */
614                 p->type = EVTCHNSTAT_unbound;
615                 p->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
616 
617                 /* Leave the backend port open and unbound too. */
618                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
619                     deassign_kernel_port(i);
620                 }
621                 s->be_handles[be_port]->guest_port = 0;
622             }
623         }
624     }
625 }
626 
627 int xen_evtchn_status_op(struct evtchn_status *status)
628 {
629     XenEvtchnState *s = xen_evtchn_singleton;
630     XenEvtchnPort *p;
631 
632     if (!s) {
633         return -ENOTSUP;
634     }
635 
636     if (status->dom != DOMID_SELF && status->dom != xen_domid) {
637         return -ESRCH;
638     }
639 
640     if (!valid_port(status->port)) {
641         return -EINVAL;
642     }
643 
644     qemu_mutex_lock(&s->port_lock);
645 
646     p = &s->port_table[status->port];
647 
648     status->status = p->type;
649     status->vcpu = p->vcpu;
650 
651     switch (p->type) {
652     case EVTCHNSTAT_unbound:
653         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
654             status->u.unbound.dom = DOMID_QEMU;
655         } else {
656             status->u.unbound.dom = xen_domid;
657         }
658         break;
659 
660     case EVTCHNSTAT_interdomain:
661         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
662             status->u.interdomain.dom = DOMID_QEMU;
663         } else {
664             status->u.interdomain.dom = xen_domid;
665         }
666 
667         status->u.interdomain.port = p->type_val &
668             PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
669         break;
670 
671     case EVTCHNSTAT_pirq:
672         status->u.pirq = p->type_val;
673         break;
674 
675     case EVTCHNSTAT_virq:
676         status->u.virq = p->type_val;
677         break;
678     }
679 
680     qemu_mutex_unlock(&s->port_lock);
681     return 0;
682 }
683 
684 /*
685  * Never thought I'd hear myself say this, but C++ templates would be
686  * kind of nice here.
687  *
688  * template<class T> static int do_unmask_port(T *shinfo, ...);
689  */
690 static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
691                              bool do_unmask, struct shared_info *shinfo,
692                              struct vcpu_info *vcpu_info)
693 {
694     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
695     typeof(shinfo->evtchn_pending[0]) mask;
696     int idx = port / bits_per_word;
697     int offset = port % bits_per_word;
698 
699     mask = 1UL << offset;
700 
701     if (idx >= bits_per_word) {
702         return -EINVAL;
703     }
704 
705     if (do_unmask) {
706         /*
707          * If this is a true unmask operation, clear the mask bit. If
708          * it was already unmasked, we have nothing further to do.
709          */
710         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
711             return 0;
712         }
713     } else {
714         /*
715          * This is a pseudo-unmask for affinity changes. We don't
716          * change the mask bit, and if it's *masked* we have nothing
717          * else to do.
718          */
719         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
720             return 0;
721         }
722     }
723 
724     /* If the event was not pending, we're done. */
725     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
726         return 0;
727     }
728 
729     /* Now on to the vcpu_info evtchn_pending_sel index... */
730     mask = 1UL << idx;
731 
732     /* If a port in this word was already pending for this vCPU, all done. */
733     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
734         return 0;
735     }
736 
737     /* Set evtchn_upcall_pending for this vCPU */
738     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
739         return 0;
740     }
741 
742     inject_callback(s, s->port_table[port].vcpu);
743 
744     return 0;
745 }
746 
747 static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
748                                  bool do_unmask,
749                                  struct compat_shared_info *shinfo,
750                                  struct compat_vcpu_info *vcpu_info)
751 {
752     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
753     typeof(shinfo->evtchn_pending[0]) mask;
754     int idx = port / bits_per_word;
755     int offset = port % bits_per_word;
756 
757     mask = 1UL << offset;
758 
759     if (idx >= bits_per_word) {
760         return -EINVAL;
761     }
762 
763     if (do_unmask) {
764         /*
765          * If this is a true unmask operation, clear the mask bit. If
766          * it was already unmasked, we have nothing further to do.
767          */
768         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
769             return 0;
770         }
771     } else {
772         /*
773          * This is a pseudo-unmask for affinity changes. We don't
774          * change the mask bit, and if it's *masked* we have nothing
775          * else to do.
776          */
777         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
778             return 0;
779         }
780     }
781 
782     /* If the event was not pending, we're done. */
783     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
784         return 0;
785     }
786 
787     /* Now on to the vcpu_info evtchn_pending_sel index... */
788     mask = 1UL << idx;
789 
790     /* If a port in this word was already pending for this vCPU, all done. */
791     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
792         return 0;
793     }
794 
795     /* Set evtchn_upcall_pending for this vCPU */
796     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
797         return 0;
798     }
799 
800     inject_callback(s, s->port_table[port].vcpu);
801 
802     return 0;
803 }
804 
805 static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
806 {
807     void *vcpu_info, *shinfo;
808 
809     if (s->port_table[port].type == EVTCHNSTAT_closed) {
810         return -EINVAL;
811     }
812 
813     shinfo = xen_overlay_get_shinfo_ptr();
814     if (!shinfo) {
815         return -ENOTSUP;
816     }
817 
818     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
819     if (!vcpu_info) {
820         return -EINVAL;
821     }
822 
823     if (xen_is_long_mode()) {
824         return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
825     } else {
826         return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
827     }
828 }
829 
830 static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
831                           struct shared_info *shinfo,
832                           struct vcpu_info *vcpu_info)
833 {
834     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
835     typeof(shinfo->evtchn_pending[0]) mask;
836     int idx = port / bits_per_word;
837     int offset = port % bits_per_word;
838 
839     mask = 1UL << offset;
840 
841     if (idx >= bits_per_word) {
842         return -EINVAL;
843     }
844 
845     /* Update the pending bit itself. If it was already set, we're done. */
846     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
847         return 0;
848     }
849 
850     /* Check if it's masked. */
851     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
852         return 0;
853     }
854 
855     /* Now on to the vcpu_info evtchn_pending_sel index... */
856     mask = 1UL << idx;
857 
858     /* If a port in this word was already pending for this vCPU, all done. */
859     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
860         return 0;
861     }
862 
863     /* Set evtchn_upcall_pending for this vCPU */
864     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
865         return 0;
866     }
867 
868     inject_callback(s, s->port_table[port].vcpu);
869 
870     return 0;
871 }
872 
873 static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
874                               struct compat_shared_info *shinfo,
875                               struct compat_vcpu_info *vcpu_info)
876 {
877     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
878     typeof(shinfo->evtchn_pending[0]) mask;
879     int idx = port / bits_per_word;
880     int offset = port % bits_per_word;
881 
882     mask = 1UL << offset;
883 
884     if (idx >= bits_per_word) {
885         return -EINVAL;
886     }
887 
888     /* Update the pending bit itself. If it was already set, we're done. */
889     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
890         return 0;
891     }
892 
893     /* Check if it's masked. */
894     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
895         return 0;
896     }
897 
898     /* Now on to the vcpu_info evtchn_pending_sel index... */
899     mask = 1UL << idx;
900 
901     /* If a port in this word was already pending for this vCPU, all done. */
902     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
903         return 0;
904     }
905 
906     /* Set evtchn_upcall_pending for this vCPU */
907     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
908         return 0;
909     }
910 
911     inject_callback(s, s->port_table[port].vcpu);
912 
913     return 0;
914 }
915 
916 static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
917 {
918     void *vcpu_info, *shinfo;
919 
920     if (s->port_table[port].type == EVTCHNSTAT_closed) {
921         return -EINVAL;
922     }
923 
924     if (s->evtchn_in_kernel) {
925         XenEvtchnPort *p = &s->port_table[port];
926         CPUState *cpu = qemu_get_cpu(p->vcpu);
927         struct kvm_irq_routing_xen_evtchn evt;
928 
929         if (!cpu) {
930             return 0;
931         }
932 
933         evt.port = port;
934         evt.vcpu = kvm_arch_vcpu_id(cpu);
935         evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
936 
937         return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
938     }
939 
940     shinfo = xen_overlay_get_shinfo_ptr();
941     if (!shinfo) {
942         return -ENOTSUP;
943     }
944 
945     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
946     if (!vcpu_info) {
947         return -EINVAL;
948     }
949 
950     if (xen_is_long_mode()) {
951         return do_set_port_lm(s, port, shinfo, vcpu_info);
952     } else {
953         return do_set_port_compat(s, port, shinfo, vcpu_info);
954     }
955 }
956 
957 static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
958 {
959     void *p = xen_overlay_get_shinfo_ptr();
960 
961     if (!p) {
962         return -ENOTSUP;
963     }
964 
965     if (xen_is_long_mode()) {
966         struct shared_info *shinfo = p;
967         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
968         typeof(shinfo->evtchn_pending[0]) mask;
969         int idx = port / bits_per_word;
970         int offset = port % bits_per_word;
971 
972         mask = 1UL << offset;
973 
974         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
975     } else {
976         struct compat_shared_info *shinfo = p;
977         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
978         typeof(shinfo->evtchn_pending[0]) mask;
979         int idx = port / bits_per_word;
980         int offset = port % bits_per_word;
981 
982         mask = 1UL << offset;
983 
984         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
985     }
986     return 0;
987 }
988 
989 static void free_port(XenEvtchnState *s, evtchn_port_t port)
990 {
991     s->port_table[port].type = EVTCHNSTAT_closed;
992     s->port_table[port].type_val = 0;
993     s->port_table[port].vcpu = 0;
994 
995     if (s->nr_ports == port + 1) {
996         do {
997             s->nr_ports--;
998         } while (s->nr_ports &&
999                  s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
1000     }
1001 
1002     /* Clear pending event to avoid unexpected behavior on re-bind. */
1003     clear_port_pending(s, port);
1004 }
1005 
1006 static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
1007                          uint16_t val, evtchn_port_t *port)
1008 {
1009     evtchn_port_t p = 1;
1010 
1011     for (p = 1; valid_port(p); p++) {
1012         if (s->port_table[p].type == EVTCHNSTAT_closed) {
1013             s->port_table[p].vcpu = vcpu;
1014             s->port_table[p].type = type;
1015             s->port_table[p].type_val = val;
1016 
1017             *port = p;
1018 
1019             if (s->nr_ports < p + 1) {
1020                 s->nr_ports = p + 1;
1021             }
1022 
1023             return 0;
1024         }
1025     }
1026     return -ENOSPC;
1027 }
1028 
1029 static bool virq_is_global(uint32_t virq)
1030 {
1031     switch (virq) {
1032     case VIRQ_TIMER:
1033     case VIRQ_DEBUG:
1034     case VIRQ_XENOPROF:
1035     case VIRQ_XENPMU:
1036         return false;
1037 
1038     default:
1039         return true;
1040     }
1041 }
1042 
1043 static int close_port(XenEvtchnState *s, evtchn_port_t port,
1044                       bool *flush_kvm_routes)
1045 {
1046     XenEvtchnPort *p = &s->port_table[port];
1047 
1048     /* Because it *might* be a PIRQ port */
1049     assert(qemu_mutex_iothread_locked());
1050 
1051     switch (p->type) {
1052     case EVTCHNSTAT_closed:
1053         return -ENOENT;
1054 
1055     case EVTCHNSTAT_pirq:
1056         s->pirq[p->type_val].port = 0;
1057         if (s->pirq[p->type_val].is_translated) {
1058             *flush_kvm_routes = true;
1059         }
1060         break;
1061 
1062     case EVTCHNSTAT_virq:
1063         kvm_xen_set_vcpu_virq(virq_is_global(p->type_val) ? 0 : p->vcpu,
1064                               p->type_val, 0);
1065         break;
1066 
1067     case EVTCHNSTAT_ipi:
1068         if (s->evtchn_in_kernel) {
1069             deassign_kernel_port(port);
1070         }
1071         break;
1072 
1073     case EVTCHNSTAT_interdomain:
1074         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
1075             uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
1076             struct xenevtchn_handle *xc = s->be_handles[be_port];
1077             if (xc) {
1078                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1079                     deassign_kernel_port(port);
1080                 }
1081                 xc->guest_port = 0;
1082             }
1083         } else {
1084             /* Loopback interdomain */
1085             XenEvtchnPort *rp = &s->port_table[p->type_val];
1086             if (!valid_port(p->type_val) || rp->type_val != port ||
1087                 rp->type != EVTCHNSTAT_interdomain) {
1088                 error_report("Inconsistent state for interdomain unbind");
1089             } else {
1090                 /* Set the other end back to unbound */
1091                 rp->type = EVTCHNSTAT_unbound;
1092                 rp->type_val = 0;
1093             }
1094         }
1095         break;
1096 
1097     default:
1098         break;
1099     }
1100 
1101     free_port(s, port);
1102     return 0;
1103 }
1104 
1105 int xen_evtchn_soft_reset(void)
1106 {
1107     XenEvtchnState *s = xen_evtchn_singleton;
1108     bool flush_kvm_routes;
1109     int i;
1110 
1111     if (!s) {
1112         return -ENOTSUP;
1113     }
1114 
1115     assert(qemu_mutex_iothread_locked());
1116 
1117     qemu_mutex_lock(&s->port_lock);
1118 
1119     for (i = 0; i < s->nr_ports; i++) {
1120         close_port(s, i, &flush_kvm_routes);
1121     }
1122 
1123     qemu_mutex_unlock(&s->port_lock);
1124 
1125     if (flush_kvm_routes) {
1126         kvm_update_msi_routes_all(NULL, true, 0, 0);
1127     }
1128 
1129     return 0;
1130 }
1131 
1132 int xen_evtchn_reset_op(struct evtchn_reset *reset)
1133 {
1134     if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
1135         return -ESRCH;
1136     }
1137 
1138     QEMU_IOTHREAD_LOCK_GUARD();
1139     return xen_evtchn_soft_reset();
1140 }
1141 
1142 int xen_evtchn_close_op(struct evtchn_close *close)
1143 {
1144     XenEvtchnState *s = xen_evtchn_singleton;
1145     bool flush_kvm_routes = false;
1146     int ret;
1147 
1148     if (!s) {
1149         return -ENOTSUP;
1150     }
1151 
1152     if (!valid_port(close->port)) {
1153         return -EINVAL;
1154     }
1155 
1156     QEMU_IOTHREAD_LOCK_GUARD();
1157     qemu_mutex_lock(&s->port_lock);
1158 
1159     ret = close_port(s, close->port, &flush_kvm_routes);
1160 
1161     qemu_mutex_unlock(&s->port_lock);
1162 
1163     if (flush_kvm_routes) {
1164         kvm_update_msi_routes_all(NULL, true, 0, 0);
1165     }
1166 
1167     return ret;
1168 }
1169 
1170 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
1171 {
1172     XenEvtchnState *s = xen_evtchn_singleton;
1173     int ret;
1174 
1175     if (!s) {
1176         return -ENOTSUP;
1177     }
1178 
1179     if (!valid_port(unmask->port)) {
1180         return -EINVAL;
1181     }
1182 
1183     qemu_mutex_lock(&s->port_lock);
1184 
1185     ret = unmask_port(s, unmask->port, true);
1186 
1187     qemu_mutex_unlock(&s->port_lock);
1188 
1189     return ret;
1190 }
1191 
1192 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
1193 {
1194     XenEvtchnState *s = xen_evtchn_singleton;
1195     XenEvtchnPort *p;
1196     int ret = -EINVAL;
1197 
1198     if (!s) {
1199         return -ENOTSUP;
1200     }
1201 
1202     if (!valid_port(vcpu->port)) {
1203         return -EINVAL;
1204     }
1205 
1206     if (!valid_vcpu(vcpu->vcpu)) {
1207         return -ENOENT;
1208     }
1209 
1210     qemu_mutex_lock(&s->port_lock);
1211 
1212     p = &s->port_table[vcpu->port];
1213 
1214     if (p->type == EVTCHNSTAT_interdomain ||
1215         p->type == EVTCHNSTAT_unbound ||
1216         p->type == EVTCHNSTAT_pirq ||
1217         (p->type == EVTCHNSTAT_virq && virq_is_global(p->type_val))) {
1218         /*
1219          * unmask_port() with do_unmask==false will just raise the event
1220          * on the new vCPU if the port was already pending.
1221          */
1222         p->vcpu = vcpu->vcpu;
1223         unmask_port(s, vcpu->port, false);
1224         ret = 0;
1225     }
1226 
1227     qemu_mutex_unlock(&s->port_lock);
1228 
1229     return ret;
1230 }
1231 
1232 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
1233 {
1234     XenEvtchnState *s = xen_evtchn_singleton;
1235     int ret;
1236 
1237     if (!s) {
1238         return -ENOTSUP;
1239     }
1240 
1241     if (virq->virq >= NR_VIRQS) {
1242         return -EINVAL;
1243     }
1244 
1245     /* Global VIRQ must be allocated on vCPU0 first */
1246     if (virq_is_global(virq->virq) && virq->vcpu != 0) {
1247         return -EINVAL;
1248     }
1249 
1250     if (!valid_vcpu(virq->vcpu)) {
1251         return -ENOENT;
1252     }
1253 
1254     qemu_mutex_lock(&s->port_lock);
1255 
1256     ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
1257                         &virq->port);
1258     if (!ret) {
1259         ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
1260         if (ret) {
1261             free_port(s, virq->port);
1262         }
1263     }
1264 
1265     qemu_mutex_unlock(&s->port_lock);
1266 
1267     return ret;
1268 }
1269 
1270 int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
1271 {
1272     XenEvtchnState *s = xen_evtchn_singleton;
1273     int ret;
1274 
1275     if (!s) {
1276         return -ENOTSUP;
1277     }
1278 
1279     if (pirq->pirq >= s->nr_pirqs) {
1280         return -EINVAL;
1281     }
1282 
1283     QEMU_IOTHREAD_LOCK_GUARD();
1284 
1285     if (s->pirq[pirq->pirq].port) {
1286         return -EBUSY;
1287     }
1288 
1289     qemu_mutex_lock(&s->port_lock);
1290 
1291     ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
1292                         &pirq->port);
1293     if (ret) {
1294         qemu_mutex_unlock(&s->port_lock);
1295         return ret;
1296     }
1297 
1298     s->pirq[pirq->pirq].port = pirq->port;
1299     trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
1300 
1301     qemu_mutex_unlock(&s->port_lock);
1302 
1303     /*
1304      * Need to do the unmask outside port_lock because it may call
1305      * back into the MSI translate function.
1306      */
1307     if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
1308         if (s->pirq[pirq->pirq].is_masked) {
1309             PCIDevice *dev = s->pirq[pirq->pirq].dev;
1310             int vector = s->pirq[pirq->pirq].vector;
1311             char *dev_path = qdev_get_dev_path(DEVICE(dev));
1312 
1313             trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
1314             g_free(dev_path);
1315 
1316             if (s->pirq[pirq->pirq].is_msix) {
1317                 msix_set_mask(dev, vector, false);
1318             } else {
1319                 msi_set_mask(dev, vector, false, NULL);
1320             }
1321         } else if (s->pirq[pirq->pirq].is_translated) {
1322             /*
1323              * If KVM had attempted to translate this one before, make it try
1324              * again. If we unmasked, then the notifier on the MSI(-X) vector
1325              * will already have had the same effect.
1326              */
1327             kvm_update_msi_routes_all(NULL, true, 0, 0);
1328         }
1329     }
1330 
1331     return ret;
1332 }
1333 
1334 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
1335 {
1336     XenEvtchnState *s = xen_evtchn_singleton;
1337     int ret;
1338 
1339     if (!s) {
1340         return -ENOTSUP;
1341     }
1342 
1343     if (!valid_vcpu(ipi->vcpu)) {
1344         return -ENOENT;
1345     }
1346 
1347     qemu_mutex_lock(&s->port_lock);
1348 
1349     ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
1350     if (!ret && s->evtchn_in_kernel) {
1351         assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
1352     }
1353 
1354     qemu_mutex_unlock(&s->port_lock);
1355 
1356     return ret;
1357 }
1358 
1359 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
1360 {
1361     XenEvtchnState *s = xen_evtchn_singleton;
1362     uint16_t type_val;
1363     int ret;
1364 
1365     if (!s) {
1366         return -ENOTSUP;
1367     }
1368 
1369     if (interdomain->remote_dom == DOMID_QEMU) {
1370         type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
1371     } else if (interdomain->remote_dom == DOMID_SELF ||
1372                interdomain->remote_dom == xen_domid) {
1373         type_val = 0;
1374     } else {
1375         return -ESRCH;
1376     }
1377 
1378     if (!valid_port(interdomain->remote_port)) {
1379         return -EINVAL;
1380     }
1381 
1382     qemu_mutex_lock(&s->port_lock);
1383 
1384     /* The newly allocated port starts out as unbound */
1385     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val,
1386                         &interdomain->local_port);
1387     if (ret) {
1388         goto out;
1389     }
1390 
1391     if (interdomain->remote_dom == DOMID_QEMU) {
1392         struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
1393         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1394 
1395         if (!xc) {
1396             ret = -ENOENT;
1397             goto out_free_port;
1398         }
1399 
1400         if (xc->guest_port) {
1401             ret = -EBUSY;
1402             goto out_free_port;
1403         }
1404 
1405         assert(xc->be_port == interdomain->remote_port);
1406         xc->guest_port = interdomain->local_port;
1407         if (kvm_xen_has_cap(EVTCHN_SEND)) {
1408             assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
1409         }
1410         lp->type = EVTCHNSTAT_interdomain;
1411         lp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU | interdomain->remote_port;
1412         ret = 0;
1413     } else {
1414         /* Loopback */
1415         XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
1416         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1417 
1418         /*
1419          * The 'remote' port for loopback must be an unbound port allocated for
1420          * communication with the local domain (as indicated by rp->type_val
1421          * being zero, not PORT_INFO_TYPEVAL_REMOTE_QEMU), and must *not* be
1422          * the port that was just allocated for the local end.
1423          */
1424         if (interdomain->local_port != interdomain->remote_port &&
1425             rp->type == EVTCHNSTAT_unbound && rp->type_val == 0) {
1426 
1427             rp->type = EVTCHNSTAT_interdomain;
1428             rp->type_val = interdomain->local_port;
1429 
1430             lp->type = EVTCHNSTAT_interdomain;
1431             lp->type_val = interdomain->remote_port;
1432         } else {
1433             ret = -EINVAL;
1434         }
1435     }
1436 
1437  out_free_port:
1438     if (ret) {
1439         free_port(s, interdomain->local_port);
1440     }
1441  out:
1442     qemu_mutex_unlock(&s->port_lock);
1443 
1444     return ret;
1445 
1446 }
1447 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
1448 {
1449     XenEvtchnState *s = xen_evtchn_singleton;
1450     uint16_t type_val;
1451     int ret;
1452 
1453     if (!s) {
1454         return -ENOTSUP;
1455     }
1456 
1457     if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
1458         return -ESRCH;
1459     }
1460 
1461     if (alloc->remote_dom == DOMID_QEMU) {
1462         type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
1463     } else if (alloc->remote_dom == DOMID_SELF ||
1464                alloc->remote_dom == xen_domid) {
1465         type_val = 0;
1466     } else {
1467         return -EPERM;
1468     }
1469 
1470     qemu_mutex_lock(&s->port_lock);
1471 
1472     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val, &alloc->port);
1473 
1474     qemu_mutex_unlock(&s->port_lock);
1475 
1476     return ret;
1477 }
1478 
1479 int xen_evtchn_send_op(struct evtchn_send *send)
1480 {
1481     XenEvtchnState *s = xen_evtchn_singleton;
1482     XenEvtchnPort *p;
1483     int ret = 0;
1484 
1485     if (!s) {
1486         return -ENOTSUP;
1487     }
1488 
1489     if (!valid_port(send->port)) {
1490         return -EINVAL;
1491     }
1492 
1493     qemu_mutex_lock(&s->port_lock);
1494 
1495     p = &s->port_table[send->port];
1496 
1497     switch (p->type) {
1498     case EVTCHNSTAT_interdomain:
1499         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
1500             /*
1501              * This is an event from the guest to qemu itself, which is
1502              * serving as the driver domain.
1503              */
1504             uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
1505             struct xenevtchn_handle *xc = s->be_handles[be_port];
1506             if (xc) {
1507                 eventfd_write(xc->fd, 1);
1508                 ret = 0;
1509             } else {
1510                 ret = -ENOENT;
1511             }
1512         } else {
1513             /* Loopback interdomain ports; just a complex IPI */
1514             set_port_pending(s, p->type_val);
1515         }
1516         break;
1517 
1518     case EVTCHNSTAT_ipi:
1519         set_port_pending(s, send->port);
1520         break;
1521 
1522     case EVTCHNSTAT_unbound:
1523         /* Xen will silently drop these */
1524         break;
1525 
1526     default:
1527         ret = -EINVAL;
1528         break;
1529     }
1530 
1531     qemu_mutex_unlock(&s->port_lock);
1532 
1533     return ret;
1534 }
1535 
1536 int xen_evtchn_set_port(uint16_t port)
1537 {
1538     XenEvtchnState *s = xen_evtchn_singleton;
1539     XenEvtchnPort *p;
1540     int ret = -EINVAL;
1541 
1542     if (!s) {
1543         return -ENOTSUP;
1544     }
1545 
1546     if (!valid_port(port)) {
1547         return -EINVAL;
1548     }
1549 
1550     qemu_mutex_lock(&s->port_lock);
1551 
1552     p = &s->port_table[port];
1553 
1554     /* QEMU has no business sending to anything but these */
1555     if (p->type == EVTCHNSTAT_virq ||
1556         (p->type == EVTCHNSTAT_interdomain &&
1557          (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU))) {
1558         set_port_pending(s, port);
1559         ret = 0;
1560     }
1561 
1562     qemu_mutex_unlock(&s->port_lock);
1563 
1564     return ret;
1565 }
1566 
1567 static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
1568 {
1569     uint16_t pirq;
1570 
1571     /*
1572      * Preserve the allocation strategy that Xen has. It looks like
1573      * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
1574      * to GSIs (counting up from 16), and then we count backwards from
1575      * the top for MSIs or when the GSI space is exhausted.
1576      */
1577     if (type == MAP_PIRQ_TYPE_GSI) {
1578         for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
1579             if (pirq_inuse(s, pirq)) {
1580                 continue;
1581             }
1582 
1583             /* Found it */
1584             goto found;
1585         }
1586     }
1587     for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
1588         /* Skip whole words at a time when they're full */
1589         if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
1590             pirq &= ~63ULL;
1591             continue;
1592         }
1593         if (pirq_inuse(s, pirq)) {
1594             continue;
1595         }
1596 
1597         goto found;
1598     }
1599     return -ENOSPC;
1600 
1601  found:
1602     pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1603     if (gsi >= 0) {
1604         assert(gsi < IOAPIC_NUM_PINS);
1605         s->gsi_pirq[gsi] = pirq;
1606     }
1607     s->pirq[pirq].gsi = gsi;
1608     return pirq;
1609 }
1610 
1611 bool xen_evtchn_set_gsi(int gsi, int level)
1612 {
1613     XenEvtchnState *s = xen_evtchn_singleton;
1614     int pirq;
1615 
1616     assert(qemu_mutex_iothread_locked());
1617 
1618     if (!s || gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1619         return false;
1620     }
1621 
1622     /*
1623      * Check that that it *isn't* the event channel GSI, and thus
1624      * that we are not recursing and it's safe to take s->port_lock.
1625      *
1626      * Locking aside, it's perfectly sane to bail out early for that
1627      * special case, as it would make no sense for the event channel
1628      * GSI to be routed back to event channels, when the delivery
1629      * method is to raise the GSI... that recursion wouldn't *just*
1630      * be a locking issue.
1631      */
1632     if (gsi && gsi == s->callback_gsi) {
1633         return false;
1634     }
1635 
1636     QEMU_LOCK_GUARD(&s->port_lock);
1637 
1638     pirq = s->gsi_pirq[gsi];
1639     if (!pirq) {
1640         return false;
1641     }
1642 
1643     if (level) {
1644         int port = s->pirq[pirq].port;
1645 
1646         s->pirq_gsi_set |= (1U << gsi);
1647         if (port) {
1648             set_port_pending(s, port);
1649         }
1650     } else {
1651         s->pirq_gsi_set &= ~(1U << gsi);
1652     }
1653     return true;
1654 }
1655 
1656 static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
1657 {
1658     /* The vector (in low 8 bits of data) must be zero */
1659     if (data & 0xff) {
1660         return 0;
1661     }
1662 
1663     uint32_t pirq = (addr & 0xff000) >> 12;
1664     pirq |= (addr >> 32) & 0xffffff00;
1665 
1666     return pirq;
1667 }
1668 
1669 static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
1670                                  int except_pirq)
1671 {
1672     uint32_t pirq;
1673 
1674     for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
1675         /*
1676          * We could be cleverer here, but it isn't really a fast path, and
1677          * this trivial optimisation is enough to let us skip the big gap
1678          * in the middle a bit quicker (in terms of both loop iterations,
1679          * and cache lines).
1680          */
1681         if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
1682             pirq += 64;
1683             continue;
1684         }
1685         if (except_pirq && pirq == except_pirq) {
1686             continue;
1687         }
1688         if (s->pirq[pirq].dev != dev) {
1689             continue;
1690         }
1691         if (vector != -1 && s->pirq[pirq].vector != vector) {
1692             continue;
1693         }
1694 
1695         /* It could theoretically be bound to a port already, but that is OK. */
1696         s->pirq[pirq].dev = dev;
1697         s->pirq[pirq].gsi = IRQ_UNBOUND;
1698         s->pirq[pirq].is_msix = false;
1699         s->pirq[pirq].vector = 0;
1700         s->pirq[pirq].is_masked = false;
1701         s->pirq[pirq].is_translated = false;
1702     }
1703 }
1704 
1705 void xen_evtchn_remove_pci_device(PCIDevice *dev)
1706 {
1707     XenEvtchnState *s = xen_evtchn_singleton;
1708 
1709     if (!s) {
1710         return;
1711     }
1712 
1713     QEMU_LOCK_GUARD(&s->port_lock);
1714     do_remove_pci_vector(s, dev, -1, 0);
1715 }
1716 
1717 void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
1718                           uint64_t addr, uint32_t data, bool is_masked)
1719 {
1720     XenEvtchnState *s = xen_evtchn_singleton;
1721     uint32_t pirq;
1722 
1723     if (!s) {
1724         return;
1725     }
1726 
1727     assert(qemu_mutex_iothread_locked());
1728 
1729     pirq = msi_pirq_target(addr, data);
1730 
1731     /*
1732      * The PIRQ# must be sane, and there must be an allocated PIRQ in
1733      * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
1734      */
1735     if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
1736         (s->pirq[pirq].gsi != IRQ_UNBOUND &&
1737          s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
1738         pirq = 0;
1739     }
1740 
1741     if (pirq) {
1742         s->pirq[pirq].dev = dev;
1743         s->pirq[pirq].gsi = IRQ_MSI_EMU;
1744         s->pirq[pirq].is_msix = is_msix;
1745         s->pirq[pirq].vector = vector;
1746         s->pirq[pirq].is_masked = is_masked;
1747     }
1748 
1749     /* Remove any (other) entries for this {device, vector} */
1750     do_remove_pci_vector(s, dev, vector, pirq);
1751 }
1752 
1753 int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
1754                                   uint64_t address, uint32_t data)
1755 {
1756     XenEvtchnState *s = xen_evtchn_singleton;
1757     uint32_t pirq, port;
1758     CPUState *cpu;
1759 
1760     if (!s) {
1761         return 1; /* Not a PIRQ */
1762     }
1763 
1764     assert(qemu_mutex_iothread_locked());
1765 
1766     pirq = msi_pirq_target(address, data);
1767     if (!pirq || pirq >= s->nr_pirqs) {
1768         return 1; /* Not a PIRQ */
1769     }
1770 
1771     if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
1772         return -ENOTSUP;
1773     }
1774 
1775     if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
1776         return -EINVAL;
1777     }
1778 
1779     /* Remember that KVM tried to translate this. It might need to try again. */
1780     s->pirq[pirq].is_translated = true;
1781 
1782     QEMU_LOCK_GUARD(&s->port_lock);
1783 
1784     port = s->pirq[pirq].port;
1785     if (!valid_port(port)) {
1786         return -EINVAL;
1787     }
1788 
1789     cpu = qemu_get_cpu(s->port_table[port].vcpu);
1790     if (!cpu) {
1791         return -EINVAL;
1792     }
1793 
1794     route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
1795     route->u.xen_evtchn.port = port;
1796     route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
1797     route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
1798 
1799     return 0; /* Handled */
1800 }
1801 
1802 bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
1803 {
1804     XenEvtchnState *s = xen_evtchn_singleton;
1805     uint32_t pirq, port;
1806 
1807     if (!s) {
1808         return false;
1809     }
1810 
1811     assert(qemu_mutex_iothread_locked());
1812 
1813     pirq = msi_pirq_target(address, data);
1814     if (!pirq || pirq >= s->nr_pirqs) {
1815         return false;
1816     }
1817 
1818     QEMU_LOCK_GUARD(&s->port_lock);
1819 
1820     port = s->pirq[pirq].port;
1821     if (!valid_port(port)) {
1822         return false;
1823     }
1824 
1825     set_port_pending(s, port);
1826     return true;
1827 }
1828 
1829 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
1830 {
1831     XenEvtchnState *s = xen_evtchn_singleton;
1832     int pirq = map->pirq;
1833     int gsi = map->index;
1834 
1835     if (!s) {
1836         return -ENOTSUP;
1837     }
1838 
1839     QEMU_IOTHREAD_LOCK_GUARD();
1840     QEMU_LOCK_GUARD(&s->port_lock);
1841 
1842     if (map->domid != DOMID_SELF && map->domid != xen_domid) {
1843         return -EPERM;
1844     }
1845     if (map->type != MAP_PIRQ_TYPE_GSI) {
1846         return -EINVAL;
1847     }
1848     if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1849         return -EINVAL;
1850     }
1851 
1852     if (pirq < 0) {
1853         pirq = allocate_pirq(s, map->type, gsi);
1854         if (pirq < 0) {
1855             return pirq;
1856         }
1857         map->pirq = pirq;
1858     } else if (pirq > s->nr_pirqs) {
1859         return -EINVAL;
1860     } else {
1861         /*
1862          * User specified a valid-looking PIRQ#. Allow it if it is
1863          * allocated and not yet bound, or if it is unallocated
1864          */
1865         if (pirq_inuse(s, pirq)) {
1866             if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
1867                 return -EBUSY;
1868             }
1869         } else {
1870             /* If it was unused, mark it used now. */
1871             pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1872         }
1873         /* Set the mapping in both directions. */
1874         s->pirq[pirq].gsi = gsi;
1875         s->gsi_pirq[gsi] = pirq;
1876     }
1877 
1878     trace_kvm_xen_map_pirq(pirq, gsi);
1879     return 0;
1880 }
1881 
1882 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
1883 {
1884     XenEvtchnState *s = xen_evtchn_singleton;
1885     int pirq = unmap->pirq;
1886     int gsi;
1887 
1888     if (!s) {
1889         return -ENOTSUP;
1890     }
1891 
1892     if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
1893         return -EPERM;
1894     }
1895     if (pirq < 0 || pirq >= s->nr_pirqs) {
1896         return -EINVAL;
1897     }
1898 
1899     QEMU_IOTHREAD_LOCK_GUARD();
1900     qemu_mutex_lock(&s->port_lock);
1901 
1902     if (!pirq_inuse(s, pirq)) {
1903         qemu_mutex_unlock(&s->port_lock);
1904         return -ENOENT;
1905     }
1906 
1907     gsi = s->pirq[pirq].gsi;
1908 
1909     /* We can only unmap GSI PIRQs */
1910     if (gsi < 0) {
1911         qemu_mutex_unlock(&s->port_lock);
1912         return -EINVAL;
1913     }
1914 
1915     s->gsi_pirq[gsi] = 0;
1916     s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
1917     pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
1918 
1919     trace_kvm_xen_unmap_pirq(pirq, gsi);
1920     qemu_mutex_unlock(&s->port_lock);
1921 
1922     if (gsi == IRQ_MSI_EMU) {
1923         kvm_update_msi_routes_all(NULL, true, 0, 0);
1924     }
1925 
1926     return 0;
1927 }
1928 
1929 int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
1930 {
1931     XenEvtchnState *s = xen_evtchn_singleton;
1932     int pirq = eoi->irq;
1933     int gsi;
1934 
1935     if (!s) {
1936         return -ENOTSUP;
1937     }
1938 
1939     QEMU_IOTHREAD_LOCK_GUARD();
1940     QEMU_LOCK_GUARD(&s->port_lock);
1941 
1942     if (!pirq_inuse(s, pirq)) {
1943         return -ENOENT;
1944     }
1945 
1946     gsi = s->pirq[pirq].gsi;
1947     if (gsi < 0) {
1948         return -EINVAL;
1949     }
1950 
1951     /* Reassert a level IRQ if needed */
1952     if (s->pirq_gsi_set & (1U << gsi)) {
1953         int port = s->pirq[pirq].port;
1954         if (port) {
1955             set_port_pending(s, port);
1956         }
1957     }
1958 
1959     return 0;
1960 }
1961 
1962 int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
1963 {
1964     XenEvtchnState *s = xen_evtchn_singleton;
1965     int pirq = query->irq;
1966 
1967     if (!s) {
1968         return -ENOTSUP;
1969     }
1970 
1971     QEMU_IOTHREAD_LOCK_GUARD();
1972     QEMU_LOCK_GUARD(&s->port_lock);
1973 
1974     if (!pirq_inuse(s, pirq)) {
1975         return -ENOENT;
1976     }
1977 
1978     if (s->pirq[pirq].gsi >= 0) {
1979         query->flags = XENIRQSTAT_needs_eoi;
1980     } else {
1981         query->flags = 0;
1982     }
1983 
1984     return 0;
1985 }
1986 
1987 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
1988 {
1989     XenEvtchnState *s = xen_evtchn_singleton;
1990     int pirq;
1991 
1992     if (!s) {
1993         return -ENOTSUP;
1994     }
1995 
1996     QEMU_LOCK_GUARD(&s->port_lock);
1997 
1998     pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
1999     if (pirq < 0) {
2000         return pirq;
2001     }
2002 
2003     get->pirq = pirq;
2004     trace_kvm_xen_get_free_pirq(pirq, get->type);
2005     return 0;
2006 }
2007 
2008 struct xenevtchn_handle *xen_be_evtchn_open(void)
2009 {
2010     struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
2011 
2012     xc->fd = eventfd(0, EFD_CLOEXEC);
2013     if (xc->fd < 0) {
2014         free(xc);
2015         return NULL;
2016     }
2017 
2018     return xc;
2019 }
2020 
2021 static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
2022 {
2023     int i;
2024 
2025     for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
2026         if (!s->be_handles[i]) {
2027             s->be_handles[i] = xc;
2028             xc->be_port = i;
2029             return i;
2030         }
2031     }
2032     return 0;
2033 }
2034 
2035 int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
2036                                    evtchn_port_t guest_port)
2037 {
2038     XenEvtchnState *s = xen_evtchn_singleton;
2039     XenEvtchnPort *gp;
2040     uint16_t be_port = 0;
2041     int ret;
2042 
2043     if (!s) {
2044         return -ENOTSUP;
2045     }
2046 
2047     if (!xc) {
2048         return -EFAULT;
2049     }
2050 
2051     if (domid != xen_domid) {
2052         return -ESRCH;
2053     }
2054 
2055     if (!valid_port(guest_port)) {
2056         return -EINVAL;
2057     }
2058 
2059     qemu_mutex_lock(&s->port_lock);
2060 
2061     /* The guest has to have an unbound port waiting for us to bind */
2062     gp = &s->port_table[guest_port];
2063 
2064     switch (gp->type) {
2065     case EVTCHNSTAT_interdomain:
2066         /* Allow rebinding after migration, preserve port # if possible */
2067         be_port = gp->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
2068         assert(be_port != 0);
2069         if (!s->be_handles[be_port]) {
2070             s->be_handles[be_port] = xc;
2071             xc->guest_port = guest_port;
2072             ret = xc->be_port = be_port;
2073             if (kvm_xen_has_cap(EVTCHN_SEND)) {
2074                 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2075             }
2076             break;
2077         }
2078         /* fall through */
2079 
2080     case EVTCHNSTAT_unbound:
2081         be_port = find_be_port(s, xc);
2082         if (!be_port) {
2083             ret = -ENOSPC;
2084             goto out;
2085         }
2086 
2087         gp->type = EVTCHNSTAT_interdomain;
2088         gp->type_val = be_port | PORT_INFO_TYPEVAL_REMOTE_QEMU;
2089         xc->guest_port = guest_port;
2090         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2091             assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2092         }
2093         ret = be_port;
2094         break;
2095 
2096     default:
2097         ret = -EINVAL;
2098         break;
2099     }
2100 
2101  out:
2102     qemu_mutex_unlock(&s->port_lock);
2103 
2104     return ret;
2105 }
2106 
2107 int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
2108 {
2109     XenEvtchnState *s = xen_evtchn_singleton;
2110     int ret;
2111 
2112     if (!s) {
2113         return -ENOTSUP;
2114     }
2115 
2116     if (!xc) {
2117         return -EFAULT;
2118     }
2119 
2120     qemu_mutex_lock(&s->port_lock);
2121 
2122     if (port && port != xc->be_port) {
2123         ret = -EINVAL;
2124         goto out;
2125     }
2126 
2127     if (xc->guest_port) {
2128         XenEvtchnPort *gp = &s->port_table[xc->guest_port];
2129 
2130         /* This should never *not* be true */
2131         if (gp->type == EVTCHNSTAT_interdomain) {
2132             gp->type = EVTCHNSTAT_unbound;
2133             gp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
2134         }
2135 
2136         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2137             deassign_kernel_port(xc->guest_port);
2138         }
2139         xc->guest_port = 0;
2140     }
2141 
2142     s->be_handles[xc->be_port] = NULL;
2143     xc->be_port = 0;
2144     ret = 0;
2145  out:
2146     qemu_mutex_unlock(&s->port_lock);
2147     return ret;
2148 }
2149 
2150 int xen_be_evtchn_close(struct xenevtchn_handle *xc)
2151 {
2152     if (!xc) {
2153         return -EFAULT;
2154     }
2155 
2156     xen_be_evtchn_unbind(xc, 0);
2157 
2158     close(xc->fd);
2159     free(xc);
2160     return 0;
2161 }
2162 
2163 int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
2164 {
2165     if (!xc) {
2166         return -1;
2167     }
2168     return xc->fd;
2169 }
2170 
2171 int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
2172 {
2173     XenEvtchnState *s = xen_evtchn_singleton;
2174     int ret;
2175 
2176     if (!s) {
2177         return -ENOTSUP;
2178     }
2179 
2180     if (!xc) {
2181         return -EFAULT;
2182     }
2183 
2184     qemu_mutex_lock(&s->port_lock);
2185 
2186     if (xc->guest_port) {
2187         set_port_pending(s, xc->guest_port);
2188         ret = 0;
2189     } else {
2190         ret = -ENOTCONN;
2191     }
2192 
2193     qemu_mutex_unlock(&s->port_lock);
2194 
2195     return ret;
2196 }
2197 
2198 int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
2199 {
2200     uint64_t val;
2201 
2202     if (!xc) {
2203         return -EFAULT;
2204     }
2205 
2206     if (!xc->be_port) {
2207         return 0;
2208     }
2209 
2210     if (eventfd_read(xc->fd, &val)) {
2211         return -errno;
2212     }
2213 
2214     return val ? xc->be_port : 0;
2215 }
2216 
2217 int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
2218 {
2219     if (!xc) {
2220         return -EFAULT;
2221     }
2222 
2223     if (xc->be_port != port) {
2224         return -EINVAL;
2225     }
2226 
2227     /*
2228      * We don't actually do anything to unmask it; the event was already
2229      * consumed in xen_be_evtchn_pending().
2230      */
2231     return 0;
2232 }
2233 
2234 int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
2235 {
2236     return xc->guest_port;
2237 }
2238 
2239 EvtchnInfoList *qmp_xen_event_list(Error **errp)
2240 {
2241     XenEvtchnState *s = xen_evtchn_singleton;
2242     EvtchnInfoList *head = NULL, **tail = &head;
2243     void *shinfo, *pending, *mask;
2244     int i;
2245 
2246     if (!s) {
2247         error_setg(errp, "Xen event channel emulation not enabled");
2248         return NULL;
2249     }
2250 
2251     shinfo = xen_overlay_get_shinfo_ptr();
2252     if (!shinfo) {
2253         error_setg(errp, "Xen shared info page not allocated");
2254         return NULL;
2255     }
2256 
2257     if (xen_is_long_mode()) {
2258         pending = shinfo + offsetof(struct shared_info, evtchn_pending);
2259         mask = shinfo + offsetof(struct shared_info, evtchn_mask);
2260     } else {
2261         pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
2262         mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
2263     }
2264 
2265     QEMU_LOCK_GUARD(&s->port_lock);
2266 
2267     for (i = 0; i < s->nr_ports; i++) {
2268         XenEvtchnPort *p = &s->port_table[i];
2269         EvtchnInfo *info;
2270 
2271         if (p->type == EVTCHNSTAT_closed) {
2272             continue;
2273         }
2274 
2275         info = g_new0(EvtchnInfo, 1);
2276 
2277         info->port = i;
2278         qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
2279         qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
2280         qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
2281         qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
2282         qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
2283         qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
2284 
2285         info->type = p->type;
2286         if (p->type == EVTCHNSTAT_interdomain) {
2287             info->remote_domain = g_strdup((p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) ?
2288                                            "qemu" : "loopback");
2289             info->target = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
2290         } else {
2291             info->target = p->type_val;
2292         }
2293         info->vcpu = p->vcpu;
2294         info->pending = test_bit(i, pending);
2295         info->masked = test_bit(i, mask);
2296 
2297         QAPI_LIST_APPEND(tail, info);
2298     }
2299 
2300     return head;
2301 }
2302 
2303 void qmp_xen_event_inject(uint32_t port, Error **errp)
2304 {
2305     XenEvtchnState *s = xen_evtchn_singleton;
2306 
2307     if (!s) {
2308         error_setg(errp, "Xen event channel emulation not enabled");
2309         return;
2310     }
2311 
2312     if (!valid_port(port)) {
2313         error_setg(errp, "Invalid port %u", port);
2314     }
2315 
2316     QEMU_LOCK_GUARD(&s->port_lock);
2317 
2318     if (set_port_pending(s, port)) {
2319         error_setg(errp, "Failed to set port %u", port);
2320         return;
2321     }
2322 }
2323 
2324 void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
2325 {
2326     EvtchnInfoList *iter, *info_list;
2327     Error *err = NULL;
2328 
2329     info_list = qmp_xen_event_list(&err);
2330     if (err) {
2331         hmp_handle_error(mon, err);
2332         return;
2333     }
2334 
2335     for (iter = info_list; iter; iter = iter->next) {
2336         EvtchnInfo *info = iter->value;
2337 
2338         monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
2339                        EvtchnPortType_str(info->type));
2340         if (info->type != EVTCHN_PORT_TYPE_IPI) {
2341             monitor_printf(mon,  "(");
2342             if (info->remote_domain) {
2343                 monitor_printf(mon, "%s:", info->remote_domain);
2344             }
2345             monitor_printf(mon, "%d)", info->target);
2346         }
2347         if (info->pending) {
2348             monitor_printf(mon, " PENDING");
2349         }
2350         if (info->masked) {
2351             monitor_printf(mon, " MASKED");
2352         }
2353         monitor_printf(mon, "\n");
2354     }
2355 
2356     qapi_free_EvtchnInfoList(info_list);
2357 }
2358 
2359 void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
2360 {
2361     int port = qdict_get_int(qdict, "port");
2362     Error *err = NULL;
2363 
2364     qmp_xen_event_inject(port, &err);
2365     if (err) {
2366         hmp_handle_error(mon, err);
2367     } else {
2368         monitor_printf(mon, "Delivered port %d\n", port);
2369     }
2370 }
2371 
2372