xref: /qemu/hw/i386/kvm/xen_evtchn.c (revision 8b7b9c5c)
1 /*
2  * QEMU Xen emulation: Event channel support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/log.h"
18 #include "qemu/error-report.h"
19 #include "monitor/monitor.h"
20 #include "monitor/hmp.h"
21 #include "qapi/error.h"
22 #include "qapi/qapi-commands-misc-target.h"
23 #include "qapi/qmp/qdict.h"
24 #include "qom/object.h"
25 #include "exec/target_page.h"
26 #include "exec/address-spaces.h"
27 #include "migration/vmstate.h"
28 #include "trace.h"
29 
30 #include "hw/sysbus.h"
31 #include "hw/xen/xen.h"
32 #include "hw/i386/x86.h"
33 #include "hw/i386/pc.h"
34 #include "hw/pci/pci.h"
35 #include "hw/pci/msi.h"
36 #include "hw/pci/msix.h"
37 #include "hw/irq.h"
38 #include "hw/xen/xen_backend_ops.h"
39 
40 #include "xen_evtchn.h"
41 #include "xen_overlay.h"
42 #include "xen_xenstore.h"
43 
44 #include "sysemu/kvm.h"
45 #include "sysemu/kvm_xen.h"
46 #include <linux/kvm.h>
47 #include <sys/eventfd.h>
48 
49 #include "hw/xen/interface/memory.h"
50 #include "hw/xen/interface/hvm/params.h"
51 
52 /* XX: For kvm_update_msi_routes_all() */
53 #include "target/i386/kvm/kvm_i386.h"
54 
55 #define TYPE_XEN_EVTCHN "xen-evtchn"
56 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
57 
58 typedef struct XenEvtchnPort {
59     uint32_t vcpu;      /* Xen/ACPI vcpu_id */
60     uint16_t type;      /* EVTCHNSTAT_xxxx */
61     uint16_t type_val;  /* pirq# / virq# / remote port according to type */
62 } XenEvtchnPort;
63 
64 /* 32-bit compatibility definitions, also used natively in 32-bit build */
65 struct compat_arch_vcpu_info {
66     unsigned int cr2;
67     unsigned int pad[5];
68 };
69 
70 struct compat_vcpu_info {
71     uint8_t evtchn_upcall_pending;
72     uint8_t evtchn_upcall_mask;
73     uint16_t pad;
74     uint32_t evtchn_pending_sel;
75     struct compat_arch_vcpu_info arch;
76     struct vcpu_time_info time;
77 }; /* 64 bytes (x86) */
78 
79 struct compat_arch_shared_info {
80     unsigned int max_pfn;
81     unsigned int pfn_to_mfn_frame_list_list;
82     unsigned int nmi_reason;
83     unsigned int p2m_cr3;
84     unsigned int p2m_vaddr;
85     unsigned int p2m_generation;
86     uint32_t wc_sec_hi;
87 };
88 
89 struct compat_shared_info {
90     struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
91     uint32_t evtchn_pending[32];
92     uint32_t evtchn_mask[32];
93     uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
94     uint32_t wc_sec;
95     uint32_t wc_nsec;
96     struct compat_arch_shared_info arch;
97 };
98 
99 #define COMPAT_EVTCHN_2L_NR_CHANNELS            1024
100 
101 /* Local private implementation of struct xenevtchn_handle */
102 struct xenevtchn_handle {
103     evtchn_port_t be_port;
104     evtchn_port_t guest_port; /* Or zero for unbound */
105     int fd;
106 };
107 
108 /*
109  * For unbound/interdomain ports there are only two possible remote
110  * domains; self and QEMU. Use a single high bit in type_val for that,
111  * and the low bits for the remote port number (or 0 for unbound).
112  */
113 #define PORT_INFO_TYPEVAL_REMOTE_QEMU           0x8000
114 #define PORT_INFO_TYPEVAL_REMOTE_PORT_MASK      0x7FFF
115 
116 /*
117  * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
118  * insane enough to think about guest-transparent live migration from actual
119  * Xen to QEMU, and ensuring that we can convert/consume the stream.
120  */
121 #define IRQ_UNBOUND -1
122 #define IRQ_PT -2
123 #define IRQ_MSI_EMU -3
124 
125 
126 struct pirq_info {
127     int gsi;
128     uint16_t port;
129     PCIDevice *dev;
130     int vector;
131     bool is_msix;
132     bool is_masked;
133     bool is_translated;
134 };
135 
136 struct XenEvtchnState {
137     /*< private >*/
138     SysBusDevice busdev;
139     /*< public >*/
140 
141     uint64_t callback_param;
142     bool evtchn_in_kernel;
143     uint32_t callback_gsi;
144 
145     QEMUBH *gsi_bh;
146 
147     QemuMutex port_lock;
148     uint32_t nr_ports;
149     XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
150 
151     /* Connected to the system GSIs for raising callback as GSI / INTx */
152     unsigned int nr_callback_gsis;
153     qemu_irq *callback_gsis;
154 
155     struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
156 
157     uint32_t nr_pirqs;
158 
159     /* Bitmap of allocated PIRQs (serialized) */
160     uint16_t nr_pirq_inuse_words;
161     uint64_t *pirq_inuse_bitmap;
162 
163     /* GSI → PIRQ mapping (serialized) */
164     uint16_t gsi_pirq[IOAPIC_NUM_PINS];
165 
166     /* Per-GSI assertion state (serialized) */
167     uint32_t pirq_gsi_set;
168 
169     /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
170     struct pirq_info *pirq;
171 };
172 
173 #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
174 #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
175 
176 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
177 
178 struct XenEvtchnState *xen_evtchn_singleton;
179 
180 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
181 #define CALLBACK_VIA_TYPE_SHIFT 56
182 
183 static void unbind_backend_ports(XenEvtchnState *s);
184 
185 static int xen_evtchn_pre_load(void *opaque)
186 {
187     XenEvtchnState *s = opaque;
188 
189     /* Unbind all the backend-side ports; they need to rebind */
190     unbind_backend_ports(s);
191 
192     /* It'll be leaked otherwise. */
193     g_free(s->pirq_inuse_bitmap);
194     s->pirq_inuse_bitmap = NULL;
195 
196     return 0;
197 }
198 
199 static int xen_evtchn_post_load(void *opaque, int version_id)
200 {
201     XenEvtchnState *s = opaque;
202     uint32_t i;
203 
204     if (s->callback_param) {
205         xen_evtchn_set_callback_param(s->callback_param);
206     }
207 
208     /* Rebuild s->pirq[].port mapping */
209     for (i = 0; i < s->nr_ports; i++) {
210         XenEvtchnPort *p = &s->port_table[i];
211 
212         if (p->type == EVTCHNSTAT_pirq) {
213             assert(p->type_val);
214             assert(p->type_val < s->nr_pirqs);
215 
216             /*
217              * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
218              * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
219              * catches up with it.
220              */
221             s->pirq[p->type_val].gsi = IRQ_UNBOUND;
222             s->pirq[p->type_val].port = i;
223         }
224     }
225     /* Rebuild s->pirq[].gsi mapping */
226     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
227         if (s->gsi_pirq[i]) {
228             s->pirq[s->gsi_pirq[i]].gsi = i;
229         }
230     }
231     return 0;
232 }
233 
234 static bool xen_evtchn_is_needed(void *opaque)
235 {
236     return xen_mode == XEN_EMULATE;
237 }
238 
239 static const VMStateDescription xen_evtchn_port_vmstate = {
240     .name = "xen_evtchn_port",
241     .version_id = 1,
242     .minimum_version_id = 1,
243     .fields = (VMStateField[]) {
244         VMSTATE_UINT32(vcpu, XenEvtchnPort),
245         VMSTATE_UINT16(type, XenEvtchnPort),
246         VMSTATE_UINT16(type_val, XenEvtchnPort),
247         VMSTATE_END_OF_LIST()
248     }
249 };
250 
251 static const VMStateDescription xen_evtchn_vmstate = {
252     .name = "xen_evtchn",
253     .version_id = 1,
254     .minimum_version_id = 1,
255     .needed = xen_evtchn_is_needed,
256     .pre_load = xen_evtchn_pre_load,
257     .post_load = xen_evtchn_post_load,
258     .fields = (VMStateField[]) {
259         VMSTATE_UINT64(callback_param, XenEvtchnState),
260         VMSTATE_UINT32(nr_ports, XenEvtchnState),
261         VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
262                                      xen_evtchn_port_vmstate, XenEvtchnPort),
263         VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
264         VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
265                                     nr_pirq_inuse_words, 0,
266                                     vmstate_info_uint64, uint64_t),
267         VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
268         VMSTATE_END_OF_LIST()
269     }
270 };
271 
272 static void xen_evtchn_class_init(ObjectClass *klass, void *data)
273 {
274     DeviceClass *dc = DEVICE_CLASS(klass);
275 
276     dc->vmsd = &xen_evtchn_vmstate;
277 }
278 
279 static const TypeInfo xen_evtchn_info = {
280     .name          = TYPE_XEN_EVTCHN,
281     .parent        = TYPE_SYS_BUS_DEVICE,
282     .instance_size = sizeof(XenEvtchnState),
283     .class_init    = xen_evtchn_class_init,
284 };
285 
286 static struct evtchn_backend_ops emu_evtchn_backend_ops = {
287     .open = xen_be_evtchn_open,
288     .bind_interdomain = xen_be_evtchn_bind_interdomain,
289     .unbind = xen_be_evtchn_unbind,
290     .close = xen_be_evtchn_close,
291     .get_fd = xen_be_evtchn_fd,
292     .notify = xen_be_evtchn_notify,
293     .unmask = xen_be_evtchn_unmask,
294     .pending = xen_be_evtchn_pending,
295 };
296 
297 static void gsi_assert_bh(void *opaque)
298 {
299     struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
300     if (vi) {
301         xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
302     }
303 }
304 
305 void xen_evtchn_create(unsigned int nr_gsis, qemu_irq *system_gsis)
306 {
307     XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
308                                                         -1, NULL));
309     int i;
310 
311     xen_evtchn_singleton = s;
312 
313     qemu_mutex_init(&s->port_lock);
314     s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
315 
316     /*
317      * These are the *output* GSI from event channel support, for
318      * signalling CPU0's events via GSI or PCI INTx instead of the
319      * per-CPU vector. We create a *set* of irqs and connect one to
320      * each of the system GSIs which were passed in from the platform
321      * code, and then just trigger the right one as appropriate from
322      * xen_evtchn_set_callback_level().
323      */
324     s->nr_callback_gsis = nr_gsis;
325     s->callback_gsis = g_new0(qemu_irq, nr_gsis);
326     for (i = 0; i < nr_gsis; i++) {
327         sysbus_init_irq(SYS_BUS_DEVICE(s), &s->callback_gsis[i]);
328         sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
329     }
330 
331     /*
332      * The Xen scheme for encoding PIRQ# into an MSI message is not
333      * compatible with 32-bit MSI, as it puts the high bits of the
334      * PIRQ# into the high bits of the MSI message address, instead of
335      * using the Extended Destination ID in address bits 4-11 which
336      * perhaps would have been a better choice.
337      *
338      * To keep life simple, kvm_accel_instance_init() initialises the
339      * default to 256. which conveniently doesn't need to set anything
340      * outside the low 32 bits of the address. It can be increased by
341      * setting the xen-evtchn-max-pirq property.
342      */
343     s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
344 
345     s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
346     s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
347     s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
348 
349     /* Set event channel functions for backend drivers to use */
350     xen_evtchn_ops = &emu_evtchn_backend_ops;
351 }
352 
353 static void xen_evtchn_register_types(void)
354 {
355     type_register_static(&xen_evtchn_info);
356 }
357 
358 type_init(xen_evtchn_register_types)
359 
360 static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
361 {
362     PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
363     uint8_t pin = param & 3;
364     uint8_t devfn = (param >> 8) & 0xff;
365     uint16_t bus = (param >> 16) & 0xffff;
366     uint16_t domain = (param >> 32) & 0xffff;
367     PCIDevice *pdev;
368     PCIINTxRoute r;
369 
370     if (domain || !pcms) {
371         return 0;
372     }
373 
374     pdev = pci_find_device(pcms->bus, bus, devfn);
375     if (!pdev) {
376         return 0;
377     }
378 
379     r = pci_device_route_intx_to_irq(pdev, pin);
380     if (r.mode != PCI_INTX_ENABLED) {
381         return 0;
382     }
383 
384     /*
385      * Hm, can we be notified of INTX routing changes? Not without
386      * *owning* the device and being allowed to overwrite its own
387      * ->intx_routing_notifier, AFAICT. So let's not.
388      */
389     return r.irq;
390 }
391 
392 void xen_evtchn_set_callback_level(int level)
393 {
394     XenEvtchnState *s = xen_evtchn_singleton;
395     if (!s) {
396         return;
397     }
398 
399     /*
400      * We get to this function in a number of ways:
401      *
402      *  • From I/O context, via PV backend drivers sending a notification to
403      *    the guest.
404      *
405      *  • From guest vCPU context, via loopback interdomain event channels
406      *    (or theoretically even IPIs but guests don't use those with GSI
407      *    delivery because that's pointless. We don't want a malicious guest
408      *    to be able to trigger a deadlock though, so we can't rule it out.)
409      *
410      *  • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
411      *    configured.
412      *
413      *  • From guest vCPU context in the KVM exit handler, if the upcall
414      *    pending flag has been cleared and the GSI needs to be deasserted.
415      *
416      *  • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
417      *    been acked in the irqchip.
418      *
419      * Whichever context we come from if we aren't already holding the BQL
420      * then e can't take it now, as we may already hold s->port_lock. So
421      * trigger the BH to set the IRQ for us instead of doing it immediately.
422      *
423      * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
424      * will deliberately take the BQL because they want the change to take
425      * effect immediately. That just leaves interdomain loopback as the case
426      * which uses the BH.
427      */
428     if (!qemu_mutex_iothread_locked()) {
429         qemu_bh_schedule(s->gsi_bh);
430         return;
431     }
432 
433     if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) {
434         qemu_set_irq(s->callback_gsis[s->callback_gsi], level);
435         if (level) {
436             /* Ensure the vCPU polls for deassertion */
437             kvm_xen_set_callback_asserted();
438         }
439     }
440 }
441 
442 int xen_evtchn_set_callback_param(uint64_t param)
443 {
444     XenEvtchnState *s = xen_evtchn_singleton;
445     struct kvm_xen_hvm_attr xa = {
446         .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
447         .u.vector = 0,
448     };
449     bool in_kernel = false;
450     uint32_t gsi = 0;
451     int type = param >> CALLBACK_VIA_TYPE_SHIFT;
452     int ret;
453 
454     if (!s) {
455         return -ENOTSUP;
456     }
457 
458     /*
459      * We need the BQL because set_callback_pci_intx() may call into PCI code,
460      * and because we may need to manipulate the old and new GSI levels.
461      */
462     assert(qemu_mutex_iothread_locked());
463     qemu_mutex_lock(&s->port_lock);
464 
465     switch (type) {
466     case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
467         xa.u.vector = (uint8_t)param,
468 
469         ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
470         if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
471             in_kernel = true;
472         }
473         gsi = 0;
474         break;
475     }
476 
477     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
478         gsi = set_callback_pci_intx(s, param);
479         ret = gsi ? 0 : -EINVAL;
480         break;
481 
482     case HVM_PARAM_CALLBACK_TYPE_GSI:
483         gsi = (uint32_t)param;
484         ret = 0;
485         break;
486 
487     default:
488         /* Xen doesn't return error even if you set something bogus */
489         ret = 0;
490         break;
491     }
492 
493     if (!ret) {
494         /* If vector delivery was turned *off* then tell the kernel */
495         if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
496             HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
497             kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
498         }
499         s->callback_param = param;
500         s->evtchn_in_kernel = in_kernel;
501 
502         if (gsi != s->callback_gsi) {
503             struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
504 
505             xen_evtchn_set_callback_level(0);
506             s->callback_gsi = gsi;
507 
508             if (gsi && vi && vi->evtchn_upcall_pending) {
509                 kvm_xen_inject_vcpu_callback_vector(0, type);
510             }
511         }
512     }
513 
514     qemu_mutex_unlock(&s->port_lock);
515 
516     return ret;
517 }
518 
519 static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
520 {
521     int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
522 
523     kvm_xen_inject_vcpu_callback_vector(vcpu, type);
524 }
525 
526 static void deassign_kernel_port(evtchn_port_t port)
527 {
528     struct kvm_xen_hvm_attr ha;
529     int ret;
530 
531     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
532     ha.u.evtchn.send_port = port;
533     ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
534 
535     ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
536     if (ret) {
537         qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
538                       port, strerror(ret));
539     }
540 }
541 
542 static int assign_kernel_port(uint16_t type, evtchn_port_t port,
543                               uint32_t vcpu_id)
544 {
545     CPUState *cpu = qemu_get_cpu(vcpu_id);
546     struct kvm_xen_hvm_attr ha;
547 
548     if (!cpu) {
549         return -ENOENT;
550     }
551 
552     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
553     ha.u.evtchn.send_port = port;
554     ha.u.evtchn.type = type;
555     ha.u.evtchn.flags = 0;
556     ha.u.evtchn.deliver.port.port = port;
557     ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
558     ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
559 
560     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
561 }
562 
563 static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
564 {
565     struct kvm_xen_hvm_attr ha;
566 
567     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
568     ha.u.evtchn.send_port = port;
569     ha.u.evtchn.type = type;
570     ha.u.evtchn.flags = 0;
571     ha.u.evtchn.deliver.eventfd.port = 0;
572     ha.u.evtchn.deliver.eventfd.fd = fd;
573 
574     return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
575 }
576 
577 static bool valid_port(evtchn_port_t port)
578 {
579     if (!port) {
580         return false;
581     }
582 
583     if (xen_is_long_mode()) {
584         return port < EVTCHN_2L_NR_CHANNELS;
585     } else {
586         return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
587     }
588 }
589 
590 static bool valid_vcpu(uint32_t vcpu)
591 {
592     return !!qemu_get_cpu(vcpu);
593 }
594 
595 static void unbind_backend_ports(XenEvtchnState *s)
596 {
597     XenEvtchnPort *p;
598     int i;
599 
600     for (i = 1; i < s->nr_ports; i++) {
601         p = &s->port_table[i];
602         if (p->type == EVTCHNSTAT_interdomain &&
603             (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU)) {
604             evtchn_port_t be_port = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
605 
606             if (s->be_handles[be_port]) {
607                 /* This part will be overwritten on the load anyway. */
608                 p->type = EVTCHNSTAT_unbound;
609                 p->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
610 
611                 /* Leave the backend port open and unbound too. */
612                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
613                     deassign_kernel_port(i);
614                 }
615                 s->be_handles[be_port]->guest_port = 0;
616             }
617         }
618     }
619 }
620 
621 int xen_evtchn_status_op(struct evtchn_status *status)
622 {
623     XenEvtchnState *s = xen_evtchn_singleton;
624     XenEvtchnPort *p;
625 
626     if (!s) {
627         return -ENOTSUP;
628     }
629 
630     if (status->dom != DOMID_SELF && status->dom != xen_domid) {
631         return -ESRCH;
632     }
633 
634     if (!valid_port(status->port)) {
635         return -EINVAL;
636     }
637 
638     qemu_mutex_lock(&s->port_lock);
639 
640     p = &s->port_table[status->port];
641 
642     status->status = p->type;
643     status->vcpu = p->vcpu;
644 
645     switch (p->type) {
646     case EVTCHNSTAT_unbound:
647         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
648             status->u.unbound.dom = DOMID_QEMU;
649         } else {
650             status->u.unbound.dom = xen_domid;
651         }
652         break;
653 
654     case EVTCHNSTAT_interdomain:
655         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
656             status->u.interdomain.dom = DOMID_QEMU;
657         } else {
658             status->u.interdomain.dom = xen_domid;
659         }
660 
661         status->u.interdomain.port = p->type_val &
662             PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
663         break;
664 
665     case EVTCHNSTAT_pirq:
666         status->u.pirq = p->type_val;
667         break;
668 
669     case EVTCHNSTAT_virq:
670         status->u.virq = p->type_val;
671         break;
672     }
673 
674     qemu_mutex_unlock(&s->port_lock);
675     return 0;
676 }
677 
678 /*
679  * Never thought I'd hear myself say this, but C++ templates would be
680  * kind of nice here.
681  *
682  * template<class T> static int do_unmask_port(T *shinfo, ...);
683  */
684 static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
685                              bool do_unmask, struct shared_info *shinfo,
686                              struct vcpu_info *vcpu_info)
687 {
688     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
689     typeof(shinfo->evtchn_pending[0]) mask;
690     int idx = port / bits_per_word;
691     int offset = port % bits_per_word;
692 
693     mask = 1UL << offset;
694 
695     if (idx >= bits_per_word) {
696         return -EINVAL;
697     }
698 
699     if (do_unmask) {
700         /*
701          * If this is a true unmask operation, clear the mask bit. If
702          * it was already unmasked, we have nothing further to do.
703          */
704         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
705             return 0;
706         }
707     } else {
708         /*
709          * This is a pseudo-unmask for affinity changes. We don't
710          * change the mask bit, and if it's *masked* we have nothing
711          * else to do.
712          */
713         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
714             return 0;
715         }
716     }
717 
718     /* If the event was not pending, we're done. */
719     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
720         return 0;
721     }
722 
723     /* Now on to the vcpu_info evtchn_pending_sel index... */
724     mask = 1UL << idx;
725 
726     /* If a port in this word was already pending for this vCPU, all done. */
727     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
728         return 0;
729     }
730 
731     /* Set evtchn_upcall_pending for this vCPU */
732     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
733         return 0;
734     }
735 
736     inject_callback(s, s->port_table[port].vcpu);
737 
738     return 0;
739 }
740 
741 static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
742                                  bool do_unmask,
743                                  struct compat_shared_info *shinfo,
744                                  struct compat_vcpu_info *vcpu_info)
745 {
746     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
747     typeof(shinfo->evtchn_pending[0]) mask;
748     int idx = port / bits_per_word;
749     int offset = port % bits_per_word;
750 
751     mask = 1UL << offset;
752 
753     if (idx >= bits_per_word) {
754         return -EINVAL;
755     }
756 
757     if (do_unmask) {
758         /*
759          * If this is a true unmask operation, clear the mask bit. If
760          * it was already unmasked, we have nothing further to do.
761          */
762         if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
763             return 0;
764         }
765     } else {
766         /*
767          * This is a pseudo-unmask for affinity changes. We don't
768          * change the mask bit, and if it's *masked* we have nothing
769          * else to do.
770          */
771         if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
772             return 0;
773         }
774     }
775 
776     /* If the event was not pending, we're done. */
777     if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
778         return 0;
779     }
780 
781     /* Now on to the vcpu_info evtchn_pending_sel index... */
782     mask = 1UL << idx;
783 
784     /* If a port in this word was already pending for this vCPU, all done. */
785     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
786         return 0;
787     }
788 
789     /* Set evtchn_upcall_pending for this vCPU */
790     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
791         return 0;
792     }
793 
794     inject_callback(s, s->port_table[port].vcpu);
795 
796     return 0;
797 }
798 
799 static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
800 {
801     void *vcpu_info, *shinfo;
802 
803     if (s->port_table[port].type == EVTCHNSTAT_closed) {
804         return -EINVAL;
805     }
806 
807     shinfo = xen_overlay_get_shinfo_ptr();
808     if (!shinfo) {
809         return -ENOTSUP;
810     }
811 
812     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
813     if (!vcpu_info) {
814         return -EINVAL;
815     }
816 
817     if (xen_is_long_mode()) {
818         return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
819     } else {
820         return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
821     }
822 }
823 
824 static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
825                           struct shared_info *shinfo,
826                           struct vcpu_info *vcpu_info)
827 {
828     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
829     typeof(shinfo->evtchn_pending[0]) mask;
830     int idx = port / bits_per_word;
831     int offset = port % bits_per_word;
832 
833     mask = 1UL << offset;
834 
835     if (idx >= bits_per_word) {
836         return -EINVAL;
837     }
838 
839     /* Update the pending bit itself. If it was already set, we're done. */
840     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
841         return 0;
842     }
843 
844     /* Check if it's masked. */
845     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
846         return 0;
847     }
848 
849     /* Now on to the vcpu_info evtchn_pending_sel index... */
850     mask = 1UL << idx;
851 
852     /* If a port in this word was already pending for this vCPU, all done. */
853     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
854         return 0;
855     }
856 
857     /* Set evtchn_upcall_pending for this vCPU */
858     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
859         return 0;
860     }
861 
862     inject_callback(s, s->port_table[port].vcpu);
863 
864     return 0;
865 }
866 
867 static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
868                               struct compat_shared_info *shinfo,
869                               struct compat_vcpu_info *vcpu_info)
870 {
871     const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
872     typeof(shinfo->evtchn_pending[0]) mask;
873     int idx = port / bits_per_word;
874     int offset = port % bits_per_word;
875 
876     mask = 1UL << offset;
877 
878     if (idx >= bits_per_word) {
879         return -EINVAL;
880     }
881 
882     /* Update the pending bit itself. If it was already set, we're done. */
883     if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
884         return 0;
885     }
886 
887     /* Check if it's masked. */
888     if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
889         return 0;
890     }
891 
892     /* Now on to the vcpu_info evtchn_pending_sel index... */
893     mask = 1UL << idx;
894 
895     /* If a port in this word was already pending for this vCPU, all done. */
896     if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
897         return 0;
898     }
899 
900     /* Set evtchn_upcall_pending for this vCPU */
901     if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
902         return 0;
903     }
904 
905     inject_callback(s, s->port_table[port].vcpu);
906 
907     return 0;
908 }
909 
910 static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
911 {
912     void *vcpu_info, *shinfo;
913 
914     if (s->port_table[port].type == EVTCHNSTAT_closed) {
915         return -EINVAL;
916     }
917 
918     if (s->evtchn_in_kernel) {
919         XenEvtchnPort *p = &s->port_table[port];
920         CPUState *cpu = qemu_get_cpu(p->vcpu);
921         struct kvm_irq_routing_xen_evtchn evt;
922 
923         if (!cpu) {
924             return 0;
925         }
926 
927         evt.port = port;
928         evt.vcpu = kvm_arch_vcpu_id(cpu);
929         evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
930 
931         return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
932     }
933 
934     shinfo = xen_overlay_get_shinfo_ptr();
935     if (!shinfo) {
936         return -ENOTSUP;
937     }
938 
939     vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
940     if (!vcpu_info) {
941         return -EINVAL;
942     }
943 
944     if (xen_is_long_mode()) {
945         return do_set_port_lm(s, port, shinfo, vcpu_info);
946     } else {
947         return do_set_port_compat(s, port, shinfo, vcpu_info);
948     }
949 }
950 
951 static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
952 {
953     void *p = xen_overlay_get_shinfo_ptr();
954 
955     if (!p) {
956         return -ENOTSUP;
957     }
958 
959     if (xen_is_long_mode()) {
960         struct shared_info *shinfo = p;
961         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
962         typeof(shinfo->evtchn_pending[0]) mask;
963         int idx = port / bits_per_word;
964         int offset = port % bits_per_word;
965 
966         mask = 1UL << offset;
967 
968         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
969     } else {
970         struct compat_shared_info *shinfo = p;
971         const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
972         typeof(shinfo->evtchn_pending[0]) mask;
973         int idx = port / bits_per_word;
974         int offset = port % bits_per_word;
975 
976         mask = 1UL << offset;
977 
978         qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
979     }
980     return 0;
981 }
982 
983 static void free_port(XenEvtchnState *s, evtchn_port_t port)
984 {
985     s->port_table[port].type = EVTCHNSTAT_closed;
986     s->port_table[port].type_val = 0;
987     s->port_table[port].vcpu = 0;
988 
989     if (s->nr_ports == port + 1) {
990         do {
991             s->nr_ports--;
992         } while (s->nr_ports &&
993                  s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
994     }
995 
996     /* Clear pending event to avoid unexpected behavior on re-bind. */
997     clear_port_pending(s, port);
998 }
999 
1000 static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
1001                          uint16_t val, evtchn_port_t *port)
1002 {
1003     evtchn_port_t p = 1;
1004 
1005     for (p = 1; valid_port(p); p++) {
1006         if (s->port_table[p].type == EVTCHNSTAT_closed) {
1007             s->port_table[p].vcpu = vcpu;
1008             s->port_table[p].type = type;
1009             s->port_table[p].type_val = val;
1010 
1011             *port = p;
1012 
1013             if (s->nr_ports < p + 1) {
1014                 s->nr_ports = p + 1;
1015             }
1016 
1017             return 0;
1018         }
1019     }
1020     return -ENOSPC;
1021 }
1022 
1023 static bool virq_is_global(uint32_t virq)
1024 {
1025     switch (virq) {
1026     case VIRQ_TIMER:
1027     case VIRQ_DEBUG:
1028     case VIRQ_XENOPROF:
1029     case VIRQ_XENPMU:
1030         return false;
1031 
1032     default:
1033         return true;
1034     }
1035 }
1036 
1037 static int close_port(XenEvtchnState *s, evtchn_port_t port,
1038                       bool *flush_kvm_routes)
1039 {
1040     XenEvtchnPort *p = &s->port_table[port];
1041 
1042     /* Because it *might* be a PIRQ port */
1043     assert(qemu_mutex_iothread_locked());
1044 
1045     switch (p->type) {
1046     case EVTCHNSTAT_closed:
1047         return -ENOENT;
1048 
1049     case EVTCHNSTAT_pirq:
1050         s->pirq[p->type_val].port = 0;
1051         if (s->pirq[p->type_val].is_translated) {
1052             *flush_kvm_routes = true;
1053         }
1054         break;
1055 
1056     case EVTCHNSTAT_virq:
1057         kvm_xen_set_vcpu_virq(virq_is_global(p->type_val) ? 0 : p->vcpu,
1058                               p->type_val, 0);
1059         break;
1060 
1061     case EVTCHNSTAT_ipi:
1062         if (s->evtchn_in_kernel) {
1063             deassign_kernel_port(port);
1064         }
1065         break;
1066 
1067     case EVTCHNSTAT_interdomain:
1068         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
1069             uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
1070             struct xenevtchn_handle *xc = s->be_handles[be_port];
1071             if (xc) {
1072                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
1073                     deassign_kernel_port(port);
1074                 }
1075                 xc->guest_port = 0;
1076             }
1077         } else {
1078             /* Loopback interdomain */
1079             XenEvtchnPort *rp = &s->port_table[p->type_val];
1080             if (!valid_port(p->type_val) || rp->type_val != port ||
1081                 rp->type != EVTCHNSTAT_interdomain) {
1082                 error_report("Inconsistent state for interdomain unbind");
1083             } else {
1084                 /* Set the other end back to unbound */
1085                 rp->type = EVTCHNSTAT_unbound;
1086                 rp->type_val = 0;
1087             }
1088         }
1089         break;
1090 
1091     default:
1092         break;
1093     }
1094 
1095     free_port(s, port);
1096     return 0;
1097 }
1098 
1099 int xen_evtchn_soft_reset(void)
1100 {
1101     XenEvtchnState *s = xen_evtchn_singleton;
1102     bool flush_kvm_routes;
1103     int i;
1104 
1105     if (!s) {
1106         return -ENOTSUP;
1107     }
1108 
1109     assert(qemu_mutex_iothread_locked());
1110 
1111     qemu_mutex_lock(&s->port_lock);
1112 
1113     for (i = 0; i < s->nr_ports; i++) {
1114         close_port(s, i, &flush_kvm_routes);
1115     }
1116 
1117     qemu_mutex_unlock(&s->port_lock);
1118 
1119     if (flush_kvm_routes) {
1120         kvm_update_msi_routes_all(NULL, true, 0, 0);
1121     }
1122 
1123     return 0;
1124 }
1125 
1126 int xen_evtchn_reset_op(struct evtchn_reset *reset)
1127 {
1128     if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
1129         return -ESRCH;
1130     }
1131 
1132     return xen_evtchn_soft_reset();
1133 }
1134 
1135 int xen_evtchn_close_op(struct evtchn_close *close)
1136 {
1137     XenEvtchnState *s = xen_evtchn_singleton;
1138     bool flush_kvm_routes = false;
1139     int ret;
1140 
1141     if (!s) {
1142         return -ENOTSUP;
1143     }
1144 
1145     if (!valid_port(close->port)) {
1146         return -EINVAL;
1147     }
1148 
1149     QEMU_IOTHREAD_LOCK_GUARD();
1150     qemu_mutex_lock(&s->port_lock);
1151 
1152     ret = close_port(s, close->port, &flush_kvm_routes);
1153 
1154     qemu_mutex_unlock(&s->port_lock);
1155 
1156     if (flush_kvm_routes) {
1157         kvm_update_msi_routes_all(NULL, true, 0, 0);
1158     }
1159 
1160     return ret;
1161 }
1162 
1163 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
1164 {
1165     XenEvtchnState *s = xen_evtchn_singleton;
1166     int ret;
1167 
1168     if (!s) {
1169         return -ENOTSUP;
1170     }
1171 
1172     if (!valid_port(unmask->port)) {
1173         return -EINVAL;
1174     }
1175 
1176     qemu_mutex_lock(&s->port_lock);
1177 
1178     ret = unmask_port(s, unmask->port, true);
1179 
1180     qemu_mutex_unlock(&s->port_lock);
1181 
1182     return ret;
1183 }
1184 
1185 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
1186 {
1187     XenEvtchnState *s = xen_evtchn_singleton;
1188     XenEvtchnPort *p;
1189     int ret = -EINVAL;
1190 
1191     if (!s) {
1192         return -ENOTSUP;
1193     }
1194 
1195     if (!valid_port(vcpu->port)) {
1196         return -EINVAL;
1197     }
1198 
1199     if (!valid_vcpu(vcpu->vcpu)) {
1200         return -ENOENT;
1201     }
1202 
1203     qemu_mutex_lock(&s->port_lock);
1204 
1205     p = &s->port_table[vcpu->port];
1206 
1207     if (p->type == EVTCHNSTAT_interdomain ||
1208         p->type == EVTCHNSTAT_unbound ||
1209         p->type == EVTCHNSTAT_pirq ||
1210         (p->type == EVTCHNSTAT_virq && virq_is_global(p->type_val))) {
1211         /*
1212          * unmask_port() with do_unmask==false will just raise the event
1213          * on the new vCPU if the port was already pending.
1214          */
1215         p->vcpu = vcpu->vcpu;
1216         unmask_port(s, vcpu->port, false);
1217         ret = 0;
1218     }
1219 
1220     qemu_mutex_unlock(&s->port_lock);
1221 
1222     return ret;
1223 }
1224 
1225 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
1226 {
1227     XenEvtchnState *s = xen_evtchn_singleton;
1228     int ret;
1229 
1230     if (!s) {
1231         return -ENOTSUP;
1232     }
1233 
1234     if (virq->virq >= NR_VIRQS) {
1235         return -EINVAL;
1236     }
1237 
1238     /* Global VIRQ must be allocated on vCPU0 first */
1239     if (virq_is_global(virq->virq) && virq->vcpu != 0) {
1240         return -EINVAL;
1241     }
1242 
1243     if (!valid_vcpu(virq->vcpu)) {
1244         return -ENOENT;
1245     }
1246 
1247     qemu_mutex_lock(&s->port_lock);
1248 
1249     ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
1250                         &virq->port);
1251     if (!ret) {
1252         ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
1253         if (ret) {
1254             free_port(s, virq->port);
1255         }
1256     }
1257 
1258     qemu_mutex_unlock(&s->port_lock);
1259 
1260     return ret;
1261 }
1262 
1263 int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
1264 {
1265     XenEvtchnState *s = xen_evtchn_singleton;
1266     int ret;
1267 
1268     if (!s) {
1269         return -ENOTSUP;
1270     }
1271 
1272     if (pirq->pirq >= s->nr_pirqs) {
1273         return -EINVAL;
1274     }
1275 
1276     QEMU_IOTHREAD_LOCK_GUARD();
1277 
1278     if (s->pirq[pirq->pirq].port) {
1279         return -EBUSY;
1280     }
1281 
1282     qemu_mutex_lock(&s->port_lock);
1283 
1284     ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
1285                         &pirq->port);
1286     if (ret) {
1287         qemu_mutex_unlock(&s->port_lock);
1288         return ret;
1289     }
1290 
1291     s->pirq[pirq->pirq].port = pirq->port;
1292     trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
1293 
1294     qemu_mutex_unlock(&s->port_lock);
1295 
1296     /*
1297      * Need to do the unmask outside port_lock because it may call
1298      * back into the MSI translate function.
1299      */
1300     if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
1301         if (s->pirq[pirq->pirq].is_masked) {
1302             PCIDevice *dev = s->pirq[pirq->pirq].dev;
1303             int vector = s->pirq[pirq->pirq].vector;
1304             char *dev_path = qdev_get_dev_path(DEVICE(dev));
1305 
1306             trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
1307             g_free(dev_path);
1308 
1309             if (s->pirq[pirq->pirq].is_msix) {
1310                 msix_set_mask(dev, vector, false);
1311             } else {
1312                 msi_set_mask(dev, vector, false, NULL);
1313             }
1314         } else if (s->pirq[pirq->pirq].is_translated) {
1315             /*
1316              * If KVM had attempted to translate this one before, make it try
1317              * again. If we unmasked, then the notifier on the MSI(-X) vector
1318              * will already have had the same effect.
1319              */
1320             kvm_update_msi_routes_all(NULL, true, 0, 0);
1321         }
1322     }
1323 
1324     return ret;
1325 }
1326 
1327 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
1328 {
1329     XenEvtchnState *s = xen_evtchn_singleton;
1330     int ret;
1331 
1332     if (!s) {
1333         return -ENOTSUP;
1334     }
1335 
1336     if (!valid_vcpu(ipi->vcpu)) {
1337         return -ENOENT;
1338     }
1339 
1340     qemu_mutex_lock(&s->port_lock);
1341 
1342     ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
1343     if (!ret && s->evtchn_in_kernel) {
1344         assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
1345     }
1346 
1347     qemu_mutex_unlock(&s->port_lock);
1348 
1349     return ret;
1350 }
1351 
1352 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
1353 {
1354     XenEvtchnState *s = xen_evtchn_singleton;
1355     uint16_t type_val;
1356     int ret;
1357 
1358     if (!s) {
1359         return -ENOTSUP;
1360     }
1361 
1362     if (interdomain->remote_dom == DOMID_QEMU) {
1363         type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
1364     } else if (interdomain->remote_dom == DOMID_SELF ||
1365                interdomain->remote_dom == xen_domid) {
1366         type_val = 0;
1367     } else {
1368         return -ESRCH;
1369     }
1370 
1371     if (!valid_port(interdomain->remote_port)) {
1372         return -EINVAL;
1373     }
1374 
1375     qemu_mutex_lock(&s->port_lock);
1376 
1377     /* The newly allocated port starts out as unbound */
1378     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val,
1379                         &interdomain->local_port);
1380     if (ret) {
1381         goto out;
1382     }
1383 
1384     if (interdomain->remote_dom == DOMID_QEMU) {
1385         struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
1386         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1387 
1388         if (!xc) {
1389             ret = -ENOENT;
1390             goto out_free_port;
1391         }
1392 
1393         if (xc->guest_port) {
1394             ret = -EBUSY;
1395             goto out_free_port;
1396         }
1397 
1398         assert(xc->be_port == interdomain->remote_port);
1399         xc->guest_port = interdomain->local_port;
1400         if (kvm_xen_has_cap(EVTCHN_SEND)) {
1401             assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
1402         }
1403         lp->type = EVTCHNSTAT_interdomain;
1404         lp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU | interdomain->remote_port;
1405         ret = 0;
1406     } else {
1407         /* Loopback */
1408         XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
1409         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
1410 
1411         /*
1412          * The 'remote' port for loopback must be an unbound port allocated for
1413          * communication with the local domain (as indicated by rp->type_val
1414          * being zero, not PORT_INFO_TYPEVAL_REMOTE_QEMU), and must *not* be
1415          * the port that was just allocated for the local end.
1416          */
1417         if (interdomain->local_port != interdomain->remote_port &&
1418             rp->type == EVTCHNSTAT_unbound && rp->type_val == 0) {
1419 
1420             rp->type = EVTCHNSTAT_interdomain;
1421             rp->type_val = interdomain->local_port;
1422 
1423             lp->type = EVTCHNSTAT_interdomain;
1424             lp->type_val = interdomain->remote_port;
1425         } else {
1426             ret = -EINVAL;
1427         }
1428     }
1429 
1430  out_free_port:
1431     if (ret) {
1432         free_port(s, interdomain->local_port);
1433     }
1434  out:
1435     qemu_mutex_unlock(&s->port_lock);
1436 
1437     return ret;
1438 
1439 }
1440 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
1441 {
1442     XenEvtchnState *s = xen_evtchn_singleton;
1443     uint16_t type_val;
1444     int ret;
1445 
1446     if (!s) {
1447         return -ENOTSUP;
1448     }
1449 
1450     if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
1451         return -ESRCH;
1452     }
1453 
1454     if (alloc->remote_dom == DOMID_QEMU) {
1455         type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
1456     } else if (alloc->remote_dom == DOMID_SELF ||
1457                alloc->remote_dom == xen_domid) {
1458         type_val = 0;
1459     } else {
1460         return -EPERM;
1461     }
1462 
1463     qemu_mutex_lock(&s->port_lock);
1464 
1465     ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val, &alloc->port);
1466 
1467     qemu_mutex_unlock(&s->port_lock);
1468 
1469     return ret;
1470 }
1471 
1472 int xen_evtchn_send_op(struct evtchn_send *send)
1473 {
1474     XenEvtchnState *s = xen_evtchn_singleton;
1475     XenEvtchnPort *p;
1476     int ret = 0;
1477 
1478     if (!s) {
1479         return -ENOTSUP;
1480     }
1481 
1482     if (!valid_port(send->port)) {
1483         return -EINVAL;
1484     }
1485 
1486     qemu_mutex_lock(&s->port_lock);
1487 
1488     p = &s->port_table[send->port];
1489 
1490     switch (p->type) {
1491     case EVTCHNSTAT_interdomain:
1492         if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
1493             /*
1494              * This is an event from the guest to qemu itself, which is
1495              * serving as the driver domain.
1496              */
1497             uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
1498             struct xenevtchn_handle *xc = s->be_handles[be_port];
1499             if (xc) {
1500                 eventfd_write(xc->fd, 1);
1501                 ret = 0;
1502             } else {
1503                 ret = -ENOENT;
1504             }
1505         } else {
1506             /* Loopback interdomain ports; just a complex IPI */
1507             set_port_pending(s, p->type_val);
1508         }
1509         break;
1510 
1511     case EVTCHNSTAT_ipi:
1512         set_port_pending(s, send->port);
1513         break;
1514 
1515     case EVTCHNSTAT_unbound:
1516         /* Xen will silently drop these */
1517         break;
1518 
1519     default:
1520         ret = -EINVAL;
1521         break;
1522     }
1523 
1524     qemu_mutex_unlock(&s->port_lock);
1525 
1526     return ret;
1527 }
1528 
1529 int xen_evtchn_set_port(uint16_t port)
1530 {
1531     XenEvtchnState *s = xen_evtchn_singleton;
1532     XenEvtchnPort *p;
1533     int ret = -EINVAL;
1534 
1535     if (!s) {
1536         return -ENOTSUP;
1537     }
1538 
1539     if (!valid_port(port)) {
1540         return -EINVAL;
1541     }
1542 
1543     qemu_mutex_lock(&s->port_lock);
1544 
1545     p = &s->port_table[port];
1546 
1547     /* QEMU has no business sending to anything but these */
1548     if (p->type == EVTCHNSTAT_virq ||
1549         (p->type == EVTCHNSTAT_interdomain &&
1550          (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU))) {
1551         set_port_pending(s, port);
1552         ret = 0;
1553     }
1554 
1555     qemu_mutex_unlock(&s->port_lock);
1556 
1557     return ret;
1558 }
1559 
1560 static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
1561 {
1562     uint16_t pirq;
1563 
1564     /*
1565      * Preserve the allocation strategy that Xen has. It looks like
1566      * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
1567      * to GSIs (counting up from 16), and then we count backwards from
1568      * the top for MSIs or when the GSI space is exhausted.
1569      */
1570     if (type == MAP_PIRQ_TYPE_GSI) {
1571         for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
1572             if (pirq_inuse(s, pirq)) {
1573                 continue;
1574             }
1575 
1576             /* Found it */
1577             goto found;
1578         }
1579     }
1580     for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
1581         /* Skip whole words at a time when they're full */
1582         if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
1583             pirq &= ~63ULL;
1584             continue;
1585         }
1586         if (pirq_inuse(s, pirq)) {
1587             continue;
1588         }
1589 
1590         goto found;
1591     }
1592     return -ENOSPC;
1593 
1594  found:
1595     pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1596     if (gsi >= 0) {
1597         assert(gsi < IOAPIC_NUM_PINS);
1598         s->gsi_pirq[gsi] = pirq;
1599     }
1600     s->pirq[pirq].gsi = gsi;
1601     return pirq;
1602 }
1603 
1604 bool xen_evtchn_set_gsi(int gsi, int level)
1605 {
1606     XenEvtchnState *s = xen_evtchn_singleton;
1607     int pirq;
1608 
1609     assert(qemu_mutex_iothread_locked());
1610 
1611     if (!s || gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1612         return false;
1613     }
1614 
1615     /*
1616      * Check that that it *isn't* the event channel GSI, and thus
1617      * that we are not recursing and it's safe to take s->port_lock.
1618      *
1619      * Locking aside, it's perfectly sane to bail out early for that
1620      * special case, as it would make no sense for the event channel
1621      * GSI to be routed back to event channels, when the delivery
1622      * method is to raise the GSI... that recursion wouldn't *just*
1623      * be a locking issue.
1624      */
1625     if (gsi && gsi == s->callback_gsi) {
1626         return false;
1627     }
1628 
1629     QEMU_LOCK_GUARD(&s->port_lock);
1630 
1631     pirq = s->gsi_pirq[gsi];
1632     if (!pirq) {
1633         return false;
1634     }
1635 
1636     if (level) {
1637         int port = s->pirq[pirq].port;
1638 
1639         s->pirq_gsi_set |= (1U << gsi);
1640         if (port) {
1641             set_port_pending(s, port);
1642         }
1643     } else {
1644         s->pirq_gsi_set &= ~(1U << gsi);
1645     }
1646     return true;
1647 }
1648 
1649 static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
1650 {
1651     /* The vector (in low 8 bits of data) must be zero */
1652     if (data & 0xff) {
1653         return 0;
1654     }
1655 
1656     uint32_t pirq = (addr & 0xff000) >> 12;
1657     pirq |= (addr >> 32) & 0xffffff00;
1658 
1659     return pirq;
1660 }
1661 
1662 static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
1663                                  int except_pirq)
1664 {
1665     uint32_t pirq;
1666 
1667     for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
1668         /*
1669          * We could be cleverer here, but it isn't really a fast path, and
1670          * this trivial optimisation is enough to let us skip the big gap
1671          * in the middle a bit quicker (in terms of both loop iterations,
1672          * and cache lines).
1673          */
1674         if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
1675             pirq += 64;
1676             continue;
1677         }
1678         if (except_pirq && pirq == except_pirq) {
1679             continue;
1680         }
1681         if (s->pirq[pirq].dev != dev) {
1682             continue;
1683         }
1684         if (vector != -1 && s->pirq[pirq].vector != vector) {
1685             continue;
1686         }
1687 
1688         /* It could theoretically be bound to a port already, but that is OK. */
1689         s->pirq[pirq].dev = dev;
1690         s->pirq[pirq].gsi = IRQ_UNBOUND;
1691         s->pirq[pirq].is_msix = false;
1692         s->pirq[pirq].vector = 0;
1693         s->pirq[pirq].is_masked = false;
1694         s->pirq[pirq].is_translated = false;
1695     }
1696 }
1697 
1698 void xen_evtchn_remove_pci_device(PCIDevice *dev)
1699 {
1700     XenEvtchnState *s = xen_evtchn_singleton;
1701 
1702     if (!s) {
1703         return;
1704     }
1705 
1706     QEMU_LOCK_GUARD(&s->port_lock);
1707     do_remove_pci_vector(s, dev, -1, 0);
1708 }
1709 
1710 void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
1711                           uint64_t addr, uint32_t data, bool is_masked)
1712 {
1713     XenEvtchnState *s = xen_evtchn_singleton;
1714     uint32_t pirq;
1715 
1716     if (!s) {
1717         return;
1718     }
1719 
1720     assert(qemu_mutex_iothread_locked());
1721 
1722     pirq = msi_pirq_target(addr, data);
1723 
1724     /*
1725      * The PIRQ# must be sane, and there must be an allocated PIRQ in
1726      * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
1727      */
1728     if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
1729         (s->pirq[pirq].gsi != IRQ_UNBOUND &&
1730          s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
1731         pirq = 0;
1732     }
1733 
1734     if (pirq) {
1735         s->pirq[pirq].dev = dev;
1736         s->pirq[pirq].gsi = IRQ_MSI_EMU;
1737         s->pirq[pirq].is_msix = is_msix;
1738         s->pirq[pirq].vector = vector;
1739         s->pirq[pirq].is_masked = is_masked;
1740     }
1741 
1742     /* Remove any (other) entries for this {device, vector} */
1743     do_remove_pci_vector(s, dev, vector, pirq);
1744 }
1745 
1746 int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
1747                                   uint64_t address, uint32_t data)
1748 {
1749     XenEvtchnState *s = xen_evtchn_singleton;
1750     uint32_t pirq, port;
1751     CPUState *cpu;
1752 
1753     if (!s) {
1754         return 1; /* Not a PIRQ */
1755     }
1756 
1757     assert(qemu_mutex_iothread_locked());
1758 
1759     pirq = msi_pirq_target(address, data);
1760     if (!pirq || pirq >= s->nr_pirqs) {
1761         return 1; /* Not a PIRQ */
1762     }
1763 
1764     if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
1765         return -ENOTSUP;
1766     }
1767 
1768     if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
1769         return -EINVAL;
1770     }
1771 
1772     /* Remember that KVM tried to translate this. It might need to try again. */
1773     s->pirq[pirq].is_translated = true;
1774 
1775     QEMU_LOCK_GUARD(&s->port_lock);
1776 
1777     port = s->pirq[pirq].port;
1778     if (!valid_port(port)) {
1779         return -EINVAL;
1780     }
1781 
1782     cpu = qemu_get_cpu(s->port_table[port].vcpu);
1783     if (!cpu) {
1784         return -EINVAL;
1785     }
1786 
1787     route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
1788     route->u.xen_evtchn.port = port;
1789     route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
1790     route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
1791 
1792     return 0; /* Handled */
1793 }
1794 
1795 bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
1796 {
1797     XenEvtchnState *s = xen_evtchn_singleton;
1798     uint32_t pirq, port;
1799 
1800     if (!s) {
1801         return false;
1802     }
1803 
1804     assert(qemu_mutex_iothread_locked());
1805 
1806     pirq = msi_pirq_target(address, data);
1807     if (!pirq || pirq >= s->nr_pirqs) {
1808         return false;
1809     }
1810 
1811     QEMU_LOCK_GUARD(&s->port_lock);
1812 
1813     port = s->pirq[pirq].port;
1814     if (!valid_port(port)) {
1815         return false;
1816     }
1817 
1818     set_port_pending(s, port);
1819     return true;
1820 }
1821 
1822 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
1823 {
1824     XenEvtchnState *s = xen_evtchn_singleton;
1825     int pirq = map->pirq;
1826     int gsi = map->index;
1827 
1828     if (!s) {
1829         return -ENOTSUP;
1830     }
1831 
1832     QEMU_IOTHREAD_LOCK_GUARD();
1833     QEMU_LOCK_GUARD(&s->port_lock);
1834 
1835     if (map->domid != DOMID_SELF && map->domid != xen_domid) {
1836         return -EPERM;
1837     }
1838     if (map->type != MAP_PIRQ_TYPE_GSI) {
1839         return -EINVAL;
1840     }
1841     if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
1842         return -EINVAL;
1843     }
1844 
1845     if (pirq < 0) {
1846         pirq = allocate_pirq(s, map->type, gsi);
1847         if (pirq < 0) {
1848             return pirq;
1849         }
1850         map->pirq = pirq;
1851     } else if (pirq > s->nr_pirqs) {
1852         return -EINVAL;
1853     } else {
1854         /*
1855          * User specified a valid-looking PIRQ#. Allow it if it is
1856          * allocated and not yet bound, or if it is unallocated
1857          */
1858         if (pirq_inuse(s, pirq)) {
1859             if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
1860                 return -EBUSY;
1861             }
1862         } else {
1863             /* If it was unused, mark it used now. */
1864             pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
1865         }
1866         /* Set the mapping in both directions. */
1867         s->pirq[pirq].gsi = gsi;
1868         s->gsi_pirq[gsi] = pirq;
1869     }
1870 
1871     trace_kvm_xen_map_pirq(pirq, gsi);
1872     return 0;
1873 }
1874 
1875 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
1876 {
1877     XenEvtchnState *s = xen_evtchn_singleton;
1878     int pirq = unmap->pirq;
1879     int gsi;
1880 
1881     if (!s) {
1882         return -ENOTSUP;
1883     }
1884 
1885     if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
1886         return -EPERM;
1887     }
1888     if (pirq < 0 || pirq >= s->nr_pirqs) {
1889         return -EINVAL;
1890     }
1891 
1892     QEMU_IOTHREAD_LOCK_GUARD();
1893     qemu_mutex_lock(&s->port_lock);
1894 
1895     if (!pirq_inuse(s, pirq)) {
1896         qemu_mutex_unlock(&s->port_lock);
1897         return -ENOENT;
1898     }
1899 
1900     gsi = s->pirq[pirq].gsi;
1901 
1902     /* We can only unmap GSI PIRQs */
1903     if (gsi < 0) {
1904         qemu_mutex_unlock(&s->port_lock);
1905         return -EINVAL;
1906     }
1907 
1908     s->gsi_pirq[gsi] = 0;
1909     s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
1910     pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
1911 
1912     trace_kvm_xen_unmap_pirq(pirq, gsi);
1913     qemu_mutex_unlock(&s->port_lock);
1914 
1915     if (gsi == IRQ_MSI_EMU) {
1916         kvm_update_msi_routes_all(NULL, true, 0, 0);
1917     }
1918 
1919     return 0;
1920 }
1921 
1922 int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
1923 {
1924     XenEvtchnState *s = xen_evtchn_singleton;
1925     int pirq = eoi->irq;
1926     int gsi;
1927 
1928     if (!s) {
1929         return -ENOTSUP;
1930     }
1931 
1932     QEMU_IOTHREAD_LOCK_GUARD();
1933     QEMU_LOCK_GUARD(&s->port_lock);
1934 
1935     if (!pirq_inuse(s, pirq)) {
1936         return -ENOENT;
1937     }
1938 
1939     gsi = s->pirq[pirq].gsi;
1940     if (gsi < 0) {
1941         return -EINVAL;
1942     }
1943 
1944     /* Reassert a level IRQ if needed */
1945     if (s->pirq_gsi_set & (1U << gsi)) {
1946         int port = s->pirq[pirq].port;
1947         if (port) {
1948             set_port_pending(s, port);
1949         }
1950     }
1951 
1952     return 0;
1953 }
1954 
1955 int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
1956 {
1957     XenEvtchnState *s = xen_evtchn_singleton;
1958     int pirq = query->irq;
1959 
1960     if (!s) {
1961         return -ENOTSUP;
1962     }
1963 
1964     QEMU_IOTHREAD_LOCK_GUARD();
1965     QEMU_LOCK_GUARD(&s->port_lock);
1966 
1967     if (!pirq_inuse(s, pirq)) {
1968         return -ENOENT;
1969     }
1970 
1971     if (s->pirq[pirq].gsi >= 0) {
1972         query->flags = XENIRQSTAT_needs_eoi;
1973     } else {
1974         query->flags = 0;
1975     }
1976 
1977     return 0;
1978 }
1979 
1980 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
1981 {
1982     XenEvtchnState *s = xen_evtchn_singleton;
1983     int pirq;
1984 
1985     if (!s) {
1986         return -ENOTSUP;
1987     }
1988 
1989     QEMU_LOCK_GUARD(&s->port_lock);
1990 
1991     pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
1992     if (pirq < 0) {
1993         return pirq;
1994     }
1995 
1996     get->pirq = pirq;
1997     trace_kvm_xen_get_free_pirq(pirq, get->type);
1998     return 0;
1999 }
2000 
2001 struct xenevtchn_handle *xen_be_evtchn_open(void)
2002 {
2003     struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
2004 
2005     xc->fd = eventfd(0, EFD_CLOEXEC);
2006     if (xc->fd < 0) {
2007         free(xc);
2008         return NULL;
2009     }
2010 
2011     return xc;
2012 }
2013 
2014 static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
2015 {
2016     int i;
2017 
2018     for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
2019         if (!s->be_handles[i]) {
2020             s->be_handles[i] = xc;
2021             xc->be_port = i;
2022             return i;
2023         }
2024     }
2025     return 0;
2026 }
2027 
2028 int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
2029                                    evtchn_port_t guest_port)
2030 {
2031     XenEvtchnState *s = xen_evtchn_singleton;
2032     XenEvtchnPort *gp;
2033     uint16_t be_port = 0;
2034     int ret;
2035 
2036     if (!s) {
2037         return -ENOTSUP;
2038     }
2039 
2040     if (!xc) {
2041         return -EFAULT;
2042     }
2043 
2044     if (domid != xen_domid) {
2045         return -ESRCH;
2046     }
2047 
2048     if (!valid_port(guest_port)) {
2049         return -EINVAL;
2050     }
2051 
2052     qemu_mutex_lock(&s->port_lock);
2053 
2054     /* The guest has to have an unbound port waiting for us to bind */
2055     gp = &s->port_table[guest_port];
2056 
2057     switch (gp->type) {
2058     case EVTCHNSTAT_interdomain:
2059         /* Allow rebinding after migration, preserve port # if possible */
2060         be_port = gp->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
2061         assert(be_port != 0);
2062         if (!s->be_handles[be_port]) {
2063             s->be_handles[be_port] = xc;
2064             xc->guest_port = guest_port;
2065             ret = xc->be_port = be_port;
2066             if (kvm_xen_has_cap(EVTCHN_SEND)) {
2067                 assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2068             }
2069             break;
2070         }
2071         /* fall through */
2072 
2073     case EVTCHNSTAT_unbound:
2074         be_port = find_be_port(s, xc);
2075         if (!be_port) {
2076             ret = -ENOSPC;
2077             goto out;
2078         }
2079 
2080         gp->type = EVTCHNSTAT_interdomain;
2081         gp->type_val = be_port | PORT_INFO_TYPEVAL_REMOTE_QEMU;
2082         xc->guest_port = guest_port;
2083         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2084             assign_kernel_eventfd(gp->type, guest_port, xc->fd);
2085         }
2086         ret = be_port;
2087         break;
2088 
2089     default:
2090         ret = -EINVAL;
2091         break;
2092     }
2093 
2094  out:
2095     qemu_mutex_unlock(&s->port_lock);
2096 
2097     return ret;
2098 }
2099 
2100 int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
2101 {
2102     XenEvtchnState *s = xen_evtchn_singleton;
2103     int ret;
2104 
2105     if (!s) {
2106         return -ENOTSUP;
2107     }
2108 
2109     if (!xc) {
2110         return -EFAULT;
2111     }
2112 
2113     qemu_mutex_lock(&s->port_lock);
2114 
2115     if (port && port != xc->be_port) {
2116         ret = -EINVAL;
2117         goto out;
2118     }
2119 
2120     if (xc->guest_port) {
2121         XenEvtchnPort *gp = &s->port_table[xc->guest_port];
2122 
2123         /* This should never *not* be true */
2124         if (gp->type == EVTCHNSTAT_interdomain) {
2125             gp->type = EVTCHNSTAT_unbound;
2126             gp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
2127         }
2128 
2129         if (kvm_xen_has_cap(EVTCHN_SEND)) {
2130             deassign_kernel_port(xc->guest_port);
2131         }
2132         xc->guest_port = 0;
2133     }
2134 
2135     s->be_handles[xc->be_port] = NULL;
2136     xc->be_port = 0;
2137     ret = 0;
2138  out:
2139     qemu_mutex_unlock(&s->port_lock);
2140     return ret;
2141 }
2142 
2143 int xen_be_evtchn_close(struct xenevtchn_handle *xc)
2144 {
2145     if (!xc) {
2146         return -EFAULT;
2147     }
2148 
2149     xen_be_evtchn_unbind(xc, 0);
2150 
2151     close(xc->fd);
2152     free(xc);
2153     return 0;
2154 }
2155 
2156 int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
2157 {
2158     if (!xc) {
2159         return -1;
2160     }
2161     return xc->fd;
2162 }
2163 
2164 int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
2165 {
2166     XenEvtchnState *s = xen_evtchn_singleton;
2167     int ret;
2168 
2169     if (!s) {
2170         return -ENOTSUP;
2171     }
2172 
2173     if (!xc) {
2174         return -EFAULT;
2175     }
2176 
2177     qemu_mutex_lock(&s->port_lock);
2178 
2179     if (xc->guest_port) {
2180         set_port_pending(s, xc->guest_port);
2181         ret = 0;
2182     } else {
2183         ret = -ENOTCONN;
2184     }
2185 
2186     qemu_mutex_unlock(&s->port_lock);
2187 
2188     return ret;
2189 }
2190 
2191 int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
2192 {
2193     uint64_t val;
2194 
2195     if (!xc) {
2196         return -EFAULT;
2197     }
2198 
2199     if (!xc->be_port) {
2200         return 0;
2201     }
2202 
2203     if (eventfd_read(xc->fd, &val)) {
2204         return -errno;
2205     }
2206 
2207     return val ? xc->be_port : 0;
2208 }
2209 
2210 int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
2211 {
2212     if (!xc) {
2213         return -EFAULT;
2214     }
2215 
2216     if (xc->be_port != port) {
2217         return -EINVAL;
2218     }
2219 
2220     /*
2221      * We don't actually do anything to unmask it; the event was already
2222      * consumed in xen_be_evtchn_pending().
2223      */
2224     return 0;
2225 }
2226 
2227 int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
2228 {
2229     return xc->guest_port;
2230 }
2231 
2232 EvtchnInfoList *qmp_xen_event_list(Error **errp)
2233 {
2234     XenEvtchnState *s = xen_evtchn_singleton;
2235     EvtchnInfoList *head = NULL, **tail = &head;
2236     void *shinfo, *pending, *mask;
2237     int i;
2238 
2239     if (!s) {
2240         error_setg(errp, "Xen event channel emulation not enabled");
2241         return NULL;
2242     }
2243 
2244     shinfo = xen_overlay_get_shinfo_ptr();
2245     if (!shinfo) {
2246         error_setg(errp, "Xen shared info page not allocated");
2247         return NULL;
2248     }
2249 
2250     if (xen_is_long_mode()) {
2251         pending = shinfo + offsetof(struct shared_info, evtchn_pending);
2252         mask = shinfo + offsetof(struct shared_info, evtchn_mask);
2253     } else {
2254         pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
2255         mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
2256     }
2257 
2258     QEMU_LOCK_GUARD(&s->port_lock);
2259 
2260     for (i = 0; i < s->nr_ports; i++) {
2261         XenEvtchnPort *p = &s->port_table[i];
2262         EvtchnInfo *info;
2263 
2264         if (p->type == EVTCHNSTAT_closed) {
2265             continue;
2266         }
2267 
2268         info = g_new0(EvtchnInfo, 1);
2269 
2270         info->port = i;
2271         qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
2272         qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
2273         qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
2274         qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
2275         qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
2276         qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
2277 
2278         info->type = p->type;
2279         if (p->type == EVTCHNSTAT_interdomain) {
2280             info->remote_domain = g_strdup((p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) ?
2281                                            "qemu" : "loopback");
2282             info->target = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
2283         } else {
2284             info->target = p->type_val;
2285         }
2286         info->vcpu = p->vcpu;
2287         info->pending = test_bit(i, pending);
2288         info->masked = test_bit(i, mask);
2289 
2290         QAPI_LIST_APPEND(tail, info);
2291     }
2292 
2293     return head;
2294 }
2295 
2296 void qmp_xen_event_inject(uint32_t port, Error **errp)
2297 {
2298     XenEvtchnState *s = xen_evtchn_singleton;
2299 
2300     if (!s) {
2301         error_setg(errp, "Xen event channel emulation not enabled");
2302         return;
2303     }
2304 
2305     if (!valid_port(port)) {
2306         error_setg(errp, "Invalid port %u", port);
2307     }
2308 
2309     QEMU_LOCK_GUARD(&s->port_lock);
2310 
2311     if (set_port_pending(s, port)) {
2312         error_setg(errp, "Failed to set port %u", port);
2313         return;
2314     }
2315 }
2316 
2317 void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
2318 {
2319     EvtchnInfoList *iter, *info_list;
2320     Error *err = NULL;
2321 
2322     info_list = qmp_xen_event_list(&err);
2323     if (err) {
2324         hmp_handle_error(mon, err);
2325         return;
2326     }
2327 
2328     for (iter = info_list; iter; iter = iter->next) {
2329         EvtchnInfo *info = iter->value;
2330 
2331         monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
2332                        EvtchnPortType_str(info->type));
2333         if (info->type != EVTCHN_PORT_TYPE_IPI) {
2334             monitor_printf(mon,  "(");
2335             if (info->remote_domain) {
2336                 monitor_printf(mon, "%s:", info->remote_domain);
2337             }
2338             monitor_printf(mon, "%d)", info->target);
2339         }
2340         if (info->pending) {
2341             monitor_printf(mon, " PENDING");
2342         }
2343         if (info->masked) {
2344             monitor_printf(mon, " MASKED");
2345         }
2346         monitor_printf(mon, "\n");
2347     }
2348 
2349     qapi_free_EvtchnInfoList(info_list);
2350 }
2351 
2352 void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
2353 {
2354     int port = qdict_get_int(qdict, "port");
2355     Error *err = NULL;
2356 
2357     qmp_xen_event_inject(port, &err);
2358     if (err) {
2359         hmp_handle_error(mon, err);
2360     } else {
2361         monitor_printf(mon, "Delivered port %d\n", port);
2362     }
2363 }
2364 
2365