xref: /qemu/hw/xen/xen_pt.c (revision 7a4e543d)
1 /*
2  * Copyright (c) 2007, Neocleus Corporation.
3  * Copyright (c) 2007, Intel Corporation.
4  *
5  * This work is licensed under the terms of the GNU GPL, version 2.  See
6  * the COPYING file in the top-level directory.
7  *
8  * Alex Novik <alex@neocleus.com>
9  * Allen Kay <allen.m.kay@intel.com>
10  * Guy Zana <guy@neocleus.com>
11  *
12  * This file implements direct PCI assignment to a HVM guest
13  */
14 
15 /*
16  * Interrupt Disable policy:
17  *
18  * INTx interrupt:
19  *   Initialize(register_real_device)
20  *     Map INTx(xc_physdev_map_pirq):
21  *       <fail>
22  *         - Set real Interrupt Disable bit to '1'.
23  *         - Set machine_irq and assigned_device->machine_irq to '0'.
24  *         * Don't bind INTx.
25  *
26  *     Bind INTx(xc_domain_bind_pt_pci_irq):
27  *       <fail>
28  *         - Set real Interrupt Disable bit to '1'.
29  *         - Unmap INTx.
30  *         - Decrement xen_pt_mapped_machine_irq[machine_irq]
31  *         - Set assigned_device->machine_irq to '0'.
32  *
33  *   Write to Interrupt Disable bit by guest software(xen_pt_cmd_reg_write)
34  *     Write '0'
35  *       - Set real bit to '0' if assigned_device->machine_irq isn't '0'.
36  *
37  *     Write '1'
38  *       - Set real bit to '1'.
39  *
40  * MSI interrupt:
41  *   Initialize MSI register(xen_pt_msi_setup, xen_pt_msi_update)
42  *     Bind MSI(xc_domain_update_msi_irq)
43  *       <fail>
44  *         - Unmap MSI.
45  *         - Set dev->msi->pirq to '-1'.
46  *
47  * MSI-X interrupt:
48  *   Initialize MSI-X register(xen_pt_msix_update_one)
49  *     Bind MSI-X(xc_domain_update_msi_irq)
50  *       <fail>
51  *         - Unmap MSI-X.
52  *         - Set entry->pirq to '-1'.
53  */
54 
55 #include "qemu/osdep.h"
56 #include <sys/ioctl.h>
57 
58 #include "hw/pci/pci.h"
59 #include "hw/xen/xen.h"
60 #include "hw/i386/pc.h"
61 #include "hw/xen/xen_backend.h"
62 #include "xen_pt.h"
63 #include "qemu/range.h"
64 #include "exec/address-spaces.h"
65 
66 #define XEN_PT_NR_IRQS (256)
67 static uint8_t xen_pt_mapped_machine_irq[XEN_PT_NR_IRQS] = {0};
68 
69 void xen_pt_log(const PCIDevice *d, const char *f, ...)
70 {
71     va_list ap;
72 
73     va_start(ap, f);
74     if (d) {
75         fprintf(stderr, "[%02x:%02x.%d] ", pci_bus_num(d->bus),
76                 PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
77     }
78     vfprintf(stderr, f, ap);
79     va_end(ap);
80 }
81 
82 /* Config Space */
83 
84 static int xen_pt_pci_config_access_check(PCIDevice *d, uint32_t addr, int len)
85 {
86     /* check offset range */
87     if (addr >= 0xFF) {
88         XEN_PT_ERR(d, "Failed to access register with offset exceeding 0xFF. "
89                    "(addr: 0x%02x, len: %d)\n", addr, len);
90         return -1;
91     }
92 
93     /* check read size */
94     if ((len != 1) && (len != 2) && (len != 4)) {
95         XEN_PT_ERR(d, "Failed to access register with invalid access length. "
96                    "(addr: 0x%02x, len: %d)\n", addr, len);
97         return -1;
98     }
99 
100     /* check offset alignment */
101     if (addr & (len - 1)) {
102         XEN_PT_ERR(d, "Failed to access register with invalid access size "
103                    "alignment. (addr: 0x%02x, len: %d)\n", addr, len);
104         return -1;
105     }
106 
107     return 0;
108 }
109 
110 int xen_pt_bar_offset_to_index(uint32_t offset)
111 {
112     int index = 0;
113 
114     /* check Exp ROM BAR */
115     if (offset == PCI_ROM_ADDRESS) {
116         return PCI_ROM_SLOT;
117     }
118 
119     /* calculate BAR index */
120     index = (offset - PCI_BASE_ADDRESS_0) >> 2;
121     if (index >= PCI_NUM_REGIONS) {
122         return -1;
123     }
124 
125     return index;
126 }
127 
128 static uint32_t xen_pt_pci_read_config(PCIDevice *d, uint32_t addr, int len)
129 {
130     XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
131     uint32_t val = 0;
132     XenPTRegGroup *reg_grp_entry = NULL;
133     XenPTReg *reg_entry = NULL;
134     int rc = 0;
135     int emul_len = 0;
136     uint32_t find_addr = addr;
137 
138     if (xen_pt_pci_config_access_check(d, addr, len)) {
139         goto exit;
140     }
141 
142     /* find register group entry */
143     reg_grp_entry = xen_pt_find_reg_grp(s, addr);
144     if (reg_grp_entry) {
145         /* check 0-Hardwired register group */
146         if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) {
147             /* no need to emulate, just return 0 */
148             val = 0;
149             goto exit;
150         }
151     }
152 
153     /* read I/O device register value */
154     rc = xen_host_pci_get_block(&s->real_device, addr, (uint8_t *)&val, len);
155     if (rc < 0) {
156         XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc);
157         memset(&val, 0xff, len);
158     }
159 
160     /* just return the I/O device register value for
161      * passthrough type register group */
162     if (reg_grp_entry == NULL) {
163         goto exit;
164     }
165 
166     /* adjust the read value to appropriate CFC-CFF window */
167     val <<= (addr & 3) << 3;
168     emul_len = len;
169 
170     /* loop around the guest requested size */
171     while (emul_len > 0) {
172         /* find register entry to be emulated */
173         reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr);
174         if (reg_entry) {
175             XenPTRegInfo *reg = reg_entry->reg;
176             uint32_t real_offset = reg_grp_entry->base_offset + reg->offset;
177             uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3);
178             uint8_t *ptr_val = NULL;
179 
180             valid_mask <<= (find_addr - real_offset) << 3;
181             ptr_val = (uint8_t *)&val + (real_offset & 3);
182 
183             /* do emulation based on register size */
184             switch (reg->size) {
185             case 1:
186                 if (reg->u.b.read) {
187                     rc = reg->u.b.read(s, reg_entry, ptr_val, valid_mask);
188                 }
189                 break;
190             case 2:
191                 if (reg->u.w.read) {
192                     rc = reg->u.w.read(s, reg_entry,
193                                        (uint16_t *)ptr_val, valid_mask);
194                 }
195                 break;
196             case 4:
197                 if (reg->u.dw.read) {
198                     rc = reg->u.dw.read(s, reg_entry,
199                                         (uint32_t *)ptr_val, valid_mask);
200                 }
201                 break;
202             }
203 
204             if (rc < 0) {
205                 xen_shutdown_fatal_error("Internal error: Invalid read "
206                                          "emulation. (%s, rc: %d)\n",
207                                          __func__, rc);
208                 return 0;
209             }
210 
211             /* calculate next address to find */
212             emul_len -= reg->size;
213             if (emul_len > 0) {
214                 find_addr = real_offset + reg->size;
215             }
216         } else {
217             /* nothing to do with passthrough type register,
218              * continue to find next byte */
219             emul_len--;
220             find_addr++;
221         }
222     }
223 
224     /* need to shift back before returning them to pci bus emulator */
225     val >>= ((addr & 3) << 3);
226 
227 exit:
228     XEN_PT_LOG_CONFIG(d, addr, val, len);
229     return val;
230 }
231 
232 static void xen_pt_pci_write_config(PCIDevice *d, uint32_t addr,
233                                     uint32_t val, int len)
234 {
235     XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
236     int index = 0;
237     XenPTRegGroup *reg_grp_entry = NULL;
238     int rc = 0;
239     uint32_t read_val = 0, wb_mask;
240     int emul_len = 0;
241     XenPTReg *reg_entry = NULL;
242     uint32_t find_addr = addr;
243     XenPTRegInfo *reg = NULL;
244     bool wp_flag = false;
245 
246     if (xen_pt_pci_config_access_check(d, addr, len)) {
247         return;
248     }
249 
250     XEN_PT_LOG_CONFIG(d, addr, val, len);
251 
252     /* check unused BAR register */
253     index = xen_pt_bar_offset_to_index(addr);
254     if ((index >= 0) && (val != 0)) {
255         uint32_t chk = val;
256 
257         if (index == PCI_ROM_SLOT)
258             chk |= (uint32_t)~PCI_ROM_ADDRESS_MASK;
259 
260         if ((chk != XEN_PT_BAR_ALLF) &&
261             (s->bases[index].bar_flag == XEN_PT_BAR_FLAG_UNUSED)) {
262             XEN_PT_WARN(d, "Guest attempt to set address to unused "
263                         "Base Address Register. (addr: 0x%02x, len: %d)\n",
264                         addr, len);
265         }
266     }
267 
268     /* find register group entry */
269     reg_grp_entry = xen_pt_find_reg_grp(s, addr);
270     if (reg_grp_entry) {
271         /* check 0-Hardwired register group */
272         if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) {
273             /* ignore silently */
274             XEN_PT_WARN(d, "Access to 0-Hardwired register. "
275                         "(addr: 0x%02x, len: %d)\n", addr, len);
276             return;
277         }
278     }
279 
280     rc = xen_host_pci_get_block(&s->real_device, addr,
281                                 (uint8_t *)&read_val, len);
282     if (rc < 0) {
283         XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc);
284         memset(&read_val, 0xff, len);
285         wb_mask = 0;
286     } else {
287         wb_mask = 0xFFFFFFFF >> ((4 - len) << 3);
288     }
289 
290     /* pass directly to the real device for passthrough type register group */
291     if (reg_grp_entry == NULL) {
292         if (!s->permissive) {
293             wb_mask = 0;
294             wp_flag = true;
295         }
296         goto out;
297     }
298 
299     memory_region_transaction_begin();
300     pci_default_write_config(d, addr, val, len);
301 
302     /* adjust the read and write value to appropriate CFC-CFF window */
303     read_val <<= (addr & 3) << 3;
304     val <<= (addr & 3) << 3;
305     emul_len = len;
306 
307     /* loop around the guest requested size */
308     while (emul_len > 0) {
309         /* find register entry to be emulated */
310         reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr);
311         if (reg_entry) {
312             reg = reg_entry->reg;
313             uint32_t real_offset = reg_grp_entry->base_offset + reg->offset;
314             uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3);
315             uint8_t *ptr_val = NULL;
316             uint32_t wp_mask = reg->emu_mask | reg->ro_mask;
317 
318             valid_mask <<= (find_addr - real_offset) << 3;
319             ptr_val = (uint8_t *)&val + (real_offset & 3);
320             if (!s->permissive) {
321                 wp_mask |= reg->res_mask;
322             }
323             if (wp_mask == (0xFFFFFFFF >> ((4 - reg->size) << 3))) {
324                 wb_mask &= ~((wp_mask >> ((find_addr - real_offset) << 3))
325                              << ((len - emul_len) << 3));
326             }
327 
328             /* do emulation based on register size */
329             switch (reg->size) {
330             case 1:
331                 if (reg->u.b.write) {
332                     rc = reg->u.b.write(s, reg_entry, ptr_val,
333                                         read_val >> ((real_offset & 3) << 3),
334                                         valid_mask);
335                 }
336                 break;
337             case 2:
338                 if (reg->u.w.write) {
339                     rc = reg->u.w.write(s, reg_entry, (uint16_t *)ptr_val,
340                                         (read_val >> ((real_offset & 3) << 3)),
341                                         valid_mask);
342                 }
343                 break;
344             case 4:
345                 if (reg->u.dw.write) {
346                     rc = reg->u.dw.write(s, reg_entry, (uint32_t *)ptr_val,
347                                          (read_val >> ((real_offset & 3) << 3)),
348                                          valid_mask);
349                 }
350                 break;
351             }
352 
353             if (rc < 0) {
354                 xen_shutdown_fatal_error("Internal error: Invalid write"
355                                          " emulation. (%s, rc: %d)\n",
356                                          __func__, rc);
357                 return;
358             }
359 
360             /* calculate next address to find */
361             emul_len -= reg->size;
362             if (emul_len > 0) {
363                 find_addr = real_offset + reg->size;
364             }
365         } else {
366             /* nothing to do with passthrough type register,
367              * continue to find next byte */
368             if (!s->permissive) {
369                 wb_mask &= ~(0xff << ((len - emul_len) << 3));
370                 /* Unused BARs will make it here, but we don't want to issue
371                  * warnings for writes to them (bogus writes get dealt with
372                  * above).
373                  */
374                 if (index < 0) {
375                     wp_flag = true;
376                 }
377             }
378             emul_len--;
379             find_addr++;
380         }
381     }
382 
383     /* need to shift back before passing them to xen_host_pci_set_block. */
384     val >>= (addr & 3) << 3;
385 
386     memory_region_transaction_commit();
387 
388 out:
389     if (wp_flag && !s->permissive_warned) {
390         s->permissive_warned = true;
391         xen_pt_log(d, "Write-back to unknown field 0x%02x (partially) inhibited (0x%0*x)\n",
392                    addr, len * 2, wb_mask);
393         xen_pt_log(d, "If the device doesn't work, try enabling permissive mode\n");
394         xen_pt_log(d, "(unsafe) and if it helps report the problem to xen-devel\n");
395     }
396     for (index = 0; wb_mask; index += len) {
397         /* unknown regs are passed through */
398         while (!(wb_mask & 0xff)) {
399             index++;
400             wb_mask >>= 8;
401         }
402         len = 0;
403         do {
404             len++;
405             wb_mask >>= 8;
406         } while (wb_mask & 0xff);
407         rc = xen_host_pci_set_block(&s->real_device, addr + index,
408                                     (uint8_t *)&val + index, len);
409 
410         if (rc < 0) {
411             XEN_PT_ERR(d, "xen_host_pci_set_block failed. return value: %d.\n", rc);
412         }
413     }
414 }
415 
416 /* register regions */
417 
418 static uint64_t xen_pt_bar_read(void *o, hwaddr addr,
419                                 unsigned size)
420 {
421     PCIDevice *d = o;
422     /* if this function is called, that probably means that there is a
423      * misconfiguration of the IOMMU. */
424     XEN_PT_ERR(d, "Should not read BAR through QEMU. @0x"TARGET_FMT_plx"\n",
425                addr);
426     return 0;
427 }
428 static void xen_pt_bar_write(void *o, hwaddr addr, uint64_t val,
429                              unsigned size)
430 {
431     PCIDevice *d = o;
432     /* Same comment as xen_pt_bar_read function */
433     XEN_PT_ERR(d, "Should not write BAR through QEMU. @0x"TARGET_FMT_plx"\n",
434                addr);
435 }
436 
437 static const MemoryRegionOps ops = {
438     .endianness = DEVICE_NATIVE_ENDIAN,
439     .read = xen_pt_bar_read,
440     .write = xen_pt_bar_write,
441 };
442 
443 static int xen_pt_register_regions(XenPCIPassthroughState *s, uint16_t *cmd)
444 {
445     int i = 0;
446     XenHostPCIDevice *d = &s->real_device;
447 
448     /* Register PIO/MMIO BARs */
449     for (i = 0; i < PCI_ROM_SLOT; i++) {
450         XenHostPCIIORegion *r = &d->io_regions[i];
451         uint8_t type;
452 
453         if (r->base_addr == 0 || r->size == 0) {
454             continue;
455         }
456 
457         s->bases[i].access.u = r->base_addr;
458 
459         if (r->type & XEN_HOST_PCI_REGION_TYPE_IO) {
460             type = PCI_BASE_ADDRESS_SPACE_IO;
461             *cmd |= PCI_COMMAND_IO;
462         } else {
463             type = PCI_BASE_ADDRESS_SPACE_MEMORY;
464             if (r->type & XEN_HOST_PCI_REGION_TYPE_PREFETCH) {
465                 type |= PCI_BASE_ADDRESS_MEM_PREFETCH;
466             }
467             if (r->type & XEN_HOST_PCI_REGION_TYPE_MEM_64) {
468                 type |= PCI_BASE_ADDRESS_MEM_TYPE_64;
469             }
470             *cmd |= PCI_COMMAND_MEMORY;
471         }
472 
473         memory_region_init_io(&s->bar[i], OBJECT(s), &ops, &s->dev,
474                               "xen-pci-pt-bar", r->size);
475         pci_register_bar(&s->dev, i, type, &s->bar[i]);
476 
477         XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%08"PRIx64
478                    " base_addr=0x%08"PRIx64" type: %#x)\n",
479                    i, r->size, r->base_addr, type);
480     }
481 
482     /* Register expansion ROM address */
483     if (d->rom.base_addr && d->rom.size) {
484         uint32_t bar_data = 0;
485 
486         /* Re-set BAR reported by OS, otherwise ROM can't be read. */
487         if (xen_host_pci_get_long(d, PCI_ROM_ADDRESS, &bar_data)) {
488             return 0;
489         }
490         if ((bar_data & PCI_ROM_ADDRESS_MASK) == 0) {
491             bar_data |= d->rom.base_addr & PCI_ROM_ADDRESS_MASK;
492             xen_host_pci_set_long(d, PCI_ROM_ADDRESS, bar_data);
493         }
494 
495         s->bases[PCI_ROM_SLOT].access.maddr = d->rom.base_addr;
496 
497         memory_region_init_io(&s->rom, OBJECT(s), &ops, &s->dev,
498                               "xen-pci-pt-rom", d->rom.size);
499         pci_register_bar(&s->dev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_MEM_PREFETCH,
500                          &s->rom);
501 
502         XEN_PT_LOG(&s->dev, "Expansion ROM registered (size=0x%08"PRIx64
503                    " base_addr=0x%08"PRIx64")\n",
504                    d->rom.size, d->rom.base_addr);
505     }
506 
507     xen_pt_register_vga_regions(d);
508     return 0;
509 }
510 
511 /* region mapping */
512 
513 static int xen_pt_bar_from_region(XenPCIPassthroughState *s, MemoryRegion *mr)
514 {
515     int i = 0;
516 
517     for (i = 0; i < PCI_NUM_REGIONS - 1; i++) {
518         if (mr == &s->bar[i]) {
519             return i;
520         }
521     }
522     if (mr == &s->rom) {
523         return PCI_ROM_SLOT;
524     }
525     return -1;
526 }
527 
528 /*
529  * This function checks if an io_region overlaps an io_region from another
530  * device.  The io_region to check is provided with (addr, size and type)
531  * A callback can be provided and will be called for every region that is
532  * overlapped.
533  * The return value indicates if the region is overlappsed */
534 struct CheckBarArgs {
535     XenPCIPassthroughState *s;
536     pcibus_t addr;
537     pcibus_t size;
538     uint8_t type;
539     bool rc;
540 };
541 static void xen_pt_check_bar_overlap(PCIBus *bus, PCIDevice *d, void *opaque)
542 {
543     struct CheckBarArgs *arg = opaque;
544     XenPCIPassthroughState *s = arg->s;
545     uint8_t type = arg->type;
546     int i;
547 
548     if (d->devfn == s->dev.devfn) {
549         return;
550     }
551 
552     /* xxx: This ignores bridges. */
553     for (i = 0; i < PCI_NUM_REGIONS; i++) {
554         const PCIIORegion *r = &d->io_regions[i];
555 
556         if (!r->size) {
557             continue;
558         }
559         if ((type & PCI_BASE_ADDRESS_SPACE_IO)
560             != (r->type & PCI_BASE_ADDRESS_SPACE_IO)) {
561             continue;
562         }
563 
564         if (ranges_overlap(arg->addr, arg->size, r->addr, r->size)) {
565             XEN_PT_WARN(&s->dev,
566                         "Overlapped to device [%02x:%02x.%d] Region: %i"
567                         " (addr: %#"FMT_PCIBUS", len: %#"FMT_PCIBUS")\n",
568                         pci_bus_num(bus), PCI_SLOT(d->devfn),
569                         PCI_FUNC(d->devfn), i, r->addr, r->size);
570             arg->rc = true;
571         }
572     }
573 }
574 
575 static void xen_pt_region_update(XenPCIPassthroughState *s,
576                                  MemoryRegionSection *sec, bool adding)
577 {
578     PCIDevice *d = &s->dev;
579     MemoryRegion *mr = sec->mr;
580     int bar = -1;
581     int rc;
582     int op = adding ? DPCI_ADD_MAPPING : DPCI_REMOVE_MAPPING;
583     struct CheckBarArgs args = {
584         .s = s,
585         .addr = sec->offset_within_address_space,
586         .size = int128_get64(sec->size),
587         .rc = false,
588     };
589 
590     bar = xen_pt_bar_from_region(s, mr);
591     if (bar == -1 && (!s->msix || &s->msix->mmio != mr)) {
592         return;
593     }
594 
595     if (s->msix && &s->msix->mmio == mr) {
596         if (adding) {
597             s->msix->mmio_base_addr = sec->offset_within_address_space;
598             rc = xen_pt_msix_update_remap(s, s->msix->bar_index);
599         }
600         return;
601     }
602 
603     args.type = d->io_regions[bar].type;
604     pci_for_each_device(d->bus, pci_bus_num(d->bus),
605                         xen_pt_check_bar_overlap, &args);
606     if (args.rc) {
607         XEN_PT_WARN(d, "Region: %d (addr: %#"FMT_PCIBUS
608                     ", len: %#"FMT_PCIBUS") is overlapped.\n",
609                     bar, sec->offset_within_address_space,
610                     int128_get64(sec->size));
611     }
612 
613     if (d->io_regions[bar].type & PCI_BASE_ADDRESS_SPACE_IO) {
614         uint32_t guest_port = sec->offset_within_address_space;
615         uint32_t machine_port = s->bases[bar].access.pio_base;
616         uint32_t size = int128_get64(sec->size);
617         rc = xc_domain_ioport_mapping(xen_xc, xen_domid,
618                                       guest_port, machine_port, size,
619                                       op);
620         if (rc) {
621             XEN_PT_ERR(d, "%s ioport mapping failed! (err: %i)\n",
622                        adding ? "create new" : "remove old", errno);
623         }
624     } else {
625         pcibus_t guest_addr = sec->offset_within_address_space;
626         pcibus_t machine_addr = s->bases[bar].access.maddr
627             + sec->offset_within_region;
628         pcibus_t size = int128_get64(sec->size);
629         rc = xc_domain_memory_mapping(xen_xc, xen_domid,
630                                       XEN_PFN(guest_addr + XC_PAGE_SIZE - 1),
631                                       XEN_PFN(machine_addr + XC_PAGE_SIZE - 1),
632                                       XEN_PFN(size + XC_PAGE_SIZE - 1),
633                                       op);
634         if (rc) {
635             XEN_PT_ERR(d, "%s mem mapping failed! (err: %i)\n",
636                        adding ? "create new" : "remove old", errno);
637         }
638     }
639 }
640 
641 static void xen_pt_region_add(MemoryListener *l, MemoryRegionSection *sec)
642 {
643     XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
644                                              memory_listener);
645 
646     memory_region_ref(sec->mr);
647     xen_pt_region_update(s, sec, true);
648 }
649 
650 static void xen_pt_region_del(MemoryListener *l, MemoryRegionSection *sec)
651 {
652     XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
653                                              memory_listener);
654 
655     xen_pt_region_update(s, sec, false);
656     memory_region_unref(sec->mr);
657 }
658 
659 static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec)
660 {
661     XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
662                                              io_listener);
663 
664     memory_region_ref(sec->mr);
665     xen_pt_region_update(s, sec, true);
666 }
667 
668 static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec)
669 {
670     XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
671                                              io_listener);
672 
673     xen_pt_region_update(s, sec, false);
674     memory_region_unref(sec->mr);
675 }
676 
677 static const MemoryListener xen_pt_memory_listener = {
678     .region_add = xen_pt_region_add,
679     .region_del = xen_pt_region_del,
680     .priority = 10,
681 };
682 
683 static const MemoryListener xen_pt_io_listener = {
684     .region_add = xen_pt_io_region_add,
685     .region_del = xen_pt_io_region_del,
686     .priority = 10,
687 };
688 
689 static void
690 xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s,
691                                       XenHostPCIDevice *dev)
692 {
693     uint16_t gpu_dev_id;
694     PCIDevice *d = &s->dev;
695 
696     gpu_dev_id = dev->device_id;
697     igd_passthrough_isa_bridge_create(d->bus, gpu_dev_id);
698 }
699 
700 /* destroy. */
701 static void xen_pt_destroy(PCIDevice *d) {
702 
703     XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
704     XenHostPCIDevice *host_dev = &s->real_device;
705     uint8_t machine_irq = s->machine_irq;
706     uint8_t intx;
707     int rc;
708 
709     if (machine_irq && !xen_host_pci_device_closed(&s->real_device)) {
710         intx = xen_pt_pci_intx(s);
711         rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq,
712                                      PT_IRQ_TYPE_PCI,
713                                      pci_bus_num(d->bus),
714                                      PCI_SLOT(s->dev.devfn),
715                                      intx,
716                                      0 /* isa_irq */);
717         if (rc < 0) {
718             XEN_PT_ERR(d, "unbinding of interrupt INT%c failed."
719                        " (machine irq: %i, err: %d)"
720                        " But bravely continuing on..\n",
721                        'a' + intx, machine_irq, errno);
722         }
723     }
724 
725     /* N.B. xen_pt_config_delete takes care of freeing them. */
726     if (s->msi) {
727         xen_pt_msi_disable(s);
728     }
729     if (s->msix) {
730         xen_pt_msix_disable(s);
731     }
732 
733     if (machine_irq) {
734         xen_pt_mapped_machine_irq[machine_irq]--;
735 
736         if (xen_pt_mapped_machine_irq[machine_irq] == 0) {
737             rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq);
738 
739             if (rc < 0) {
740                 XEN_PT_ERR(d, "unmapping of interrupt %i failed. (err: %d)"
741                            " But bravely continuing on..\n",
742                            machine_irq, errno);
743             }
744         }
745         s->machine_irq = 0;
746     }
747 
748     /* delete all emulated config registers */
749     xen_pt_config_delete(s);
750 
751     xen_pt_unregister_vga_regions(host_dev);
752 
753     if (s->listener_set) {
754         memory_listener_unregister(&s->memory_listener);
755         memory_listener_unregister(&s->io_listener);
756         s->listener_set = false;
757     }
758     if (!xen_host_pci_device_closed(&s->real_device)) {
759         xen_host_pci_device_put(&s->real_device);
760     }
761 }
762 /* init */
763 
764 static void xen_pt_realize(PCIDevice *d, Error **errp)
765 {
766     XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
767     int i, rc = 0;
768     uint8_t machine_irq = 0, scratch;
769     uint16_t cmd = 0;
770     int pirq = XEN_PT_UNASSIGNED_PIRQ;
771     Error *err = NULL;
772 
773     /* register real device */
774     XEN_PT_LOG(d, "Assigning real physical device %02x:%02x.%d"
775                " to devfn %#x\n",
776                s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function,
777                s->dev.devfn);
778 
779     xen_host_pci_device_get(&s->real_device,
780                             s->hostaddr.domain, s->hostaddr.bus,
781                             s->hostaddr.slot, s->hostaddr.function,
782                             &err);
783     if (err) {
784         error_append_hint(&err, "Failed to \"open\" the real pci device");
785         error_propagate(errp, err);
786         return;
787     }
788 
789     s->is_virtfn = s->real_device.is_virtfn;
790     if (s->is_virtfn) {
791         XEN_PT_LOG(d, "%04x:%02x:%02x.%d is a SR-IOV Virtual Function\n",
792                    s->real_device.domain, s->real_device.bus,
793                    s->real_device.dev, s->real_device.func);
794     }
795 
796     /* Initialize virtualized PCI configuration (Extended 256 Bytes) */
797     memset(d->config, 0, PCI_CONFIG_SPACE_SIZE);
798 
799     s->memory_listener = xen_pt_memory_listener;
800     s->io_listener = xen_pt_io_listener;
801 
802     /* Setup VGA bios for passthrough GFX */
803     if ((s->real_device.domain == 0) && (s->real_device.bus == 0) &&
804         (s->real_device.dev == 2) && (s->real_device.func == 0)) {
805         if (!is_igd_vga_passthrough(&s->real_device)) {
806             error_setg(errp, "Need to enable igd-passthru if you're trying"
807                     " to passthrough IGD GFX");
808             xen_host_pci_device_put(&s->real_device);
809             return;
810         }
811 
812         xen_pt_setup_vga(s, &s->real_device, &err);
813         if (err) {
814             error_append_hint(&err, "Setup VGA BIOS of passthrough"
815                     " GFX failed");
816             error_propagate(errp, err);
817             xen_host_pci_device_put(&s->real_device);
818             return;
819         }
820 
821         /* Register ISA bridge for passthrough GFX. */
822         xen_igd_passthrough_isa_bridge_create(s, &s->real_device);
823     }
824 
825     /* Handle real device's MMIO/PIO BARs */
826     xen_pt_register_regions(s, &cmd);
827 
828     /* reinitialize each config register to be emulated */
829     xen_pt_config_init(s, &err);
830     if (err) {
831         error_append_hint(&err, "PCI Config space initialisation failed");
832         error_report_err(err);
833         rc = -1;
834         goto err_out;
835     }
836 
837     /* Bind interrupt */
838     rc = xen_host_pci_get_byte(&s->real_device, PCI_INTERRUPT_PIN, &scratch);
839     if (rc) {
840         error_setg_errno(errp, errno, "Failed to read PCI_INTERRUPT_PIN");
841         goto err_out;
842     }
843     if (!scratch) {
844         error_setg(errp, "no pin interrupt");
845         goto out;
846     }
847 
848     machine_irq = s->real_device.irq;
849     rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
850     if (rc < 0) {
851         error_setg_errno(errp, errno, "Mapping machine irq %u to"
852                          " pirq %i failed", machine_irq, pirq);
853 
854         /* Disable PCI intx assertion (turn on bit10 of devctl) */
855         cmd |= PCI_COMMAND_INTX_DISABLE;
856         machine_irq = 0;
857         s->machine_irq = 0;
858     } else {
859         machine_irq = pirq;
860         s->machine_irq = pirq;
861         xen_pt_mapped_machine_irq[machine_irq]++;
862     }
863 
864     /* bind machine_irq to device */
865     if (machine_irq != 0) {
866         uint8_t e_intx = xen_pt_pci_intx(s);
867 
868         rc = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, machine_irq,
869                                        pci_bus_num(d->bus),
870                                        PCI_SLOT(d->devfn),
871                                        e_intx);
872         if (rc < 0) {
873             error_setg_errno(errp, errno, "Binding of interrupt %u failed",
874                              e_intx);
875 
876             /* Disable PCI intx assertion (turn on bit10 of devctl) */
877             cmd |= PCI_COMMAND_INTX_DISABLE;
878             xen_pt_mapped_machine_irq[machine_irq]--;
879 
880             if (xen_pt_mapped_machine_irq[machine_irq] == 0) {
881                 if (xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq)) {
882                     error_setg_errno(errp, errno, "Unmapping of machine"
883                             " interrupt %u failed", machine_irq);
884                 }
885             }
886             s->machine_irq = 0;
887         }
888     }
889 
890 out:
891     if (cmd) {
892         uint16_t val;
893 
894         rc = xen_host_pci_get_word(&s->real_device, PCI_COMMAND, &val);
895         if (rc) {
896             error_setg_errno(errp, errno, "Failed to read PCI_COMMAND");
897             goto err_out;
898         } else {
899             val |= cmd;
900             rc = xen_host_pci_set_word(&s->real_device, PCI_COMMAND, val);
901             if (rc) {
902                 error_setg_errno(errp, errno, "Failed to write PCI_COMMAND"
903                                  " val = 0x%x", val);
904                 goto err_out;
905             }
906         }
907     }
908 
909     memory_listener_register(&s->memory_listener, &s->dev.bus_master_as);
910     memory_listener_register(&s->io_listener, &address_space_io);
911     s->listener_set = true;
912     XEN_PT_LOG(d,
913                "Real physical device %02x:%02x.%d registered successfully\n",
914                s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function);
915 
916     return;
917 
918 err_out:
919     for (i = 0; i < PCI_ROM_SLOT; i++) {
920         object_unparent(OBJECT(&s->bar[i]));
921     }
922     object_unparent(OBJECT(&s->rom));
923 
924     xen_pt_destroy(d);
925     assert(rc);
926 }
927 
928 static void xen_pt_unregister_device(PCIDevice *d)
929 {
930     xen_pt_destroy(d);
931 }
932 
933 static Property xen_pci_passthrough_properties[] = {
934     DEFINE_PROP_PCI_HOST_DEVADDR("hostaddr", XenPCIPassthroughState, hostaddr),
935     DEFINE_PROP_BOOL("permissive", XenPCIPassthroughState, permissive, false),
936     DEFINE_PROP_END_OF_LIST(),
937 };
938 
939 static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data)
940 {
941     DeviceClass *dc = DEVICE_CLASS(klass);
942     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
943 
944     k->realize = xen_pt_realize;
945     k->exit = xen_pt_unregister_device;
946     k->config_read = xen_pt_pci_read_config;
947     k->config_write = xen_pt_pci_write_config;
948     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
949     dc->desc = "Assign an host PCI device with Xen";
950     dc->props = xen_pci_passthrough_properties;
951 };
952 
953 static void xen_pci_passthrough_finalize(Object *obj)
954 {
955     XenPCIPassthroughState *s = XEN_PT_DEVICE(obj);
956 
957     xen_pt_msix_delete(s);
958 }
959 
960 static const TypeInfo xen_pci_passthrough_info = {
961     .name = TYPE_XEN_PT_DEVICE,
962     .parent = TYPE_PCI_DEVICE,
963     .instance_size = sizeof(XenPCIPassthroughState),
964     .instance_finalize = xen_pci_passthrough_finalize,
965     .class_init = xen_pci_passthrough_class_init,
966 };
967 
968 static void xen_pci_passthrough_register_types(void)
969 {
970     type_register_static(&xen_pci_passthrough_info);
971 }
972 
973 type_init(xen_pci_passthrough_register_types)
974