1315a1350SMichael S. Tsirkin /* 2315a1350SMichael S. Tsirkin * MSI-X device support 3315a1350SMichael S. Tsirkin * 4315a1350SMichael S. Tsirkin * This module includes support for MSI-X in pci devices. 5315a1350SMichael S. Tsirkin * 6315a1350SMichael S. Tsirkin * Author: Michael S. Tsirkin <mst@redhat.com> 7315a1350SMichael S. Tsirkin * 8315a1350SMichael S. Tsirkin * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) 9315a1350SMichael S. Tsirkin * 10315a1350SMichael S. Tsirkin * This work is licensed under the terms of the GNU GPL, version 2. See 11315a1350SMichael S. Tsirkin * the COPYING file in the top-level directory. 12315a1350SMichael S. Tsirkin * 13315a1350SMichael S. Tsirkin * Contributions after 2012-01-13 are licensed under the terms of the 14315a1350SMichael S. Tsirkin * GNU GPL, version 2 or (at your option) any later version. 15315a1350SMichael S. Tsirkin */ 16315a1350SMichael S. Tsirkin 1797d5408fSPeter Maydell #include "qemu/osdep.h" 18c759b24fSMichael S. Tsirkin #include "hw/pci/msi.h" 19c759b24fSMichael S. Tsirkin #include "hw/pci/msix.h" 20c759b24fSMichael S. Tsirkin #include "hw/pci/pci.h" 21428c3eceSStefano Stabellini #include "hw/xen/xen.h" 22da278d58SPhilippe Mathieu-Daudé #include "sysemu/xen.h" 23ca77ee28SMarkus Armbruster #include "migration/qemu-file-types.h" 24d6454270SMarkus Armbruster #include "migration/vmstate.h" 251de7afc9SPaolo Bonzini #include "qemu/range.h" 26ee640c62SCao jin #include "qapi/error.h" 27993b1f4bSPeter Xu #include "trace.h" 28315a1350SMichael S. Tsirkin 29315a1350SMichael S. Tsirkin /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */ 30315a1350SMichael S. Tsirkin #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1) 31315a1350SMichael S. Tsirkin #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) 32315a1350SMichael S. Tsirkin #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) 33315a1350SMichael S. Tsirkin 344c93bfa9SMichael S. Tsirkin MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) 35315a1350SMichael S. Tsirkin { 36315a1350SMichael S. Tsirkin uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; 37315a1350SMichael S. Tsirkin MSIMessage msg; 38315a1350SMichael S. Tsirkin 39315a1350SMichael S. Tsirkin msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); 40315a1350SMichael S. Tsirkin msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); 41315a1350SMichael S. Tsirkin return msg; 42315a1350SMichael S. Tsirkin } 43315a1350SMichael S. Tsirkin 44315a1350SMichael S. Tsirkin /* 45315a1350SMichael S. Tsirkin * Special API for POWER to configure the vectors through 46315a1350SMichael S. Tsirkin * a side channel. Should never be used by devices. 47315a1350SMichael S. Tsirkin */ 48315a1350SMichael S. Tsirkin void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg) 49315a1350SMichael S. Tsirkin { 50315a1350SMichael S. Tsirkin uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; 51315a1350SMichael S. Tsirkin 52315a1350SMichael S. Tsirkin pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address); 53315a1350SMichael S. Tsirkin pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data); 54315a1350SMichael S. Tsirkin table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; 55315a1350SMichael S. Tsirkin } 56315a1350SMichael S. Tsirkin 57315a1350SMichael S. Tsirkin static uint8_t msix_pending_mask(int vector) 58315a1350SMichael S. Tsirkin { 59315a1350SMichael S. Tsirkin return 1 << (vector % 8); 60315a1350SMichael S. Tsirkin } 61315a1350SMichael S. Tsirkin 62315a1350SMichael S. Tsirkin static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) 63315a1350SMichael S. Tsirkin { 64315a1350SMichael S. Tsirkin return dev->msix_pba + vector / 8; 65315a1350SMichael S. Tsirkin } 66315a1350SMichael S. Tsirkin 67315a1350SMichael S. Tsirkin static int msix_is_pending(PCIDevice *dev, int vector) 68315a1350SMichael S. Tsirkin { 69315a1350SMichael S. Tsirkin return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); 70315a1350SMichael S. Tsirkin } 71315a1350SMichael S. Tsirkin 7270f8ee39SMichael S. Tsirkin void msix_set_pending(PCIDevice *dev, unsigned int vector) 73315a1350SMichael S. Tsirkin { 74315a1350SMichael S. Tsirkin *msix_pending_byte(dev, vector) |= msix_pending_mask(vector); 75315a1350SMichael S. Tsirkin } 76315a1350SMichael S. Tsirkin 773bdfaabbSDmitry Fleytman void msix_clr_pending(PCIDevice *dev, int vector) 78315a1350SMichael S. Tsirkin { 79315a1350SMichael S. Tsirkin *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector); 80315a1350SMichael S. Tsirkin } 81315a1350SMichael S. Tsirkin 8270f8ee39SMichael S. Tsirkin static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask) 83315a1350SMichael S. Tsirkin { 84428c3eceSStefano Stabellini unsigned offset = vector * PCI_MSIX_ENTRY_SIZE; 85e1e4bf22SMichael S. Tsirkin uint8_t *data = &dev->msix_table[offset + PCI_MSIX_ENTRY_DATA]; 86428c3eceSStefano Stabellini /* MSIs on Xen can be remapped into pirqs. In those cases, masking 87428c3eceSStefano Stabellini * and unmasking go through the PV evtchn path. */ 88e1e4bf22SMichael S. Tsirkin if (xen_enabled() && xen_is_pirq_msi(pci_get_long(data))) { 89428c3eceSStefano Stabellini return false; 90428c3eceSStefano Stabellini } 91428c3eceSStefano Stabellini return fmask || dev->msix_table[offset + PCI_MSIX_ENTRY_VECTOR_CTRL] & 92428c3eceSStefano Stabellini PCI_MSIX_ENTRY_CTRL_MASKBIT; 93315a1350SMichael S. Tsirkin } 94315a1350SMichael S. Tsirkin 9570f8ee39SMichael S. Tsirkin bool msix_is_masked(PCIDevice *dev, unsigned int vector) 96315a1350SMichael S. Tsirkin { 97315a1350SMichael S. Tsirkin return msix_vector_masked(dev, vector, dev->msix_function_masked); 98315a1350SMichael S. Tsirkin } 99315a1350SMichael S. Tsirkin 100315a1350SMichael S. Tsirkin static void msix_fire_vector_notifier(PCIDevice *dev, 101315a1350SMichael S. Tsirkin unsigned int vector, bool is_masked) 102315a1350SMichael S. Tsirkin { 103315a1350SMichael S. Tsirkin MSIMessage msg; 104315a1350SMichael S. Tsirkin int ret; 105315a1350SMichael S. Tsirkin 106315a1350SMichael S. Tsirkin if (!dev->msix_vector_use_notifier) { 107315a1350SMichael S. Tsirkin return; 108315a1350SMichael S. Tsirkin } 109315a1350SMichael S. Tsirkin if (is_masked) { 110315a1350SMichael S. Tsirkin dev->msix_vector_release_notifier(dev, vector); 111315a1350SMichael S. Tsirkin } else { 112315a1350SMichael S. Tsirkin msg = msix_get_message(dev, vector); 113315a1350SMichael S. Tsirkin ret = dev->msix_vector_use_notifier(dev, vector, msg); 114315a1350SMichael S. Tsirkin assert(ret >= 0); 115315a1350SMichael S. Tsirkin } 116315a1350SMichael S. Tsirkin } 117315a1350SMichael S. Tsirkin 118315a1350SMichael S. Tsirkin static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) 119315a1350SMichael S. Tsirkin { 120315a1350SMichael S. Tsirkin bool is_masked = msix_is_masked(dev, vector); 121315a1350SMichael S. Tsirkin 122315a1350SMichael S. Tsirkin if (is_masked == was_masked) { 123315a1350SMichael S. Tsirkin return; 124315a1350SMichael S. Tsirkin } 125315a1350SMichael S. Tsirkin 126315a1350SMichael S. Tsirkin msix_fire_vector_notifier(dev, vector, is_masked); 127315a1350SMichael S. Tsirkin 128315a1350SMichael S. Tsirkin if (!is_masked && msix_is_pending(dev, vector)) { 129315a1350SMichael S. Tsirkin msix_clr_pending(dev, vector); 130315a1350SMichael S. Tsirkin msix_notify(dev, vector); 131315a1350SMichael S. Tsirkin } 132315a1350SMichael S. Tsirkin } 133315a1350SMichael S. Tsirkin 134993b1f4bSPeter Xu static bool msix_masked(PCIDevice *dev) 135993b1f4bSPeter Xu { 136993b1f4bSPeter Xu return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK; 137993b1f4bSPeter Xu } 138993b1f4bSPeter Xu 139315a1350SMichael S. Tsirkin static void msix_update_function_masked(PCIDevice *dev) 140315a1350SMichael S. Tsirkin { 141993b1f4bSPeter Xu dev->msix_function_masked = !msix_enabled(dev) || msix_masked(dev); 142315a1350SMichael S. Tsirkin } 143315a1350SMichael S. Tsirkin 144315a1350SMichael S. Tsirkin /* Handle MSI-X capability config write. */ 145315a1350SMichael S. Tsirkin void msix_write_config(PCIDevice *dev, uint32_t addr, 146315a1350SMichael S. Tsirkin uint32_t val, int len) 147315a1350SMichael S. Tsirkin { 148315a1350SMichael S. Tsirkin unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET; 149315a1350SMichael S. Tsirkin int vector; 150315a1350SMichael S. Tsirkin bool was_masked; 151315a1350SMichael S. Tsirkin 152315a1350SMichael S. Tsirkin if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) { 153315a1350SMichael S. Tsirkin return; 154315a1350SMichael S. Tsirkin } 155315a1350SMichael S. Tsirkin 156993b1f4bSPeter Xu trace_msix_write_config(dev->name, msix_enabled(dev), msix_masked(dev)); 157993b1f4bSPeter Xu 158315a1350SMichael S. Tsirkin was_masked = dev->msix_function_masked; 159315a1350SMichael S. Tsirkin msix_update_function_masked(dev); 160315a1350SMichael S. Tsirkin 161315a1350SMichael S. Tsirkin if (!msix_enabled(dev)) { 162315a1350SMichael S. Tsirkin return; 163315a1350SMichael S. Tsirkin } 164315a1350SMichael S. Tsirkin 165315a1350SMichael S. Tsirkin pci_device_deassert_intx(dev); 166315a1350SMichael S. Tsirkin 167315a1350SMichael S. Tsirkin if (dev->msix_function_masked == was_masked) { 168315a1350SMichael S. Tsirkin return; 169315a1350SMichael S. Tsirkin } 170315a1350SMichael S. Tsirkin 171315a1350SMichael S. Tsirkin for (vector = 0; vector < dev->msix_entries_nr; ++vector) { 172315a1350SMichael S. Tsirkin msix_handle_mask_update(dev, vector, 173315a1350SMichael S. Tsirkin msix_vector_masked(dev, vector, was_masked)); 174315a1350SMichael S. Tsirkin } 175315a1350SMichael S. Tsirkin } 176315a1350SMichael S. Tsirkin 177315a1350SMichael S. Tsirkin static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr, 178315a1350SMichael S. Tsirkin unsigned size) 179315a1350SMichael S. Tsirkin { 180315a1350SMichael S. Tsirkin PCIDevice *dev = opaque; 181315a1350SMichael S. Tsirkin 18258cf0f86SPaolo Bonzini assert(addr + size <= dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE); 183315a1350SMichael S. Tsirkin return pci_get_long(dev->msix_table + addr); 184315a1350SMichael S. Tsirkin } 185315a1350SMichael S. Tsirkin 186315a1350SMichael S. Tsirkin static void msix_table_mmio_write(void *opaque, hwaddr addr, 187315a1350SMichael S. Tsirkin uint64_t val, unsigned size) 188315a1350SMichael S. Tsirkin { 189315a1350SMichael S. Tsirkin PCIDevice *dev = opaque; 190315a1350SMichael S. Tsirkin int vector = addr / PCI_MSIX_ENTRY_SIZE; 191315a1350SMichael S. Tsirkin bool was_masked; 192315a1350SMichael S. Tsirkin 19358cf0f86SPaolo Bonzini assert(addr + size <= dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE); 19458cf0f86SPaolo Bonzini 195315a1350SMichael S. Tsirkin was_masked = msix_is_masked(dev, vector); 196315a1350SMichael S. Tsirkin pci_set_long(dev->msix_table + addr, val); 197315a1350SMichael S. Tsirkin msix_handle_mask_update(dev, vector, was_masked); 198315a1350SMichael S. Tsirkin } 199315a1350SMichael S. Tsirkin 200315a1350SMichael S. Tsirkin static const MemoryRegionOps msix_table_mmio_ops = { 201315a1350SMichael S. Tsirkin .read = msix_table_mmio_read, 202315a1350SMichael S. Tsirkin .write = msix_table_mmio_write, 2036f991980SPaolo Bonzini .endianness = DEVICE_LITTLE_ENDIAN, 204315a1350SMichael S. Tsirkin .valid = { 205315a1350SMichael S. Tsirkin .min_access_size = 4, 206191f90cbSMichael S. Tsirkin .max_access_size = 8, 207191f90cbSMichael S. Tsirkin }, 208191f90cbSMichael S. Tsirkin .impl = { 209315a1350SMichael S. Tsirkin .max_access_size = 4, 210315a1350SMichael S. Tsirkin }, 211315a1350SMichael S. Tsirkin }; 212315a1350SMichael S. Tsirkin 213315a1350SMichael S. Tsirkin static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, 214315a1350SMichael S. Tsirkin unsigned size) 215315a1350SMichael S. Tsirkin { 216315a1350SMichael S. Tsirkin PCIDevice *dev = opaque; 217bbef882cSMichael S. Tsirkin if (dev->msix_vector_poll_notifier) { 218bbef882cSMichael S. Tsirkin unsigned vector_start = addr * 8; 219bbef882cSMichael S. Tsirkin unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr); 220bbef882cSMichael S. Tsirkin dev->msix_vector_poll_notifier(dev, vector_start, vector_end); 221bbef882cSMichael S. Tsirkin } 222315a1350SMichael S. Tsirkin 223315a1350SMichael S. Tsirkin return pci_get_long(dev->msix_pba + addr); 224315a1350SMichael S. Tsirkin } 225315a1350SMichael S. Tsirkin 22643b11a91SMarc-André Lureau static void msix_pba_mmio_write(void *opaque, hwaddr addr, 22743b11a91SMarc-André Lureau uint64_t val, unsigned size) 22843b11a91SMarc-André Lureau { 22943b11a91SMarc-André Lureau } 23043b11a91SMarc-André Lureau 231315a1350SMichael S. Tsirkin static const MemoryRegionOps msix_pba_mmio_ops = { 232315a1350SMichael S. Tsirkin .read = msix_pba_mmio_read, 23343b11a91SMarc-André Lureau .write = msix_pba_mmio_write, 2346f991980SPaolo Bonzini .endianness = DEVICE_LITTLE_ENDIAN, 235315a1350SMichael S. Tsirkin .valid = { 236315a1350SMichael S. Tsirkin .min_access_size = 4, 237191f90cbSMichael S. Tsirkin .max_access_size = 8, 238191f90cbSMichael S. Tsirkin }, 239191f90cbSMichael S. Tsirkin .impl = { 240315a1350SMichael S. Tsirkin .max_access_size = 4, 241315a1350SMichael S. Tsirkin }, 242315a1350SMichael S. Tsirkin }; 243315a1350SMichael S. Tsirkin 244315a1350SMichael S. Tsirkin static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) 245315a1350SMichael S. Tsirkin { 246315a1350SMichael S. Tsirkin int vector; 247315a1350SMichael S. Tsirkin 248315a1350SMichael S. Tsirkin for (vector = 0; vector < nentries; ++vector) { 249315a1350SMichael S. Tsirkin unsigned offset = 250315a1350SMichael S. Tsirkin vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; 251315a1350SMichael S. Tsirkin bool was_masked = msix_is_masked(dev, vector); 252315a1350SMichael S. Tsirkin 253315a1350SMichael S. Tsirkin dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; 254315a1350SMichael S. Tsirkin msix_handle_mask_update(dev, vector, was_masked); 255315a1350SMichael S. Tsirkin } 256315a1350SMichael S. Tsirkin } 257315a1350SMichael S. Tsirkin 258ee640c62SCao jin /* 259ee640c62SCao jin * Make PCI device @dev MSI-X capable 260ee640c62SCao jin * @nentries is the max number of MSI-X vectors that the device support. 261ee640c62SCao jin * @table_bar is the MemoryRegion that MSI-X table structure resides. 262ee640c62SCao jin * @table_bar_nr is number of base address register corresponding to @table_bar. 263ee640c62SCao jin * @table_offset indicates the offset that the MSI-X table structure starts with 264ee640c62SCao jin * in @table_bar. 265ee640c62SCao jin * @pba_bar is the MemoryRegion that the Pending Bit Array structure resides. 266ee640c62SCao jin * @pba_bar_nr is number of base address register corresponding to @pba_bar. 267ee640c62SCao jin * @pba_offset indicates the offset that the Pending Bit Array structure 268ee640c62SCao jin * starts with in @pba_bar. 269ee640c62SCao jin * Non-zero @cap_pos puts capability MSI-X at that offset in PCI config space. 270ee640c62SCao jin * @errp is for returning errors. 271ee640c62SCao jin * 272ee640c62SCao jin * Return 0 on success; set @errp and return -errno on error: 273ee640c62SCao jin * -ENOTSUP means lacking msi support for a msi-capable platform. 274ee640c62SCao jin * -EINVAL means capability overlap, happens when @cap_pos is non-zero, 275ee640c62SCao jin * also means a programming error, except device assignment, which can check 276ee640c62SCao jin * if a real HW is broken. 277ee640c62SCao jin */ 278315a1350SMichael S. Tsirkin int msix_init(struct PCIDevice *dev, unsigned short nentries, 279315a1350SMichael S. Tsirkin MemoryRegion *table_bar, uint8_t table_bar_nr, 280315a1350SMichael S. Tsirkin unsigned table_offset, MemoryRegion *pba_bar, 281ee640c62SCao jin uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos, 282ee640c62SCao jin Error **errp) 283315a1350SMichael S. Tsirkin { 284315a1350SMichael S. Tsirkin int cap; 285315a1350SMichael S. Tsirkin unsigned table_size, pba_size; 286315a1350SMichael S. Tsirkin uint8_t *config; 287315a1350SMichael S. Tsirkin 288315a1350SMichael S. Tsirkin /* Nothing to do if MSI is not supported by interrupt controller */ 289226419d6SMichael S. Tsirkin if (!msi_nonbroken) { 290ee640c62SCao jin error_setg(errp, "MSI-X is not supported by interrupt controller"); 291315a1350SMichael S. Tsirkin return -ENOTSUP; 292315a1350SMichael S. Tsirkin } 293315a1350SMichael S. Tsirkin 294315a1350SMichael S. Tsirkin if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) { 295ee640c62SCao jin error_setg(errp, "The number of MSI-X vectors is invalid"); 296315a1350SMichael S. Tsirkin return -EINVAL; 297315a1350SMichael S. Tsirkin } 298315a1350SMichael S. Tsirkin 299315a1350SMichael S. Tsirkin table_size = nentries * PCI_MSIX_ENTRY_SIZE; 300315a1350SMichael S. Tsirkin pba_size = QEMU_ALIGN_UP(nentries, 64) / 8; 301315a1350SMichael S. Tsirkin 302315a1350SMichael S. Tsirkin /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */ 303315a1350SMichael S. Tsirkin if ((table_bar_nr == pba_bar_nr && 304315a1350SMichael S. Tsirkin ranges_overlap(table_offset, table_size, pba_offset, pba_size)) || 305315a1350SMichael S. Tsirkin table_offset + table_size > memory_region_size(table_bar) || 306315a1350SMichael S. Tsirkin pba_offset + pba_size > memory_region_size(pba_bar) || 307315a1350SMichael S. Tsirkin (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) { 308ee640c62SCao jin error_setg(errp, "table & pba overlap, or they don't fit in BARs," 309ee640c62SCao jin " or don't align"); 310315a1350SMichael S. Tsirkin return -EINVAL; 311315a1350SMichael S. Tsirkin } 312315a1350SMichael S. Tsirkin 31327841278SMao Zhongyi cap = pci_add_capability(dev, PCI_CAP_ID_MSIX, 314ee640c62SCao jin cap_pos, MSIX_CAP_LENGTH, errp); 315315a1350SMichael S. Tsirkin if (cap < 0) { 316315a1350SMichael S. Tsirkin return cap; 317315a1350SMichael S. Tsirkin } 318315a1350SMichael S. Tsirkin 319315a1350SMichael S. Tsirkin dev->msix_cap = cap; 320315a1350SMichael S. Tsirkin dev->cap_present |= QEMU_PCI_CAP_MSIX; 321315a1350SMichael S. Tsirkin config = dev->config + cap; 322315a1350SMichael S. Tsirkin 323315a1350SMichael S. Tsirkin pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); 324315a1350SMichael S. Tsirkin dev->msix_entries_nr = nentries; 325315a1350SMichael S. Tsirkin dev->msix_function_masked = true; 326315a1350SMichael S. Tsirkin 327315a1350SMichael S. Tsirkin pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr); 328315a1350SMichael S. Tsirkin pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr); 329315a1350SMichael S. Tsirkin 330315a1350SMichael S. Tsirkin /* Make flags bit writable. */ 331315a1350SMichael S. Tsirkin dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK | 332315a1350SMichael S. Tsirkin MSIX_MASKALL_MASK; 333315a1350SMichael S. Tsirkin 334315a1350SMichael S. Tsirkin dev->msix_table = g_malloc0(table_size); 335315a1350SMichael S. Tsirkin dev->msix_pba = g_malloc0(pba_size); 336315a1350SMichael S. Tsirkin dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used); 337315a1350SMichael S. Tsirkin 338315a1350SMichael S. Tsirkin msix_mask_all(dev, nentries); 339315a1350SMichael S. Tsirkin 34040c5dce9SPaolo Bonzini memory_region_init_io(&dev->msix_table_mmio, OBJECT(dev), &msix_table_mmio_ops, dev, 341315a1350SMichael S. Tsirkin "msix-table", table_size); 342315a1350SMichael S. Tsirkin memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio); 34340c5dce9SPaolo Bonzini memory_region_init_io(&dev->msix_pba_mmio, OBJECT(dev), &msix_pba_mmio_ops, dev, 344315a1350SMichael S. Tsirkin "msix-pba", pba_size); 345315a1350SMichael S. Tsirkin memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio); 346315a1350SMichael S. Tsirkin 347315a1350SMichael S. Tsirkin return 0; 348315a1350SMichael S. Tsirkin } 349315a1350SMichael S. Tsirkin 350315a1350SMichael S. Tsirkin int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries, 351ee640c62SCao jin uint8_t bar_nr, Error **errp) 352315a1350SMichael S. Tsirkin { 353315a1350SMichael S. Tsirkin int ret; 354315a1350SMichael S. Tsirkin char *name; 355a0ccd212SJason Wang uint32_t bar_size = 4096; 356a0ccd212SJason Wang uint32_t bar_pba_offset = bar_size / 2; 35717323e8bSDongli Zhang uint32_t bar_pba_size = QEMU_ALIGN_UP(nentries, 64) / 8; 358315a1350SMichael S. Tsirkin 359315a1350SMichael S. Tsirkin /* 360315a1350SMichael S. Tsirkin * Migration compatibility dictates that this remains a 4k 361315a1350SMichael S. Tsirkin * BAR with the vector table in the lower half and PBA in 362a0ccd212SJason Wang * the upper half for nentries which is lower or equal to 128. 363a0ccd212SJason Wang * No need to care about using more than 65 entries for legacy 364a0ccd212SJason Wang * machine types who has at most 64 queues. 365315a1350SMichael S. Tsirkin */ 366a0ccd212SJason Wang if (nentries * PCI_MSIX_ENTRY_SIZE > bar_pba_offset) { 367a0ccd212SJason Wang bar_pba_offset = nentries * PCI_MSIX_ENTRY_SIZE; 368a0ccd212SJason Wang } 369315a1350SMichael S. Tsirkin 370a0ccd212SJason Wang if (bar_pba_offset + bar_pba_size > 4096) { 371a0ccd212SJason Wang bar_size = bar_pba_offset + bar_pba_size; 372a0ccd212SJason Wang } 373a0ccd212SJason Wang 3749bff5d81SPeter Maydell bar_size = pow2ceil(bar_size); 375315a1350SMichael S. Tsirkin 376315a1350SMichael S. Tsirkin name = g_strdup_printf("%s-msix", dev->name); 377a0ccd212SJason Wang memory_region_init(&dev->msix_exclusive_bar, OBJECT(dev), name, bar_size); 378315a1350SMichael S. Tsirkin g_free(name); 379315a1350SMichael S. Tsirkin 380315a1350SMichael S. Tsirkin ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 381a0ccd212SJason Wang 0, &dev->msix_exclusive_bar, 382a0ccd212SJason Wang bar_nr, bar_pba_offset, 383ee640c62SCao jin 0, errp); 384315a1350SMichael S. Tsirkin if (ret) { 385315a1350SMichael S. Tsirkin return ret; 386315a1350SMichael S. Tsirkin } 387315a1350SMichael S. Tsirkin 388315a1350SMichael S. Tsirkin pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY, 389315a1350SMichael S. Tsirkin &dev->msix_exclusive_bar); 390315a1350SMichael S. Tsirkin 391315a1350SMichael S. Tsirkin return 0; 392315a1350SMichael S. Tsirkin } 393315a1350SMichael S. Tsirkin 394315a1350SMichael S. Tsirkin static void msix_free_irq_entries(PCIDevice *dev) 395315a1350SMichael S. Tsirkin { 396315a1350SMichael S. Tsirkin int vector; 397315a1350SMichael S. Tsirkin 398315a1350SMichael S. Tsirkin for (vector = 0; vector < dev->msix_entries_nr; ++vector) { 399315a1350SMichael S. Tsirkin dev->msix_entry_used[vector] = 0; 400315a1350SMichael S. Tsirkin msix_clr_pending(dev, vector); 401315a1350SMichael S. Tsirkin } 402315a1350SMichael S. Tsirkin } 403315a1350SMichael S. Tsirkin 404315a1350SMichael S. Tsirkin static void msix_clear_all_vectors(PCIDevice *dev) 405315a1350SMichael S. Tsirkin { 406315a1350SMichael S. Tsirkin int vector; 407315a1350SMichael S. Tsirkin 408315a1350SMichael S. Tsirkin for (vector = 0; vector < dev->msix_entries_nr; ++vector) { 409315a1350SMichael S. Tsirkin msix_clr_pending(dev, vector); 410315a1350SMichael S. Tsirkin } 411315a1350SMichael S. Tsirkin } 412315a1350SMichael S. Tsirkin 413315a1350SMichael S. Tsirkin /* Clean up resources for the device. */ 414315a1350SMichael S. Tsirkin void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar) 415315a1350SMichael S. Tsirkin { 416315a1350SMichael S. Tsirkin if (!msix_present(dev)) { 417315a1350SMichael S. Tsirkin return; 418315a1350SMichael S. Tsirkin } 419315a1350SMichael S. Tsirkin pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); 420315a1350SMichael S. Tsirkin dev->msix_cap = 0; 421315a1350SMichael S. Tsirkin msix_free_irq_entries(dev); 422315a1350SMichael S. Tsirkin dev->msix_entries_nr = 0; 423315a1350SMichael S. Tsirkin memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio); 424315a1350SMichael S. Tsirkin g_free(dev->msix_pba); 425315a1350SMichael S. Tsirkin dev->msix_pba = NULL; 426315a1350SMichael S. Tsirkin memory_region_del_subregion(table_bar, &dev->msix_table_mmio); 427315a1350SMichael S. Tsirkin g_free(dev->msix_table); 428315a1350SMichael S. Tsirkin dev->msix_table = NULL; 429315a1350SMichael S. Tsirkin g_free(dev->msix_entry_used); 430315a1350SMichael S. Tsirkin dev->msix_entry_used = NULL; 431315a1350SMichael S. Tsirkin dev->cap_present &= ~QEMU_PCI_CAP_MSIX; 432315a1350SMichael S. Tsirkin } 433315a1350SMichael S. Tsirkin 434315a1350SMichael S. Tsirkin void msix_uninit_exclusive_bar(PCIDevice *dev) 435315a1350SMichael S. Tsirkin { 436315a1350SMichael S. Tsirkin if (msix_present(dev)) { 437315a1350SMichael S. Tsirkin msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar); 438315a1350SMichael S. Tsirkin } 439315a1350SMichael S. Tsirkin } 440315a1350SMichael S. Tsirkin 441315a1350SMichael S. Tsirkin void msix_save(PCIDevice *dev, QEMUFile *f) 442315a1350SMichael S. Tsirkin { 443315a1350SMichael S. Tsirkin unsigned n = dev->msix_entries_nr; 444315a1350SMichael S. Tsirkin 445315a1350SMichael S. Tsirkin if (!msix_present(dev)) { 446315a1350SMichael S. Tsirkin return; 447315a1350SMichael S. Tsirkin } 448315a1350SMichael S. Tsirkin 449315a1350SMichael S. Tsirkin qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE); 4500ef1efcfSMarc-André Lureau qemu_put_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8)); 451315a1350SMichael S. Tsirkin } 452315a1350SMichael S. Tsirkin 453315a1350SMichael S. Tsirkin /* Should be called after restoring the config space. */ 454315a1350SMichael S. Tsirkin void msix_load(PCIDevice *dev, QEMUFile *f) 455315a1350SMichael S. Tsirkin { 456315a1350SMichael S. Tsirkin unsigned n = dev->msix_entries_nr; 457315a1350SMichael S. Tsirkin unsigned int vector; 458315a1350SMichael S. Tsirkin 459315a1350SMichael S. Tsirkin if (!msix_present(dev)) { 460315a1350SMichael S. Tsirkin return; 461315a1350SMichael S. Tsirkin } 462315a1350SMichael S. Tsirkin 463315a1350SMichael S. Tsirkin msix_clear_all_vectors(dev); 464315a1350SMichael S. Tsirkin qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE); 4650ef1efcfSMarc-André Lureau qemu_get_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8)); 466315a1350SMichael S. Tsirkin msix_update_function_masked(dev); 467315a1350SMichael S. Tsirkin 468315a1350SMichael S. Tsirkin for (vector = 0; vector < n; vector++) { 469315a1350SMichael S. Tsirkin msix_handle_mask_update(dev, vector, true); 470315a1350SMichael S. Tsirkin } 471315a1350SMichael S. Tsirkin } 472315a1350SMichael S. Tsirkin 473315a1350SMichael S. Tsirkin /* Does device support MSI-X? */ 474315a1350SMichael S. Tsirkin int msix_present(PCIDevice *dev) 475315a1350SMichael S. Tsirkin { 476315a1350SMichael S. Tsirkin return dev->cap_present & QEMU_PCI_CAP_MSIX; 477315a1350SMichael S. Tsirkin } 478315a1350SMichael S. Tsirkin 479315a1350SMichael S. Tsirkin /* Is MSI-X enabled? */ 480315a1350SMichael S. Tsirkin int msix_enabled(PCIDevice *dev) 481315a1350SMichael S. Tsirkin { 482315a1350SMichael S. Tsirkin return (dev->cap_present & QEMU_PCI_CAP_MSIX) && 483315a1350SMichael S. Tsirkin (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & 484315a1350SMichael S. Tsirkin MSIX_ENABLE_MASK); 485315a1350SMichael S. Tsirkin } 486315a1350SMichael S. Tsirkin 487315a1350SMichael S. Tsirkin /* Send an MSI-X message */ 488315a1350SMichael S. Tsirkin void msix_notify(PCIDevice *dev, unsigned vector) 489315a1350SMichael S. Tsirkin { 490315a1350SMichael S. Tsirkin MSIMessage msg; 491315a1350SMichael S. Tsirkin 49293482436SCao jin if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { 493315a1350SMichael S. Tsirkin return; 49493482436SCao jin } 49593482436SCao jin 496315a1350SMichael S. Tsirkin if (msix_is_masked(dev, vector)) { 497315a1350SMichael S. Tsirkin msix_set_pending(dev, vector); 498315a1350SMichael S. Tsirkin return; 499315a1350SMichael S. Tsirkin } 500315a1350SMichael S. Tsirkin 501315a1350SMichael S. Tsirkin msg = msix_get_message(dev, vector); 502315a1350SMichael S. Tsirkin 50338d40ff1SPavel Fedin msi_send_message(dev, msg); 504315a1350SMichael S. Tsirkin } 505315a1350SMichael S. Tsirkin 506315a1350SMichael S. Tsirkin void msix_reset(PCIDevice *dev) 507315a1350SMichael S. Tsirkin { 508315a1350SMichael S. Tsirkin if (!msix_present(dev)) { 509315a1350SMichael S. Tsirkin return; 510315a1350SMichael S. Tsirkin } 511315a1350SMichael S. Tsirkin msix_clear_all_vectors(dev); 512315a1350SMichael S. Tsirkin dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &= 513315a1350SMichael S. Tsirkin ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET]; 514315a1350SMichael S. Tsirkin memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE); 515315a1350SMichael S. Tsirkin memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8); 516315a1350SMichael S. Tsirkin msix_mask_all(dev, dev->msix_entries_nr); 517315a1350SMichael S. Tsirkin } 518315a1350SMichael S. Tsirkin 519315a1350SMichael S. Tsirkin /* PCI spec suggests that devices make it possible for software to configure 520315a1350SMichael S. Tsirkin * less vectors than supported by the device, but does not specify a standard 521315a1350SMichael S. Tsirkin * mechanism for devices to do so. 522315a1350SMichael S. Tsirkin * 523315a1350SMichael S. Tsirkin * We support this by asking devices to declare vectors software is going to 524315a1350SMichael S. Tsirkin * actually use, and checking this on the notification path. Devices that 525315a1350SMichael S. Tsirkin * don't want to follow the spec suggestion can declare all vectors as used. */ 526315a1350SMichael S. Tsirkin 527315a1350SMichael S. Tsirkin /* Mark vector as used. */ 528315a1350SMichael S. Tsirkin int msix_vector_use(PCIDevice *dev, unsigned vector) 529315a1350SMichael S. Tsirkin { 53093482436SCao jin if (vector >= dev->msix_entries_nr) { 531315a1350SMichael S. Tsirkin return -EINVAL; 53293482436SCao jin } 53393482436SCao jin 534315a1350SMichael S. Tsirkin dev->msix_entry_used[vector]++; 535315a1350SMichael S. Tsirkin return 0; 536315a1350SMichael S. Tsirkin } 537315a1350SMichael S. Tsirkin 538315a1350SMichael S. Tsirkin /* Mark vector as unused. */ 539315a1350SMichael S. Tsirkin void msix_vector_unuse(PCIDevice *dev, unsigned vector) 540315a1350SMichael S. Tsirkin { 541315a1350SMichael S. Tsirkin if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { 542315a1350SMichael S. Tsirkin return; 543315a1350SMichael S. Tsirkin } 544315a1350SMichael S. Tsirkin if (--dev->msix_entry_used[vector]) { 545315a1350SMichael S. Tsirkin return; 546315a1350SMichael S. Tsirkin } 547315a1350SMichael S. Tsirkin msix_clr_pending(dev, vector); 548315a1350SMichael S. Tsirkin } 549315a1350SMichael S. Tsirkin 550315a1350SMichael S. Tsirkin void msix_unuse_all_vectors(PCIDevice *dev) 551315a1350SMichael S. Tsirkin { 552315a1350SMichael S. Tsirkin if (!msix_present(dev)) { 553315a1350SMichael S. Tsirkin return; 554315a1350SMichael S. Tsirkin } 555315a1350SMichael S. Tsirkin msix_free_irq_entries(dev); 556315a1350SMichael S. Tsirkin } 557315a1350SMichael S. Tsirkin 558315a1350SMichael S. Tsirkin unsigned int msix_nr_vectors_allocated(const PCIDevice *dev) 559315a1350SMichael S. Tsirkin { 560315a1350SMichael S. Tsirkin return dev->msix_entries_nr; 561315a1350SMichael S. Tsirkin } 562315a1350SMichael S. Tsirkin 563315a1350SMichael S. Tsirkin static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector) 564315a1350SMichael S. Tsirkin { 565315a1350SMichael S. Tsirkin MSIMessage msg; 566315a1350SMichael S. Tsirkin 567315a1350SMichael S. Tsirkin if (msix_is_masked(dev, vector)) { 568315a1350SMichael S. Tsirkin return 0; 569315a1350SMichael S. Tsirkin } 570315a1350SMichael S. Tsirkin msg = msix_get_message(dev, vector); 571315a1350SMichael S. Tsirkin return dev->msix_vector_use_notifier(dev, vector, msg); 572315a1350SMichael S. Tsirkin } 573315a1350SMichael S. Tsirkin 574315a1350SMichael S. Tsirkin static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector) 575315a1350SMichael S. Tsirkin { 576315a1350SMichael S. Tsirkin if (msix_is_masked(dev, vector)) { 577315a1350SMichael S. Tsirkin return; 578315a1350SMichael S. Tsirkin } 579315a1350SMichael S. Tsirkin dev->msix_vector_release_notifier(dev, vector); 580315a1350SMichael S. Tsirkin } 581315a1350SMichael S. Tsirkin 582315a1350SMichael S. Tsirkin int msix_set_vector_notifiers(PCIDevice *dev, 583315a1350SMichael S. Tsirkin MSIVectorUseNotifier use_notifier, 584bbef882cSMichael S. Tsirkin MSIVectorReleaseNotifier release_notifier, 585bbef882cSMichael S. Tsirkin MSIVectorPollNotifier poll_notifier) 586315a1350SMichael S. Tsirkin { 587315a1350SMichael S. Tsirkin int vector, ret; 588315a1350SMichael S. Tsirkin 589315a1350SMichael S. Tsirkin assert(use_notifier && release_notifier); 590315a1350SMichael S. Tsirkin 591315a1350SMichael S. Tsirkin dev->msix_vector_use_notifier = use_notifier; 592315a1350SMichael S. Tsirkin dev->msix_vector_release_notifier = release_notifier; 593bbef882cSMichael S. Tsirkin dev->msix_vector_poll_notifier = poll_notifier; 594315a1350SMichael S. Tsirkin 595315a1350SMichael S. Tsirkin if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & 596315a1350SMichael S. Tsirkin (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { 597315a1350SMichael S. Tsirkin for (vector = 0; vector < dev->msix_entries_nr; vector++) { 598315a1350SMichael S. Tsirkin ret = msix_set_notifier_for_vector(dev, vector); 599315a1350SMichael S. Tsirkin if (ret < 0) { 600315a1350SMichael S. Tsirkin goto undo; 601315a1350SMichael S. Tsirkin } 602315a1350SMichael S. Tsirkin } 603315a1350SMichael S. Tsirkin } 604bbef882cSMichael S. Tsirkin if (dev->msix_vector_poll_notifier) { 605bbef882cSMichael S. Tsirkin dev->msix_vector_poll_notifier(dev, 0, dev->msix_entries_nr); 606bbef882cSMichael S. Tsirkin } 607315a1350SMichael S. Tsirkin return 0; 608315a1350SMichael S. Tsirkin 609315a1350SMichael S. Tsirkin undo: 610315a1350SMichael S. Tsirkin while (--vector >= 0) { 611315a1350SMichael S. Tsirkin msix_unset_notifier_for_vector(dev, vector); 612315a1350SMichael S. Tsirkin } 613315a1350SMichael S. Tsirkin dev->msix_vector_use_notifier = NULL; 614315a1350SMichael S. Tsirkin dev->msix_vector_release_notifier = NULL; 615315a1350SMichael S. Tsirkin return ret; 616315a1350SMichael S. Tsirkin } 617315a1350SMichael S. Tsirkin 618315a1350SMichael S. Tsirkin void msix_unset_vector_notifiers(PCIDevice *dev) 619315a1350SMichael S. Tsirkin { 620315a1350SMichael S. Tsirkin int vector; 621315a1350SMichael S. Tsirkin 622315a1350SMichael S. Tsirkin assert(dev->msix_vector_use_notifier && 623315a1350SMichael S. Tsirkin dev->msix_vector_release_notifier); 624315a1350SMichael S. Tsirkin 625315a1350SMichael S. Tsirkin if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & 626315a1350SMichael S. Tsirkin (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { 627315a1350SMichael S. Tsirkin for (vector = 0; vector < dev->msix_entries_nr; vector++) { 628315a1350SMichael S. Tsirkin msix_unset_notifier_for_vector(dev, vector); 629315a1350SMichael S. Tsirkin } 630315a1350SMichael S. Tsirkin } 631315a1350SMichael S. Tsirkin dev->msix_vector_use_notifier = NULL; 632315a1350SMichael S. Tsirkin dev->msix_vector_release_notifier = NULL; 633bbef882cSMichael S. Tsirkin dev->msix_vector_poll_notifier = NULL; 634315a1350SMichael S. Tsirkin } 635340b50c7SGerd Hoffmann 6362c21ee76SJianjun Duan static int put_msix_state(QEMUFile *f, void *pv, size_t size, 637*3ddba9a9SMarkus Armbruster const VMStateField *field, JSONWriter *vmdesc) 638340b50c7SGerd Hoffmann { 639340b50c7SGerd Hoffmann msix_save(pv, f); 6402c21ee76SJianjun Duan 6412c21ee76SJianjun Duan return 0; 642340b50c7SGerd Hoffmann } 643340b50c7SGerd Hoffmann 6442c21ee76SJianjun Duan static int get_msix_state(QEMUFile *f, void *pv, size_t size, 64503fee66fSMarc-André Lureau const VMStateField *field) 646340b50c7SGerd Hoffmann { 647340b50c7SGerd Hoffmann msix_load(pv, f); 648340b50c7SGerd Hoffmann return 0; 649340b50c7SGerd Hoffmann } 650340b50c7SGerd Hoffmann 651340b50c7SGerd Hoffmann static VMStateInfo vmstate_info_msix = { 652340b50c7SGerd Hoffmann .name = "msix state", 653340b50c7SGerd Hoffmann .get = get_msix_state, 654340b50c7SGerd Hoffmann .put = put_msix_state, 655340b50c7SGerd Hoffmann }; 656340b50c7SGerd Hoffmann 657340b50c7SGerd Hoffmann const VMStateDescription vmstate_msix = { 658340b50c7SGerd Hoffmann .name = "msix", 659340b50c7SGerd Hoffmann .fields = (VMStateField[]) { 660340b50c7SGerd Hoffmann { 661340b50c7SGerd Hoffmann .name = "msix", 662340b50c7SGerd Hoffmann .version_id = 0, 663340b50c7SGerd Hoffmann .field_exists = NULL, 664340b50c7SGerd Hoffmann .size = 0, /* ouch */ 665340b50c7SGerd Hoffmann .info = &vmstate_info_msix, 666340b50c7SGerd Hoffmann .flags = VMS_SINGLE, 667340b50c7SGerd Hoffmann .offset = 0, 668340b50c7SGerd Hoffmann }, 669340b50c7SGerd Hoffmann VMSTATE_END_OF_LIST() 670340b50c7SGerd Hoffmann } 671340b50c7SGerd Hoffmann }; 672