xref: /qemu/hw/pci/msi.c (revision d884e272)
1 /*
2  * msi.c
3  *
4  * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11 
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16 
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "hw/pci/msi.h"
23 #include "hw/xen/xen.h"
24 #include "qemu/range.h"
25 #include "qapi/error.h"
26 #include "sysemu/xen.h"
27 
28 #include "hw/i386/kvm/xen_evtchn.h"
29 
30 /* PCI_MSI_ADDRESS_LO */
31 #define PCI_MSI_ADDRESS_LO_MASK         (~0x3)
32 
33 /* If we get rid of cap allocator, we won't need those. */
34 #define PCI_MSI_32_SIZEOF       0x0a
35 #define PCI_MSI_64_SIZEOF       0x0e
36 #define PCI_MSI_32M_SIZEOF      0x14
37 #define PCI_MSI_64M_SIZEOF      0x18
38 
39 #define PCI_MSI_VECTORS_MAX     32
40 
41 /*
42  * Flag for interrupt controllers to declare broken MSI/MSI-X support.
43  * values: false - broken; true - non-broken.
44  *
45  * Setting this flag to false will remove MSI/MSI-X capability from all devices.
46  *
47  * It is preferable for controllers to set this to true (non-broken) even if
48  * they do not actually support MSI/MSI-X: guests normally probe the controller
49  * type and do not attempt to enable MSI/MSI-X with interrupt controllers not
50  * supporting such, so removing the capability is not required, and
51  * it seems cleaner to have a given device look the same for all boards.
52  *
53  * TODO: some existing controllers violate the above rule. Identify and fix them.
54  */
55 bool msi_nonbroken;
56 
57 /* If we get rid of cap allocator, we won't need this. */
58 static inline uint8_t msi_cap_sizeof(uint16_t flags)
59 {
60     switch (flags & (PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT)) {
61     case PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT:
62         return PCI_MSI_64M_SIZEOF;
63     case PCI_MSI_FLAGS_64BIT:
64         return PCI_MSI_64_SIZEOF;
65     case PCI_MSI_FLAGS_MASKBIT:
66         return PCI_MSI_32M_SIZEOF;
67     case 0:
68         return PCI_MSI_32_SIZEOF;
69     default:
70         abort();
71         break;
72     }
73     return 0;
74 }
75 
76 //#define MSI_DEBUG
77 
78 #ifdef MSI_DEBUG
79 # define MSI_DPRINTF(fmt, ...)                                          \
80     fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
81 #else
82 # define MSI_DPRINTF(fmt, ...)  do { } while (0)
83 #endif
84 #define MSI_DEV_PRINTF(dev, fmt, ...)                                   \
85     MSI_DPRINTF("%s:%x " fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
86 
87 static inline unsigned int msi_nr_vectors(uint16_t flags)
88 {
89     return 1U <<
90         ((flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE));
91 }
92 
93 static inline uint8_t msi_flags_off(const PCIDevice* dev)
94 {
95     return dev->msi_cap + PCI_MSI_FLAGS;
96 }
97 
98 static inline uint8_t msi_address_lo_off(const PCIDevice* dev)
99 {
100     return dev->msi_cap + PCI_MSI_ADDRESS_LO;
101 }
102 
103 static inline uint8_t msi_address_hi_off(const PCIDevice* dev)
104 {
105     return dev->msi_cap + PCI_MSI_ADDRESS_HI;
106 }
107 
108 static inline uint8_t msi_data_off(const PCIDevice* dev, bool msi64bit)
109 {
110     return dev->msi_cap + (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32);
111 }
112 
113 static inline uint8_t msi_mask_off(const PCIDevice* dev, bool msi64bit)
114 {
115     return dev->msi_cap + (msi64bit ? PCI_MSI_MASK_64 : PCI_MSI_MASK_32);
116 }
117 
118 static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
119 {
120     return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
121 }
122 
123 /*
124  * Special API for POWER to configure the vectors through
125  * a side channel. Should never be used by devices.
126  */
127 void msi_set_message(PCIDevice *dev, MSIMessage msg)
128 {
129     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
130     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
131 
132     if (msi64bit) {
133         pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
134     } else {
135         pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
136     }
137     pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
138 }
139 
140 static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector)
141 {
142     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
143     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
144     unsigned int nr_vectors = msi_nr_vectors(flags);
145     MSIMessage msg;
146 
147     assert(vector < nr_vectors);
148 
149     if (msi64bit) {
150         msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev));
151     } else {
152         msg.address = pci_get_long(dev->config + msi_address_lo_off(dev));
153     }
154 
155     /* upper bit 31:16 is zero */
156     msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
157     if (nr_vectors > 1) {
158         msg.data &= ~(nr_vectors - 1);
159         msg.data |= vector;
160     }
161 
162     return msg;
163 }
164 
165 MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
166 {
167     return dev->msi_prepare_message(dev, vector);
168 }
169 
170 bool msi_enabled(const PCIDevice *dev)
171 {
172     return msi_present(dev) &&
173         (pci_get_word(dev->config + msi_flags_off(dev)) &
174          PCI_MSI_FLAGS_ENABLE);
175 }
176 
177 /*
178  * Make PCI device @dev MSI-capable.
179  * Non-zero @offset puts capability MSI at that offset in PCI config
180  * space.
181  * @nr_vectors is the number of MSI vectors (1, 2, 4, 8, 16 or 32).
182  * If @msi64bit, make the device capable of sending a 64-bit message
183  * address.
184  * If @msi_per_vector_mask, make the device support per-vector masking.
185  * @errp is for returning errors.
186  * Return 0 on success; set @errp and return -errno on error.
187  *
188  * -ENOTSUP means lacking msi support for a msi-capable platform.
189  * -EINVAL means capability overlap, happens when @offset is non-zero,
190  *  also means a programming error, except device assignment, which can check
191  *  if a real HW is broken.
192  */
193 int msi_init(struct PCIDevice *dev, uint8_t offset,
194              unsigned int nr_vectors, bool msi64bit,
195              bool msi_per_vector_mask, Error **errp)
196 {
197     unsigned int vectors_order;
198     uint16_t flags;
199     uint8_t cap_size;
200     int config_offset;
201 
202     if (!msi_nonbroken) {
203         error_setg(errp, "MSI is not supported by interrupt controller");
204         return -ENOTSUP;
205     }
206 
207     MSI_DEV_PRINTF(dev,
208                    "init offset: 0x%"PRIx8" vector: %"PRId8
209                    " 64bit %d mask %d\n",
210                    offset, nr_vectors, msi64bit, msi_per_vector_mask);
211 
212     assert(!(nr_vectors & (nr_vectors - 1)));   /* power of 2 */
213     assert(nr_vectors > 0);
214     assert(nr_vectors <= PCI_MSI_VECTORS_MAX);
215     /* the nr of MSI vectors is up to 32 */
216     vectors_order = ctz32(nr_vectors);
217 
218     flags = vectors_order << ctz32(PCI_MSI_FLAGS_QMASK);
219     if (msi64bit) {
220         flags |= PCI_MSI_FLAGS_64BIT;
221     }
222     if (msi_per_vector_mask) {
223         flags |= PCI_MSI_FLAGS_MASKBIT;
224     }
225 
226     cap_size = msi_cap_sizeof(flags);
227     config_offset = pci_add_capability(dev, PCI_CAP_ID_MSI, offset,
228                                         cap_size, errp);
229     if (config_offset < 0) {
230         return config_offset;
231     }
232 
233     dev->msi_cap = config_offset;
234     dev->cap_present |= QEMU_PCI_CAP_MSI;
235 
236     pci_set_word(dev->config + msi_flags_off(dev), flags);
237     pci_set_word(dev->wmask + msi_flags_off(dev),
238                  PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
239     pci_set_long(dev->wmask + msi_address_lo_off(dev),
240                  PCI_MSI_ADDRESS_LO_MASK);
241     if (msi64bit) {
242         pci_set_long(dev->wmask + msi_address_hi_off(dev), 0xffffffff);
243     }
244     pci_set_word(dev->wmask + msi_data_off(dev, msi64bit), 0xffff);
245 
246     if (msi_per_vector_mask) {
247         /* Make mask bits 0 to nr_vectors - 1 writable. */
248         pci_set_long(dev->wmask + msi_mask_off(dev, msi64bit),
249                      0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors));
250     }
251 
252     dev->msi_prepare_message = msi_prepare_message;
253 
254     return 0;
255 }
256 
257 void msi_uninit(struct PCIDevice *dev)
258 {
259     uint16_t flags;
260     uint8_t cap_size;
261 
262     if (!msi_present(dev)) {
263         return;
264     }
265     flags = pci_get_word(dev->config + msi_flags_off(dev));
266     cap_size = msi_cap_sizeof(flags);
267     pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size);
268     dev->cap_present &= ~QEMU_PCI_CAP_MSI;
269     dev->msi_prepare_message = NULL;
270 
271     MSI_DEV_PRINTF(dev, "uninit\n");
272 }
273 
274 void msi_reset(PCIDevice *dev)
275 {
276     uint16_t flags;
277     bool msi64bit;
278 
279     if (!msi_present(dev)) {
280         return;
281     }
282 
283     flags = pci_get_word(dev->config + msi_flags_off(dev));
284     flags &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
285     msi64bit = flags & PCI_MSI_FLAGS_64BIT;
286 
287     pci_set_word(dev->config + msi_flags_off(dev), flags);
288     pci_set_long(dev->config + msi_address_lo_off(dev), 0);
289     if (msi64bit) {
290         pci_set_long(dev->config + msi_address_hi_off(dev), 0);
291     }
292     pci_set_word(dev->config + msi_data_off(dev, msi64bit), 0);
293     if (flags & PCI_MSI_FLAGS_MASKBIT) {
294         pci_set_long(dev->config + msi_mask_off(dev, msi64bit), 0);
295         pci_set_long(dev->config + msi_pending_off(dev, msi64bit), 0);
296     }
297     MSI_DEV_PRINTF(dev, "reset\n");
298 }
299 
300 bool msi_is_masked(const PCIDevice *dev, unsigned int vector)
301 {
302     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
303     uint32_t mask, data;
304     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
305     assert(vector < PCI_MSI_VECTORS_MAX);
306 
307     if (!(flags & PCI_MSI_FLAGS_MASKBIT)) {
308         return false;
309     }
310 
311     data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
312     if (xen_enabled() && xen_is_pirq_msi(data)) {
313         return false;
314     }
315 
316     mask = pci_get_long(dev->config +
317                         msi_mask_off(dev, flags & PCI_MSI_FLAGS_64BIT));
318     return mask & (1U << vector);
319 }
320 
321 void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
322 {
323     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
324     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
325     uint32_t irq_state, vector_mask, pending;
326 
327     if (vector >= PCI_MSI_VECTORS_MAX) {
328         error_setg(errp, "msi: vector %d not allocated. max vector is %d",
329                    vector, (PCI_MSI_VECTORS_MAX - 1));
330         return;
331     }
332 
333     vector_mask = (1U << vector);
334 
335     irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit));
336 
337     if (mask) {
338         irq_state |= vector_mask;
339     } else {
340         irq_state &= ~vector_mask;
341     }
342 
343     pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state);
344 
345     pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
346     if (!mask && (pending & vector_mask)) {
347         pending &= ~vector_mask;
348         pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
349         msi_notify(dev, vector);
350     }
351 }
352 
353 void msi_notify(PCIDevice *dev, unsigned int vector)
354 {
355     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
356     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
357     unsigned int nr_vectors = msi_nr_vectors(flags);
358     MSIMessage msg;
359 
360     assert(vector < nr_vectors);
361     if (msi_is_masked(dev, vector)) {
362         assert(flags & PCI_MSI_FLAGS_MASKBIT);
363         pci_long_test_and_set_mask(
364             dev->config + msi_pending_off(dev, msi64bit), 1U << vector);
365         MSI_DEV_PRINTF(dev, "pending vector 0x%x\n", vector);
366         return;
367     }
368 
369     msg = msi_get_message(dev, vector);
370 
371     MSI_DEV_PRINTF(dev,
372                    "notify vector 0x%x"
373                    " address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
374                    vector, msg.address, msg.data);
375     msi_send_message(dev, msg);
376 }
377 
378 void msi_send_message(PCIDevice *dev, MSIMessage msg)
379 {
380     dev->msi_trigger(dev, msg);
381 }
382 
383 /* Normally called by pci_default_write_config(). */
384 void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
385 {
386     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
387     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
388     bool msi_per_vector_mask = flags & PCI_MSI_FLAGS_MASKBIT;
389     unsigned int nr_vectors;
390     uint8_t log_num_vecs;
391     uint8_t log_max_vecs;
392     unsigned int vector;
393     uint32_t pending;
394 
395     if (!msi_present(dev) ||
396         !ranges_overlap(addr, len, dev->msi_cap, msi_cap_sizeof(flags))) {
397         return;
398     }
399 
400 #ifdef MSI_DEBUG
401     MSI_DEV_PRINTF(dev, "addr 0x%"PRIx32" val 0x%"PRIx32" len %d\n",
402                    addr, val, len);
403     MSI_DEV_PRINTF(dev, "ctrl: 0x%"PRIx16" address: 0x%"PRIx32,
404                    flags,
405                    pci_get_long(dev->config + msi_address_lo_off(dev)));
406     if (msi64bit) {
407         fprintf(stderr, " address-hi: 0x%"PRIx32,
408                 pci_get_long(dev->config + msi_address_hi_off(dev)));
409     }
410     fprintf(stderr, " data: 0x%"PRIx16,
411             pci_get_word(dev->config + msi_data_off(dev, msi64bit)));
412     if (flags & PCI_MSI_FLAGS_MASKBIT) {
413         fprintf(stderr, " mask 0x%"PRIx32" pending 0x%"PRIx32,
414                 pci_get_long(dev->config + msi_mask_off(dev, msi64bit)),
415                 pci_get_long(dev->config + msi_pending_off(dev, msi64bit)));
416     }
417     fprintf(stderr, "\n");
418 #endif
419 
420     if (xen_mode == XEN_EMULATE) {
421         for (vector = 0; vector < msi_nr_vectors(flags); vector++) {
422             MSIMessage msg = msi_prepare_message(dev, vector);
423 
424             xen_evtchn_snoop_msi(dev, false, vector, msg.address, msg.data,
425                                  msi_is_masked(dev, vector));
426         }
427     }
428 
429     if (!(flags & PCI_MSI_FLAGS_ENABLE)) {
430         return;
431     }
432 
433     /*
434      * Now MSI is enabled, clear INTx# interrupts.
435      * the driver is prohibited from writing enable bit to mask
436      * a service request. But the guest OS could do this.
437      * So we just discard the interrupts as moderate fallback.
438      *
439      * 6.8.3.3. Enabling Operation
440      *   While enabled for MSI or MSI-X operation, a function is prohibited
441      *   from using its INTx# pin (if implemented) to request
442      *   service (MSI, MSI-X, and INTx# are mutually exclusive).
443      */
444     pci_device_deassert_intx(dev);
445 
446     /*
447      * nr_vectors might be set bigger than capable. So clamp it.
448      * This is not legal by spec, so we can do anything we like,
449      * just don't crash the host
450      */
451     log_num_vecs =
452         (flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE);
453     log_max_vecs =
454         (flags & PCI_MSI_FLAGS_QMASK) >> ctz32(PCI_MSI_FLAGS_QMASK);
455     if (log_num_vecs > log_max_vecs) {
456         flags &= ~PCI_MSI_FLAGS_QSIZE;
457         flags |= log_max_vecs << ctz32(PCI_MSI_FLAGS_QSIZE);
458         pci_set_word(dev->config + msi_flags_off(dev), flags);
459     }
460 
461     if (!msi_per_vector_mask) {
462         /* if per vector masking isn't supported,
463            there is no pending interrupt. */
464         return;
465     }
466 
467     nr_vectors = msi_nr_vectors(flags);
468 
469     /* This will discard pending interrupts, if any. */
470     pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
471     pending &= 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors);
472     pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
473 
474     /* deliver pending interrupts which are unmasked */
475     for (vector = 0; vector < nr_vectors; ++vector) {
476         if (msi_is_masked(dev, vector) || !(pending & (1U << vector))) {
477             continue;
478         }
479 
480         pci_long_test_and_clear_mask(
481             dev->config + msi_pending_off(dev, msi64bit), 1U << vector);
482         msi_notify(dev, vector);
483     }
484 }
485 
486 unsigned int msi_nr_vectors_allocated(const PCIDevice *dev)
487 {
488     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
489     return msi_nr_vectors(flags);
490 }
491