xref: /qemu/hw/pci/msi.c (revision 370ed600)
1 /*
2  * msi.c
3  *
4  * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11 
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16 
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "hw/pci/msi.h"
23 #include "hw/xen/xen.h"
24 #include "qemu/range.h"
25 #include "qapi/error.h"
26 
27 #include "hw/i386/kvm/xen_evtchn.h"
28 
29 /* PCI_MSI_ADDRESS_LO */
30 #define PCI_MSI_ADDRESS_LO_MASK         (~0x3)
31 
32 /* If we get rid of cap allocator, we won't need those. */
33 #define PCI_MSI_32_SIZEOF       0x0a
34 #define PCI_MSI_64_SIZEOF       0x0e
35 #define PCI_MSI_32M_SIZEOF      0x14
36 #define PCI_MSI_64M_SIZEOF      0x18
37 
38 #define PCI_MSI_VECTORS_MAX     32
39 
40 /*
41  * Flag for interrupt controllers to declare broken MSI/MSI-X support.
42  * values: false - broken; true - non-broken.
43  *
44  * Setting this flag to false will remove MSI/MSI-X capability from all devices.
45  *
46  * It is preferable for controllers to set this to true (non-broken) even if
47  * they do not actually support MSI/MSI-X: guests normally probe the controller
48  * type and do not attempt to enable MSI/MSI-X with interrupt controllers not
49  * supporting such, so removing the capability is not required, and
50  * it seems cleaner to have a given device look the same for all boards.
51  *
52  * TODO: some existing controllers violate the above rule. Identify and fix them.
53  */
54 bool msi_nonbroken;
55 
56 /* If we get rid of cap allocator, we won't need this. */
57 static inline uint8_t msi_cap_sizeof(uint16_t flags)
58 {
59     switch (flags & (PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT)) {
60     case PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT:
61         return PCI_MSI_64M_SIZEOF;
62     case PCI_MSI_FLAGS_64BIT:
63         return PCI_MSI_64_SIZEOF;
64     case PCI_MSI_FLAGS_MASKBIT:
65         return PCI_MSI_32M_SIZEOF;
66     case 0:
67         return PCI_MSI_32_SIZEOF;
68     default:
69         abort();
70         break;
71     }
72     return 0;
73 }
74 
75 //#define MSI_DEBUG
76 
77 #ifdef MSI_DEBUG
78 # define MSI_DPRINTF(fmt, ...)                                          \
79     fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
80 #else
81 # define MSI_DPRINTF(fmt, ...)  do { } while (0)
82 #endif
83 #define MSI_DEV_PRINTF(dev, fmt, ...)                                   \
84     MSI_DPRINTF("%s:%x " fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
85 
86 static inline unsigned int msi_nr_vectors(uint16_t flags)
87 {
88     return 1U <<
89         ((flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE));
90 }
91 
92 static inline uint8_t msi_flags_off(const PCIDevice* dev)
93 {
94     return dev->msi_cap + PCI_MSI_FLAGS;
95 }
96 
97 static inline uint8_t msi_address_lo_off(const PCIDevice* dev)
98 {
99     return dev->msi_cap + PCI_MSI_ADDRESS_LO;
100 }
101 
102 static inline uint8_t msi_address_hi_off(const PCIDevice* dev)
103 {
104     return dev->msi_cap + PCI_MSI_ADDRESS_HI;
105 }
106 
107 static inline uint8_t msi_data_off(const PCIDevice* dev, bool msi64bit)
108 {
109     return dev->msi_cap + (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32);
110 }
111 
112 static inline uint8_t msi_mask_off(const PCIDevice* dev, bool msi64bit)
113 {
114     return dev->msi_cap + (msi64bit ? PCI_MSI_MASK_64 : PCI_MSI_MASK_32);
115 }
116 
117 static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
118 {
119     return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
120 }
121 
122 /*
123  * Special API for POWER to configure the vectors through
124  * a side channel. Should never be used by devices.
125  */
126 void msi_set_message(PCIDevice *dev, MSIMessage msg)
127 {
128     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
129     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
130 
131     if (msi64bit) {
132         pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
133     } else {
134         pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
135     }
136     pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
137 }
138 
139 static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector)
140 {
141     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
142     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
143     unsigned int nr_vectors = msi_nr_vectors(flags);
144     MSIMessage msg;
145 
146     assert(vector < nr_vectors);
147 
148     if (msi64bit) {
149         msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev));
150     } else {
151         msg.address = pci_get_long(dev->config + msi_address_lo_off(dev));
152     }
153 
154     /* upper bit 31:16 is zero */
155     msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
156     if (nr_vectors > 1) {
157         msg.data &= ~(nr_vectors - 1);
158         msg.data |= vector;
159     }
160 
161     return msg;
162 }
163 
164 MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
165 {
166     return dev->msi_prepare_message(dev, vector);
167 }
168 
169 bool msi_enabled(const PCIDevice *dev)
170 {
171     return msi_present(dev) &&
172         (pci_get_word(dev->config + msi_flags_off(dev)) &
173          PCI_MSI_FLAGS_ENABLE);
174 }
175 
176 /*
177  * Make PCI device @dev MSI-capable.
178  * Non-zero @offset puts capability MSI at that offset in PCI config
179  * space.
180  * @nr_vectors is the number of MSI vectors (1, 2, 4, 8, 16 or 32).
181  * If @msi64bit, make the device capable of sending a 64-bit message
182  * address.
183  * If @msi_per_vector_mask, make the device support per-vector masking.
184  * @errp is for returning errors.
185  * Return 0 on success; set @errp and return -errno on error.
186  *
187  * -ENOTSUP means lacking msi support for a msi-capable platform.
188  * -EINVAL means capability overlap, happens when @offset is non-zero,
189  *  also means a programming error, except device assignment, which can check
190  *  if a real HW is broken.
191  */
192 int msi_init(struct PCIDevice *dev, uint8_t offset,
193              unsigned int nr_vectors, bool msi64bit,
194              bool msi_per_vector_mask, Error **errp)
195 {
196     unsigned int vectors_order;
197     uint16_t flags;
198     uint8_t cap_size;
199     int config_offset;
200 
201     if (!msi_nonbroken) {
202         error_setg(errp, "MSI is not supported by interrupt controller");
203         return -ENOTSUP;
204     }
205 
206     MSI_DEV_PRINTF(dev,
207                    "init offset: 0x%"PRIx8" vector: %"PRId8
208                    " 64bit %d mask %d\n",
209                    offset, nr_vectors, msi64bit, msi_per_vector_mask);
210 
211     assert(!(nr_vectors & (nr_vectors - 1)));   /* power of 2 */
212     assert(nr_vectors > 0);
213     assert(nr_vectors <= PCI_MSI_VECTORS_MAX);
214     /* the nr of MSI vectors is up to 32 */
215     vectors_order = ctz32(nr_vectors);
216 
217     flags = vectors_order << ctz32(PCI_MSI_FLAGS_QMASK);
218     if (msi64bit) {
219         flags |= PCI_MSI_FLAGS_64BIT;
220     }
221     if (msi_per_vector_mask) {
222         flags |= PCI_MSI_FLAGS_MASKBIT;
223     }
224 
225     cap_size = msi_cap_sizeof(flags);
226     config_offset = pci_add_capability(dev, PCI_CAP_ID_MSI, offset,
227                                         cap_size, errp);
228     if (config_offset < 0) {
229         return config_offset;
230     }
231 
232     dev->msi_cap = config_offset;
233     dev->cap_present |= QEMU_PCI_CAP_MSI;
234 
235     pci_set_word(dev->config + msi_flags_off(dev), flags);
236     pci_set_word(dev->wmask + msi_flags_off(dev),
237                  PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
238     pci_set_long(dev->wmask + msi_address_lo_off(dev),
239                  PCI_MSI_ADDRESS_LO_MASK);
240     if (msi64bit) {
241         pci_set_long(dev->wmask + msi_address_hi_off(dev), 0xffffffff);
242     }
243     pci_set_word(dev->wmask + msi_data_off(dev, msi64bit), 0xffff);
244 
245     if (msi_per_vector_mask) {
246         /* Make mask bits 0 to nr_vectors - 1 writable. */
247         pci_set_long(dev->wmask + msi_mask_off(dev, msi64bit),
248                      0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors));
249     }
250 
251     dev->msi_prepare_message = msi_prepare_message;
252 
253     return 0;
254 }
255 
256 void msi_uninit(struct PCIDevice *dev)
257 {
258     uint16_t flags;
259     uint8_t cap_size;
260 
261     if (!msi_present(dev)) {
262         return;
263     }
264     flags = pci_get_word(dev->config + msi_flags_off(dev));
265     cap_size = msi_cap_sizeof(flags);
266     pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size);
267     dev->cap_present &= ~QEMU_PCI_CAP_MSI;
268     dev->msi_prepare_message = NULL;
269 
270     MSI_DEV_PRINTF(dev, "uninit\n");
271 }
272 
273 void msi_reset(PCIDevice *dev)
274 {
275     uint16_t flags;
276     bool msi64bit;
277 
278     if (!msi_present(dev)) {
279         return;
280     }
281 
282     flags = pci_get_word(dev->config + msi_flags_off(dev));
283     flags &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
284     msi64bit = flags & PCI_MSI_FLAGS_64BIT;
285 
286     pci_set_word(dev->config + msi_flags_off(dev), flags);
287     pci_set_long(dev->config + msi_address_lo_off(dev), 0);
288     if (msi64bit) {
289         pci_set_long(dev->config + msi_address_hi_off(dev), 0);
290     }
291     pci_set_word(dev->config + msi_data_off(dev, msi64bit), 0);
292     if (flags & PCI_MSI_FLAGS_MASKBIT) {
293         pci_set_long(dev->config + msi_mask_off(dev, msi64bit), 0);
294         pci_set_long(dev->config + msi_pending_off(dev, msi64bit), 0);
295     }
296     MSI_DEV_PRINTF(dev, "reset\n");
297 }
298 
299 bool msi_is_masked(const PCIDevice *dev, unsigned int vector)
300 {
301     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
302     uint32_t mask, data;
303     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
304     assert(vector < PCI_MSI_VECTORS_MAX);
305 
306     if (!(flags & PCI_MSI_FLAGS_MASKBIT)) {
307         return false;
308     }
309 
310     data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
311     if (xen_is_pirq_msi(data)) {
312         return false;
313     }
314 
315     mask = pci_get_long(dev->config +
316                         msi_mask_off(dev, flags & PCI_MSI_FLAGS_64BIT));
317     return mask & (1U << vector);
318 }
319 
320 void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
321 {
322     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
323     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
324     uint32_t irq_state, vector_mask, pending;
325 
326     if (vector >= PCI_MSI_VECTORS_MAX) {
327         error_setg(errp, "msi: vector %d not allocated. max vector is %d",
328                    vector, (PCI_MSI_VECTORS_MAX - 1));
329         return;
330     }
331 
332     vector_mask = (1U << vector);
333 
334     irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit));
335 
336     if (mask) {
337         irq_state |= vector_mask;
338     } else {
339         irq_state &= ~vector_mask;
340     }
341 
342     pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state);
343 
344     pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
345     if (!mask && (pending & vector_mask)) {
346         pending &= ~vector_mask;
347         pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
348         msi_notify(dev, vector);
349     }
350 }
351 
352 void msi_notify(PCIDevice *dev, unsigned int vector)
353 {
354     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
355     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
356     unsigned int nr_vectors = msi_nr_vectors(flags);
357     MSIMessage msg;
358 
359     assert(vector < nr_vectors);
360     if (msi_is_masked(dev, vector)) {
361         assert(flags & PCI_MSI_FLAGS_MASKBIT);
362         pci_long_test_and_set_mask(
363             dev->config + msi_pending_off(dev, msi64bit), 1U << vector);
364         MSI_DEV_PRINTF(dev, "pending vector 0x%x\n", vector);
365         return;
366     }
367 
368     msg = msi_get_message(dev, vector);
369 
370     MSI_DEV_PRINTF(dev,
371                    "notify vector 0x%x"
372                    " address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
373                    vector, msg.address, msg.data);
374     msi_send_message(dev, msg);
375 }
376 
377 void msi_send_message(PCIDevice *dev, MSIMessage msg)
378 {
379     dev->msi_trigger(dev, msg);
380 }
381 
382 /* Normally called by pci_default_write_config(). */
383 void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
384 {
385     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
386     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
387     bool msi_per_vector_mask = flags & PCI_MSI_FLAGS_MASKBIT;
388     unsigned int nr_vectors;
389     uint8_t log_num_vecs;
390     uint8_t log_max_vecs;
391     unsigned int vector;
392     uint32_t pending;
393 
394     if (!msi_present(dev) ||
395         !ranges_overlap(addr, len, dev->msi_cap, msi_cap_sizeof(flags))) {
396         return;
397     }
398 
399 #ifdef MSI_DEBUG
400     MSI_DEV_PRINTF(dev, "addr 0x%"PRIx32" val 0x%"PRIx32" len %d\n",
401                    addr, val, len);
402     MSI_DEV_PRINTF(dev, "ctrl: 0x%"PRIx16" address: 0x%"PRIx32,
403                    flags,
404                    pci_get_long(dev->config + msi_address_lo_off(dev)));
405     if (msi64bit) {
406         fprintf(stderr, " address-hi: 0x%"PRIx32,
407                 pci_get_long(dev->config + msi_address_hi_off(dev)));
408     }
409     fprintf(stderr, " data: 0x%"PRIx16,
410             pci_get_word(dev->config + msi_data_off(dev, msi64bit)));
411     if (flags & PCI_MSI_FLAGS_MASKBIT) {
412         fprintf(stderr, " mask 0x%"PRIx32" pending 0x%"PRIx32,
413                 pci_get_long(dev->config + msi_mask_off(dev, msi64bit)),
414                 pci_get_long(dev->config + msi_pending_off(dev, msi64bit)));
415     }
416     fprintf(stderr, "\n");
417 #endif
418 
419     if (xen_mode == XEN_EMULATE) {
420         for (vector = 0; vector < msi_nr_vectors(flags); vector++) {
421             MSIMessage msg = msi_prepare_message(dev, vector);
422 
423             xen_evtchn_snoop_msi(dev, false, vector, msg.address, msg.data,
424                                  msi_is_masked(dev, vector));
425         }
426     }
427 
428     if (!(flags & PCI_MSI_FLAGS_ENABLE)) {
429         return;
430     }
431 
432     /*
433      * Now MSI is enabled, clear INTx# interrupts.
434      * the driver is prohibited from writing enable bit to mask
435      * a service request. But the guest OS could do this.
436      * So we just discard the interrupts as moderate fallback.
437      *
438      * 6.8.3.3. Enabling Operation
439      *   While enabled for MSI or MSI-X operation, a function is prohibited
440      *   from using its INTx# pin (if implemented) to request
441      *   service (MSI, MSI-X, and INTx# are mutually exclusive).
442      */
443     pci_device_deassert_intx(dev);
444 
445     /*
446      * nr_vectors might be set bigger than capable. So clamp it.
447      * This is not legal by spec, so we can do anything we like,
448      * just don't crash the host
449      */
450     log_num_vecs =
451         (flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE);
452     log_max_vecs =
453         (flags & PCI_MSI_FLAGS_QMASK) >> ctz32(PCI_MSI_FLAGS_QMASK);
454     if (log_num_vecs > log_max_vecs) {
455         flags &= ~PCI_MSI_FLAGS_QSIZE;
456         flags |= log_max_vecs << ctz32(PCI_MSI_FLAGS_QSIZE);
457         pci_set_word(dev->config + msi_flags_off(dev), flags);
458     }
459 
460     if (!msi_per_vector_mask) {
461         /* if per vector masking isn't supported,
462            there is no pending interrupt. */
463         return;
464     }
465 
466     nr_vectors = msi_nr_vectors(flags);
467 
468     /* This will discard pending interrupts, if any. */
469     pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
470     pending &= 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors);
471     pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
472 
473     /* deliver pending interrupts which are unmasked */
474     for (vector = 0; vector < nr_vectors; ++vector) {
475         if (msi_is_masked(dev, vector) || !(pending & (1U << vector))) {
476             continue;
477         }
478 
479         pci_long_test_and_clear_mask(
480             dev->config + msi_pending_off(dev, msi64bit), 1U << vector);
481         msi_notify(dev, vector);
482     }
483 }
484 
485 unsigned int msi_nr_vectors_allocated(const PCIDevice *dev)
486 {
487     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
488     return msi_nr_vectors(flags);
489 }
490