xref: /qemu/hw/intc/ioapic.c (revision 423edd9a)
1 /*
2  *  ioapic.c IOAPIC emulation logic
3  *
4  *  Copyright (c) 2004-2005 Fabrice Bellard
5  *
6  *  Split the ioapic logic from apic.c
7  *  Xiantao Zhang <xiantao.zhang@intel.com>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #include "qemu/osdep.h"
24 #include "qapi/error.h"
25 #include "monitor/monitor.h"
26 #include "hw/i386/apic.h"
27 #include "hw/i386/ioapic.h"
28 #include "hw/i386/ioapic_internal.h"
29 #include "hw/i386/x86.h"
30 #include "hw/intc/i8259.h"
31 #include "hw/pci/msi.h"
32 #include "hw/qdev-properties.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/i386/apic-msidef.h"
36 #include "hw/i386/x86-iommu.h"
37 #include "trace.h"
38 
39 #define APIC_DELIVERY_MODE_SHIFT 8
40 #define APIC_POLARITY_SHIFT 14
41 #define APIC_TRIG_MODE_SHIFT 15
42 
43 static IOAPICCommonState *ioapics[MAX_IOAPICS];
44 
45 /* global variable from ioapic_common.c */
46 extern int ioapic_no;
47 
48 struct ioapic_entry_info {
49     /* fields parsed from IOAPIC entries */
50     uint8_t masked;
51     uint8_t trig_mode;
52     uint16_t dest_idx;
53     uint8_t dest_mode;
54     uint8_t delivery_mode;
55     uint8_t vector;
56 
57     /* MSI message generated from above parsed fields */
58     uint32_t addr;
59     uint32_t data;
60 };
61 
62 static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info)
63 {
64     memset(info, 0, sizeof(*info));
65     info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1;
66     info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
67     /*
68      * By default, this would be dest_id[8] + reserved[8]. When IR
69      * is enabled, this would be interrupt_index[15] +
70      * interrupt_format[1]. This field never means anything, but
71      * only used to generate corresponding MSI.
72      */
73     info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff;
74     info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
75     info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \
76         & IOAPIC_DM_MASK;
77     if (info->delivery_mode == IOAPIC_DM_EXTINT) {
78         info->vector = pic_read_irq(isa_pic);
79     } else {
80         info->vector = entry & IOAPIC_VECTOR_MASK;
81     }
82 
83     info->addr = APIC_DEFAULT_ADDRESS | \
84         (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \
85         (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT);
86     info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \
87         (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \
88         (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
89 }
90 
91 static void ioapic_service(IOAPICCommonState *s)
92 {
93     AddressSpace *ioapic_as = X86_MACHINE(qdev_get_machine())->ioapic_as;
94     struct ioapic_entry_info info;
95     uint8_t i;
96     uint32_t mask;
97     uint64_t entry;
98 
99     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
100         mask = 1 << i;
101         if (s->irr & mask) {
102             int coalesce = 0;
103 
104             entry = s->ioredtbl[i];
105             ioapic_entry_parse(entry, &info);
106             if (!info.masked) {
107                 if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
108                     s->irr &= ~mask;
109                 } else {
110                     coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
111                     trace_ioapic_set_remote_irr(i);
112                     s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
113                 }
114 
115                 if (coalesce) {
116                     /* We are level triggered interrupts, and the
117                      * guest should be still working on previous one,
118                      * so skip it. */
119                     continue;
120                 }
121 
122 #ifdef CONFIG_KVM
123                 if (kvm_irqchip_is_split()) {
124                     if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
125                         kvm_set_irq(kvm_state, i, 1);
126                         kvm_set_irq(kvm_state, i, 0);
127                     } else {
128                         kvm_set_irq(kvm_state, i, 1);
129                     }
130                     continue;
131                 }
132 #endif
133 
134                 /* No matter whether IR is enabled, we translate
135                  * the IOAPIC message into a MSI one, and its
136                  * address space will decide whether we need a
137                  * translation. */
138                 stl_le_phys(ioapic_as, info.addr, info.data);
139             }
140         }
141     }
142 }
143 
144 #define SUCCESSIVE_IRQ_MAX_COUNT 10000
145 
146 static void delayed_ioapic_service_cb(void *opaque)
147 {
148     IOAPICCommonState *s = opaque;
149 
150     ioapic_service(s);
151 }
152 
153 static void ioapic_set_irq(void *opaque, int vector, int level)
154 {
155     IOAPICCommonState *s = opaque;
156 
157     /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
158      * to GSI 2.  GSI maps to ioapic 1-1.  This is not
159      * the cleanest way of doing it but it should work. */
160 
161     trace_ioapic_set_irq(vector, level);
162     ioapic_stat_update_irq(s, vector, level);
163     if (vector == 0) {
164         vector = 2;
165     }
166     if (vector < IOAPIC_NUM_PINS) {
167         uint32_t mask = 1 << vector;
168         uint64_t entry = s->ioredtbl[vector];
169 
170         if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) ==
171             IOAPIC_TRIGGER_LEVEL) {
172             /* level triggered */
173             if (level) {
174                 s->irr |= mask;
175                 if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
176                     ioapic_service(s);
177                 }
178             } else {
179                 s->irr &= ~mask;
180             }
181         } else {
182             /* According to the 82093AA manual, we must ignore edge requests
183              * if the input pin is masked. */
184             if (level && !(entry & IOAPIC_LVT_MASKED)) {
185                 s->irr |= mask;
186                 ioapic_service(s);
187             }
188         }
189     }
190 }
191 
192 static void ioapic_update_kvm_routes(IOAPICCommonState *s)
193 {
194 #ifdef CONFIG_KVM
195     int i;
196 
197     if (kvm_irqchip_is_split()) {
198         for (i = 0; i < IOAPIC_NUM_PINS; i++) {
199             MSIMessage msg;
200             struct ioapic_entry_info info;
201             ioapic_entry_parse(s->ioredtbl[i], &info);
202             if (!info.masked) {
203                 msg.address = info.addr;
204                 msg.data = info.data;
205                 kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
206             }
207         }
208         kvm_irqchip_commit_routes(kvm_state);
209     }
210 #endif
211 }
212 
213 #ifdef CONFIG_KVM
214 static void ioapic_iec_notifier(void *private, bool global,
215                                 uint32_t index, uint32_t mask)
216 {
217     IOAPICCommonState *s = (IOAPICCommonState *)private;
218     /* For simplicity, we just update all the routes */
219     ioapic_update_kvm_routes(s);
220 }
221 #endif
222 
223 void ioapic_eoi_broadcast(int vector)
224 {
225     IOAPICCommonState *s;
226     uint64_t entry;
227     int i, n;
228 
229     trace_ioapic_eoi_broadcast(vector);
230 
231     for (i = 0; i < MAX_IOAPICS; i++) {
232         s = ioapics[i];
233         if (!s) {
234             continue;
235         }
236         for (n = 0; n < IOAPIC_NUM_PINS; n++) {
237             entry = s->ioredtbl[n];
238 
239             if ((entry & IOAPIC_VECTOR_MASK) != vector ||
240                 ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) {
241                 continue;
242             }
243 
244             if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
245                 continue;
246             }
247 
248             trace_ioapic_clear_remote_irr(n, vector);
249             s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
250 
251             if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
252                 ++s->irq_eoi[n];
253                 if (s->irq_eoi[n] >= SUCCESSIVE_IRQ_MAX_COUNT) {
254                     /*
255                      * Real hardware does not deliver the interrupt immediately
256                      * during eoi broadcast, and this lets a buggy guest make
257                      * slow progress even if it does not correctly handle a
258                      * level-triggered interrupt. Emulate this behavior if we
259                      * detect an interrupt storm.
260                      */
261                     s->irq_eoi[n] = 0;
262                     timer_mod_anticipate(s->delayed_ioapic_service_timer,
263                                          qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
264                                          NANOSECONDS_PER_SECOND / 100);
265                     trace_ioapic_eoi_delayed_reassert(n);
266                 } else {
267                     ioapic_service(s);
268                 }
269             } else {
270                 s->irq_eoi[n] = 0;
271             }
272         }
273     }
274 }
275 
276 static uint64_t
277 ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size)
278 {
279     IOAPICCommonState *s = opaque;
280     int index;
281     uint32_t val = 0;
282 
283     addr &= 0xff;
284 
285     switch (addr) {
286     case IOAPIC_IOREGSEL:
287         val = s->ioregsel;
288         break;
289     case IOAPIC_IOWIN:
290         if (size != 4) {
291             break;
292         }
293         switch (s->ioregsel) {
294         case IOAPIC_REG_ID:
295         case IOAPIC_REG_ARB:
296             val = s->id << IOAPIC_ID_SHIFT;
297             break;
298         case IOAPIC_REG_VER:
299             val = s->version |
300                 ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT);
301             break;
302         default:
303             index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
304             if (index >= 0 && index < IOAPIC_NUM_PINS) {
305                 if (s->ioregsel & 1) {
306                     val = s->ioredtbl[index] >> 32;
307                 } else {
308                     val = s->ioredtbl[index] & 0xffffffff;
309                 }
310             }
311         }
312         break;
313     }
314 
315     trace_ioapic_mem_read(addr, s->ioregsel, size, val);
316 
317     return val;
318 }
319 
320 /*
321  * This is to satisfy the hack in Linux kernel. One hack of it is to
322  * simulate clearing the Remote IRR bit of IOAPIC entry using the
323  * following:
324  *
325  * "For IO-APIC's with EOI register, we use that to do an explicit EOI.
326  * Otherwise, we simulate the EOI message manually by changing the trigger
327  * mode to edge and then back to level, with RTE being masked during
328  * this."
329  *
330  * (See linux kernel __eoi_ioapic_pin() comment in commit c0205701)
331  *
332  * This is based on the assumption that, Remote IRR bit will be
333  * cleared by IOAPIC hardware when configured as edge-triggered
334  * interrupts.
335  *
336  * Without this, level-triggered interrupts in IR mode might fail to
337  * work correctly.
338  */
339 static inline void
340 ioapic_fix_edge_remote_irr(uint64_t *entry)
341 {
342     if (!(*entry & IOAPIC_LVT_TRIGGER_MODE)) {
343         /* Edge-triggered interrupts, make sure remote IRR is zero */
344         *entry &= ~((uint64_t)IOAPIC_LVT_REMOTE_IRR);
345     }
346 }
347 
348 static void
349 ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
350                  unsigned int size)
351 {
352     IOAPICCommonState *s = opaque;
353     int index;
354 
355     addr &= 0xff;
356     trace_ioapic_mem_write(addr, s->ioregsel, size, val);
357 
358     switch (addr) {
359     case IOAPIC_IOREGSEL:
360         s->ioregsel = val;
361         break;
362     case IOAPIC_IOWIN:
363         if (size != 4) {
364             break;
365         }
366         switch (s->ioregsel) {
367         case IOAPIC_REG_ID:
368             s->id = (val >> IOAPIC_ID_SHIFT) & IOAPIC_ID_MASK;
369             break;
370         case IOAPIC_REG_VER:
371         case IOAPIC_REG_ARB:
372             break;
373         default:
374             index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
375             if (index >= 0 && index < IOAPIC_NUM_PINS) {
376                 uint64_t ro_bits = s->ioredtbl[index] & IOAPIC_RO_BITS;
377                 if (s->ioregsel & 1) {
378                     s->ioredtbl[index] &= 0xffffffff;
379                     s->ioredtbl[index] |= (uint64_t)val << 32;
380                 } else {
381                     s->ioredtbl[index] &= ~0xffffffffULL;
382                     s->ioredtbl[index] |= val;
383                 }
384                 /* restore RO bits */
385                 s->ioredtbl[index] &= IOAPIC_RW_BITS;
386                 s->ioredtbl[index] |= ro_bits;
387                 s->irq_eoi[index] = 0;
388                 ioapic_fix_edge_remote_irr(&s->ioredtbl[index]);
389                 ioapic_service(s);
390             }
391         }
392         break;
393     case IOAPIC_EOI:
394         /* Explicit EOI is only supported for IOAPIC version 0x20 */
395         if (size != 4 || s->version != 0x20) {
396             break;
397         }
398         ioapic_eoi_broadcast(val);
399         break;
400     }
401 
402     ioapic_update_kvm_routes(s);
403 }
404 
405 static const MemoryRegionOps ioapic_io_ops = {
406     .read = ioapic_mem_read,
407     .write = ioapic_mem_write,
408     .endianness = DEVICE_NATIVE_ENDIAN,
409 };
410 
411 static void ioapic_machine_done_notify(Notifier *notifier, void *data)
412 {
413 #ifdef CONFIG_KVM
414     IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
415                                         machine_done);
416 
417     if (kvm_irqchip_is_split()) {
418         X86IOMMUState *iommu = x86_iommu_get_default();
419         if (iommu) {
420             /* Register this IOAPIC with IOMMU IEC notifier, so that
421              * when there are IR invalidates, we can be notified to
422              * update kernel IR cache. */
423             x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s);
424         }
425     }
426 #endif
427 }
428 
429 #define IOAPIC_VER_DEF 0x20
430 
431 static void ioapic_realize(DeviceState *dev, Error **errp)
432 {
433     IOAPICCommonState *s = IOAPIC_COMMON(dev);
434 
435     if (s->version != 0x11 && s->version != 0x20) {
436         error_setg(errp, "IOAPIC only supports version 0x11 or 0x20 "
437                    "(default: 0x%x).", IOAPIC_VER_DEF);
438         return;
439     }
440 
441     memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
442                           "ioapic", 0x1000);
443 
444     s->delayed_ioapic_service_timer =
445         timer_new_ns(QEMU_CLOCK_VIRTUAL, delayed_ioapic_service_cb, s);
446 
447     qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
448 
449     ioapics[ioapic_no] = s;
450     s->machine_done.notify = ioapic_machine_done_notify;
451     qemu_add_machine_init_done_notifier(&s->machine_done);
452 }
453 
454 static void ioapic_unrealize(DeviceState *dev, Error **errp)
455 {
456     IOAPICCommonState *s = IOAPIC_COMMON(dev);
457 
458     timer_del(s->delayed_ioapic_service_timer);
459     timer_free(s->delayed_ioapic_service_timer);
460 }
461 
462 static Property ioapic_properties[] = {
463     DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
464     DEFINE_PROP_END_OF_LIST(),
465 };
466 
467 static void ioapic_class_init(ObjectClass *klass, void *data)
468 {
469     IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
470     DeviceClass *dc = DEVICE_CLASS(klass);
471 
472     k->realize = ioapic_realize;
473     k->unrealize = ioapic_unrealize;
474     /*
475      * If APIC is in kernel, we need to update the kernel cache after
476      * migration, otherwise first 24 gsi routes will be invalid.
477      */
478     k->post_load = ioapic_update_kvm_routes;
479     dc->reset = ioapic_reset_common;
480     device_class_set_props(dc, ioapic_properties);
481 }
482 
483 static const TypeInfo ioapic_info = {
484     .name          = TYPE_IOAPIC,
485     .parent        = TYPE_IOAPIC_COMMON,
486     .instance_size = sizeof(IOAPICCommonState),
487     .class_init    = ioapic_class_init,
488 };
489 
490 static void ioapic_register_types(void)
491 {
492     type_register_static(&ioapic_info);
493 }
494 
495 type_init(ioapic_register_types)
496