xref: /qemu/hw/intc/spapr_xive_kvm.c (revision dc5e9ac7)
1 /*
2  * QEMU PowerPC sPAPR XIVE interrupt controller model
3  *
4  * Copyright (c) 2017-2019, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/log.h"
12 #include "qemu/error-report.h"
13 #include "qapi/error.h"
14 #include "target/ppc/cpu.h"
15 #include "sysemu/cpus.h"
16 #include "sysemu/kvm.h"
17 #include "hw/ppc/spapr.h"
18 #include "hw/ppc/spapr_cpu_core.h"
19 #include "hw/ppc/spapr_xive.h"
20 #include "hw/ppc/xive.h"
21 #include "kvm_ppc.h"
22 
23 #include <sys/ioctl.h>
24 
25 /*
26  * Helpers for CPU hotplug
27  *
28  * TODO: make a common KVMEnabledCPU layer for XICS and XIVE
29  */
30 typedef struct KVMEnabledCPU {
31     unsigned long vcpu_id;
32     QLIST_ENTRY(KVMEnabledCPU) node;
33 } KVMEnabledCPU;
34 
35 static QLIST_HEAD(, KVMEnabledCPU)
36     kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
37 
38 static bool kvm_cpu_is_enabled(CPUState *cs)
39 {
40     KVMEnabledCPU *enabled_cpu;
41     unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
42 
43     QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
44         if (enabled_cpu->vcpu_id == vcpu_id) {
45             return true;
46         }
47     }
48     return false;
49 }
50 
51 static void kvm_cpu_enable(CPUState *cs)
52 {
53     KVMEnabledCPU *enabled_cpu;
54     unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
55 
56     enabled_cpu = g_malloc(sizeof(*enabled_cpu));
57     enabled_cpu->vcpu_id = vcpu_id;
58     QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
59 }
60 
61 static void kvm_cpu_disable_all(void)
62 {
63     KVMEnabledCPU *enabled_cpu, *next;
64 
65     QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) {
66         QLIST_REMOVE(enabled_cpu, node);
67         g_free(enabled_cpu);
68     }
69 }
70 
71 /*
72  * XIVE Thread Interrupt Management context (KVM)
73  */
74 
75 void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp)
76 {
77     SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
78     uint64_t state[2];
79     int ret;
80 
81     /* The KVM XIVE device is not in use yet */
82     if (xive->fd == -1) {
83         return;
84     }
85 
86     /* word0 and word1 of the OS ring. */
87     state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
88 
89     ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
90     if (ret != 0) {
91         error_setg_errno(errp, errno,
92                          "XIVE: could not restore KVM state of CPU %ld",
93                          kvm_arch_vcpu_id(tctx->cs));
94     }
95 }
96 
97 void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
98 {
99     SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
100     uint64_t state[2] = { 0 };
101     int ret;
102 
103     /* The KVM XIVE device is not in use */
104     if (xive->fd == -1) {
105         return;
106     }
107 
108     ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
109     if (ret != 0) {
110         error_setg_errno(errp, errno,
111                          "XIVE: could not capture KVM state of CPU %ld",
112                          kvm_arch_vcpu_id(tctx->cs));
113         return;
114     }
115 
116     /* word0 and word1 of the OS ring. */
117     *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
118 }
119 
120 typedef struct {
121     XiveTCTX *tctx;
122     Error *err;
123 } XiveCpuGetState;
124 
125 static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
126                                                  run_on_cpu_data arg)
127 {
128     XiveCpuGetState *s = arg.host_ptr;
129 
130     kvmppc_xive_cpu_get_state(s->tctx, &s->err);
131 }
132 
133 void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
134 {
135     XiveCpuGetState s = {
136         .tctx = tctx,
137         .err = NULL,
138     };
139 
140     /*
141      * Kick the vCPU to make sure they are available for the KVM ioctl.
142      */
143     run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
144                RUN_ON_CPU_HOST_PTR(&s));
145 
146     if (s.err) {
147         error_propagate(errp, s.err);
148         return;
149     }
150 }
151 
152 void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
153 {
154     SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
155     unsigned long vcpu_id;
156     int ret;
157 
158     /* The KVM XIVE device is not in use */
159     if (xive->fd == -1) {
160         return;
161     }
162 
163     /* Check if CPU was hot unplugged and replugged. */
164     if (kvm_cpu_is_enabled(tctx->cs)) {
165         return;
166     }
167 
168     vcpu_id = kvm_arch_vcpu_id(tctx->cs);
169 
170     ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
171                               vcpu_id, 0);
172     if (ret < 0) {
173         error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s",
174                    vcpu_id, strerror(errno));
175         return;
176     }
177 
178     kvm_cpu_enable(tctx->cs);
179 }
180 
181 /*
182  * XIVE Interrupt Source (KVM)
183  */
184 
185 void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
186                                    Error **errp)
187 {
188     uint32_t end_idx;
189     uint32_t end_blk;
190     uint8_t priority;
191     uint32_t server;
192     bool masked;
193     uint32_t eisn;
194     uint64_t kvm_src;
195     Error *local_err = NULL;
196 
197     assert(xive_eas_is_valid(eas));
198 
199     end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
200     end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
201     eisn = xive_get_field64(EAS_END_DATA, eas->w);
202     masked = xive_eas_is_masked(eas);
203 
204     spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
205 
206     kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
207         KVM_XIVE_SOURCE_PRIORITY_MASK;
208     kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
209         KVM_XIVE_SOURCE_SERVER_MASK;
210     kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) &
211         KVM_XIVE_SOURCE_MASKED_MASK;
212     kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) &
213         KVM_XIVE_SOURCE_EISN_MASK;
214 
215     kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn,
216                       &kvm_src, true, &local_err);
217     if (local_err) {
218         error_propagate(errp, local_err);
219         return;
220     }
221 }
222 
223 void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp)
224 {
225     kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn,
226                       NULL, true, errp);
227 }
228 
229 /*
230  * At reset, the interrupt sources are simply created and MASKED. We
231  * only need to inform the KVM XIVE device about their type: LSI or
232  * MSI.
233  */
234 void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
235 {
236     SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
237     uint64_t state = 0;
238 
239     /* The KVM XIVE device is not in use */
240     if (xive->fd == -1) {
241         return;
242     }
243 
244     if (xive_source_irq_is_lsi(xsrc, srcno)) {
245         state |= KVM_XIVE_LEVEL_SENSITIVE;
246         if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
247             state |= KVM_XIVE_LEVEL_ASSERTED;
248         }
249     }
250 
251     kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state,
252                       true, errp);
253 }
254 
255 static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
256 {
257     int i;
258 
259     for (i = 0; i < xsrc->nr_irqs; i++) {
260         Error *local_err = NULL;
261 
262         kvmppc_xive_source_reset_one(xsrc, i, &local_err);
263         if (local_err) {
264             error_propagate(errp, local_err);
265             return;
266         }
267     }
268 }
269 
270 /*
271  * This is used to perform the magic loads on the ESB pages, described
272  * in xive.h.
273  *
274  * Memory barriers should not be needed for loads (no store for now).
275  */
276 static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
277                             uint64_t data, bool write)
278 {
279     uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) +
280         offset;
281 
282     if (write) {
283         *addr = cpu_to_be64(data);
284         return -1;
285     } else {
286         /* Prevent the compiler from optimizing away the load */
287         volatile uint64_t value = be64_to_cpu(*addr);
288         return value;
289     }
290 }
291 
292 static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
293 {
294     return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3;
295 }
296 
297 static void xive_esb_trigger(XiveSource *xsrc, int srcno)
298 {
299     uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno);
300 
301     *addr = 0x0;
302 }
303 
304 uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
305                             uint64_t data, bool write)
306 {
307     if (write) {
308         return xive_esb_rw(xsrc, srcno, offset, data, 1);
309     }
310 
311     /*
312      * Special Load EOI handling for LSI sources. Q bit is never set
313      * and the interrupt should be re-triggered if the level is still
314      * asserted.
315      */
316     if (xive_source_irq_is_lsi(xsrc, srcno) &&
317         offset == XIVE_ESB_LOAD_EOI) {
318         xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00);
319         if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
320             xive_esb_trigger(xsrc, srcno);
321         }
322         return 0;
323     } else {
324         return xive_esb_rw(xsrc, srcno, offset, 0, 0);
325     }
326 }
327 
328 static void kvmppc_xive_source_get_state(XiveSource *xsrc)
329 {
330     int i;
331 
332     for (i = 0; i < xsrc->nr_irqs; i++) {
333         /* Perform a load without side effect to retrieve the PQ bits */
334         uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
335 
336         /* and save PQ locally */
337         xive_source_esb_set(xsrc, i, pq);
338     }
339 }
340 
341 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
342 {
343     XiveSource *xsrc = opaque;
344     SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
345     struct kvm_irq_level args;
346     int rc;
347 
348     /* The KVM XIVE device should be in use */
349     assert(xive->fd != -1);
350 
351     args.irq = srcno;
352     if (!xive_source_irq_is_lsi(xsrc, srcno)) {
353         if (!val) {
354             return;
355         }
356         args.level = KVM_INTERRUPT_SET;
357     } else {
358         if (val) {
359             xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
360             args.level = KVM_INTERRUPT_SET_LEVEL;
361         } else {
362             xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
363             args.level = KVM_INTERRUPT_UNSET;
364         }
365     }
366     rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
367     if (rc < 0) {
368         error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno));
369     }
370 }
371 
372 /*
373  * sPAPR XIVE interrupt controller (KVM)
374  */
375 void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
376                                   uint32_t end_idx, XiveEND *end,
377                                   Error **errp)
378 {
379     struct kvm_ppc_xive_eq kvm_eq = { 0 };
380     uint64_t kvm_eq_idx;
381     uint8_t priority;
382     uint32_t server;
383     Error *local_err = NULL;
384 
385     assert(xive_end_is_valid(end));
386 
387     /* Encode the tuple (server, prio) as a KVM EQ index */
388     spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
389 
390     kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
391             KVM_XIVE_EQ_PRIORITY_MASK;
392     kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
393         KVM_XIVE_EQ_SERVER_MASK;
394 
395     kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
396                       &kvm_eq, false, &local_err);
397     if (local_err) {
398         error_propagate(errp, local_err);
399         return;
400     }
401 
402     /*
403      * The EQ index and toggle bit are updated by HW. These are the
404      * only fields from KVM we want to update QEMU with. The other END
405      * fields should already be in the QEMU END table.
406      */
407     end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
408         xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
409 }
410 
411 void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
412                                   uint32_t end_idx, XiveEND *end,
413                                   Error **errp)
414 {
415     struct kvm_ppc_xive_eq kvm_eq = { 0 };
416     uint64_t kvm_eq_idx;
417     uint8_t priority;
418     uint32_t server;
419     Error *local_err = NULL;
420 
421     /*
422      * Build the KVM state from the local END structure.
423      */
424 
425     kvm_eq.flags = 0;
426     if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) {
427         kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
428     }
429 
430     /*
431      * If the hcall is disabling the EQ, set the size and page address
432      * to zero. When migrating, only valid ENDs are taken into
433      * account.
434      */
435     if (xive_end_is_valid(end)) {
436         kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
437         kvm_eq.qaddr  = xive_end_qaddr(end);
438         /*
439          * The EQ toggle bit and index should only be relevant when
440          * restoring the EQ state
441          */
442         kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1);
443         kvm_eq.qindex  = xive_get_field32(END_W1_PAGE_OFF, end->w1);
444     } else {
445         kvm_eq.qshift = 0;
446         kvm_eq.qaddr  = 0;
447     }
448 
449     /* Encode the tuple (server, prio) as a KVM EQ index */
450     spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
451 
452     kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
453             KVM_XIVE_EQ_PRIORITY_MASK;
454     kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
455         KVM_XIVE_EQ_SERVER_MASK;
456 
457     kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
458                       &kvm_eq, true, &local_err);
459     if (local_err) {
460         error_propagate(errp, local_err);
461         return;
462     }
463 }
464 
465 void kvmppc_xive_reset(SpaprXive *xive, Error **errp)
466 {
467     kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET,
468                       NULL, true, errp);
469 }
470 
471 static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
472 {
473     Error *local_err = NULL;
474     int i;
475 
476     for (i = 0; i < xive->nr_ends; i++) {
477         if (!xive_end_is_valid(&xive->endt[i])) {
478             continue;
479         }
480 
481         kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
482                                      &xive->endt[i], &local_err);
483         if (local_err) {
484             error_propagate(errp, local_err);
485             return;
486         }
487     }
488 }
489 
490 /*
491  * The primary goal of the XIVE VM change handler is to mark the EQ
492  * pages dirty when all XIVE event notifications have stopped.
493  *
494  * Whenever the VM is stopped, the VM change handler sets the source
495  * PQs to PENDING to stop the flow of events and to possibly catch a
496  * triggered interrupt occuring while the VM is stopped. The previous
497  * state is saved in anticipation of a migration. The XIVE controller
498  * is then synced through KVM to flush any in-flight event
499  * notification and stabilize the EQs.
500  *
501  * At this stage, we can mark the EQ page dirty and let a migration
502  * sequence transfer the EQ pages to the destination, which is done
503  * just after the stop state.
504  *
505  * The previous configuration of the sources is restored when the VM
506  * runs again. If an interrupt was queued while the VM was stopped,
507  * simply generate a trigger.
508  */
509 static void kvmppc_xive_change_state_handler(void *opaque, int running,
510                                              RunState state)
511 {
512     SpaprXive *xive = opaque;
513     XiveSource *xsrc = &xive->source;
514     Error *local_err = NULL;
515     int i;
516 
517     /*
518      * Restore the sources to their initial state. This is called when
519      * the VM resumes after a stop or a migration.
520      */
521     if (running) {
522         for (i = 0; i < xsrc->nr_irqs; i++) {
523             uint8_t pq = xive_source_esb_get(xsrc, i);
524             uint8_t old_pq;
525 
526             old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
527 
528             /*
529              * An interrupt was queued while the VM was stopped,
530              * generate a trigger.
531              */
532             if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) {
533                 xive_esb_trigger(xsrc, i);
534             }
535         }
536 
537         return;
538     }
539 
540     /*
541      * Mask the sources, to stop the flow of event notifications, and
542      * save the PQs locally in the XiveSource object. The XiveSource
543      * state will be collected later on by its vmstate handler if a
544      * migration is in progress.
545      */
546     for (i = 0; i < xsrc->nr_irqs; i++) {
547         uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
548 
549         /*
550          * PQ is set to PENDING to possibly catch a triggered
551          * interrupt occuring while the VM is stopped (hotplug event
552          * for instance) .
553          */
554         if (pq != XIVE_ESB_OFF) {
555             pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10);
556         }
557         xive_source_esb_set(xsrc, i, pq);
558     }
559 
560     /*
561      * Sync the XIVE controller in KVM, to flush in-flight event
562      * notification that should be enqueued in the EQs and mark the
563      * XIVE EQ pages dirty to collect all updates.
564      */
565     kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL,
566                       KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err);
567     if (local_err) {
568         error_report_err(local_err);
569         return;
570     }
571 }
572 
573 void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp)
574 {
575     /* The KVM XIVE device is not in use */
576     if (xive->fd == -1) {
577         return;
578     }
579 
580     /*
581      * When the VM is stopped, the sources are masked and the previous
582      * state is saved in anticipation of a migration. We should not
583      * synchronize the source state in that case else we will override
584      * the saved state.
585      */
586     if (runstate_is_running()) {
587         kvmppc_xive_source_get_state(&xive->source);
588     }
589 
590     /* EAT: there is no extra state to query from KVM */
591 
592     /* ENDT */
593     kvmppc_xive_get_queues(xive, errp);
594 }
595 
596 /*
597  * The SpaprXive 'pre_save' method is called by the vmstate handler of
598  * the SpaprXive model, after the XIVE controller is synced in the VM
599  * change handler.
600  */
601 int kvmppc_xive_pre_save(SpaprXive *xive)
602 {
603     Error *local_err = NULL;
604 
605     /* The KVM XIVE device is not in use */
606     if (xive->fd == -1) {
607         return 0;
608     }
609 
610     /* EAT: there is no extra state to query from KVM */
611 
612     /* ENDT */
613     kvmppc_xive_get_queues(xive, &local_err);
614     if (local_err) {
615         error_report_err(local_err);
616         return -1;
617     }
618 
619     return 0;
620 }
621 
622 /*
623  * The SpaprXive 'post_load' method is not called by a vmstate
624  * handler. It is called at the sPAPR machine level at the end of the
625  * migration sequence by the sPAPR IRQ backend 'post_load' method,
626  * when all XIVE states have been transferred and loaded.
627  */
628 int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
629 {
630     Error *local_err = NULL;
631     CPUState *cs;
632     int i;
633 
634     /* The KVM XIVE device should be in use */
635     assert(xive->fd != -1);
636 
637     /* Restore the ENDT first. The targetting depends on it. */
638     for (i = 0; i < xive->nr_ends; i++) {
639         if (!xive_end_is_valid(&xive->endt[i])) {
640             continue;
641         }
642 
643         kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
644                                      &xive->endt[i], &local_err);
645         if (local_err) {
646             error_report_err(local_err);
647             return -1;
648         }
649     }
650 
651     /* Restore the EAT */
652     for (i = 0; i < xive->nr_irqs; i++) {
653         if (!xive_eas_is_valid(&xive->eat[i])) {
654             continue;
655         }
656 
657         kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err);
658         if (local_err) {
659             error_report_err(local_err);
660             return -1;
661         }
662     }
663 
664     /*
665      * Restore the thread interrupt contexts of initial CPUs.
666      *
667      * The context of hotplugged CPUs is restored later, by the
668      * 'post_load' handler of the XiveTCTX model because they are not
669      * available at the time the SpaprXive 'post_load' method is
670      * called. We can not restore the context of all CPUs in the
671      * 'post_load' handler of XiveTCTX because the machine is not
672      * necessarily connected to the KVM device at that time.
673      */
674     CPU_FOREACH(cs) {
675         PowerPCCPU *cpu = POWERPC_CPU(cs);
676 
677         kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err);
678         if (local_err) {
679             error_report_err(local_err);
680             return -1;
681         }
682     }
683 
684     /* The source states will be restored when the machine starts running */
685     return 0;
686 }
687 
688 static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
689                               Error **errp)
690 {
691     void *addr;
692     uint32_t page_shift = 16; /* TODO: fix page_shift */
693 
694     addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd,
695                 pgoff << page_shift);
696     if (addr == MAP_FAILED) {
697         error_setg_errno(errp, errno, "XIVE: unable to set memory mapping");
698         return NULL;
699     }
700 
701     return addr;
702 }
703 
704 /*
705  * All the XIVE memory regions are now backed by mappings from the KVM
706  * XIVE device.
707  */
708 void kvmppc_xive_connect(SpaprXive *xive, Error **errp)
709 {
710     XiveSource *xsrc = &xive->source;
711     Error *local_err = NULL;
712     size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
713     size_t tima_len = 4ull << TM_SHIFT;
714     CPUState *cs;
715 
716     /*
717      * The KVM XIVE device already in use. This is the case when
718      * rebooting under the XIVE-only interrupt mode.
719      */
720     if (xive->fd != -1) {
721         return;
722     }
723 
724     if (!kvmppc_has_cap_xive()) {
725         error_setg(errp, "IRQ_XIVE capability must be present for KVM");
726         return;
727     }
728 
729     /* First, create the KVM XIVE device */
730     xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
731     if (xive->fd < 0) {
732         error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device");
733         return;
734     }
735 
736     /*
737      * 1. Source ESB pages - KVM mapping
738      */
739     xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
740                                       &local_err);
741     if (local_err) {
742         goto fail;
743     }
744 
745     memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc),
746                                       "xive.esb", esb_len, xsrc->esb_mmap);
747     memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0,
748                                         &xsrc->esb_mmio_kvm, 1);
749 
750     /*
751      * 2. END ESB pages (No KVM support yet)
752      */
753 
754     /*
755      * 3. TIMA pages - KVM mapping
756      */
757     xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
758                                      &local_err);
759     if (local_err) {
760         goto fail;
761     }
762     memory_region_init_ram_device_ptr(&xive->tm_mmio_kvm, OBJECT(xive),
763                                       "xive.tima", tima_len, xive->tm_mmap);
764     memory_region_add_subregion_overlap(&xive->tm_mmio, 0,
765                                         &xive->tm_mmio_kvm, 1);
766 
767     xive->change = qemu_add_vm_change_state_handler(
768         kvmppc_xive_change_state_handler, xive);
769 
770     /* Connect the presenters to the initial VCPUs of the machine */
771     CPU_FOREACH(cs) {
772         PowerPCCPU *cpu = POWERPC_CPU(cs);
773 
774         kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, &local_err);
775         if (local_err) {
776             goto fail;
777         }
778     }
779 
780     /* Update the KVM sources */
781     kvmppc_xive_source_reset(xsrc, &local_err);
782     if (local_err) {
783         goto fail;
784     }
785 
786     kvm_kernel_irqchip = true;
787     kvm_msi_via_irqfd_allowed = true;
788     kvm_gsi_direct_mapping = true;
789     return;
790 
791 fail:
792     error_propagate(errp, local_err);
793     kvmppc_xive_disconnect(xive, NULL);
794 }
795 
796 void kvmppc_xive_disconnect(SpaprXive *xive, Error **errp)
797 {
798     XiveSource *xsrc;
799     size_t esb_len;
800 
801     /* The KVM XIVE device is not in use */
802     if (!xive || xive->fd == -1) {
803         return;
804     }
805 
806     if (!kvmppc_has_cap_xive()) {
807         error_setg(errp, "IRQ_XIVE capability must be present for KVM");
808         return;
809     }
810 
811     /* Clear the KVM mapping */
812     xsrc = &xive->source;
813     esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
814 
815     if (xsrc->esb_mmap) {
816         memory_region_del_subregion(&xsrc->esb_mmio, &xsrc->esb_mmio_kvm);
817         object_unparent(OBJECT(&xsrc->esb_mmio_kvm));
818         munmap(xsrc->esb_mmap, esb_len);
819         xsrc->esb_mmap = NULL;
820     }
821 
822     if (xive->tm_mmap) {
823         memory_region_del_subregion(&xive->tm_mmio, &xive->tm_mmio_kvm);
824         object_unparent(OBJECT(&xive->tm_mmio_kvm));
825         munmap(xive->tm_mmap, 4ull << TM_SHIFT);
826         xive->tm_mmap = NULL;
827     }
828 
829     /*
830      * When the KVM device fd is closed, the KVM device is destroyed
831      * and removed from the list of devices of the VM. The VCPU
832      * presenters are also detached from the device.
833      */
834     if (xive->fd != -1) {
835         close(xive->fd);
836         xive->fd = -1;
837     }
838 
839     kvm_kernel_irqchip = false;
840     kvm_msi_via_irqfd_allowed = false;
841     kvm_gsi_direct_mapping = false;
842 
843     /* Clear the local list of presenter (hotplug) */
844     kvm_cpu_disable_all();
845 
846     /* VM Change state handler is not needed anymore */
847     if (xive->change) {
848         qemu_del_vm_change_state_handler(xive->change);
849         xive->change = NULL;
850     }
851 }
852