xref: /qemu/hw/xen/xen-hvm-common.c (revision 195801d7)
1 #include "qemu/osdep.h"
2 #include "qemu/units.h"
3 #include "qapi/error.h"
4 #include "trace.h"
5 
6 #include "hw/pci/pci_host.h"
7 #include "hw/xen/xen-hvm-common.h"
8 #include "hw/xen/xen-bus.h"
9 #include "hw/boards.h"
10 #include "hw/xen/arch_hvm.h"
11 
12 MemoryRegion ram_memory;
13 
14 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
15                    Error **errp)
16 {
17     unsigned long nr_pfn;
18     xen_pfn_t *pfn_list;
19     int i;
20 
21     if (runstate_check(RUN_STATE_INMIGRATE)) {
22         /* RAM already populated in Xen */
23         fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT
24                 " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n",
25                 __func__, size, ram_addr);
26         return;
27     }
28 
29     if (mr == &ram_memory) {
30         return;
31     }
32 
33     trace_xen_ram_alloc(ram_addr, size);
34 
35     nr_pfn = size >> TARGET_PAGE_BITS;
36     pfn_list = g_new(xen_pfn_t, nr_pfn);
37 
38     for (i = 0; i < nr_pfn; i++) {
39         pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
40     }
41 
42     if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
43         error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
44                    ram_addr);
45     }
46 
47     g_free(pfn_list);
48 }
49 
50 static void xen_set_memory(struct MemoryListener *listener,
51                            MemoryRegionSection *section,
52                            bool add)
53 {
54     XenIOState *state = container_of(listener, XenIOState, memory_listener);
55 
56     if (section->mr == &ram_memory) {
57         return;
58     } else {
59         if (add) {
60             xen_map_memory_section(xen_domid, state->ioservid,
61                                    section);
62         } else {
63             xen_unmap_memory_section(xen_domid, state->ioservid,
64                                      section);
65         }
66     }
67 
68     arch_xen_set_memory(state, section, add);
69 }
70 
71 void xen_region_add(MemoryListener *listener,
72                            MemoryRegionSection *section)
73 {
74     memory_region_ref(section->mr);
75     xen_set_memory(listener, section, true);
76 }
77 
78 void xen_region_del(MemoryListener *listener,
79                            MemoryRegionSection *section)
80 {
81     xen_set_memory(listener, section, false);
82     memory_region_unref(section->mr);
83 }
84 
85 void xen_io_add(MemoryListener *listener,
86                        MemoryRegionSection *section)
87 {
88     XenIOState *state = container_of(listener, XenIOState, io_listener);
89     MemoryRegion *mr = section->mr;
90 
91     if (mr->ops == &unassigned_io_ops) {
92         return;
93     }
94 
95     memory_region_ref(mr);
96 
97     xen_map_io_section(xen_domid, state->ioservid, section);
98 }
99 
100 void xen_io_del(MemoryListener *listener,
101                        MemoryRegionSection *section)
102 {
103     XenIOState *state = container_of(listener, XenIOState, io_listener);
104     MemoryRegion *mr = section->mr;
105 
106     if (mr->ops == &unassigned_io_ops) {
107         return;
108     }
109 
110     xen_unmap_io_section(xen_domid, state->ioservid, section);
111 
112     memory_region_unref(mr);
113 }
114 
115 void xen_device_realize(DeviceListener *listener,
116                                DeviceState *dev)
117 {
118     XenIOState *state = container_of(listener, XenIOState, device_listener);
119 
120     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
121         PCIDevice *pci_dev = PCI_DEVICE(dev);
122         XenPciDevice *xendev = g_new(XenPciDevice, 1);
123 
124         xendev->pci_dev = pci_dev;
125         xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
126                                      pci_dev->devfn);
127         QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
128 
129         xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
130     }
131 }
132 
133 void xen_device_unrealize(DeviceListener *listener,
134                                  DeviceState *dev)
135 {
136     XenIOState *state = container_of(listener, XenIOState, device_listener);
137 
138     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
139         PCIDevice *pci_dev = PCI_DEVICE(dev);
140         XenPciDevice *xendev, *next;
141 
142         xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
143 
144         QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
145             if (xendev->pci_dev == pci_dev) {
146                 QLIST_REMOVE(xendev, entry);
147                 g_free(xendev);
148                 break;
149             }
150         }
151     }
152 }
153 
154 MemoryListener xen_io_listener = {
155     .name = "xen-io",
156     .region_add = xen_io_add,
157     .region_del = xen_io_del,
158     .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
159 };
160 
161 DeviceListener xen_device_listener = {
162     .realize = xen_device_realize,
163     .unrealize = xen_device_unrealize,
164 };
165 
166 /* get the ioreq packets from share mem */
167 static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
168 {
169     ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
170 
171     if (req->state != STATE_IOREQ_READY) {
172         DPRINTF("I/O request not ready: "
173                 "%x, ptr: %x, port: %"PRIx64", "
174                 "data: %"PRIx64", count: %u, size: %u\n",
175                 req->state, req->data_is_ptr, req->addr,
176                 req->data, req->count, req->size);
177         return NULL;
178     }
179 
180     xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
181 
182     req->state = STATE_IOREQ_INPROCESS;
183     return req;
184 }
185 
186 /* use poll to get the port notification */
187 /* ioreq_vec--out,the */
188 /* retval--the number of ioreq packet */
189 static ioreq_t *cpu_get_ioreq(XenIOState *state)
190 {
191     MachineState *ms = MACHINE(qdev_get_machine());
192     unsigned int max_cpus = ms->smp.max_cpus;
193     int i;
194     evtchn_port_t port;
195 
196     port = qemu_xen_evtchn_pending(state->xce_handle);
197     if (port == state->bufioreq_local_port) {
198         timer_mod(state->buffered_io_timer,
199                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
200         return NULL;
201     }
202 
203     if (port != -1) {
204         for (i = 0; i < max_cpus; i++) {
205             if (state->ioreq_local_port[i] == port) {
206                 break;
207             }
208         }
209 
210         if (i == max_cpus) {
211             hw_error("Fatal error while trying to get io event!\n");
212         }
213 
214         /* unmask the wanted port again */
215         qemu_xen_evtchn_unmask(state->xce_handle, port);
216 
217         /* get the io packet from shared memory */
218         state->send_vcpu = i;
219         return cpu_get_ioreq_from_shared_memory(state, i);
220     }
221 
222     /* read error or read nothing */
223     return NULL;
224 }
225 
226 static uint32_t do_inp(uint32_t addr, unsigned long size)
227 {
228     switch (size) {
229         case 1:
230             return cpu_inb(addr);
231         case 2:
232             return cpu_inw(addr);
233         case 4:
234             return cpu_inl(addr);
235         default:
236             hw_error("inp: bad size: %04x %lx", addr, size);
237     }
238 }
239 
240 static void do_outp(uint32_t addr,
241         unsigned long size, uint32_t val)
242 {
243     switch (size) {
244         case 1:
245             return cpu_outb(addr, val);
246         case 2:
247             return cpu_outw(addr, val);
248         case 4:
249             return cpu_outl(addr, val);
250         default:
251             hw_error("outp: bad size: %04x %lx", addr, size);
252     }
253 }
254 
255 /*
256  * Helper functions which read/write an object from/to physical guest
257  * memory, as part of the implementation of an ioreq.
258  *
259  * Equivalent to
260  *   cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
261  *                          val, req->size, 0/1)
262  * except without the integer overflow problems.
263  */
264 static void rw_phys_req_item(hwaddr addr,
265                              ioreq_t *req, uint32_t i, void *val, int rw)
266 {
267     /* Do everything unsigned so overflow just results in a truncated result
268      * and accesses to undesired parts of guest memory, which is up
269      * to the guest */
270     hwaddr offset = (hwaddr)req->size * i;
271     if (req->df) {
272         addr -= offset;
273     } else {
274         addr += offset;
275     }
276     cpu_physical_memory_rw(addr, val, req->size, rw);
277 }
278 
279 static inline void read_phys_req_item(hwaddr addr,
280                                       ioreq_t *req, uint32_t i, void *val)
281 {
282     rw_phys_req_item(addr, req, i, val, 0);
283 }
284 static inline void write_phys_req_item(hwaddr addr,
285                                        ioreq_t *req, uint32_t i, void *val)
286 {
287     rw_phys_req_item(addr, req, i, val, 1);
288 }
289 
290 
291 void cpu_ioreq_pio(ioreq_t *req)
292 {
293     uint32_t i;
294 
295     trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
296                          req->data, req->count, req->size);
297 
298     if (req->size > sizeof(uint32_t)) {
299         hw_error("PIO: bad size (%u)", req->size);
300     }
301 
302     if (req->dir == IOREQ_READ) {
303         if (!req->data_is_ptr) {
304             req->data = do_inp(req->addr, req->size);
305             trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
306                                          req->size);
307         } else {
308             uint32_t tmp;
309 
310             for (i = 0; i < req->count; i++) {
311                 tmp = do_inp(req->addr, req->size);
312                 write_phys_req_item(req->data, req, i, &tmp);
313             }
314         }
315     } else if (req->dir == IOREQ_WRITE) {
316         if (!req->data_is_ptr) {
317             trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
318                                           req->size);
319             do_outp(req->addr, req->size, req->data);
320         } else {
321             for (i = 0; i < req->count; i++) {
322                 uint32_t tmp = 0;
323 
324                 read_phys_req_item(req->data, req, i, &tmp);
325                 do_outp(req->addr, req->size, tmp);
326             }
327         }
328     }
329 }
330 
331 static void cpu_ioreq_move(ioreq_t *req)
332 {
333     uint32_t i;
334 
335     trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
336                          req->data, req->count, req->size);
337 
338     if (req->size > sizeof(req->data)) {
339         hw_error("MMIO: bad size (%u)", req->size);
340     }
341 
342     if (!req->data_is_ptr) {
343         if (req->dir == IOREQ_READ) {
344             for (i = 0; i < req->count; i++) {
345                 read_phys_req_item(req->addr, req, i, &req->data);
346             }
347         } else if (req->dir == IOREQ_WRITE) {
348             for (i = 0; i < req->count; i++) {
349                 write_phys_req_item(req->addr, req, i, &req->data);
350             }
351         }
352     } else {
353         uint64_t tmp;
354 
355         if (req->dir == IOREQ_READ) {
356             for (i = 0; i < req->count; i++) {
357                 read_phys_req_item(req->addr, req, i, &tmp);
358                 write_phys_req_item(req->data, req, i, &tmp);
359             }
360         } else if (req->dir == IOREQ_WRITE) {
361             for (i = 0; i < req->count; i++) {
362                 read_phys_req_item(req->data, req, i, &tmp);
363                 write_phys_req_item(req->addr, req, i, &tmp);
364             }
365         }
366     }
367 }
368 
369 static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
370 {
371     uint32_t sbdf = req->addr >> 32;
372     uint32_t reg = req->addr;
373     XenPciDevice *xendev;
374 
375     if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
376         req->size != sizeof(uint32_t)) {
377         hw_error("PCI config access: bad size (%u)", req->size);
378     }
379 
380     if (req->count != 1) {
381         hw_error("PCI config access: bad count (%u)", req->count);
382     }
383 
384     QLIST_FOREACH(xendev, &state->dev_list, entry) {
385         if (xendev->sbdf != sbdf) {
386             continue;
387         }
388 
389         if (!req->data_is_ptr) {
390             if (req->dir == IOREQ_READ) {
391                 req->data = pci_host_config_read_common(
392                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
393                     req->size);
394                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
395                                             req->size, req->data);
396             } else if (req->dir == IOREQ_WRITE) {
397                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
398                                              req->size, req->data);
399                 pci_host_config_write_common(
400                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
401                     req->data, req->size);
402             }
403         } else {
404             uint32_t tmp;
405 
406             if (req->dir == IOREQ_READ) {
407                 tmp = pci_host_config_read_common(
408                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
409                     req->size);
410                 trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
411                                             req->size, tmp);
412                 write_phys_req_item(req->data, req, 0, &tmp);
413             } else if (req->dir == IOREQ_WRITE) {
414                 read_phys_req_item(req->data, req, 0, &tmp);
415                 trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
416                                              req->size, tmp);
417                 pci_host_config_write_common(
418                     xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
419                     tmp, req->size);
420             }
421         }
422     }
423 }
424 
425 static void handle_ioreq(XenIOState *state, ioreq_t *req)
426 {
427     trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
428                        req->addr, req->data, req->count, req->size);
429 
430     if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
431             (req->size < sizeof (target_ulong))) {
432         req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
433     }
434 
435     if (req->dir == IOREQ_WRITE)
436         trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
437                                  req->addr, req->data, req->count, req->size);
438 
439     switch (req->type) {
440         case IOREQ_TYPE_PIO:
441             cpu_ioreq_pio(req);
442             break;
443         case IOREQ_TYPE_COPY:
444             cpu_ioreq_move(req);
445             break;
446         case IOREQ_TYPE_TIMEOFFSET:
447             break;
448         case IOREQ_TYPE_INVALIDATE:
449             xen_invalidate_map_cache();
450             break;
451         case IOREQ_TYPE_PCI_CONFIG:
452             cpu_ioreq_config(state, req);
453             break;
454         default:
455             arch_handle_ioreq(state, req);
456     }
457     if (req->dir == IOREQ_READ) {
458         trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
459                                 req->addr, req->data, req->count, req->size);
460     }
461 }
462 
463 static bool handle_buffered_iopage(XenIOState *state)
464 {
465     buffered_iopage_t *buf_page = state->buffered_io_page;
466     buf_ioreq_t *buf_req = NULL;
467     bool handled_ioreq = false;
468     ioreq_t req;
469     int qw;
470 
471     if (!buf_page) {
472         return 0;
473     }
474 
475     memset(&req, 0x00, sizeof(req));
476     req.state = STATE_IOREQ_READY;
477     req.count = 1;
478     req.dir = IOREQ_WRITE;
479 
480     for (;;) {
481         uint32_t rdptr = buf_page->read_pointer, wrptr;
482 
483         xen_rmb();
484         wrptr = buf_page->write_pointer;
485         xen_rmb();
486         if (rdptr != buf_page->read_pointer) {
487             continue;
488         }
489         if (rdptr == wrptr) {
490             break;
491         }
492         buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
493         req.size = 1U << buf_req->size;
494         req.addr = buf_req->addr;
495         req.data = buf_req->data;
496         req.type = buf_req->type;
497         xen_rmb();
498         qw = (req.size == 8);
499         if (qw) {
500             if (rdptr + 1 == wrptr) {
501                 hw_error("Incomplete quad word buffered ioreq");
502             }
503             buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
504                                            IOREQ_BUFFER_SLOT_NUM];
505             req.data |= ((uint64_t)buf_req->data) << 32;
506             xen_rmb();
507         }
508 
509         handle_ioreq(state, &req);
510 
511         /* Only req.data may get updated by handle_ioreq(), albeit even that
512          * should not happen as such data would never make it to the guest (we
513          * can only usefully see writes here after all).
514          */
515         assert(req.state == STATE_IOREQ_READY);
516         assert(req.count == 1);
517         assert(req.dir == IOREQ_WRITE);
518         assert(!req.data_is_ptr);
519 
520         qatomic_add(&buf_page->read_pointer, qw + 1);
521         handled_ioreq = true;
522     }
523 
524     return handled_ioreq;
525 }
526 
527 static void handle_buffered_io(void *opaque)
528 {
529     XenIOState *state = opaque;
530 
531     if (handle_buffered_iopage(state)) {
532         timer_mod(state->buffered_io_timer,
533                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
534     } else {
535         timer_del(state->buffered_io_timer);
536         qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
537     }
538 }
539 
540 static void cpu_handle_ioreq(void *opaque)
541 {
542     XenIOState *state = opaque;
543     ioreq_t *req = cpu_get_ioreq(state);
544 
545     handle_buffered_iopage(state);
546     if (req) {
547         ioreq_t copy = *req;
548 
549         xen_rmb();
550         handle_ioreq(state, &copy);
551         req->data = copy.data;
552 
553         if (req->state != STATE_IOREQ_INPROCESS) {
554             fprintf(stderr, "Badness in I/O request ... not in service?!: "
555                     "%x, ptr: %x, port: %"PRIx64", "
556                     "data: %"PRIx64", count: %u, size: %u, type: %u\n",
557                     req->state, req->data_is_ptr, req->addr,
558                     req->data, req->count, req->size, req->type);
559             destroy_hvm_domain(false);
560             return;
561         }
562 
563         xen_wmb(); /* Update ioreq contents /then/ update state. */
564 
565         /*
566          * We do this before we send the response so that the tools
567          * have the opportunity to pick up on the reset before the
568          * guest resumes and does a hlt with interrupts disabled which
569          * causes Xen to powerdown the domain.
570          */
571         if (runstate_is_running()) {
572             ShutdownCause request;
573 
574             if (qemu_shutdown_requested_get()) {
575                 destroy_hvm_domain(false);
576             }
577             request = qemu_reset_requested_get();
578             if (request) {
579                 qemu_system_reset(request);
580                 destroy_hvm_domain(true);
581             }
582         }
583 
584         req->state = STATE_IORESP_READY;
585         qemu_xen_evtchn_notify(state->xce_handle,
586                                state->ioreq_local_port[state->send_vcpu]);
587     }
588 }
589 
590 static void xen_main_loop_prepare(XenIOState *state)
591 {
592     int evtchn_fd = -1;
593 
594     if (state->xce_handle != NULL) {
595         evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle);
596     }
597 
598     state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
599                                                  state);
600 
601     if (evtchn_fd != -1) {
602         CPUState *cpu_state;
603 
604         DPRINTF("%s: Init cpu_by_vcpu_id\n", __func__);
605         CPU_FOREACH(cpu_state) {
606             DPRINTF("%s: cpu_by_vcpu_id[%d]=%p\n",
607                     __func__, cpu_state->cpu_index, cpu_state);
608             state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
609         }
610         qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
611     }
612 }
613 
614 
615 void xen_hvm_change_state_handler(void *opaque, bool running,
616                                          RunState rstate)
617 {
618     XenIOState *state = opaque;
619 
620     if (running) {
621         xen_main_loop_prepare(state);
622     }
623 
624     xen_set_ioreq_server_state(xen_domid,
625                                state->ioservid,
626                                running);
627 }
628 
629 void xen_exit_notifier(Notifier *n, void *data)
630 {
631     XenIOState *state = container_of(n, XenIOState, exit);
632 
633     xen_destroy_ioreq_server(xen_domid, state->ioservid);
634     if (state->fres != NULL) {
635         xenforeignmemory_unmap_resource(xen_fmem, state->fres);
636     }
637 
638     qemu_xen_evtchn_close(state->xce_handle);
639     xs_daemon_close(state->xenstore);
640 }
641 
642 static int xen_map_ioreq_server(XenIOState *state)
643 {
644     void *addr = NULL;
645     xen_pfn_t ioreq_pfn;
646     xen_pfn_t bufioreq_pfn;
647     evtchn_port_t bufioreq_evtchn;
648     int rc;
649 
650     /*
651      * Attempt to map using the resource API and fall back to normal
652      * foreign mapping if this is not supported.
653      */
654     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
655     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
656     state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
657                                          XENMEM_resource_ioreq_server,
658                                          state->ioservid, 0, 2,
659                                          &addr,
660                                          PROT_READ | PROT_WRITE, 0);
661     if (state->fres != NULL) {
662         trace_xen_map_resource_ioreq(state->ioservid, addr);
663         state->buffered_io_page = addr;
664         state->shared_page = addr + XC_PAGE_SIZE;
665     } else if (errno != EOPNOTSUPP) {
666         error_report("failed to map ioreq server resources: error %d handle=%p",
667                      errno, xen_xc);
668         return -1;
669     }
670 
671     rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
672                                    (state->shared_page == NULL) ?
673                                    &ioreq_pfn : NULL,
674                                    (state->buffered_io_page == NULL) ?
675                                    &bufioreq_pfn : NULL,
676                                    &bufioreq_evtchn);
677     if (rc < 0) {
678         error_report("failed to get ioreq server info: error %d handle=%p",
679                      errno, xen_xc);
680         return rc;
681     }
682 
683     if (state->shared_page == NULL) {
684         DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
685 
686         state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
687                                                   PROT_READ | PROT_WRITE,
688                                                   1, &ioreq_pfn, NULL);
689         if (state->shared_page == NULL) {
690             error_report("map shared IO page returned error %d handle=%p",
691                          errno, xen_xc);
692         }
693     }
694 
695     if (state->buffered_io_page == NULL) {
696         DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn);
697 
698         state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
699                                                        PROT_READ | PROT_WRITE,
700                                                        1, &bufioreq_pfn,
701                                                        NULL);
702         if (state->buffered_io_page == NULL) {
703             error_report("map buffered IO page returned error %d", errno);
704             return -1;
705         }
706     }
707 
708     if (state->shared_page == NULL || state->buffered_io_page == NULL) {
709         return -1;
710     }
711 
712     DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn);
713 
714     state->bufioreq_remote_port = bufioreq_evtchn;
715 
716     return 0;
717 }
718 
719 void destroy_hvm_domain(bool reboot)
720 {
721     xc_interface *xc_handle;
722     int sts;
723     int rc;
724 
725     unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
726 
727     if (xen_dmod) {
728         rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
729         if (!rc) {
730             return;
731         }
732         if (errno != ENOTTY /* old Xen */) {
733             error_report("xendevicemodel_shutdown failed with error %d", errno);
734         }
735         /* well, try the old thing then */
736     }
737 
738     xc_handle = xc_interface_open(0, 0, 0);
739     if (xc_handle == NULL) {
740         fprintf(stderr, "Cannot acquire xenctrl handle\n");
741     } else {
742         sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
743         if (sts != 0) {
744             fprintf(stderr, "xc_domain_shutdown failed to issue %s, "
745                     "sts %d, %s\n", reboot ? "reboot" : "poweroff",
746                     sts, strerror(errno));
747         } else {
748             fprintf(stderr, "Issued domain %d %s\n", xen_domid,
749                     reboot ? "reboot" : "poweroff");
750         }
751         xc_interface_close(xc_handle);
752     }
753 }
754 
755 void xen_shutdown_fatal_error(const char *fmt, ...)
756 {
757     va_list ap;
758 
759     va_start(ap, fmt);
760     vfprintf(stderr, fmt, ap);
761     va_end(ap);
762     fprintf(stderr, "Will destroy the domain.\n");
763     /* destroy the domain */
764     qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
765 }
766 
767 static void xen_do_ioreq_register(XenIOState *state,
768                                   unsigned int max_cpus,
769                                   const MemoryListener *xen_memory_listener)
770 {
771     int i, rc;
772 
773     state->exit.notify = xen_exit_notifier;
774     qemu_add_exit_notifier(&state->exit);
775 
776     /*
777      * Register wake-up support in QMP query-current-machine API
778      */
779     qemu_register_wakeup_support();
780 
781     rc = xen_map_ioreq_server(state);
782     if (rc < 0) {
783         goto err;
784     }
785 
786     /* Note: cpus is empty at this point in init */
787     state->cpu_by_vcpu_id = g_new0(CPUState *, max_cpus);
788 
789     rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
790     if (rc < 0) {
791         error_report("failed to enable ioreq server info: error %d handle=%p",
792                      errno, xen_xc);
793         goto err;
794     }
795 
796     state->ioreq_local_port = g_new0(evtchn_port_t, max_cpus);
797 
798     /* FIXME: how about if we overflow the page here? */
799     for (i = 0; i < max_cpus; i++) {
800         rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
801                                               xen_vcpu_eport(state->shared_page,
802                                                              i));
803         if (rc == -1) {
804             error_report("shared evtchn %d bind error %d", i, errno);
805             goto err;
806         }
807         state->ioreq_local_port[i] = rc;
808     }
809 
810     rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
811                                           state->bufioreq_remote_port);
812     if (rc == -1) {
813         error_report("buffered evtchn bind error %d", errno);
814         goto err;
815     }
816     state->bufioreq_local_port = rc;
817 
818     /* Init RAM management */
819 #ifdef XEN_COMPAT_PHYSMAP
820     xen_map_cache_init(xen_phys_offset_to_gaddr, state);
821 #else
822     xen_map_cache_init(NULL, state);
823 #endif
824 
825     qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
826 
827     state->memory_listener = *xen_memory_listener;
828     memory_listener_register(&state->memory_listener, &address_space_memory);
829 
830     state->io_listener = xen_io_listener;
831     memory_listener_register(&state->io_listener, &address_space_io);
832 
833     state->device_listener = xen_device_listener;
834     QLIST_INIT(&state->dev_list);
835     device_listener_register(&state->device_listener);
836 
837     return;
838 
839 err:
840     error_report("xen hardware virtual machine initialisation failed");
841     exit(1);
842 }
843 
844 void xen_register_ioreq(XenIOState *state, unsigned int max_cpus,
845                         const MemoryListener *xen_memory_listener)
846 {
847     int rc;
848 
849     setup_xen_backend_ops();
850 
851     state->xce_handle = qemu_xen_evtchn_open();
852     if (state->xce_handle == NULL) {
853         error_report("xen: event channel open failed with error %d", errno);
854         goto err;
855     }
856 
857     state->xenstore = xs_daemon_open();
858     if (state->xenstore == NULL) {
859         error_report("xen: xenstore open failed with error %d", errno);
860         goto err;
861     }
862 
863     rc = xen_create_ioreq_server(xen_domid, &state->ioservid);
864     if (!rc) {
865         xen_do_ioreq_register(state, max_cpus, xen_memory_listener);
866     } else {
867         warn_report("xen: failed to create ioreq server");
868     }
869 
870     xen_bus_init();
871 
872     xen_be_init();
873 
874     return;
875 
876 err:
877     error_report("xen hardware virtual machine backend registration failed");
878     exit(1);
879 }
880