xref: /qemu/hw/i386/kvm/xen_gnttab.c (revision 370ed600)
1 /*
2  * QEMU Xen emulation: Grant table support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qapi/error.h"
18 #include "qom/object.h"
19 #include "exec/target_page.h"
20 #include "exec/address-spaces.h"
21 #include "migration/vmstate.h"
22 
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_gnttab.h"
28 
29 #include "sysemu/kvm.h"
30 #include "sysemu/kvm_xen.h"
31 
32 #include "hw/xen/interface/memory.h"
33 #include "hw/xen/interface/grant_table.h"
34 
35 #define TYPE_XEN_GNTTAB "xen-gnttab"
36 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
37 
38 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
39 
40 static struct gnttab_backend_ops emu_gnttab_backend_ops;
41 
42 struct XenGnttabState {
43     /*< private >*/
44     SysBusDevice busdev;
45     /*< public >*/
46 
47     QemuMutex gnt_lock;
48 
49     uint32_t nr_frames;
50     uint32_t max_frames;
51 
52     union {
53         grant_entry_v1_t *v1;
54         /* Theoretically, v2 support could be added here. */
55     } entries;
56 
57     MemoryRegion gnt_frames;
58     MemoryRegion *gnt_aliases;
59     uint64_t *gnt_frame_gpas;
60 
61     uint8_t *map_track;
62 };
63 
64 struct XenGnttabState *xen_gnttab_singleton;
65 
66 static void xen_gnttab_realize(DeviceState *dev, Error **errp)
67 {
68     XenGnttabState *s = XEN_GNTTAB(dev);
69     int i;
70 
71     if (xen_mode != XEN_EMULATE) {
72         error_setg(errp, "Xen grant table support is for Xen emulation");
73         return;
74     }
75     s->max_frames = kvm_xen_get_gnttab_max_frames();
76     memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table",
77                            XEN_PAGE_SIZE * s->max_frames, &error_abort);
78     memory_region_set_enabled(&s->gnt_frames, true);
79     s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames);
80 
81     /* Create individual page-sizes aliases for overlays */
82     s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames);
83     s->gnt_frame_gpas = (void *)g_new(uint64_t, s->max_frames);
84     for (i = 0; i < s->max_frames; i++) {
85         memory_region_init_alias(&s->gnt_aliases[i], OBJECT(dev),
86                                  NULL, &s->gnt_frames,
87                                  i * XEN_PAGE_SIZE, XEN_PAGE_SIZE);
88         s->gnt_frame_gpas[i] = INVALID_GPA;
89     }
90 
91     s->nr_frames = 0;
92     memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
93     s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
94     s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
95 
96     qemu_mutex_init(&s->gnt_lock);
97 
98     xen_gnttab_singleton = s;
99 
100     s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1);
101 
102     xen_gnttab_ops = &emu_gnttab_backend_ops;
103 }
104 
105 static int xen_gnttab_post_load(void *opaque, int version_id)
106 {
107     XenGnttabState *s = XEN_GNTTAB(opaque);
108     uint32_t i;
109 
110     for (i = 0; i < s->nr_frames; i++) {
111         if (s->gnt_frame_gpas[i] != INVALID_GPA) {
112             xen_overlay_do_map_page(&s->gnt_aliases[i], s->gnt_frame_gpas[i]);
113         }
114     }
115     return 0;
116 }
117 
118 static bool xen_gnttab_is_needed(void *opaque)
119 {
120     return xen_mode == XEN_EMULATE;
121 }
122 
123 static const VMStateDescription xen_gnttab_vmstate = {
124     .name = "xen_gnttab",
125     .version_id = 1,
126     .minimum_version_id = 1,
127     .needed = xen_gnttab_is_needed,
128     .post_load = xen_gnttab_post_load,
129     .fields = (VMStateField[]) {
130         VMSTATE_UINT32(nr_frames, XenGnttabState),
131         VMSTATE_VARRAY_UINT32(gnt_frame_gpas, XenGnttabState, nr_frames, 0,
132                               vmstate_info_uint64, uint64_t),
133         VMSTATE_END_OF_LIST()
134     }
135 };
136 
137 static void xen_gnttab_class_init(ObjectClass *klass, void *data)
138 {
139     DeviceClass *dc = DEVICE_CLASS(klass);
140 
141     dc->realize = xen_gnttab_realize;
142     dc->vmsd = &xen_gnttab_vmstate;
143 }
144 
145 static const TypeInfo xen_gnttab_info = {
146     .name          = TYPE_XEN_GNTTAB,
147     .parent        = TYPE_SYS_BUS_DEVICE,
148     .instance_size = sizeof(XenGnttabState),
149     .class_init    = xen_gnttab_class_init,
150 };
151 
152 void xen_gnttab_create(void)
153 {
154     xen_gnttab_singleton = XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB,
155                                                            -1, NULL));
156 }
157 
158 static void xen_gnttab_register_types(void)
159 {
160     type_register_static(&xen_gnttab_info);
161 }
162 
163 type_init(xen_gnttab_register_types)
164 
165 int xen_gnttab_map_page(uint64_t idx, uint64_t gfn)
166 {
167     XenGnttabState *s = xen_gnttab_singleton;
168     uint64_t gpa = gfn << XEN_PAGE_SHIFT;
169 
170     if (!s) {
171         return -ENOTSUP;
172     }
173 
174     if (idx >= s->max_frames) {
175         return -EINVAL;
176     }
177 
178     QEMU_IOTHREAD_LOCK_GUARD();
179     QEMU_LOCK_GUARD(&s->gnt_lock);
180 
181     xen_overlay_do_map_page(&s->gnt_aliases[idx], gpa);
182 
183     s->gnt_frame_gpas[idx] = gpa;
184 
185     if (s->nr_frames <= idx) {
186         s->nr_frames = idx + 1;
187     }
188 
189     return 0;
190 }
191 
192 int xen_gnttab_set_version_op(struct gnttab_set_version *set)
193 {
194     int ret;
195 
196     switch (set->version) {
197     case 1:
198         ret = 0;
199         break;
200 
201     case 2:
202         /* Behave as before set_version was introduced. */
203         ret = -ENOSYS;
204         break;
205 
206     default:
207         ret = -EINVAL;
208     }
209 
210     set->version = 1;
211     return ret;
212 }
213 
214 int xen_gnttab_get_version_op(struct gnttab_get_version *get)
215 {
216     if (get->dom != DOMID_SELF && get->dom != xen_domid) {
217         return -ESRCH;
218     }
219 
220     get->version = 1;
221     return 0;
222 }
223 
224 int xen_gnttab_query_size_op(struct gnttab_query_size *size)
225 {
226     XenGnttabState *s = xen_gnttab_singleton;
227 
228     if (!s) {
229         return -ENOTSUP;
230     }
231 
232     if (size->dom != DOMID_SELF && size->dom != xen_domid) {
233         size->status = GNTST_bad_domain;
234         return 0;
235     }
236 
237     size->status = GNTST_okay;
238     size->nr_frames = s->nr_frames;
239     size->max_nr_frames = s->max_frames;
240     return 0;
241 }
242 
243 /* Track per-open refs, to allow close() to clean up. */
244 struct active_ref {
245     MemoryRegionSection mrs;
246     void *virtaddr;
247     uint32_t refcnt;
248     int prot;
249 };
250 
251 static void gnt_unref(XenGnttabState *s, grant_ref_t ref,
252                       MemoryRegionSection *mrs, int prot)
253 {
254     if (mrs && mrs->mr) {
255         if (prot & PROT_WRITE) {
256             memory_region_set_dirty(mrs->mr, mrs->offset_within_region,
257                                     XEN_PAGE_SIZE);
258         }
259         memory_region_unref(mrs->mr);
260         mrs->mr = NULL;
261     }
262     assert(s->map_track[ref] != 0);
263 
264     if (--s->map_track[ref] == 0) {
265         grant_entry_v1_t *gnt_p = &s->entries.v1[ref];
266         qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing));
267     }
268 }
269 
270 static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
271 {
272     uint16_t mask = GTF_type_mask | GTF_sub_page;
273     grant_entry_v1_t gnt, *gnt_p;
274     int retries = 0;
275 
276     if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
277         s->map_track[ref] == UINT8_MAX) {
278         return INVALID_GPA;
279     }
280 
281     if (prot & PROT_WRITE) {
282         mask |= GTF_readonly;
283     }
284 
285     gnt_p = &s->entries.v1[ref];
286 
287     /*
288      * The guest can legitimately be changing the GTF_readonly flag. Allow
289      * that, but don't let a malicious guest cause a livelock.
290      */
291     for (retries = 0; retries < 5; retries++) {
292         uint16_t new_flags;
293 
294         /* Read the entry before an atomic operation on its flags */
295         gnt = *(volatile grant_entry_v1_t *)gnt_p;
296 
297         if ((gnt.flags & mask) != GTF_permit_access ||
298             gnt.domid != DOMID_QEMU) {
299             return INVALID_GPA;
300         }
301 
302         new_flags = gnt.flags | GTF_reading;
303         if (prot & PROT_WRITE) {
304             new_flags |= GTF_writing;
305         }
306 
307         if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) {
308             return (uint64_t)gnt.frame << XEN_PAGE_SHIFT;
309         }
310     }
311 
312     return INVALID_GPA;
313 }
314 
315 struct xengntdev_handle {
316     GHashTable *active_maps;
317 };
318 
319 static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt,
320                                         uint32_t nr_grants)
321 {
322     return 0;
323 }
324 
325 static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt,
326                                     uint32_t count, uint32_t domid,
327                                     uint32_t *refs, int prot)
328 {
329     XenGnttabState *s = xen_gnttab_singleton;
330     struct active_ref *act;
331 
332     if (!s) {
333         errno = ENOTSUP;
334         return NULL;
335     }
336 
337     if (domid != xen_domid) {
338         errno = EINVAL;
339         return NULL;
340     }
341 
342     if (!count || count > 4096) {
343         errno = EINVAL;
344         return NULL;
345     }
346 
347     /*
348      * Making a contiguous mapping from potentially discontiguous grant
349      * references would be... distinctly non-trivial. We don't support it.
350      * Even changing the API to return an array of pointers, one per page,
351      * wouldn't be simple to use in PV backends because some structures
352      * actually cross page boundaries (e.g. 32-bit blkif_response ring
353      * entries are 12 bytes).
354      */
355     if (count != 1) {
356         errno = EINVAL;
357         return NULL;
358     }
359 
360     QEMU_LOCK_GUARD(&s->gnt_lock);
361 
362     act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
363     if (act) {
364         if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) {
365             if (gnt_ref(s, refs[0], prot) == INVALID_GPA) {
366                 return NULL;
367             }
368             act->prot |= PROT_WRITE;
369         }
370         act->refcnt++;
371     } else {
372         uint64_t gpa = gnt_ref(s, refs[0], prot);
373         if (gpa == INVALID_GPA) {
374             errno = EINVAL;
375             return NULL;
376         }
377 
378         act = g_new0(struct active_ref, 1);
379         act->prot = prot;
380         act->refcnt = 1;
381         act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE);
382 
383         if (act->mrs.mr &&
384             !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) &&
385             memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) {
386             act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block,
387                                              act->mrs.offset_within_region);
388         }
389         if (!act->virtaddr) {
390             gnt_unref(s, refs[0], &act->mrs, 0);
391             g_free(act);
392             errno = EINVAL;
393             return NULL;
394         }
395 
396         s->map_track[refs[0]]++;
397         g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act);
398     }
399 
400     return act->virtaddr;
401 }
402 
403 static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data)
404 {
405     XenGnttabState *s = user_data;
406     grant_ref_t gref = GPOINTER_TO_INT(key);
407     struct active_ref *act = value;
408 
409     gnt_unref(s, gref, &act->mrs, act->prot);
410     g_free(act);
411     return true;
412 }
413 
414 static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt,
415                                void *start_address, uint32_t *refs,
416                                uint32_t count)
417 {
418     XenGnttabState *s = xen_gnttab_singleton;
419     struct active_ref *act;
420 
421     if (!s) {
422         return -ENOTSUP;
423     }
424 
425     if (count != 1) {
426         return -EINVAL;
427     }
428 
429     QEMU_LOCK_GUARD(&s->gnt_lock);
430 
431     act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
432     if (!act) {
433         return -ENOENT;
434     }
435 
436     if (act->virtaddr != start_address) {
437         return -EINVAL;
438     }
439 
440     if (!--act->refcnt) {
441         do_unmap(GINT_TO_POINTER(refs[0]), act, s);
442         g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0]));
443     }
444 
445     return 0;
446 }
447 
448 /*
449  * This looks a bit like the one for true Xen in xen-operations.c but
450  * in emulation we don't support multi-page mappings. And under Xen we
451  * *want* the multi-page mappings so we have fewer bounces through the
452  * kernel and the hypervisor. So the code paths end up being similar,
453  * but different.
454  */
455 static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain,
456                               uint32_t domid, XenGrantCopySegment *segs,
457                               uint32_t nr_segs, Error **errp)
458 {
459     int prot = to_domain ? PROT_WRITE : PROT_READ;
460     unsigned int i;
461 
462     for (i = 0; i < nr_segs; i++) {
463         XenGrantCopySegment *seg = &segs[i];
464         void *page;
465         uint32_t ref = to_domain ? seg->dest.foreign.ref :
466             seg->source.foreign.ref;
467 
468         page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot);
469         if (!page) {
470             if (errp) {
471                 error_setg_errno(errp, errno,
472                                  "xen_be_gnttab_map_refs failed");
473             }
474             return -errno;
475         }
476 
477         if (to_domain) {
478             memcpy(page + seg->dest.foreign.offset, seg->source.virt,
479                    seg->len);
480         } else {
481             memcpy(seg->dest.virt, page + seg->source.foreign.offset,
482                    seg->len);
483         }
484 
485         if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) {
486             if (errp) {
487                 error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed");
488             }
489             return -errno;
490         }
491     }
492 
493     return 0;
494 }
495 
496 static struct xengntdev_handle *xen_be_gnttab_open(void)
497 {
498     struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1);
499 
500     xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal);
501     return xgt;
502 }
503 
504 static int xen_be_gnttab_close(struct xengntdev_handle *xgt)
505 {
506     XenGnttabState *s = xen_gnttab_singleton;
507 
508     if (!s) {
509         return -ENOTSUP;
510     }
511 
512     g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s);
513     g_hash_table_destroy(xgt->active_maps);
514     g_free(xgt);
515     return 0;
516 }
517 
518 static struct gnttab_backend_ops emu_gnttab_backend_ops = {
519     .open = xen_be_gnttab_open,
520     .close = xen_be_gnttab_close,
521     .grant_copy = xen_be_gnttab_copy,
522     .set_max_grants = xen_be_gnttab_set_max_grants,
523     .map_refs = xen_be_gnttab_map_refs,
524     .unmap = xen_be_gnttab_unmap,
525 };
526 
527 int xen_gnttab_reset(void)
528 {
529     XenGnttabState *s = xen_gnttab_singleton;
530 
531     if (!s) {
532         return -ENOTSUP;
533     }
534 
535     QEMU_LOCK_GUARD(&s->gnt_lock);
536 
537     s->nr_frames = 0;
538 
539     memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
540 
541     s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
542     s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
543 
544     memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1);
545 
546     return 0;
547 }
548