xref: /qemu/hw/i386/kvm/xen_xenstore.c (revision 651ccdfa)
1 /*
2  * QEMU Xen emulation: Shared/overlay pages support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "qemu/host-utils.h"
15 #include "qemu/module.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/cutils.h"
18 #include "qapi/error.h"
19 #include "qom/object.h"
20 #include "migration/vmstate.h"
21 
22 #include "hw/sysbus.h"
23 #include "hw/xen/xen.h"
24 #include "hw/xen/xen_backend_ops.h"
25 #include "xen_overlay.h"
26 #include "xen_evtchn.h"
27 #include "xen_xenstore.h"
28 
29 #include "sysemu/kvm.h"
30 #include "sysemu/kvm_xen.h"
31 
32 #include "trace.h"
33 
34 #include "xenstore_impl.h"
35 
36 #include "hw/xen/interface/io/xs_wire.h"
37 #include "hw/xen/interface/event_channel.h"
38 #include "hw/xen/interface/grant_table.h"
39 
40 #define TYPE_XEN_XENSTORE "xen-xenstore"
41 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
42 
43 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
44 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
45 
46 #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
47 
48 struct XenXenstoreState {
49     /*< private >*/
50     SysBusDevice busdev;
51     /*< public >*/
52 
53     XenstoreImplState *impl;
54     GList *watch_events; /* for the guest */
55 
56     MemoryRegion xenstore_page;
57     struct xenstore_domain_interface *xs;
58     uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
59     uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
60     uint32_t req_offset;
61     uint32_t rsp_offset;
62     bool rsp_pending;
63     bool fatal_error;
64 
65     evtchn_port_t guest_port;
66     evtchn_port_t be_port;
67     struct xenevtchn_handle *eh;
68 
69     uint8_t *impl_state;
70     uint32_t impl_state_size;
71 
72     struct xengntdev_handle *gt;
73     void *granted_xs;
74 };
75 
76 struct XenXenstoreState *xen_xenstore_singleton;
77 
78 static void xen_xenstore_event(void *opaque);
79 static void fire_watch_cb(void *opaque, const char *path, const char *token);
80 
81 static struct xenstore_backend_ops emu_xenstore_backend_ops;
82 
83 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
84                                                 GList *perms,
85                                                 const char *relpath,
86                                                 const char *fmt, ...)
87 {
88     gchar *abspath;
89     gchar *value;
90     va_list args;
91     GByteArray *data;
92     int err;
93 
94     abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
95     va_start(args, fmt);
96     value = g_strdup_vprintf(fmt, args);
97     va_end(args);
98 
99     data = g_byte_array_new_take((void *)value, strlen(value));
100 
101     err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
102     assert(!err);
103 
104     g_byte_array_unref(data);
105 
106     err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
107     assert(!err);
108 
109     g_free(abspath);
110 }
111 
112 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
113 {
114     XenXenstoreState *s = XEN_XENSTORE(dev);
115     GList *perms;
116 
117     if (xen_mode != XEN_EMULATE) {
118         error_setg(errp, "Xen xenstore support is for Xen emulation");
119         return;
120     }
121     memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
122                            XEN_PAGE_SIZE, &error_abort);
123     memory_region_set_enabled(&s->xenstore_page, true);
124     s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
125     memset(s->xs, 0, XEN_PAGE_SIZE);
126 
127     /* We can't map it this early as KVM isn't ready */
128     xen_xenstore_singleton = s;
129 
130     s->eh = xen_be_evtchn_open();
131     if (!s->eh) {
132         error_setg(errp, "Xenstore evtchn port init failed");
133         return;
134     }
135     aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
136                        xen_xenstore_event, NULL, NULL, NULL, s);
137 
138     s->impl = xs_impl_create(xen_domid);
139 
140     /* Populate the default nodes */
141 
142     /* Nodes owned by 'dom0' but readable by the guest */
143     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
144     perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
145 
146     relpath_printf(s, perms, "", "%s", "");
147 
148     relpath_printf(s, perms, "domid", "%u", xen_domid);
149 
150     relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
151     relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
152 
153     relpath_printf(s, perms, "platform/acpi", "%u", 1);
154     relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
155     relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
156     relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
157 
158     g_list_free_full(perms, g_free);
159 
160     /* Nodes owned by the guest */
161     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
162 
163     relpath_printf(s, perms, "attr", "%s", "");
164 
165     relpath_printf(s, perms, "control/shutdown", "%s", "");
166     relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
167     relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
168     relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
169     relpath_printf(s, perms, "control/feature-s3", "%u", 1);
170     relpath_printf(s, perms, "control/feature-s4", "%u", 1);
171 
172     relpath_printf(s, perms, "data", "%s", "");
173     relpath_printf(s, perms, "device", "%s", "");
174     relpath_printf(s, perms, "drivers", "%s", "");
175     relpath_printf(s, perms, "error", "%s", "");
176     relpath_printf(s, perms, "feature", "%s", "");
177 
178     g_list_free_full(perms, g_free);
179 
180     xen_xenstore_ops = &emu_xenstore_backend_ops;
181 }
182 
183 static bool xen_xenstore_is_needed(void *opaque)
184 {
185     return xen_mode == XEN_EMULATE;
186 }
187 
188 static int xen_xenstore_pre_save(void *opaque)
189 {
190     XenXenstoreState *s = opaque;
191     GByteArray *save;
192 
193     if (s->eh) {
194         s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
195     }
196 
197     g_free(s->impl_state);
198     save = xs_impl_serialize(s->impl);
199     s->impl_state = save->data;
200     s->impl_state_size = save->len;
201     g_byte_array_free(save, false);
202 
203     return 0;
204 }
205 
206 static int xen_xenstore_post_load(void *opaque, int ver)
207 {
208     XenXenstoreState *s = opaque;
209     GByteArray *save;
210     int ret;
211 
212     /*
213      * As qemu/dom0, rebind to the guest's port. The Windows drivers may
214      * unbind the XenStore evtchn and rebind to it, having obtained the
215      * "remote" port through EVTCHNOP_status. In the case that migration
216      * occurs while it's unbound, the "remote" port needs to be the same
217      * as before so that the guest can find it, but should remain unbound.
218      */
219     if (s->guest_port) {
220         int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
221                                                      s->guest_port);
222         if (be_port < 0) {
223             return be_port;
224         }
225         s->be_port = be_port;
226     }
227 
228     save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
229     s->impl_state = NULL;
230     s->impl_state_size = 0;
231 
232     ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
233     return ret;
234 }
235 
236 static const VMStateDescription xen_xenstore_vmstate = {
237     .name = "xen_xenstore",
238     .unmigratable = 1, /* The PV back ends don't migrate yet */
239     .version_id = 1,
240     .minimum_version_id = 1,
241     .needed = xen_xenstore_is_needed,
242     .pre_save = xen_xenstore_pre_save,
243     .post_load = xen_xenstore_post_load,
244     .fields = (VMStateField[]) {
245         VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
246                             sizeof_field(XenXenstoreState, req_data)),
247         VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
248                             sizeof_field(XenXenstoreState, rsp_data)),
249         VMSTATE_UINT32(req_offset, XenXenstoreState),
250         VMSTATE_UINT32(rsp_offset, XenXenstoreState),
251         VMSTATE_BOOL(rsp_pending, XenXenstoreState),
252         VMSTATE_UINT32(guest_port, XenXenstoreState),
253         VMSTATE_BOOL(fatal_error, XenXenstoreState),
254         VMSTATE_UINT32(impl_state_size, XenXenstoreState),
255         VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
256                                     impl_state_size, 0,
257                                     vmstate_info_uint8, uint8_t),
258         VMSTATE_END_OF_LIST()
259     }
260 };
261 
262 static void xen_xenstore_class_init(ObjectClass *klass, void *data)
263 {
264     DeviceClass *dc = DEVICE_CLASS(klass);
265 
266     dc->realize = xen_xenstore_realize;
267     dc->vmsd = &xen_xenstore_vmstate;
268 }
269 
270 static const TypeInfo xen_xenstore_info = {
271     .name          = TYPE_XEN_XENSTORE,
272     .parent        = TYPE_SYS_BUS_DEVICE,
273     .instance_size = sizeof(XenXenstoreState),
274     .class_init    = xen_xenstore_class_init,
275 };
276 
277 void xen_xenstore_create(void)
278 {
279     DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
280 
281     xen_xenstore_singleton = XEN_XENSTORE(dev);
282 
283     /*
284      * Defer the init (xen_xenstore_reset()) until KVM is set up and the
285      * overlay page can be mapped.
286      */
287 }
288 
289 static void xen_xenstore_register_types(void)
290 {
291     type_register_static(&xen_xenstore_info);
292 }
293 
294 type_init(xen_xenstore_register_types)
295 
296 uint16_t xen_xenstore_get_port(void)
297 {
298     XenXenstoreState *s = xen_xenstore_singleton;
299     if (!s) {
300         return 0;
301     }
302     return s->guest_port;
303 }
304 
305 static bool req_pending(XenXenstoreState *s)
306 {
307     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
308 
309     return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
310 }
311 
312 static void reset_req(XenXenstoreState *s)
313 {
314     memset(s->req_data, 0, sizeof(s->req_data));
315     s->req_offset = 0;
316 }
317 
318 static void reset_rsp(XenXenstoreState *s)
319 {
320     s->rsp_pending = false;
321 
322     memset(s->rsp_data, 0, sizeof(s->rsp_data));
323     s->rsp_offset = 0;
324 }
325 
326 static void xs_error(XenXenstoreState *s, unsigned int id,
327                      xs_transaction_t tx_id, int errnum)
328 {
329     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
330     const char *errstr = NULL;
331 
332     for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
333         struct xsd_errors *xsd_error = &xsd_errors[i];
334 
335         if (xsd_error->errnum == errnum) {
336             errstr = xsd_error->errstring;
337             break;
338         }
339     }
340     assert(errstr);
341 
342     trace_xenstore_error(id, tx_id, errstr);
343 
344     rsp->type = XS_ERROR;
345     rsp->req_id = id;
346     rsp->tx_id = tx_id;
347     rsp->len = (uint32_t)strlen(errstr) + 1;
348 
349     memcpy(&rsp[1], errstr, rsp->len);
350 }
351 
352 static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
353                   xs_transaction_t tx_id)
354 {
355     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
356     const char *okstr = "OK";
357 
358     rsp->type = type;
359     rsp->req_id = req_id;
360     rsp->tx_id = tx_id;
361     rsp->len = (uint32_t)strlen(okstr) + 1;
362 
363     memcpy(&rsp[1], okstr, rsp->len);
364 }
365 
366 /*
367  * The correct request and response formats are documented in xen.git:
368  * docs/misc/xenstore.txt. A summary is given below for convenience.
369  * The '|' symbol represents a NUL character.
370  *
371  * ---------- Database read, write and permissions operations ----------
372  *
373  * READ                    <path>|                 <value|>
374  * WRITE                   <path>|<value|>
375  *         Store and read the octet string <value> at <path>.
376  *         WRITE creates any missing parent paths, with empty values.
377  *
378  * MKDIR                   <path>|
379  *         Ensures that the <path> exists, by necessary by creating
380  *         it and any missing parents with empty values.  If <path>
381  *         or any parent already exists, its value is left unchanged.
382  *
383  * RM                      <path>|
384  *         Ensures that the <path> does not exist, by deleting
385  *         it and all of its children.  It is not an error if <path> does
386  *         not exist, but it _is_ an error if <path>'s immediate parent
387  *         does not exist either.
388  *
389  * DIRECTORY               <path>|                 <child-leaf-name>|*
390  *         Gives a list of the immediate children of <path>, as only the
391  *         leafnames.  The resulting children are each named
392  *         <path>/<child-leaf-name>.
393  *
394  * DIRECTORY_PART          <path>|<offset>         <gencnt>|<child-leaf-name>|*
395  *         Same as DIRECTORY, but to be used for children lists longer than
396  *         XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
397  *         the list of children to return. Return values are the generation
398  *         count <gencnt> of the node (to be used to ensure the node hasn't
399  *         changed between two reads: <gencnt> being the same for multiple
400  *         reads guarantees the node hasn't changed) and the list of children
401  *         starting at the specified <offset> of the complete list.
402  *
403  * GET_PERMS               <path>|                 <perm-as-string>|+
404  * SET_PERMS               <path>|<perm-as-string>|+?
405  *         <perm-as-string> is one of the following
406  *                 w<domid>        write only
407  *                 r<domid>        read only
408  *                 b<domid>        both read and write
409  *                 n<domid>        no access
410  *         See https://wiki.xen.org/wiki/XenBus section
411  *         `Permissions' for details of the permissions system.
412  *         It is possible to set permissions for the special watch paths
413  *         "@introduceDomain" and "@releaseDomain" to enable receiving those
414  *         watches in unprivileged domains.
415  *
416  * ---------- Watches ----------
417  *
418  * WATCH                   <wpath>|<token>|?
419  *         Adds a watch.
420  *
421  *         When a <path> is modified (including path creation, removal,
422  *         contents change or permissions change) this generates an event
423  *         on the changed <path>.  Changes made in transactions cause an
424  *         event only if and when committed.  Each occurring event is
425  *         matched against all the watches currently set up, and each
426  *         matching watch results in a WATCH_EVENT message (see below).
427  *
428  *         The event's path matches the watch's <wpath> if it is an child
429  *         of <wpath>.
430  *
431  *         <wpath> can be a <path> to watch or @<wspecial>.  In the
432  *         latter case <wspecial> may have any syntax but it matches
433  *         (according to the rules above) only the following special
434  *         events which are invented by xenstored:
435  *             @introduceDomain    occurs on INTRODUCE
436  *             @releaseDomain      occurs on any domain crash or
437  *                                 shutdown, and also on RELEASE
438  *                                 and domain destruction
439  *         <wspecial> events are sent to privileged callers or explicitly
440  *         via SET_PERMS enabled domains only.
441  *
442  *         When a watch is first set up it is triggered once straight
443  *         away, with <path> equal to <wpath>.  Watches may be triggered
444  *         spuriously.  The tx_id in a WATCH request is ignored.
445  *
446  *         Watches are supposed to be restricted by the permissions
447  *         system but in practice the implementation is imperfect.
448  *         Applications should not rely on being sent a notification for
449  *         paths that they cannot read; however, an application may rely
450  *         on being sent a watch when a path which it _is_ able to read
451  *         is deleted even if that leaves only a nonexistent unreadable
452  *         parent.  A notification may omitted if a node's permissions
453  *         are changed so as to make it unreadable, in which case future
454  *         notifications may be suppressed (and if the node is later made
455  *         readable, some notifications may have been lost).
456  *
457  * WATCH_EVENT                                     <epath>|<token>|
458  *         Unsolicited `reply' generated for matching modification events
459  *         as described above.  req_id and tx_id are both 0.
460  *
461  *         <epath> is the event's path, ie the actual path that was
462  *         modified; however if the event was the recursive removal of an
463  *         parent of <wpath>, <epath> is just
464  *         <wpath> (rather than the actual path which was removed).  So
465  *         <epath> is a child of <wpath>, regardless.
466  *
467  *         Iff <wpath> for the watch was specified as a relative pathname,
468  *         the <epath> path will also be relative (with the same base,
469  *         obviously).
470  *
471  * UNWATCH                 <wpath>|<token>|?
472  *
473  * RESET_WATCHES           |
474  *         Reset all watches and transactions of the caller.
475  *
476  * ---------- Transactions ----------
477  *
478  * TRANSACTION_START       |                       <transid>|
479  *         <transid> is an opaque uint32_t allocated by xenstored
480  *         represented as unsigned decimal.  After this, transaction may
481  *         be referenced by using <transid> (as 32-bit binary) in the
482  *         tx_id request header field.  When transaction is started whole
483  *         db is copied; reads and writes happen on the copy.
484  *         It is not legal to send non-0 tx_id in TRANSACTION_START.
485  *
486  * TRANSACTION_END         T|
487  * TRANSACTION_END         F|
488  *         tx_id must refer to existing transaction.  After this
489  *         request the tx_id is no longer valid and may be reused by
490  *         xenstore.  If F, the transaction is discarded.  If T,
491  *         it is committed: if there were any other intervening writes
492  *         then our END gets get EAGAIN.
493  *
494  *         The plan is that in the future only intervening `conflicting'
495  *         writes cause EAGAIN, meaning only writes or other commits
496  *         which changed paths which were read or written in the
497  *         transaction at hand.
498  *
499  */
500 
501 static void xs_read(XenXenstoreState *s, unsigned int req_id,
502                     xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
503 {
504     const char *path = (const char *)req_data;
505     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
506     uint8_t *rsp_data = (uint8_t *)&rsp[1];
507     g_autoptr(GByteArray) data = g_byte_array_new();
508     int err;
509 
510     if (len == 0 || req_data[len - 1] != '\0') {
511         xs_error(s, req_id, tx_id, EINVAL);
512         return;
513     }
514 
515     trace_xenstore_read(tx_id, path);
516     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
517     if (err) {
518         xs_error(s, req_id, tx_id, err);
519         return;
520     }
521 
522     rsp->type = XS_READ;
523     rsp->req_id = req_id;
524     rsp->tx_id = tx_id;
525     rsp->len = 0;
526 
527     len = data->len;
528     if (len > XENSTORE_PAYLOAD_MAX) {
529         xs_error(s, req_id, tx_id, E2BIG);
530         return;
531     }
532 
533     memcpy(&rsp_data[rsp->len], data->data, len);
534     rsp->len += len;
535 }
536 
537 static void xs_write(XenXenstoreState *s, unsigned int req_id,
538                      xs_transaction_t tx_id, uint8_t *req_data,
539                      unsigned int len)
540 {
541     g_autoptr(GByteArray) data = g_byte_array_new();
542     const char *path;
543     int err;
544 
545     if (len == 0) {
546         xs_error(s, req_id, tx_id, EINVAL);
547         return;
548     }
549 
550     path = (const char *)req_data;
551 
552     while (len--) {
553         if (*req_data++ == '\0') {
554             break;
555         }
556         if (len == 0) {
557             xs_error(s, req_id, tx_id, EINVAL);
558             return;
559         }
560     }
561 
562     g_byte_array_append(data, req_data, len);
563 
564     trace_xenstore_write(tx_id, path);
565     err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
566     if (err) {
567         xs_error(s, req_id, tx_id, err);
568         return;
569     }
570 
571     xs_ok(s, XS_WRITE, req_id, tx_id);
572 }
573 
574 static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
575                      xs_transaction_t tx_id, uint8_t *req_data,
576                      unsigned int len)
577 {
578     g_autoptr(GByteArray) data = g_byte_array_new();
579     const char *path;
580     int err;
581 
582     if (len == 0 || req_data[len - 1] != '\0') {
583         xs_error(s, req_id, tx_id, EINVAL);
584         return;
585     }
586 
587     path = (const char *)req_data;
588 
589     trace_xenstore_mkdir(tx_id, path);
590     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
591     if (err == ENOENT) {
592         err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
593     }
594 
595     if (!err) {
596         xs_error(s, req_id, tx_id, err);
597         return;
598     }
599 
600     xs_ok(s, XS_MKDIR, req_id, tx_id);
601 }
602 
603 static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
604                               GList *strings, unsigned int start, bool truncate)
605 {
606     uint8_t *rsp_data = (uint8_t *)&rsp[1];
607     GList *l;
608 
609     for (l = strings; l; l = l->next) {
610         size_t len = strlen(l->data) + 1; /* Including the NUL termination */
611         char *str = l->data;
612 
613         if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
614             if (truncate) {
615                 len = XENSTORE_PAYLOAD_MAX - rsp->len;
616                 if (!len) {
617                     return;
618                 }
619             } else {
620                 xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
621                 return;
622             }
623         }
624 
625         if (start) {
626             if (start >= len) {
627                 start -= len;
628                 continue;
629             }
630 
631             str += start;
632             len -= start;
633             start = 0;
634         }
635 
636         memcpy(&rsp_data[rsp->len], str, len);
637         rsp->len += len;
638     }
639     /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
640     if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
641         rsp_data[rsp->len++] = '\0';
642     }
643 }
644 
645 static void xs_directory(XenXenstoreState *s, unsigned int req_id,
646                          xs_transaction_t tx_id, uint8_t *req_data,
647                          unsigned int len)
648 {
649     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
650     GList *items = NULL;
651     const char *path;
652     int err;
653 
654     if (len == 0 || req_data[len - 1] != '\0') {
655         xs_error(s, req_id, tx_id, EINVAL);
656         return;
657     }
658 
659     path = (const char *)req_data;
660 
661     trace_xenstore_directory(tx_id, path);
662     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
663     if (err != 0) {
664         xs_error(s, req_id, tx_id, err);
665         return;
666     }
667 
668     rsp->type = XS_DIRECTORY;
669     rsp->req_id = req_id;
670     rsp->tx_id = tx_id;
671     rsp->len = 0;
672 
673     xs_append_strings(s, rsp, items, 0, false);
674 
675     g_list_free_full(items, g_free);
676 }
677 
678 static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
679                               xs_transaction_t tx_id, uint8_t *req_data,
680                               unsigned int len)
681 {
682     const char *offset_str, *path = (const char *)req_data;
683     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
684     char *rsp_data = (char *)&rsp[1];
685     uint64_t gencnt = 0;
686     unsigned int offset;
687     GList *items = NULL;
688     int err;
689 
690     if (len == 0) {
691         xs_error(s, req_id, tx_id, EINVAL);
692         return;
693     }
694 
695     while (len--) {
696         if (*req_data++ == '\0') {
697             break;
698         }
699         if (len == 0) {
700             xs_error(s, req_id, tx_id, EINVAL);
701             return;
702         }
703     }
704 
705     offset_str = (const char *)req_data;
706     while (len--) {
707         if (*req_data++ == '\0') {
708             break;
709         }
710         if (len == 0) {
711             xs_error(s, req_id, tx_id, EINVAL);
712             return;
713         }
714     }
715 
716     if (len) {
717         xs_error(s, req_id, tx_id, EINVAL);
718         return;
719     }
720 
721     if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
722         xs_error(s, req_id, tx_id, EINVAL);
723         return;
724     }
725 
726     trace_xenstore_directory_part(tx_id, path, offset);
727     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
728     if (err != 0) {
729         xs_error(s, req_id, tx_id, err);
730         return;
731     }
732 
733     rsp->type = XS_DIRECTORY_PART;
734     rsp->req_id = req_id;
735     rsp->tx_id = tx_id;
736     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
737 
738     xs_append_strings(s, rsp, items, offset, true);
739 
740     g_list_free_full(items, g_free);
741 }
742 
743 static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
744                                  xs_transaction_t tx_id, uint8_t *req_data,
745                                  unsigned int len)
746 {
747     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
748     char *rsp_data = (char *)&rsp[1];
749     int err;
750 
751     if (len != 1 || req_data[0] != '\0') {
752         xs_error(s, req_id, tx_id, EINVAL);
753         return;
754     }
755 
756     rsp->type = XS_TRANSACTION_START;
757     rsp->req_id = req_id;
758     rsp->tx_id = tx_id;
759     rsp->len = 0;
760 
761     err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
762     if (err) {
763         xs_error(s, req_id, tx_id, err);
764         return;
765     }
766 
767     trace_xenstore_transaction_start(tx_id);
768 
769     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
770     assert(rsp->len < XENSTORE_PAYLOAD_MAX);
771     rsp->len++;
772 }
773 
774 static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
775                                xs_transaction_t tx_id, uint8_t *req_data,
776                                unsigned int len)
777 {
778     bool commit;
779     int err;
780 
781     if (len != 2 || req_data[1] != '\0') {
782         xs_error(s, req_id, tx_id, EINVAL);
783         return;
784     }
785 
786     switch (req_data[0]) {
787     case 'T':
788         commit = true;
789         break;
790     case 'F':
791         commit = false;
792         break;
793     default:
794         xs_error(s, req_id, tx_id, EINVAL);
795         return;
796     }
797 
798     trace_xenstore_transaction_end(tx_id, commit);
799     err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
800     if (err) {
801         xs_error(s, req_id, tx_id, err);
802         return;
803     }
804 
805     xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
806 }
807 
808 static void xs_rm(XenXenstoreState *s, unsigned int req_id,
809                   xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
810 {
811     const char *path = (const char *)req_data;
812     int err;
813 
814     if (len == 0 || req_data[len - 1] != '\0') {
815         xs_error(s, req_id, tx_id, EINVAL);
816         return;
817     }
818 
819     trace_xenstore_rm(tx_id, path);
820     err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
821     if (err) {
822         xs_error(s, req_id, tx_id, err);
823         return;
824     }
825 
826     xs_ok(s, XS_RM, req_id, tx_id);
827 }
828 
829 static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
830                          xs_transaction_t tx_id, uint8_t *req_data,
831                          unsigned int len)
832 {
833     const char *path = (const char *)req_data;
834     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
835     GList *perms = NULL;
836     int err;
837 
838     if (len == 0 || req_data[len - 1] != '\0') {
839         xs_error(s, req_id, tx_id, EINVAL);
840         return;
841     }
842 
843     trace_xenstore_get_perms(tx_id, path);
844     err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
845     if (err) {
846         xs_error(s, req_id, tx_id, err);
847         return;
848     }
849 
850     rsp->type = XS_GET_PERMS;
851     rsp->req_id = req_id;
852     rsp->tx_id = tx_id;
853     rsp->len = 0;
854 
855     xs_append_strings(s, rsp, perms, 0, false);
856 
857     g_list_free_full(perms, g_free);
858 }
859 
860 static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
861                          xs_transaction_t tx_id, uint8_t *req_data,
862                          unsigned int len)
863 {
864     const char *path = (const char *)req_data;
865     uint8_t *perm;
866     GList *perms = NULL;
867     int err;
868 
869     if (len == 0) {
870         xs_error(s, req_id, tx_id, EINVAL);
871         return;
872     }
873 
874     while (len--) {
875         if (*req_data++ == '\0') {
876             break;
877         }
878         if (len == 0) {
879             xs_error(s, req_id, tx_id, EINVAL);
880             return;
881         }
882     }
883 
884     perm = req_data;
885     while (len--) {
886         if (*req_data++ == '\0') {
887             perms = g_list_append(perms, perm);
888             perm = req_data;
889         }
890     }
891 
892     /*
893      * Note that there may be trailing garbage at the end of the buffer.
894      * This is explicitly permitted by the '?' at the end of the definition:
895      *
896      *    SET_PERMS         <path>|<perm-as-string>|+?
897      */
898 
899     trace_xenstore_set_perms(tx_id, path);
900     err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
901     g_list_free(perms);
902     if (err) {
903         xs_error(s, req_id, tx_id, err);
904         return;
905     }
906 
907     xs_ok(s, XS_SET_PERMS, req_id, tx_id);
908 }
909 
910 static void xs_watch(XenXenstoreState *s, unsigned int req_id,
911                      xs_transaction_t tx_id, uint8_t *req_data,
912                      unsigned int len)
913 {
914     const char *token, *path = (const char *)req_data;
915     int err;
916 
917     if (len == 0) {
918         xs_error(s, req_id, tx_id, EINVAL);
919         return;
920     }
921 
922     while (len--) {
923         if (*req_data++ == '\0') {
924             break;
925         }
926         if (len == 0) {
927             xs_error(s, req_id, tx_id, EINVAL);
928             return;
929         }
930     }
931 
932     token = (const char *)req_data;
933     while (len--) {
934         if (*req_data++ == '\0') {
935             break;
936         }
937         if (len == 0) {
938             xs_error(s, req_id, tx_id, EINVAL);
939             return;
940         }
941     }
942 
943     /*
944      * Note that there may be trailing garbage at the end of the buffer.
945      * This is explicitly permitted by the '?' at the end of the definition:
946      *
947      *    WATCH             <wpath>|<token>|?
948      */
949 
950     trace_xenstore_watch(path, token);
951     err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
952     if (err) {
953         xs_error(s, req_id, tx_id, err);
954         return;
955     }
956 
957     xs_ok(s, XS_WATCH, req_id, tx_id);
958 }
959 
960 static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
961                        xs_transaction_t tx_id, uint8_t *req_data,
962                        unsigned int len)
963 {
964     const char *token, *path = (const char *)req_data;
965     int err;
966 
967     if (len == 0) {
968         xs_error(s, req_id, tx_id, EINVAL);
969         return;
970     }
971 
972     while (len--) {
973         if (*req_data++ == '\0') {
974             break;
975         }
976         if (len == 0) {
977             xs_error(s, req_id, tx_id, EINVAL);
978             return;
979         }
980     }
981 
982     token = (const char *)req_data;
983     while (len--) {
984         if (*req_data++ == '\0') {
985             break;
986         }
987         if (len == 0) {
988             xs_error(s, req_id, tx_id, EINVAL);
989             return;
990         }
991     }
992 
993     trace_xenstore_unwatch(path, token);
994     err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
995     if (err) {
996         xs_error(s, req_id, tx_id, err);
997         return;
998     }
999 
1000     xs_ok(s, XS_UNWATCH, req_id, tx_id);
1001 }
1002 
1003 static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
1004                              xs_transaction_t tx_id, uint8_t *req_data,
1005                              unsigned int len)
1006 {
1007     if (len == 0 || req_data[len - 1] != '\0') {
1008         xs_error(s, req_id, tx_id, EINVAL);
1009         return;
1010     }
1011 
1012     trace_xenstore_reset_watches();
1013     xs_impl_reset_watches(s->impl, xen_domid);
1014 
1015     xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
1016 }
1017 
1018 static void xs_priv(XenXenstoreState *s, unsigned int req_id,
1019                     xs_transaction_t tx_id, uint8_t *data,
1020                     unsigned int len)
1021 {
1022     xs_error(s, req_id, tx_id, EACCES);
1023 }
1024 
1025 static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
1026                       xs_transaction_t tx_id, uint8_t *data,
1027                       unsigned int len)
1028 {
1029     xs_error(s, req_id, tx_id, ENOSYS);
1030 }
1031 
1032 typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
1033                         xs_transaction_t tx_id, uint8_t *data,
1034                         unsigned int len);
1035 
1036 struct xsd_req {
1037     const char *name;
1038     xs_impl fn;
1039 };
1040 #define XSD_REQ(_type, _fn)                           \
1041     [_type] = { .name = #_type, .fn = _fn }
1042 
1043 struct xsd_req xsd_reqs[] = {
1044     XSD_REQ(XS_READ, xs_read),
1045     XSD_REQ(XS_WRITE, xs_write),
1046     XSD_REQ(XS_MKDIR, xs_mkdir),
1047     XSD_REQ(XS_DIRECTORY, xs_directory),
1048     XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
1049     XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
1050     XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
1051     XSD_REQ(XS_RM, xs_rm),
1052     XSD_REQ(XS_GET_PERMS, xs_get_perms),
1053     XSD_REQ(XS_SET_PERMS, xs_set_perms),
1054     XSD_REQ(XS_WATCH, xs_watch),
1055     XSD_REQ(XS_UNWATCH, xs_unwatch),
1056     XSD_REQ(XS_CONTROL, xs_priv),
1057     XSD_REQ(XS_INTRODUCE, xs_priv),
1058     XSD_REQ(XS_RELEASE, xs_priv),
1059     XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
1060     XSD_REQ(XS_RESUME, xs_priv),
1061     XSD_REQ(XS_SET_TARGET, xs_priv),
1062     XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
1063 };
1064 
1065 static void process_req(XenXenstoreState *s)
1066 {
1067     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1068     xs_impl handler = NULL;
1069 
1070     assert(req_pending(s));
1071     assert(!s->rsp_pending);
1072 
1073     if (req->type < ARRAY_SIZE(xsd_reqs)) {
1074         handler = xsd_reqs[req->type].fn;
1075     }
1076     if (!handler) {
1077         handler = &xs_unimpl;
1078     }
1079 
1080     handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
1081 
1082     s->rsp_pending = true;
1083     reset_req(s);
1084 }
1085 
1086 static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
1087                                    unsigned int len)
1088 {
1089     if (!len) {
1090         return 0;
1091     }
1092 
1093     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
1094     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
1095     unsigned int copied = 0;
1096 
1097     /* Ensure the ring contents don't cross the req_prod access. */
1098     smp_rmb();
1099 
1100     while (len) {
1101         unsigned int avail = prod - cons;
1102         unsigned int offset = MASK_XENSTORE_IDX(cons);
1103         unsigned int copylen = avail;
1104 
1105         if (avail > XENSTORE_RING_SIZE) {
1106             error_report("XenStore ring handling error");
1107             s->fatal_error = true;
1108             break;
1109         } else if (avail == 0) {
1110             break;
1111         }
1112 
1113         if (copylen > len) {
1114             copylen = len;
1115         }
1116         if (copylen > XENSTORE_RING_SIZE - offset) {
1117             copylen = XENSTORE_RING_SIZE - offset;
1118         }
1119 
1120         memcpy(ptr, &s->xs->req[offset], copylen);
1121         copied += copylen;
1122 
1123         ptr += copylen;
1124         len -= copylen;
1125 
1126         cons += copylen;
1127     }
1128 
1129     /*
1130      * Not sure this ever mattered except on Alpha, but this barrier
1131      * is to ensure that the update to req_cons is globally visible
1132      * only after we have consumed all the data from the ring, and we
1133      * don't end up seeing data written to the ring *after* the other
1134      * end sees the update and writes more to the ring. Xen's own
1135      * xenstored has the same barrier here (although with no comment
1136      * at all, obviously, because it's Xen code).
1137      */
1138     smp_mb();
1139 
1140     qatomic_set(&s->xs->req_cons, cons);
1141 
1142     return copied;
1143 }
1144 
1145 static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
1146                                  unsigned int len)
1147 {
1148     if (!len) {
1149         return 0;
1150     }
1151 
1152     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
1153     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
1154     unsigned int copied = 0;
1155 
1156     /*
1157      * This matches the barrier in copy_to_ring() (or the guest's
1158      * equivalent) betweem writing the data to the ring and updating
1159      * rsp_prod. It protects against the pathological case (which
1160      * again I think never happened except on Alpha) where our
1161      * subsequent writes to the ring could *cross* the read of
1162      * rsp_cons and the guest could see the new data when it was
1163      * intending to read the old.
1164      */
1165     smp_mb();
1166 
1167     while (len) {
1168         unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
1169         unsigned int offset = MASK_XENSTORE_IDX(prod);
1170         unsigned int copylen = len;
1171 
1172         if (avail > XENSTORE_RING_SIZE) {
1173             error_report("XenStore ring handling error");
1174             s->fatal_error = true;
1175             break;
1176         } else if (avail == 0) {
1177             break;
1178         }
1179 
1180         if (copylen > avail) {
1181             copylen = avail;
1182         }
1183         if (copylen > XENSTORE_RING_SIZE - offset) {
1184             copylen = XENSTORE_RING_SIZE - offset;
1185         }
1186 
1187 
1188         memcpy(&s->xs->rsp[offset], ptr, copylen);
1189         copied += copylen;
1190 
1191         ptr += copylen;
1192         len -= copylen;
1193 
1194         prod += copylen;
1195     }
1196 
1197     /* Ensure the ring contents are seen before rsp_prod update. */
1198     smp_wmb();
1199 
1200     qatomic_set(&s->xs->rsp_prod, prod);
1201 
1202     return copied;
1203 }
1204 
1205 static unsigned int get_req(XenXenstoreState *s)
1206 {
1207     unsigned int copied = 0;
1208 
1209     if (s->fatal_error) {
1210         return 0;
1211     }
1212 
1213     assert(!req_pending(s));
1214 
1215     if (s->req_offset < XENSTORE_HEADER_SIZE) {
1216         void *ptr = s->req_data + s->req_offset;
1217         unsigned int len = XENSTORE_HEADER_SIZE;
1218         unsigned int copylen = copy_from_ring(s, ptr, len);
1219 
1220         copied += copylen;
1221         s->req_offset += copylen;
1222     }
1223 
1224     if (s->req_offset >= XENSTORE_HEADER_SIZE) {
1225         struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1226 
1227         if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
1228             error_report("Illegal XenStore request");
1229             s->fatal_error = true;
1230             return 0;
1231         }
1232 
1233         void *ptr = s->req_data + s->req_offset;
1234         unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
1235         unsigned int copylen = copy_from_ring(s, ptr, len);
1236 
1237         copied += copylen;
1238         s->req_offset += copylen;
1239     }
1240 
1241     return copied;
1242 }
1243 
1244 static unsigned int put_rsp(XenXenstoreState *s)
1245 {
1246     if (s->fatal_error) {
1247         return 0;
1248     }
1249 
1250     assert(s->rsp_pending);
1251 
1252     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1253     assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
1254 
1255     void *ptr = s->rsp_data + s->rsp_offset;
1256     unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
1257     unsigned int copylen = copy_to_ring(s, ptr, len);
1258 
1259     s->rsp_offset += copylen;
1260 
1261     /* Have we produced a complete response? */
1262     if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
1263         reset_rsp(s);
1264     }
1265 
1266     return copylen;
1267 }
1268 
1269 static void deliver_watch(XenXenstoreState *s, const char *path,
1270                           const char *token)
1271 {
1272     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1273     uint8_t *rsp_data = (uint8_t *)&rsp[1];
1274     unsigned int len;
1275 
1276     assert(!s->rsp_pending);
1277 
1278     trace_xenstore_watch_event(path, token);
1279 
1280     rsp->type = XS_WATCH_EVENT;
1281     rsp->req_id = 0;
1282     rsp->tx_id = 0;
1283     rsp->len = 0;
1284 
1285     len = strlen(path);
1286 
1287     /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
1288     assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
1289 
1290     memcpy(&rsp_data[rsp->len], path, len);
1291     rsp->len += len;
1292     rsp_data[rsp->len] = '\0';
1293     rsp->len++;
1294 
1295     len = strlen(token);
1296     /*
1297      * It is possible for the guest to have chosen a token that will
1298      * not fit (along with the patch) into a watch event. We have no
1299      * choice but to drop the event if this is the case.
1300      */
1301     if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
1302         return;
1303     }
1304 
1305     memcpy(&rsp_data[rsp->len], token, len);
1306     rsp->len += len;
1307     rsp_data[rsp->len] = '\0';
1308     rsp->len++;
1309 
1310     s->rsp_pending = true;
1311 }
1312 
1313 struct watch_event {
1314     char *path;
1315     char *token;
1316 };
1317 
1318 static void free_watch_event(struct watch_event *ev)
1319 {
1320     if (ev) {
1321         g_free(ev->path);
1322         g_free(ev->token);
1323         g_free(ev);
1324     }
1325 }
1326 
1327 static void queue_watch(XenXenstoreState *s, const char *path,
1328                         const char *token)
1329 {
1330     struct watch_event *ev = g_new0(struct watch_event, 1);
1331 
1332     ev->path = g_strdup(path);
1333     ev->token = g_strdup(token);
1334 
1335     s->watch_events = g_list_append(s->watch_events, ev);
1336 }
1337 
1338 static void fire_watch_cb(void *opaque, const char *path, const char *token)
1339 {
1340     XenXenstoreState *s = opaque;
1341 
1342     assert(qemu_mutex_iothread_locked());
1343 
1344     /*
1345      * If there's a response pending, we obviously can't scribble over
1346      * it. But if there's a request pending, it has dibs on the buffer
1347      * too.
1348      *
1349      * In the common case of a watch firing due to backend activity
1350      * when the ring was otherwise idle, we should be able to copy the
1351      * strings directly into the rsp_data and thence the actual ring,
1352      * without needing to perform any allocations and queue them.
1353      */
1354     if (s->rsp_pending || req_pending(s)) {
1355         queue_watch(s, path, token);
1356     } else {
1357         deliver_watch(s, path, token);
1358         /*
1359          * If the message was queued because there was already ring activity,
1360          * no need to wake the guest. But if not, we need to send the evtchn.
1361          */
1362         xen_be_evtchn_notify(s->eh, s->be_port);
1363     }
1364 }
1365 
1366 static void process_watch_events(XenXenstoreState *s)
1367 {
1368     struct watch_event *ev = s->watch_events->data;
1369 
1370     deliver_watch(s, ev->path, ev->token);
1371 
1372     s->watch_events = g_list_remove(s->watch_events, ev);
1373     free_watch_event(ev);
1374 }
1375 
1376 static void xen_xenstore_event(void *opaque)
1377 {
1378     XenXenstoreState *s = opaque;
1379     evtchn_port_t port = xen_be_evtchn_pending(s->eh);
1380     unsigned int copied_to, copied_from;
1381     bool processed, notify = false;
1382 
1383     if (port != s->be_port) {
1384         return;
1385     }
1386 
1387     /* We know this is a no-op. */
1388     xen_be_evtchn_unmask(s->eh, port);
1389 
1390     do {
1391         copied_to = copied_from = 0;
1392         processed = false;
1393 
1394         if (!s->rsp_pending && s->watch_events) {
1395             process_watch_events(s);
1396         }
1397 
1398         if (s->rsp_pending) {
1399             copied_to = put_rsp(s);
1400         }
1401 
1402         if (!req_pending(s)) {
1403             copied_from = get_req(s);
1404         }
1405 
1406         if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
1407             process_req(s);
1408             processed = true;
1409         }
1410 
1411         notify |= copied_to || copied_from;
1412     } while (copied_to || copied_from || processed);
1413 
1414     if (notify) {
1415         xen_be_evtchn_notify(s->eh, s->be_port);
1416     }
1417 }
1418 
1419 static void alloc_guest_port(XenXenstoreState *s)
1420 {
1421     struct evtchn_alloc_unbound alloc = {
1422         .dom = DOMID_SELF,
1423         .remote_dom = DOMID_QEMU,
1424     };
1425 
1426     if (!xen_evtchn_alloc_unbound_op(&alloc)) {
1427         s->guest_port = alloc.port;
1428     }
1429 }
1430 
1431 int xen_xenstore_reset(void)
1432 {
1433     XenXenstoreState *s = xen_xenstore_singleton;
1434     int err;
1435 
1436     if (!s) {
1437         return -ENOTSUP;
1438     }
1439 
1440     s->req_offset = s->rsp_offset = 0;
1441     s->rsp_pending = false;
1442 
1443     if (!memory_region_is_mapped(&s->xenstore_page)) {
1444         uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
1445         xen_overlay_do_map_page(&s->xenstore_page, gpa);
1446     }
1447 
1448     alloc_guest_port(s);
1449 
1450     /*
1451      * As qemu/dom0, bind to the guest's port. For incoming migration, this
1452      * will be unbound as the guest's evtchn table is overwritten. We then
1453      * rebind to the correct guest port in xen_xenstore_post_load().
1454      */
1455     err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
1456     if (err < 0) {
1457         return err;
1458     }
1459     s->be_port = err;
1460 
1461     /*
1462      * We don't actually access the guest's page through the grant, because
1463      * this isn't real Xen, and we can just use the page we gave it in the
1464      * first place. Map the grant anyway, mostly for cosmetic purposes so
1465      * it *looks* like it's in use in the guest-visible grant table.
1466      */
1467     s->gt = qemu_xen_gnttab_open();
1468     uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
1469     s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
1470                                              PROT_READ | PROT_WRITE);
1471 
1472     return 0;
1473 }
1474 
1475 struct qemu_xs_handle {
1476     XenstoreImplState *impl;
1477     GList *watches;
1478     QEMUBH *watch_bh;
1479 };
1480 
1481 struct qemu_xs_watch {
1482     struct qemu_xs_handle *h;
1483     char *path;
1484     xs_watch_fn fn;
1485     void *opaque;
1486     GList *events;
1487 };
1488 
1489 static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
1490 {
1491     return g_strdup_printf("/local/domain/%u", domid);
1492 }
1493 
1494 static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
1495                               const char *path, unsigned int *num)
1496 {
1497     GList *items = NULL, *l;
1498     unsigned int i = 0;
1499     char **items_ret;
1500     int err;
1501 
1502     err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
1503     if (err) {
1504         errno = err;
1505         return NULL;
1506     }
1507 
1508     items_ret = g_new0(char *, g_list_length(items) + 1);
1509     *num = 0;
1510     for (l = items; l; l = l->next) {
1511         items_ret[i++] = l->data;
1512         (*num)++;
1513     }
1514     g_list_free(items);
1515     return items_ret;
1516 }
1517 
1518 static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
1519                         const char *path, unsigned int *len)
1520 {
1521     GByteArray *data = g_byte_array_new();
1522     bool free_segment = false;
1523     int err;
1524 
1525     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1526     if (err) {
1527         free_segment = true;
1528         errno = err;
1529     } else {
1530         if (len) {
1531             *len = data->len;
1532         }
1533         /* The xen-bus-helper code expects to get NUL terminated string! */
1534         g_byte_array_append(data, (void *)"", 1);
1535     }
1536 
1537     return g_byte_array_free(data, free_segment);
1538 }
1539 
1540 static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
1541                         const char *path, const void *data, unsigned int len)
1542 {
1543     GByteArray *gdata = g_byte_array_new();
1544     int err;
1545 
1546     g_byte_array_append(gdata, data, len);
1547     err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
1548     g_byte_array_unref(gdata);
1549     if (err) {
1550         errno = err;
1551         return false;
1552     }
1553     return true;
1554 }
1555 
1556 static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
1557                          unsigned int owner, unsigned int domid,
1558                          unsigned int perms, const char *path)
1559 {
1560     g_autoptr(GByteArray) data = g_byte_array_new();
1561     GList *perms_list = NULL;
1562     int err;
1563 
1564     /* mkdir does this */
1565     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1566     if (err == ENOENT) {
1567         err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
1568     }
1569     if (err) {
1570         errno = err;
1571         return false;
1572     }
1573 
1574     perms_list = g_list_append(perms_list,
1575                                xs_perm_as_string(XS_PERM_NONE, owner));
1576     perms_list = g_list_append(perms_list,
1577                                xs_perm_as_string(perms, domid));
1578 
1579     err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
1580     g_list_free_full(perms_list, g_free);
1581     if (err) {
1582         errno = err;
1583         return false;
1584     }
1585     return true;
1586 }
1587 
1588 static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
1589                           const char *path)
1590 {
1591     int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
1592     if (err) {
1593         errno = err;
1594         return false;
1595     }
1596     return true;
1597 }
1598 
1599 static void be_watch_bh(void *_h)
1600 {
1601     struct qemu_xs_handle *h = _h;
1602     GList *l;
1603 
1604     for (l = h->watches; l; l = l->next) {
1605         struct qemu_xs_watch *w = l->data;
1606 
1607         while (w->events) {
1608             struct watch_event *ev = w->events->data;
1609 
1610             w->fn(w->opaque, ev->path);
1611 
1612             w->events = g_list_remove(w->events, ev);
1613             free_watch_event(ev);
1614         }
1615     }
1616 }
1617 
1618 static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
1619 {
1620     struct watch_event *ev = g_new0(struct watch_event, 1);
1621     struct qemu_xs_watch *w = opaque;
1622 
1623     /* We don't care about the token */
1624     ev->path = g_strdup(path);
1625     w->events = g_list_append(w->events, ev);
1626 
1627     qemu_bh_schedule(w->h->watch_bh);
1628 }
1629 
1630 static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
1631                                          const char *path, xs_watch_fn fn,
1632                                          void *opaque)
1633 {
1634     struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
1635     int err;
1636 
1637     w->h = h;
1638     w->fn = fn;
1639     w->opaque = opaque;
1640 
1641     err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
1642     if (err) {
1643         errno = err;
1644         g_free(w);
1645         return NULL;
1646     }
1647 
1648     w->path = g_strdup(path);
1649     h->watches = g_list_append(h->watches, w);
1650     return w;
1651 }
1652 
1653 static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
1654 {
1655     xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
1656 
1657     h->watches = g_list_remove(h->watches, w);
1658     g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
1659     g_free(w->path);
1660     g_free(w);
1661 }
1662 
1663 static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
1664 {
1665     unsigned int new_tx = XBT_NULL;
1666     int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
1667     if (err) {
1668         errno = err;
1669         return XBT_NULL;
1670     }
1671     return new_tx;
1672 }
1673 
1674 static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
1675                                   bool abort)
1676 {
1677     int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
1678     if (err) {
1679         errno = err;
1680         return false;
1681     }
1682     return true;
1683 }
1684 
1685 static struct qemu_xs_handle *xs_be_open(void)
1686 {
1687     XenXenstoreState *s = xen_xenstore_singleton;
1688     struct qemu_xs_handle *h;
1689 
1690     if (!s && !s->impl) {
1691         errno = -ENOSYS;
1692         return NULL;
1693     }
1694 
1695     h = g_new0(struct qemu_xs_handle, 1);
1696     h->impl = s->impl;
1697 
1698     h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
1699 
1700     return h;
1701 }
1702 
1703 static void xs_be_close(struct qemu_xs_handle *h)
1704 {
1705     while (h->watches) {
1706         struct qemu_xs_watch *w = h->watches->data;
1707         xs_be_unwatch(h, w);
1708     }
1709 
1710     qemu_bh_delete(h->watch_bh);
1711     g_free(h);
1712 }
1713 
1714 static struct xenstore_backend_ops emu_xenstore_backend_ops = {
1715     .open = xs_be_open,
1716     .close = xs_be_close,
1717     .get_domain_path = xs_be_get_domain_path,
1718     .directory = xs_be_directory,
1719     .read = xs_be_read,
1720     .write = xs_be_write,
1721     .create = xs_be_create,
1722     .destroy = xs_be_destroy,
1723     .watch = xs_be_watch,
1724     .unwatch = xs_be_unwatch,
1725     .transaction_start = xs_be_transaction_start,
1726     .transaction_end = xs_be_transaction_end,
1727 };
1728