xref: /qemu/hw/i386/kvm/xen_xenstore.c (revision 370ed600)
1 /*
2  * QEMU Xen emulation: Shared/overlay pages support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "qemu/host-utils.h"
15 #include "qemu/module.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qapi/error.h"
20 #include "qom/object.h"
21 #include "migration/vmstate.h"
22 
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_evtchn.h"
28 #include "xen_xenstore.h"
29 
30 #include "sysemu/kvm.h"
31 #include "sysemu/kvm_xen.h"
32 
33 #include "trace.h"
34 
35 #include "xenstore_impl.h"
36 
37 #include "hw/xen/interface/io/xs_wire.h"
38 #include "hw/xen/interface/event_channel.h"
39 #include "hw/xen/interface/grant_table.h"
40 
41 #define TYPE_XEN_XENSTORE "xen-xenstore"
42 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
43 
44 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
45 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
46 
47 #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
48 
49 struct XenXenstoreState {
50     /*< private >*/
51     SysBusDevice busdev;
52     /*< public >*/
53 
54     XenstoreImplState *impl;
55     GList *watch_events; /* for the guest */
56 
57     MemoryRegion xenstore_page;
58     struct xenstore_domain_interface *xs;
59     uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
60     uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
61     uint32_t req_offset;
62     uint32_t rsp_offset;
63     bool rsp_pending;
64     bool fatal_error;
65 
66     evtchn_port_t guest_port;
67     evtchn_port_t be_port;
68     struct xenevtchn_handle *eh;
69 
70     uint8_t *impl_state;
71     uint32_t impl_state_size;
72 
73     struct xengntdev_handle *gt;
74     void *granted_xs;
75 };
76 
77 struct XenXenstoreState *xen_xenstore_singleton;
78 
79 static void xen_xenstore_event(void *opaque);
80 static void fire_watch_cb(void *opaque, const char *path, const char *token);
81 
82 static struct xenstore_backend_ops emu_xenstore_backend_ops;
83 
84 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
85                                                 GList *perms,
86                                                 const char *relpath,
87                                                 const char *fmt, ...)
88 {
89     gchar *abspath;
90     gchar *value;
91     va_list args;
92     GByteArray *data;
93     int err;
94 
95     abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
96     va_start(args, fmt);
97     value = g_strdup_vprintf(fmt, args);
98     va_end(args);
99 
100     data = g_byte_array_new_take((void *)value, strlen(value));
101 
102     err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
103     assert(!err);
104 
105     g_byte_array_unref(data);
106 
107     err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
108     assert(!err);
109 
110     g_free(abspath);
111 }
112 
113 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
114 {
115     XenXenstoreState *s = XEN_XENSTORE(dev);
116     GList *perms;
117 
118     if (xen_mode != XEN_EMULATE) {
119         error_setg(errp, "Xen xenstore support is for Xen emulation");
120         return;
121     }
122     memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
123                            XEN_PAGE_SIZE, &error_abort);
124     memory_region_set_enabled(&s->xenstore_page, true);
125     s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
126     memset(s->xs, 0, XEN_PAGE_SIZE);
127 
128     /* We can't map it this early as KVM isn't ready */
129     xen_xenstore_singleton = s;
130 
131     s->eh = xen_be_evtchn_open();
132     if (!s->eh) {
133         error_setg(errp, "Xenstore evtchn port init failed");
134         return;
135     }
136     aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
137                        xen_xenstore_event, NULL, NULL, NULL, s);
138 
139     s->impl = xs_impl_create(xen_domid);
140 
141     /* Populate the default nodes */
142 
143     /* Nodes owned by 'dom0' but readable by the guest */
144     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
145     perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
146 
147     relpath_printf(s, perms, "", "%s", "");
148 
149     relpath_printf(s, perms, "domid", "%u", xen_domid);
150 
151     relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
152     relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
153 
154     relpath_printf(s, perms, "platform/acpi", "%u", 1);
155     relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
156     relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
157     relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
158 
159     g_list_free_full(perms, g_free);
160 
161     /* Nodes owned by the guest */
162     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
163 
164     relpath_printf(s, perms, "attr", "%s", "");
165 
166     relpath_printf(s, perms, "control/shutdown", "%s", "");
167     relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
168     relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
169     relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
170     relpath_printf(s, perms, "control/feature-s3", "%u", 1);
171     relpath_printf(s, perms, "control/feature-s4", "%u", 1);
172 
173     relpath_printf(s, perms, "data", "%s", "");
174     relpath_printf(s, perms, "device", "%s", "");
175     relpath_printf(s, perms, "drivers", "%s", "");
176     relpath_printf(s, perms, "error", "%s", "");
177     relpath_printf(s, perms, "feature", "%s", "");
178 
179     g_list_free_full(perms, g_free);
180 
181     xen_xenstore_ops = &emu_xenstore_backend_ops;
182 }
183 
184 static bool xen_xenstore_is_needed(void *opaque)
185 {
186     return xen_mode == XEN_EMULATE;
187 }
188 
189 static int xen_xenstore_pre_save(void *opaque)
190 {
191     XenXenstoreState *s = opaque;
192     GByteArray *save;
193 
194     if (s->eh) {
195         s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
196     }
197 
198     g_free(s->impl_state);
199     save = xs_impl_serialize(s->impl);
200     s->impl_state = save->data;
201     s->impl_state_size = save->len;
202     g_byte_array_free(save, false);
203 
204     return 0;
205 }
206 
207 static int xen_xenstore_post_load(void *opaque, int ver)
208 {
209     XenXenstoreState *s = opaque;
210     GByteArray *save;
211     int ret;
212 
213     /*
214      * As qemu/dom0, rebind to the guest's port. The Windows drivers may
215      * unbind the XenStore evtchn and rebind to it, having obtained the
216      * "remote" port through EVTCHNOP_status. In the case that migration
217      * occurs while it's unbound, the "remote" port needs to be the same
218      * as before so that the guest can find it, but should remain unbound.
219      */
220     if (s->guest_port) {
221         int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
222                                                      s->guest_port);
223         if (be_port < 0) {
224             return be_port;
225         }
226         s->be_port = be_port;
227     }
228 
229     save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
230     s->impl_state = NULL;
231     s->impl_state_size = 0;
232 
233     ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
234     return ret;
235 }
236 
237 static const VMStateDescription xen_xenstore_vmstate = {
238     .name = "xen_xenstore",
239     .unmigratable = 1, /* The PV back ends don't migrate yet */
240     .version_id = 1,
241     .minimum_version_id = 1,
242     .needed = xen_xenstore_is_needed,
243     .pre_save = xen_xenstore_pre_save,
244     .post_load = xen_xenstore_post_load,
245     .fields = (VMStateField[]) {
246         VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
247                             sizeof_field(XenXenstoreState, req_data)),
248         VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
249                             sizeof_field(XenXenstoreState, rsp_data)),
250         VMSTATE_UINT32(req_offset, XenXenstoreState),
251         VMSTATE_UINT32(rsp_offset, XenXenstoreState),
252         VMSTATE_BOOL(rsp_pending, XenXenstoreState),
253         VMSTATE_UINT32(guest_port, XenXenstoreState),
254         VMSTATE_BOOL(fatal_error, XenXenstoreState),
255         VMSTATE_UINT32(impl_state_size, XenXenstoreState),
256         VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
257                                     impl_state_size, 0,
258                                     vmstate_info_uint8, uint8_t),
259         VMSTATE_END_OF_LIST()
260     }
261 };
262 
263 static void xen_xenstore_class_init(ObjectClass *klass, void *data)
264 {
265     DeviceClass *dc = DEVICE_CLASS(klass);
266 
267     dc->realize = xen_xenstore_realize;
268     dc->vmsd = &xen_xenstore_vmstate;
269 }
270 
271 static const TypeInfo xen_xenstore_info = {
272     .name          = TYPE_XEN_XENSTORE,
273     .parent        = TYPE_SYS_BUS_DEVICE,
274     .instance_size = sizeof(XenXenstoreState),
275     .class_init    = xen_xenstore_class_init,
276 };
277 
278 void xen_xenstore_create(void)
279 {
280     DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
281 
282     xen_xenstore_singleton = XEN_XENSTORE(dev);
283 
284     /*
285      * Defer the init (xen_xenstore_reset()) until KVM is set up and the
286      * overlay page can be mapped.
287      */
288 }
289 
290 static void xen_xenstore_register_types(void)
291 {
292     type_register_static(&xen_xenstore_info);
293 }
294 
295 type_init(xen_xenstore_register_types)
296 
297 uint16_t xen_xenstore_get_port(void)
298 {
299     XenXenstoreState *s = xen_xenstore_singleton;
300     if (!s) {
301         return 0;
302     }
303     return s->guest_port;
304 }
305 
306 static bool req_pending(XenXenstoreState *s)
307 {
308     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
309 
310     return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
311 }
312 
313 static void reset_req(XenXenstoreState *s)
314 {
315     memset(s->req_data, 0, sizeof(s->req_data));
316     s->req_offset = 0;
317 }
318 
319 static void reset_rsp(XenXenstoreState *s)
320 {
321     s->rsp_pending = false;
322 
323     memset(s->rsp_data, 0, sizeof(s->rsp_data));
324     s->rsp_offset = 0;
325 }
326 
327 static void xs_error(XenXenstoreState *s, unsigned int id,
328                      xs_transaction_t tx_id, int errnum)
329 {
330     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
331     const char *errstr = NULL;
332 
333     for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
334         struct xsd_errors *xsd_error = &xsd_errors[i];
335 
336         if (xsd_error->errnum == errnum) {
337             errstr = xsd_error->errstring;
338             break;
339         }
340     }
341     assert(errstr);
342 
343     trace_xenstore_error(id, tx_id, errstr);
344 
345     rsp->type = XS_ERROR;
346     rsp->req_id = id;
347     rsp->tx_id = tx_id;
348     rsp->len = (uint32_t)strlen(errstr) + 1;
349 
350     memcpy(&rsp[1], errstr, rsp->len);
351 }
352 
353 static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
354                   xs_transaction_t tx_id)
355 {
356     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
357     const char *okstr = "OK";
358 
359     rsp->type = type;
360     rsp->req_id = req_id;
361     rsp->tx_id = tx_id;
362     rsp->len = (uint32_t)strlen(okstr) + 1;
363 
364     memcpy(&rsp[1], okstr, rsp->len);
365 }
366 
367 /*
368  * The correct request and response formats are documented in xen.git:
369  * docs/misc/xenstore.txt. A summary is given below for convenience.
370  * The '|' symbol represents a NUL character.
371  *
372  * ---------- Database read, write and permissions operations ----------
373  *
374  * READ                    <path>|                 <value|>
375  * WRITE                   <path>|<value|>
376  *         Store and read the octet string <value> at <path>.
377  *         WRITE creates any missing parent paths, with empty values.
378  *
379  * MKDIR                   <path>|
380  *         Ensures that the <path> exists, by necessary by creating
381  *         it and any missing parents with empty values.  If <path>
382  *         or any parent already exists, its value is left unchanged.
383  *
384  * RM                      <path>|
385  *         Ensures that the <path> does not exist, by deleting
386  *         it and all of its children.  It is not an error if <path> does
387  *         not exist, but it _is_ an error if <path>'s immediate parent
388  *         does not exist either.
389  *
390  * DIRECTORY               <path>|                 <child-leaf-name>|*
391  *         Gives a list of the immediate children of <path>, as only the
392  *         leafnames.  The resulting children are each named
393  *         <path>/<child-leaf-name>.
394  *
395  * DIRECTORY_PART          <path>|<offset>         <gencnt>|<child-leaf-name>|*
396  *         Same as DIRECTORY, but to be used for children lists longer than
397  *         XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
398  *         the list of children to return. Return values are the generation
399  *         count <gencnt> of the node (to be used to ensure the node hasn't
400  *         changed between two reads: <gencnt> being the same for multiple
401  *         reads guarantees the node hasn't changed) and the list of children
402  *         starting at the specified <offset> of the complete list.
403  *
404  * GET_PERMS               <path>|                 <perm-as-string>|+
405  * SET_PERMS               <path>|<perm-as-string>|+?
406  *         <perm-as-string> is one of the following
407  *                 w<domid>        write only
408  *                 r<domid>        read only
409  *                 b<domid>        both read and write
410  *                 n<domid>        no access
411  *         See https://wiki.xen.org/wiki/XenBus section
412  *         `Permissions' for details of the permissions system.
413  *         It is possible to set permissions for the special watch paths
414  *         "@introduceDomain" and "@releaseDomain" to enable receiving those
415  *         watches in unprivileged domains.
416  *
417  * ---------- Watches ----------
418  *
419  * WATCH                   <wpath>|<token>|?
420  *         Adds a watch.
421  *
422  *         When a <path> is modified (including path creation, removal,
423  *         contents change or permissions change) this generates an event
424  *         on the changed <path>.  Changes made in transactions cause an
425  *         event only if and when committed.  Each occurring event is
426  *         matched against all the watches currently set up, and each
427  *         matching watch results in a WATCH_EVENT message (see below).
428  *
429  *         The event's path matches the watch's <wpath> if it is an child
430  *         of <wpath>.
431  *
432  *         <wpath> can be a <path> to watch or @<wspecial>.  In the
433  *         latter case <wspecial> may have any syntax but it matches
434  *         (according to the rules above) only the following special
435  *         events which are invented by xenstored:
436  *             @introduceDomain    occurs on INTRODUCE
437  *             @releaseDomain      occurs on any domain crash or
438  *                                 shutdown, and also on RELEASE
439  *                                 and domain destruction
440  *         <wspecial> events are sent to privileged callers or explicitly
441  *         via SET_PERMS enabled domains only.
442  *
443  *         When a watch is first set up it is triggered once straight
444  *         away, with <path> equal to <wpath>.  Watches may be triggered
445  *         spuriously.  The tx_id in a WATCH request is ignored.
446  *
447  *         Watches are supposed to be restricted by the permissions
448  *         system but in practice the implementation is imperfect.
449  *         Applications should not rely on being sent a notification for
450  *         paths that they cannot read; however, an application may rely
451  *         on being sent a watch when a path which it _is_ able to read
452  *         is deleted even if that leaves only a nonexistent unreadable
453  *         parent.  A notification may omitted if a node's permissions
454  *         are changed so as to make it unreadable, in which case future
455  *         notifications may be suppressed (and if the node is later made
456  *         readable, some notifications may have been lost).
457  *
458  * WATCH_EVENT                                     <epath>|<token>|
459  *         Unsolicited `reply' generated for matching modification events
460  *         as described above.  req_id and tx_id are both 0.
461  *
462  *         <epath> is the event's path, ie the actual path that was
463  *         modified; however if the event was the recursive removal of an
464  *         parent of <wpath>, <epath> is just
465  *         <wpath> (rather than the actual path which was removed).  So
466  *         <epath> is a child of <wpath>, regardless.
467  *
468  *         Iff <wpath> for the watch was specified as a relative pathname,
469  *         the <epath> path will also be relative (with the same base,
470  *         obviously).
471  *
472  * UNWATCH                 <wpath>|<token>|?
473  *
474  * RESET_WATCHES           |
475  *         Reset all watches and transactions of the caller.
476  *
477  * ---------- Transactions ----------
478  *
479  * TRANSACTION_START       |                       <transid>|
480  *         <transid> is an opaque uint32_t allocated by xenstored
481  *         represented as unsigned decimal.  After this, transaction may
482  *         be referenced by using <transid> (as 32-bit binary) in the
483  *         tx_id request header field.  When transaction is started whole
484  *         db is copied; reads and writes happen on the copy.
485  *         It is not legal to send non-0 tx_id in TRANSACTION_START.
486  *
487  * TRANSACTION_END         T|
488  * TRANSACTION_END         F|
489  *         tx_id must refer to existing transaction.  After this
490  *         request the tx_id is no longer valid and may be reused by
491  *         xenstore.  If F, the transaction is discarded.  If T,
492  *         it is committed: if there were any other intervening writes
493  *         then our END gets get EAGAIN.
494  *
495  *         The plan is that in the future only intervening `conflicting'
496  *         writes cause EAGAIN, meaning only writes or other commits
497  *         which changed paths which were read or written in the
498  *         transaction at hand.
499  *
500  */
501 
502 static void xs_read(XenXenstoreState *s, unsigned int req_id,
503                     xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
504 {
505     const char *path = (const char *)req_data;
506     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
507     uint8_t *rsp_data = (uint8_t *)&rsp[1];
508     g_autoptr(GByteArray) data = g_byte_array_new();
509     int err;
510 
511     if (len == 0 || req_data[len - 1] != '\0') {
512         xs_error(s, req_id, tx_id, EINVAL);
513         return;
514     }
515 
516     trace_xenstore_read(tx_id, path);
517     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
518     if (err) {
519         xs_error(s, req_id, tx_id, err);
520         return;
521     }
522 
523     rsp->type = XS_READ;
524     rsp->req_id = req_id;
525     rsp->tx_id = tx_id;
526     rsp->len = 0;
527 
528     len = data->len;
529     if (len > XENSTORE_PAYLOAD_MAX) {
530         xs_error(s, req_id, tx_id, E2BIG);
531         return;
532     }
533 
534     memcpy(&rsp_data[rsp->len], data->data, len);
535     rsp->len += len;
536 }
537 
538 static void xs_write(XenXenstoreState *s, unsigned int req_id,
539                      xs_transaction_t tx_id, uint8_t *req_data,
540                      unsigned int len)
541 {
542     g_autoptr(GByteArray) data = g_byte_array_new();
543     const char *path;
544     int err;
545 
546     if (len == 0) {
547         xs_error(s, req_id, tx_id, EINVAL);
548         return;
549     }
550 
551     path = (const char *)req_data;
552 
553     while (len--) {
554         if (*req_data++ == '\0') {
555             break;
556         }
557         if (len == 0) {
558             xs_error(s, req_id, tx_id, EINVAL);
559             return;
560         }
561     }
562 
563     g_byte_array_append(data, req_data, len);
564 
565     trace_xenstore_write(tx_id, path);
566     err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
567     if (err) {
568         xs_error(s, req_id, tx_id, err);
569         return;
570     }
571 
572     xs_ok(s, XS_WRITE, req_id, tx_id);
573 }
574 
575 static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
576                      xs_transaction_t tx_id, uint8_t *req_data,
577                      unsigned int len)
578 {
579     g_autoptr(GByteArray) data = g_byte_array_new();
580     const char *path;
581     int err;
582 
583     if (len == 0 || req_data[len - 1] != '\0') {
584         xs_error(s, req_id, tx_id, EINVAL);
585         return;
586     }
587 
588     path = (const char *)req_data;
589 
590     trace_xenstore_mkdir(tx_id, path);
591     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
592     if (err == ENOENT) {
593         err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
594     }
595 
596     if (!err) {
597         xs_error(s, req_id, tx_id, err);
598         return;
599     }
600 
601     xs_ok(s, XS_MKDIR, req_id, tx_id);
602 }
603 
604 static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
605                               GList *strings, unsigned int start, bool truncate)
606 {
607     uint8_t *rsp_data = (uint8_t *)&rsp[1];
608     GList *l;
609 
610     for (l = strings; l; l = l->next) {
611         size_t len = strlen(l->data) + 1; /* Including the NUL termination */
612         char *str = l->data;
613 
614         if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
615             if (truncate) {
616                 len = XENSTORE_PAYLOAD_MAX - rsp->len;
617                 if (!len) {
618                     return;
619                 }
620             } else {
621                 xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
622                 return;
623             }
624         }
625 
626         if (start) {
627             if (start >= len) {
628                 start -= len;
629                 continue;
630             }
631 
632             str += start;
633             len -= start;
634             start = 0;
635         }
636 
637         memcpy(&rsp_data[rsp->len], str, len);
638         rsp->len += len;
639     }
640     /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
641     if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
642         rsp_data[rsp->len++] = '\0';
643     }
644 }
645 
646 static void xs_directory(XenXenstoreState *s, unsigned int req_id,
647                          xs_transaction_t tx_id, uint8_t *req_data,
648                          unsigned int len)
649 {
650     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
651     GList *items = NULL;
652     const char *path;
653     int err;
654 
655     if (len == 0 || req_data[len - 1] != '\0') {
656         xs_error(s, req_id, tx_id, EINVAL);
657         return;
658     }
659 
660     path = (const char *)req_data;
661 
662     trace_xenstore_directory(tx_id, path);
663     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
664     if (err != 0) {
665         xs_error(s, req_id, tx_id, err);
666         return;
667     }
668 
669     rsp->type = XS_DIRECTORY;
670     rsp->req_id = req_id;
671     rsp->tx_id = tx_id;
672     rsp->len = 0;
673 
674     xs_append_strings(s, rsp, items, 0, false);
675 
676     g_list_free_full(items, g_free);
677 }
678 
679 static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
680                               xs_transaction_t tx_id, uint8_t *req_data,
681                               unsigned int len)
682 {
683     const char *offset_str, *path = (const char *)req_data;
684     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
685     char *rsp_data = (char *)&rsp[1];
686     uint64_t gencnt = 0;
687     unsigned int offset;
688     GList *items = NULL;
689     int err;
690 
691     if (len == 0) {
692         xs_error(s, req_id, tx_id, EINVAL);
693         return;
694     }
695 
696     while (len--) {
697         if (*req_data++ == '\0') {
698             break;
699         }
700         if (len == 0) {
701             xs_error(s, req_id, tx_id, EINVAL);
702             return;
703         }
704     }
705 
706     offset_str = (const char *)req_data;
707     while (len--) {
708         if (*req_data++ == '\0') {
709             break;
710         }
711         if (len == 0) {
712             xs_error(s, req_id, tx_id, EINVAL);
713             return;
714         }
715     }
716 
717     if (len) {
718         xs_error(s, req_id, tx_id, EINVAL);
719         return;
720     }
721 
722     if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
723         xs_error(s, req_id, tx_id, EINVAL);
724         return;
725     }
726 
727     trace_xenstore_directory_part(tx_id, path, offset);
728     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
729     if (err != 0) {
730         xs_error(s, req_id, tx_id, err);
731         return;
732     }
733 
734     rsp->type = XS_DIRECTORY_PART;
735     rsp->req_id = req_id;
736     rsp->tx_id = tx_id;
737     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
738 
739     xs_append_strings(s, rsp, items, offset, true);
740 
741     g_list_free_full(items, g_free);
742 }
743 
744 static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
745                                  xs_transaction_t tx_id, uint8_t *req_data,
746                                  unsigned int len)
747 {
748     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
749     char *rsp_data = (char *)&rsp[1];
750     int err;
751 
752     if (len != 1 || req_data[0] != '\0') {
753         xs_error(s, req_id, tx_id, EINVAL);
754         return;
755     }
756 
757     rsp->type = XS_TRANSACTION_START;
758     rsp->req_id = req_id;
759     rsp->tx_id = tx_id;
760     rsp->len = 0;
761 
762     err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
763     if (err) {
764         xs_error(s, req_id, tx_id, err);
765         return;
766     }
767 
768     trace_xenstore_transaction_start(tx_id);
769 
770     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
771     assert(rsp->len < XENSTORE_PAYLOAD_MAX);
772     rsp->len++;
773 }
774 
775 static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
776                                xs_transaction_t tx_id, uint8_t *req_data,
777                                unsigned int len)
778 {
779     bool commit;
780     int err;
781 
782     if (len != 2 || req_data[1] != '\0') {
783         xs_error(s, req_id, tx_id, EINVAL);
784         return;
785     }
786 
787     switch (req_data[0]) {
788     case 'T':
789         commit = true;
790         break;
791     case 'F':
792         commit = false;
793         break;
794     default:
795         xs_error(s, req_id, tx_id, EINVAL);
796         return;
797     }
798 
799     trace_xenstore_transaction_end(tx_id, commit);
800     err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
801     if (err) {
802         xs_error(s, req_id, tx_id, err);
803         return;
804     }
805 
806     xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
807 }
808 
809 static void xs_rm(XenXenstoreState *s, unsigned int req_id,
810                   xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
811 {
812     const char *path = (const char *)req_data;
813     int err;
814 
815     if (len == 0 || req_data[len - 1] != '\0') {
816         xs_error(s, req_id, tx_id, EINVAL);
817         return;
818     }
819 
820     trace_xenstore_rm(tx_id, path);
821     err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
822     if (err) {
823         xs_error(s, req_id, tx_id, err);
824         return;
825     }
826 
827     xs_ok(s, XS_RM, req_id, tx_id);
828 }
829 
830 static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
831                          xs_transaction_t tx_id, uint8_t *req_data,
832                          unsigned int len)
833 {
834     const char *path = (const char *)req_data;
835     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
836     GList *perms = NULL;
837     int err;
838 
839     if (len == 0 || req_data[len - 1] != '\0') {
840         xs_error(s, req_id, tx_id, EINVAL);
841         return;
842     }
843 
844     trace_xenstore_get_perms(tx_id, path);
845     err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
846     if (err) {
847         xs_error(s, req_id, tx_id, err);
848         return;
849     }
850 
851     rsp->type = XS_GET_PERMS;
852     rsp->req_id = req_id;
853     rsp->tx_id = tx_id;
854     rsp->len = 0;
855 
856     xs_append_strings(s, rsp, perms, 0, false);
857 
858     g_list_free_full(perms, g_free);
859 }
860 
861 static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
862                          xs_transaction_t tx_id, uint8_t *req_data,
863                          unsigned int len)
864 {
865     const char *path = (const char *)req_data;
866     uint8_t *perm;
867     GList *perms = NULL;
868     int err;
869 
870     if (len == 0) {
871         xs_error(s, req_id, tx_id, EINVAL);
872         return;
873     }
874 
875     while (len--) {
876         if (*req_data++ == '\0') {
877             break;
878         }
879         if (len == 0) {
880             xs_error(s, req_id, tx_id, EINVAL);
881             return;
882         }
883     }
884 
885     perm = req_data;
886     while (len--) {
887         if (*req_data++ == '\0') {
888             perms = g_list_append(perms, perm);
889             perm = req_data;
890         }
891     }
892 
893     /*
894      * Note that there may be trailing garbage at the end of the buffer.
895      * This is explicitly permitted by the '?' at the end of the definition:
896      *
897      *    SET_PERMS         <path>|<perm-as-string>|+?
898      */
899 
900     trace_xenstore_set_perms(tx_id, path);
901     err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
902     g_list_free(perms);
903     if (err) {
904         xs_error(s, req_id, tx_id, err);
905         return;
906     }
907 
908     xs_ok(s, XS_SET_PERMS, req_id, tx_id);
909 }
910 
911 static void xs_watch(XenXenstoreState *s, unsigned int req_id,
912                      xs_transaction_t tx_id, uint8_t *req_data,
913                      unsigned int len)
914 {
915     const char *token, *path = (const char *)req_data;
916     int err;
917 
918     if (len == 0) {
919         xs_error(s, req_id, tx_id, EINVAL);
920         return;
921     }
922 
923     while (len--) {
924         if (*req_data++ == '\0') {
925             break;
926         }
927         if (len == 0) {
928             xs_error(s, req_id, tx_id, EINVAL);
929             return;
930         }
931     }
932 
933     token = (const char *)req_data;
934     while (len--) {
935         if (*req_data++ == '\0') {
936             break;
937         }
938         if (len == 0) {
939             xs_error(s, req_id, tx_id, EINVAL);
940             return;
941         }
942     }
943 
944     /*
945      * Note that there may be trailing garbage at the end of the buffer.
946      * This is explicitly permitted by the '?' at the end of the definition:
947      *
948      *    WATCH             <wpath>|<token>|?
949      */
950 
951     trace_xenstore_watch(path, token);
952     err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
953     if (err) {
954         xs_error(s, req_id, tx_id, err);
955         return;
956     }
957 
958     xs_ok(s, XS_WATCH, req_id, tx_id);
959 }
960 
961 static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
962                        xs_transaction_t tx_id, uint8_t *req_data,
963                        unsigned int len)
964 {
965     const char *token, *path = (const char *)req_data;
966     int err;
967 
968     if (len == 0) {
969         xs_error(s, req_id, tx_id, EINVAL);
970         return;
971     }
972 
973     while (len--) {
974         if (*req_data++ == '\0') {
975             break;
976         }
977         if (len == 0) {
978             xs_error(s, req_id, tx_id, EINVAL);
979             return;
980         }
981     }
982 
983     token = (const char *)req_data;
984     while (len--) {
985         if (*req_data++ == '\0') {
986             break;
987         }
988         if (len == 0) {
989             xs_error(s, req_id, tx_id, EINVAL);
990             return;
991         }
992     }
993 
994     trace_xenstore_unwatch(path, token);
995     err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
996     if (err) {
997         xs_error(s, req_id, tx_id, err);
998         return;
999     }
1000 
1001     xs_ok(s, XS_UNWATCH, req_id, tx_id);
1002 }
1003 
1004 static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
1005                              xs_transaction_t tx_id, uint8_t *req_data,
1006                              unsigned int len)
1007 {
1008     if (len == 0 || req_data[len - 1] != '\0') {
1009         xs_error(s, req_id, tx_id, EINVAL);
1010         return;
1011     }
1012 
1013     trace_xenstore_reset_watches();
1014     xs_impl_reset_watches(s->impl, xen_domid);
1015 
1016     xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
1017 }
1018 
1019 static void xs_priv(XenXenstoreState *s, unsigned int req_id,
1020                     xs_transaction_t tx_id, uint8_t *data,
1021                     unsigned int len)
1022 {
1023     xs_error(s, req_id, tx_id, EACCES);
1024 }
1025 
1026 static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
1027                       xs_transaction_t tx_id, uint8_t *data,
1028                       unsigned int len)
1029 {
1030     xs_error(s, req_id, tx_id, ENOSYS);
1031 }
1032 
1033 typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
1034                         xs_transaction_t tx_id, uint8_t *data,
1035                         unsigned int len);
1036 
1037 struct xsd_req {
1038     const char *name;
1039     xs_impl fn;
1040 };
1041 #define XSD_REQ(_type, _fn)                           \
1042     [_type] = { .name = #_type, .fn = _fn }
1043 
1044 struct xsd_req xsd_reqs[] = {
1045     XSD_REQ(XS_READ, xs_read),
1046     XSD_REQ(XS_WRITE, xs_write),
1047     XSD_REQ(XS_MKDIR, xs_mkdir),
1048     XSD_REQ(XS_DIRECTORY, xs_directory),
1049     XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
1050     XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
1051     XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
1052     XSD_REQ(XS_RM, xs_rm),
1053     XSD_REQ(XS_GET_PERMS, xs_get_perms),
1054     XSD_REQ(XS_SET_PERMS, xs_set_perms),
1055     XSD_REQ(XS_WATCH, xs_watch),
1056     XSD_REQ(XS_UNWATCH, xs_unwatch),
1057     XSD_REQ(XS_CONTROL, xs_priv),
1058     XSD_REQ(XS_INTRODUCE, xs_priv),
1059     XSD_REQ(XS_RELEASE, xs_priv),
1060     XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
1061     XSD_REQ(XS_RESUME, xs_priv),
1062     XSD_REQ(XS_SET_TARGET, xs_priv),
1063     XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
1064 };
1065 
1066 static void process_req(XenXenstoreState *s)
1067 {
1068     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1069     xs_impl handler = NULL;
1070 
1071     assert(req_pending(s));
1072     assert(!s->rsp_pending);
1073 
1074     if (req->type < ARRAY_SIZE(xsd_reqs)) {
1075         handler = xsd_reqs[req->type].fn;
1076     }
1077     if (!handler) {
1078         handler = &xs_unimpl;
1079     }
1080 
1081     handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
1082 
1083     s->rsp_pending = true;
1084     reset_req(s);
1085 }
1086 
1087 static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
1088                                    unsigned int len)
1089 {
1090     if (!len) {
1091         return 0;
1092     }
1093 
1094     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
1095     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
1096     unsigned int copied = 0;
1097 
1098     /* Ensure the ring contents don't cross the req_prod access. */
1099     smp_rmb();
1100 
1101     while (len) {
1102         unsigned int avail = prod - cons;
1103         unsigned int offset = MASK_XENSTORE_IDX(cons);
1104         unsigned int copylen = avail;
1105 
1106         if (avail > XENSTORE_RING_SIZE) {
1107             error_report("XenStore ring handling error");
1108             s->fatal_error = true;
1109             break;
1110         } else if (avail == 0) {
1111             break;
1112         }
1113 
1114         if (copylen > len) {
1115             copylen = len;
1116         }
1117         if (copylen > XENSTORE_RING_SIZE - offset) {
1118             copylen = XENSTORE_RING_SIZE - offset;
1119         }
1120 
1121         memcpy(ptr, &s->xs->req[offset], copylen);
1122         copied += copylen;
1123 
1124         ptr += copylen;
1125         len -= copylen;
1126 
1127         cons += copylen;
1128     }
1129 
1130     /*
1131      * Not sure this ever mattered except on Alpha, but this barrier
1132      * is to ensure that the update to req_cons is globally visible
1133      * only after we have consumed all the data from the ring, and we
1134      * don't end up seeing data written to the ring *after* the other
1135      * end sees the update and writes more to the ring. Xen's own
1136      * xenstored has the same barrier here (although with no comment
1137      * at all, obviously, because it's Xen code).
1138      */
1139     smp_mb();
1140 
1141     qatomic_set(&s->xs->req_cons, cons);
1142 
1143     return copied;
1144 }
1145 
1146 static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
1147                                  unsigned int len)
1148 {
1149     if (!len) {
1150         return 0;
1151     }
1152 
1153     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
1154     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
1155     unsigned int copied = 0;
1156 
1157     /*
1158      * This matches the barrier in copy_to_ring() (or the guest's
1159      * equivalent) betweem writing the data to the ring and updating
1160      * rsp_prod. It protects against the pathological case (which
1161      * again I think never happened except on Alpha) where our
1162      * subsequent writes to the ring could *cross* the read of
1163      * rsp_cons and the guest could see the new data when it was
1164      * intending to read the old.
1165      */
1166     smp_mb();
1167 
1168     while (len) {
1169         unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
1170         unsigned int offset = MASK_XENSTORE_IDX(prod);
1171         unsigned int copylen = len;
1172 
1173         if (avail > XENSTORE_RING_SIZE) {
1174             error_report("XenStore ring handling error");
1175             s->fatal_error = true;
1176             break;
1177         } else if (avail == 0) {
1178             break;
1179         }
1180 
1181         if (copylen > avail) {
1182             copylen = avail;
1183         }
1184         if (copylen > XENSTORE_RING_SIZE - offset) {
1185             copylen = XENSTORE_RING_SIZE - offset;
1186         }
1187 
1188 
1189         memcpy(&s->xs->rsp[offset], ptr, copylen);
1190         copied += copylen;
1191 
1192         ptr += copylen;
1193         len -= copylen;
1194 
1195         prod += copylen;
1196     }
1197 
1198     /* Ensure the ring contents are seen before rsp_prod update. */
1199     smp_wmb();
1200 
1201     qatomic_set(&s->xs->rsp_prod, prod);
1202 
1203     return copied;
1204 }
1205 
1206 static unsigned int get_req(XenXenstoreState *s)
1207 {
1208     unsigned int copied = 0;
1209 
1210     if (s->fatal_error) {
1211         return 0;
1212     }
1213 
1214     assert(!req_pending(s));
1215 
1216     if (s->req_offset < XENSTORE_HEADER_SIZE) {
1217         void *ptr = s->req_data + s->req_offset;
1218         unsigned int len = XENSTORE_HEADER_SIZE;
1219         unsigned int copylen = copy_from_ring(s, ptr, len);
1220 
1221         copied += copylen;
1222         s->req_offset += copylen;
1223     }
1224 
1225     if (s->req_offset >= XENSTORE_HEADER_SIZE) {
1226         struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1227 
1228         if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
1229             error_report("Illegal XenStore request");
1230             s->fatal_error = true;
1231             return 0;
1232         }
1233 
1234         void *ptr = s->req_data + s->req_offset;
1235         unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
1236         unsigned int copylen = copy_from_ring(s, ptr, len);
1237 
1238         copied += copylen;
1239         s->req_offset += copylen;
1240     }
1241 
1242     return copied;
1243 }
1244 
1245 static unsigned int put_rsp(XenXenstoreState *s)
1246 {
1247     if (s->fatal_error) {
1248         return 0;
1249     }
1250 
1251     assert(s->rsp_pending);
1252 
1253     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1254     assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
1255 
1256     void *ptr = s->rsp_data + s->rsp_offset;
1257     unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
1258     unsigned int copylen = copy_to_ring(s, ptr, len);
1259 
1260     s->rsp_offset += copylen;
1261 
1262     /* Have we produced a complete response? */
1263     if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
1264         reset_rsp(s);
1265     }
1266 
1267     return copylen;
1268 }
1269 
1270 static void deliver_watch(XenXenstoreState *s, const char *path,
1271                           const char *token)
1272 {
1273     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1274     uint8_t *rsp_data = (uint8_t *)&rsp[1];
1275     unsigned int len;
1276 
1277     assert(!s->rsp_pending);
1278 
1279     trace_xenstore_watch_event(path, token);
1280 
1281     rsp->type = XS_WATCH_EVENT;
1282     rsp->req_id = 0;
1283     rsp->tx_id = 0;
1284     rsp->len = 0;
1285 
1286     len = strlen(path);
1287 
1288     /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
1289     assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
1290 
1291     memcpy(&rsp_data[rsp->len], path, len);
1292     rsp->len += len;
1293     rsp_data[rsp->len] = '\0';
1294     rsp->len++;
1295 
1296     len = strlen(token);
1297     /*
1298      * It is possible for the guest to have chosen a token that will
1299      * not fit (along with the patch) into a watch event. We have no
1300      * choice but to drop the event if this is the case.
1301      */
1302     if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
1303         return;
1304     }
1305 
1306     memcpy(&rsp_data[rsp->len], token, len);
1307     rsp->len += len;
1308     rsp_data[rsp->len] = '\0';
1309     rsp->len++;
1310 
1311     s->rsp_pending = true;
1312 }
1313 
1314 struct watch_event {
1315     char *path;
1316     char *token;
1317 };
1318 
1319 static void free_watch_event(struct watch_event *ev)
1320 {
1321     if (ev) {
1322         g_free(ev->path);
1323         g_free(ev->token);
1324         g_free(ev);
1325     }
1326 }
1327 
1328 static void queue_watch(XenXenstoreState *s, const char *path,
1329                         const char *token)
1330 {
1331     struct watch_event *ev = g_new0(struct watch_event, 1);
1332 
1333     ev->path = g_strdup(path);
1334     ev->token = g_strdup(token);
1335 
1336     s->watch_events = g_list_append(s->watch_events, ev);
1337 }
1338 
1339 static void fire_watch_cb(void *opaque, const char *path, const char *token)
1340 {
1341     XenXenstoreState *s = opaque;
1342 
1343     assert(qemu_mutex_iothread_locked());
1344 
1345     /*
1346      * If there's a response pending, we obviously can't scribble over
1347      * it. But if there's a request pending, it has dibs on the buffer
1348      * too.
1349      *
1350      * In the common case of a watch firing due to backend activity
1351      * when the ring was otherwise idle, we should be able to copy the
1352      * strings directly into the rsp_data and thence the actual ring,
1353      * without needing to perform any allocations and queue them.
1354      */
1355     if (s->rsp_pending || req_pending(s)) {
1356         queue_watch(s, path, token);
1357     } else {
1358         deliver_watch(s, path, token);
1359         /*
1360          * If the message was queued because there was already ring activity,
1361          * no need to wake the guest. But if not, we need to send the evtchn.
1362          */
1363         xen_be_evtchn_notify(s->eh, s->be_port);
1364     }
1365 }
1366 
1367 static void process_watch_events(XenXenstoreState *s)
1368 {
1369     struct watch_event *ev = s->watch_events->data;
1370 
1371     deliver_watch(s, ev->path, ev->token);
1372 
1373     s->watch_events = g_list_remove(s->watch_events, ev);
1374     free_watch_event(ev);
1375 }
1376 
1377 static void xen_xenstore_event(void *opaque)
1378 {
1379     XenXenstoreState *s = opaque;
1380     evtchn_port_t port = xen_be_evtchn_pending(s->eh);
1381     unsigned int copied_to, copied_from;
1382     bool processed, notify = false;
1383 
1384     if (port != s->be_port) {
1385         return;
1386     }
1387 
1388     /* We know this is a no-op. */
1389     xen_be_evtchn_unmask(s->eh, port);
1390 
1391     do {
1392         copied_to = copied_from = 0;
1393         processed = false;
1394 
1395         if (!s->rsp_pending && s->watch_events) {
1396             process_watch_events(s);
1397         }
1398 
1399         if (s->rsp_pending) {
1400             copied_to = put_rsp(s);
1401         }
1402 
1403         if (!req_pending(s)) {
1404             copied_from = get_req(s);
1405         }
1406 
1407         if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
1408             process_req(s);
1409             processed = true;
1410         }
1411 
1412         notify |= copied_to || copied_from;
1413     } while (copied_to || copied_from || processed);
1414 
1415     if (notify) {
1416         xen_be_evtchn_notify(s->eh, s->be_port);
1417     }
1418 }
1419 
1420 static void alloc_guest_port(XenXenstoreState *s)
1421 {
1422     struct evtchn_alloc_unbound alloc = {
1423         .dom = DOMID_SELF,
1424         .remote_dom = DOMID_QEMU,
1425     };
1426 
1427     if (!xen_evtchn_alloc_unbound_op(&alloc)) {
1428         s->guest_port = alloc.port;
1429     }
1430 }
1431 
1432 int xen_xenstore_reset(void)
1433 {
1434     XenXenstoreState *s = xen_xenstore_singleton;
1435     int err;
1436 
1437     if (!s) {
1438         return -ENOTSUP;
1439     }
1440 
1441     s->req_offset = s->rsp_offset = 0;
1442     s->rsp_pending = false;
1443 
1444     if (!memory_region_is_mapped(&s->xenstore_page)) {
1445         uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
1446         xen_overlay_do_map_page(&s->xenstore_page, gpa);
1447     }
1448 
1449     alloc_guest_port(s);
1450 
1451     /*
1452      * As qemu/dom0, bind to the guest's port. For incoming migration, this
1453      * will be unbound as the guest's evtchn table is overwritten. We then
1454      * rebind to the correct guest port in xen_xenstore_post_load().
1455      */
1456     err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
1457     if (err < 0) {
1458         return err;
1459     }
1460     s->be_port = err;
1461 
1462     /*
1463      * We don't actually access the guest's page through the grant, because
1464      * this isn't real Xen, and we can just use the page we gave it in the
1465      * first place. Map the grant anyway, mostly for cosmetic purposes so
1466      * it *looks* like it's in use in the guest-visible grant table.
1467      */
1468     s->gt = qemu_xen_gnttab_open();
1469     uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
1470     s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
1471                                              PROT_READ | PROT_WRITE);
1472 
1473     return 0;
1474 }
1475 
1476 struct qemu_xs_handle {
1477     XenstoreImplState *impl;
1478     GList *watches;
1479     QEMUBH *watch_bh;
1480 };
1481 
1482 struct qemu_xs_watch {
1483     struct qemu_xs_handle *h;
1484     char *path;
1485     xs_watch_fn fn;
1486     void *opaque;
1487     GList *events;
1488 };
1489 
1490 static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
1491 {
1492     return g_strdup_printf("/local/domain/%u", domid);
1493 }
1494 
1495 static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
1496                               const char *path, unsigned int *num)
1497 {
1498     GList *items = NULL, *l;
1499     unsigned int i = 0;
1500     char **items_ret;
1501     int err;
1502 
1503     err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
1504     if (err) {
1505         errno = err;
1506         return NULL;
1507     }
1508 
1509     items_ret = g_new0(char *, g_list_length(items) + 1);
1510     *num = 0;
1511     for (l = items; l; l = l->next) {
1512         items_ret[i++] = l->data;
1513         (*num)++;
1514     }
1515     g_list_free(items);
1516     return items_ret;
1517 }
1518 
1519 static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
1520                         const char *path, unsigned int *len)
1521 {
1522     GByteArray *data = g_byte_array_new();
1523     bool free_segment = false;
1524     int err;
1525 
1526     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1527     if (err) {
1528         free_segment = true;
1529         errno = err;
1530     } else {
1531         if (len) {
1532             *len = data->len;
1533         }
1534         /* The xen-bus-helper code expects to get NUL terminated string! */
1535         g_byte_array_append(data, (void *)"", 1);
1536     }
1537 
1538     return g_byte_array_free(data, free_segment);
1539 }
1540 
1541 static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
1542                         const char *path, const void *data, unsigned int len)
1543 {
1544     GByteArray *gdata = g_byte_array_new();
1545     int err;
1546 
1547     g_byte_array_append(gdata, data, len);
1548     err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
1549     g_byte_array_unref(gdata);
1550     if (err) {
1551         errno = err;
1552         return false;
1553     }
1554     return true;
1555 }
1556 
1557 static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
1558                          unsigned int owner, unsigned int domid,
1559                          unsigned int perms, const char *path)
1560 {
1561     g_autoptr(GByteArray) data = g_byte_array_new();
1562     GList *perms_list = NULL;
1563     int err;
1564 
1565     /* mkdir does this */
1566     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1567     if (err == ENOENT) {
1568         err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
1569     }
1570     if (err) {
1571         errno = err;
1572         return false;
1573     }
1574 
1575     perms_list = g_list_append(perms_list,
1576                                xs_perm_as_string(XS_PERM_NONE, owner));
1577     perms_list = g_list_append(perms_list,
1578                                xs_perm_as_string(perms, domid));
1579 
1580     err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
1581     g_list_free_full(perms_list, g_free);
1582     if (err) {
1583         errno = err;
1584         return false;
1585     }
1586     return true;
1587 }
1588 
1589 static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
1590                           const char *path)
1591 {
1592     int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
1593     if (err) {
1594         errno = err;
1595         return false;
1596     }
1597     return true;
1598 }
1599 
1600 static void be_watch_bh(void *_h)
1601 {
1602     struct qemu_xs_handle *h = _h;
1603     GList *l;
1604 
1605     for (l = h->watches; l; l = l->next) {
1606         struct qemu_xs_watch *w = l->data;
1607 
1608         while (w->events) {
1609             struct watch_event *ev = w->events->data;
1610 
1611             w->fn(w->opaque, ev->path);
1612 
1613             w->events = g_list_remove(w->events, ev);
1614             free_watch_event(ev);
1615         }
1616     }
1617 }
1618 
1619 static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
1620 {
1621     struct watch_event *ev = g_new0(struct watch_event, 1);
1622     struct qemu_xs_watch *w = opaque;
1623 
1624     /* We don't care about the token */
1625     ev->path = g_strdup(path);
1626     w->events = g_list_append(w->events, ev);
1627 
1628     qemu_bh_schedule(w->h->watch_bh);
1629 }
1630 
1631 static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
1632                                          const char *path, xs_watch_fn fn,
1633                                          void *opaque)
1634 {
1635     struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
1636     int err;
1637 
1638     w->h = h;
1639     w->fn = fn;
1640     w->opaque = opaque;
1641 
1642     err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
1643     if (err) {
1644         errno = err;
1645         g_free(w);
1646         return NULL;
1647     }
1648 
1649     w->path = g_strdup(path);
1650     h->watches = g_list_append(h->watches, w);
1651     return w;
1652 }
1653 
1654 static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
1655 {
1656     xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
1657 
1658     h->watches = g_list_remove(h->watches, w);
1659     g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
1660     g_free(w->path);
1661     g_free(w);
1662 }
1663 
1664 static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
1665 {
1666     unsigned int new_tx = XBT_NULL;
1667     int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
1668     if (err) {
1669         errno = err;
1670         return XBT_NULL;
1671     }
1672     return new_tx;
1673 }
1674 
1675 static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
1676                                   bool abort)
1677 {
1678     int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
1679     if (err) {
1680         errno = err;
1681         return false;
1682     }
1683     return true;
1684 }
1685 
1686 static struct qemu_xs_handle *xs_be_open(void)
1687 {
1688     XenXenstoreState *s = xen_xenstore_singleton;
1689     struct qemu_xs_handle *h;
1690 
1691     if (!s && !s->impl) {
1692         errno = -ENOSYS;
1693         return NULL;
1694     }
1695 
1696     h = g_new0(struct qemu_xs_handle, 1);
1697     h->impl = s->impl;
1698 
1699     h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
1700 
1701     return h;
1702 }
1703 
1704 static void xs_be_close(struct qemu_xs_handle *h)
1705 {
1706     while (h->watches) {
1707         struct qemu_xs_watch *w = h->watches->data;
1708         xs_be_unwatch(h, w);
1709     }
1710 
1711     qemu_bh_delete(h->watch_bh);
1712     g_free(h);
1713 }
1714 
1715 static struct xenstore_backend_ops emu_xenstore_backend_ops = {
1716     .open = xs_be_open,
1717     .close = xs_be_close,
1718     .get_domain_path = xs_be_get_domain_path,
1719     .directory = xs_be_directory,
1720     .read = xs_be_read,
1721     .write = xs_be_write,
1722     .create = xs_be_create,
1723     .destroy = xs_be_destroy,
1724     .watch = xs_be_watch,
1725     .unwatch = xs_be_unwatch,
1726     .transaction_start = xs_be_transaction_start,
1727     .transaction_end = xs_be_transaction_end,
1728 };
1729