xref: /qemu/hw/i386/kvm/xen_xenstore.c (revision 2abf0da2)
1 /*
2  * QEMU Xen emulation: Shared/overlay pages support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "qemu/host-utils.h"
15 #include "qemu/module.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qapi/error.h"
20 #include "qom/object.h"
21 #include "migration/vmstate.h"
22 
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_evtchn.h"
28 #include "xen_primary_console.h"
29 #include "xen_xenstore.h"
30 
31 #include "sysemu/kvm.h"
32 #include "sysemu/kvm_xen.h"
33 
34 #include "trace.h"
35 
36 #include "xenstore_impl.h"
37 
38 #include "hw/xen/interface/io/xs_wire.h"
39 #include "hw/xen/interface/event_channel.h"
40 #include "hw/xen/interface/grant_table.h"
41 
42 #define TYPE_XEN_XENSTORE "xen-xenstore"
43 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
44 
45 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
46 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
47 
48 #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
49 
50 struct XenXenstoreState {
51     /*< private >*/
52     SysBusDevice busdev;
53     /*< public >*/
54 
55     XenstoreImplState *impl;
56     GList *watch_events; /* for the guest */
57 
58     MemoryRegion xenstore_page;
59     struct xenstore_domain_interface *xs;
60     uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
61     uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
62     uint32_t req_offset;
63     uint32_t rsp_offset;
64     bool rsp_pending;
65     bool fatal_error;
66 
67     evtchn_port_t guest_port;
68     evtchn_port_t be_port;
69     struct xenevtchn_handle *eh;
70 
71     uint8_t *impl_state;
72     uint32_t impl_state_size;
73 
74     struct xengntdev_handle *gt;
75     void *granted_xs;
76 };
77 
78 struct XenXenstoreState *xen_xenstore_singleton;
79 
80 static void xen_xenstore_event(void *opaque);
81 static void fire_watch_cb(void *opaque, const char *path, const char *token);
82 
83 static struct xenstore_backend_ops emu_xenstore_backend_ops;
84 
85 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
86                                                 GList *perms,
87                                                 const char *relpath,
88                                                 const char *fmt, ...)
89 {
90     gchar *abspath;
91     gchar *value;
92     va_list args;
93     GByteArray *data;
94     int err;
95 
96     abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
97     va_start(args, fmt);
98     value = g_strdup_vprintf(fmt, args);
99     va_end(args);
100 
101     data = g_byte_array_new_take((void *)value, strlen(value));
102 
103     err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
104     assert(!err);
105 
106     g_byte_array_unref(data);
107 
108     err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
109     assert(!err);
110 
111     g_free(abspath);
112 }
113 
114 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
115 {
116     XenXenstoreState *s = XEN_XENSTORE(dev);
117     GList *perms;
118 
119     if (xen_mode != XEN_EMULATE) {
120         error_setg(errp, "Xen xenstore support is for Xen emulation");
121         return;
122     }
123     memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
124                            XEN_PAGE_SIZE, &error_abort);
125     memory_region_set_enabled(&s->xenstore_page, true);
126     s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
127     memset(s->xs, 0, XEN_PAGE_SIZE);
128 
129     /* We can't map it this early as KVM isn't ready */
130     xen_xenstore_singleton = s;
131 
132     s->eh = xen_be_evtchn_open();
133     if (!s->eh) {
134         error_setg(errp, "Xenstore evtchn port init failed");
135         return;
136     }
137     aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh),
138                        xen_xenstore_event, NULL, NULL, NULL, s);
139 
140     s->impl = xs_impl_create(xen_domid);
141 
142     /* Populate the default nodes */
143 
144     /* Nodes owned by 'dom0' but readable by the guest */
145     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
146     perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
147 
148     relpath_printf(s, perms, "", "%s", "");
149 
150     relpath_printf(s, perms, "domid", "%u", xen_domid);
151 
152     relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
153     relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
154 
155     relpath_printf(s, perms, "platform/acpi", "%u", 1);
156     relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
157     relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
158     relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
159 
160     g_list_free_full(perms, g_free);
161 
162     /* Nodes owned by the guest */
163     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
164 
165     relpath_printf(s, perms, "attr", "%s", "");
166 
167     relpath_printf(s, perms, "control/shutdown", "%s", "");
168     relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
169     relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
170     relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
171     relpath_printf(s, perms, "control/feature-s3", "%u", 1);
172     relpath_printf(s, perms, "control/feature-s4", "%u", 1);
173 
174     relpath_printf(s, perms, "data", "%s", "");
175     relpath_printf(s, perms, "device", "%s", "");
176     relpath_printf(s, perms, "drivers", "%s", "");
177     relpath_printf(s, perms, "error", "%s", "");
178     relpath_printf(s, perms, "feature", "%s", "");
179 
180     g_list_free_full(perms, g_free);
181 
182     xen_xenstore_ops = &emu_xenstore_backend_ops;
183 }
184 
185 static bool xen_xenstore_is_needed(void *opaque)
186 {
187     return xen_mode == XEN_EMULATE;
188 }
189 
190 static int xen_xenstore_pre_save(void *opaque)
191 {
192     XenXenstoreState *s = opaque;
193     GByteArray *save;
194 
195     if (s->eh) {
196         s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
197     }
198 
199     g_free(s->impl_state);
200     save = xs_impl_serialize(s->impl);
201     s->impl_state = save->data;
202     s->impl_state_size = save->len;
203     g_byte_array_free(save, false);
204 
205     return 0;
206 }
207 
208 static int xen_xenstore_post_load(void *opaque, int ver)
209 {
210     XenXenstoreState *s = opaque;
211     GByteArray *save;
212     int ret;
213 
214     /*
215      * As qemu/dom0, rebind to the guest's port. The Windows drivers may
216      * unbind the XenStore evtchn and rebind to it, having obtained the
217      * "remote" port through EVTCHNOP_status. In the case that migration
218      * occurs while it's unbound, the "remote" port needs to be the same
219      * as before so that the guest can find it, but should remain unbound.
220      */
221     if (s->guest_port) {
222         int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
223                                                      s->guest_port);
224         if (be_port < 0) {
225             return be_port;
226         }
227         s->be_port = be_port;
228     }
229 
230     save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
231     s->impl_state = NULL;
232     s->impl_state_size = 0;
233 
234     ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
235     return ret;
236 }
237 
238 static const VMStateDescription xen_xenstore_vmstate = {
239     .name = "xen_xenstore",
240     .unmigratable = 1, /* The PV back ends don't migrate yet */
241     .version_id = 1,
242     .minimum_version_id = 1,
243     .needed = xen_xenstore_is_needed,
244     .pre_save = xen_xenstore_pre_save,
245     .post_load = xen_xenstore_post_load,
246     .fields = (const VMStateField[]) {
247         VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
248                             sizeof_field(XenXenstoreState, req_data)),
249         VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
250                             sizeof_field(XenXenstoreState, rsp_data)),
251         VMSTATE_UINT32(req_offset, XenXenstoreState),
252         VMSTATE_UINT32(rsp_offset, XenXenstoreState),
253         VMSTATE_BOOL(rsp_pending, XenXenstoreState),
254         VMSTATE_UINT32(guest_port, XenXenstoreState),
255         VMSTATE_BOOL(fatal_error, XenXenstoreState),
256         VMSTATE_UINT32(impl_state_size, XenXenstoreState),
257         VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
258                                     impl_state_size, 0,
259                                     vmstate_info_uint8, uint8_t),
260         VMSTATE_END_OF_LIST()
261     }
262 };
263 
264 static void xen_xenstore_class_init(ObjectClass *klass, void *data)
265 {
266     DeviceClass *dc = DEVICE_CLASS(klass);
267 
268     dc->realize = xen_xenstore_realize;
269     dc->vmsd = &xen_xenstore_vmstate;
270 }
271 
272 static const TypeInfo xen_xenstore_info = {
273     .name          = TYPE_XEN_XENSTORE,
274     .parent        = TYPE_SYS_BUS_DEVICE,
275     .instance_size = sizeof(XenXenstoreState),
276     .class_init    = xen_xenstore_class_init,
277 };
278 
279 void xen_xenstore_create(void)
280 {
281     DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
282 
283     xen_xenstore_singleton = XEN_XENSTORE(dev);
284 
285     /*
286      * Defer the init (xen_xenstore_reset()) until KVM is set up and the
287      * overlay page can be mapped.
288      */
289 }
290 
291 static void xen_xenstore_register_types(void)
292 {
293     type_register_static(&xen_xenstore_info);
294 }
295 
296 type_init(xen_xenstore_register_types)
297 
298 uint16_t xen_xenstore_get_port(void)
299 {
300     XenXenstoreState *s = xen_xenstore_singleton;
301     if (!s) {
302         return 0;
303     }
304     return s->guest_port;
305 }
306 
307 static bool req_pending(XenXenstoreState *s)
308 {
309     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
310 
311     return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
312 }
313 
314 static void reset_req(XenXenstoreState *s)
315 {
316     memset(s->req_data, 0, sizeof(s->req_data));
317     s->req_offset = 0;
318 }
319 
320 static void reset_rsp(XenXenstoreState *s)
321 {
322     s->rsp_pending = false;
323 
324     memset(s->rsp_data, 0, sizeof(s->rsp_data));
325     s->rsp_offset = 0;
326 }
327 
328 static void xs_error(XenXenstoreState *s, unsigned int id,
329                      xs_transaction_t tx_id, int errnum)
330 {
331     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
332     const char *errstr = NULL;
333 
334     for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
335         const struct xsd_errors *xsd_error = &xsd_errors[i];
336 
337         if (xsd_error->errnum == errnum) {
338             errstr = xsd_error->errstring;
339             break;
340         }
341     }
342     assert(errstr);
343 
344     trace_xenstore_error(id, tx_id, errstr);
345 
346     rsp->type = XS_ERROR;
347     rsp->req_id = id;
348     rsp->tx_id = tx_id;
349     rsp->len = (uint32_t)strlen(errstr) + 1;
350 
351     memcpy(&rsp[1], errstr, rsp->len);
352 }
353 
354 static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
355                   xs_transaction_t tx_id)
356 {
357     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
358     const char *okstr = "OK";
359 
360     rsp->type = type;
361     rsp->req_id = req_id;
362     rsp->tx_id = tx_id;
363     rsp->len = (uint32_t)strlen(okstr) + 1;
364 
365     memcpy(&rsp[1], okstr, rsp->len);
366 }
367 
368 /*
369  * The correct request and response formats are documented in xen.git:
370  * docs/misc/xenstore.txt. A summary is given below for convenience.
371  * The '|' symbol represents a NUL character.
372  *
373  * ---------- Database read, write and permissions operations ----------
374  *
375  * READ                    <path>|                 <value|>
376  * WRITE                   <path>|<value|>
377  *         Store and read the octet string <value> at <path>.
378  *         WRITE creates any missing parent paths, with empty values.
379  *
380  * MKDIR                   <path>|
381  *         Ensures that the <path> exists, by necessary by creating
382  *         it and any missing parents with empty values.  If <path>
383  *         or any parent already exists, its value is left unchanged.
384  *
385  * RM                      <path>|
386  *         Ensures that the <path> does not exist, by deleting
387  *         it and all of its children.  It is not an error if <path> does
388  *         not exist, but it _is_ an error if <path>'s immediate parent
389  *         does not exist either.
390  *
391  * DIRECTORY               <path>|                 <child-leaf-name>|*
392  *         Gives a list of the immediate children of <path>, as only the
393  *         leafnames.  The resulting children are each named
394  *         <path>/<child-leaf-name>.
395  *
396  * DIRECTORY_PART          <path>|<offset>         <gencnt>|<child-leaf-name>|*
397  *         Same as DIRECTORY, but to be used for children lists longer than
398  *         XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
399  *         the list of children to return. Return values are the generation
400  *         count <gencnt> of the node (to be used to ensure the node hasn't
401  *         changed between two reads: <gencnt> being the same for multiple
402  *         reads guarantees the node hasn't changed) and the list of children
403  *         starting at the specified <offset> of the complete list.
404  *
405  * GET_PERMS               <path>|                 <perm-as-string>|+
406  * SET_PERMS               <path>|<perm-as-string>|+?
407  *         <perm-as-string> is one of the following
408  *                 w<domid>        write only
409  *                 r<domid>        read only
410  *                 b<domid>        both read and write
411  *                 n<domid>        no access
412  *         See https://wiki.xen.org/wiki/XenBus section
413  *         `Permissions' for details of the permissions system.
414  *         It is possible to set permissions for the special watch paths
415  *         "@introduceDomain" and "@releaseDomain" to enable receiving those
416  *         watches in unprivileged domains.
417  *
418  * ---------- Watches ----------
419  *
420  * WATCH                   <wpath>|<token>|?
421  *         Adds a watch.
422  *
423  *         When a <path> is modified (including path creation, removal,
424  *         contents change or permissions change) this generates an event
425  *         on the changed <path>.  Changes made in transactions cause an
426  *         event only if and when committed.  Each occurring event is
427  *         matched against all the watches currently set up, and each
428  *         matching watch results in a WATCH_EVENT message (see below).
429  *
430  *         The event's path matches the watch's <wpath> if it is an child
431  *         of <wpath>.
432  *
433  *         <wpath> can be a <path> to watch or @<wspecial>.  In the
434  *         latter case <wspecial> may have any syntax but it matches
435  *         (according to the rules above) only the following special
436  *         events which are invented by xenstored:
437  *             @introduceDomain    occurs on INTRODUCE
438  *             @releaseDomain      occurs on any domain crash or
439  *                                 shutdown, and also on RELEASE
440  *                                 and domain destruction
441  *         <wspecial> events are sent to privileged callers or explicitly
442  *         via SET_PERMS enabled domains only.
443  *
444  *         When a watch is first set up it is triggered once straight
445  *         away, with <path> equal to <wpath>.  Watches may be triggered
446  *         spuriously.  The tx_id in a WATCH request is ignored.
447  *
448  *         Watches are supposed to be restricted by the permissions
449  *         system but in practice the implementation is imperfect.
450  *         Applications should not rely on being sent a notification for
451  *         paths that they cannot read; however, an application may rely
452  *         on being sent a watch when a path which it _is_ able to read
453  *         is deleted even if that leaves only a nonexistent unreadable
454  *         parent.  A notification may omitted if a node's permissions
455  *         are changed so as to make it unreadable, in which case future
456  *         notifications may be suppressed (and if the node is later made
457  *         readable, some notifications may have been lost).
458  *
459  * WATCH_EVENT                                     <epath>|<token>|
460  *         Unsolicited `reply' generated for matching modification events
461  *         as described above.  req_id and tx_id are both 0.
462  *
463  *         <epath> is the event's path, ie the actual path that was
464  *         modified; however if the event was the recursive removal of an
465  *         parent of <wpath>, <epath> is just
466  *         <wpath> (rather than the actual path which was removed).  So
467  *         <epath> is a child of <wpath>, regardless.
468  *
469  *         Iff <wpath> for the watch was specified as a relative pathname,
470  *         the <epath> path will also be relative (with the same base,
471  *         obviously).
472  *
473  * UNWATCH                 <wpath>|<token>|?
474  *
475  * RESET_WATCHES           |
476  *         Reset all watches and transactions of the caller.
477  *
478  * ---------- Transactions ----------
479  *
480  * TRANSACTION_START       |                       <transid>|
481  *         <transid> is an opaque uint32_t allocated by xenstored
482  *         represented as unsigned decimal.  After this, transaction may
483  *         be referenced by using <transid> (as 32-bit binary) in the
484  *         tx_id request header field.  When transaction is started whole
485  *         db is copied; reads and writes happen on the copy.
486  *         It is not legal to send non-0 tx_id in TRANSACTION_START.
487  *
488  * TRANSACTION_END         T|
489  * TRANSACTION_END         F|
490  *         tx_id must refer to existing transaction.  After this
491  *         request the tx_id is no longer valid and may be reused by
492  *         xenstore.  If F, the transaction is discarded.  If T,
493  *         it is committed: if there were any other intervening writes
494  *         then our END gets get EAGAIN.
495  *
496  *         The plan is that in the future only intervening `conflicting'
497  *         writes cause EAGAIN, meaning only writes or other commits
498  *         which changed paths which were read or written in the
499  *         transaction at hand.
500  *
501  */
502 
503 static void xs_read(XenXenstoreState *s, unsigned int req_id,
504                     xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
505 {
506     const char *path = (const char *)req_data;
507     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
508     uint8_t *rsp_data = (uint8_t *)&rsp[1];
509     g_autoptr(GByteArray) data = g_byte_array_new();
510     int err;
511 
512     if (len == 0 || req_data[len - 1] != '\0') {
513         xs_error(s, req_id, tx_id, EINVAL);
514         return;
515     }
516 
517     trace_xenstore_read(tx_id, path);
518     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
519     if (err) {
520         xs_error(s, req_id, tx_id, err);
521         return;
522     }
523 
524     rsp->type = XS_READ;
525     rsp->req_id = req_id;
526     rsp->tx_id = tx_id;
527     rsp->len = 0;
528 
529     len = data->len;
530     if (len > XENSTORE_PAYLOAD_MAX) {
531         xs_error(s, req_id, tx_id, E2BIG);
532         return;
533     }
534 
535     memcpy(&rsp_data[rsp->len], data->data, len);
536     rsp->len += len;
537 }
538 
539 static void xs_write(XenXenstoreState *s, unsigned int req_id,
540                      xs_transaction_t tx_id, uint8_t *req_data,
541                      unsigned int len)
542 {
543     g_autoptr(GByteArray) data = g_byte_array_new();
544     const char *path;
545     int err;
546 
547     if (len == 0) {
548         xs_error(s, req_id, tx_id, EINVAL);
549         return;
550     }
551 
552     path = (const char *)req_data;
553 
554     while (len--) {
555         if (*req_data++ == '\0') {
556             break;
557         }
558         if (len == 0) {
559             xs_error(s, req_id, tx_id, EINVAL);
560             return;
561         }
562     }
563 
564     g_byte_array_append(data, req_data, len);
565 
566     trace_xenstore_write(tx_id, path);
567     err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
568     if (err) {
569         xs_error(s, req_id, tx_id, err);
570         return;
571     }
572 
573     xs_ok(s, XS_WRITE, req_id, tx_id);
574 }
575 
576 static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
577                      xs_transaction_t tx_id, uint8_t *req_data,
578                      unsigned int len)
579 {
580     g_autoptr(GByteArray) data = g_byte_array_new();
581     const char *path;
582     int err;
583 
584     if (len == 0 || req_data[len - 1] != '\0') {
585         xs_error(s, req_id, tx_id, EINVAL);
586         return;
587     }
588 
589     path = (const char *)req_data;
590 
591     trace_xenstore_mkdir(tx_id, path);
592     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
593     if (err == ENOENT) {
594         err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
595     }
596 
597     if (!err) {
598         xs_error(s, req_id, tx_id, err);
599         return;
600     }
601 
602     xs_ok(s, XS_MKDIR, req_id, tx_id);
603 }
604 
605 static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
606                               GList *strings, unsigned int start, bool truncate)
607 {
608     uint8_t *rsp_data = (uint8_t *)&rsp[1];
609     GList *l;
610 
611     for (l = strings; l; l = l->next) {
612         size_t len = strlen(l->data) + 1; /* Including the NUL termination */
613         char *str = l->data;
614 
615         if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
616             if (truncate) {
617                 len = XENSTORE_PAYLOAD_MAX - rsp->len;
618                 if (!len) {
619                     return;
620                 }
621             } else {
622                 xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
623                 return;
624             }
625         }
626 
627         if (start) {
628             if (start >= len) {
629                 start -= len;
630                 continue;
631             }
632 
633             str += start;
634             len -= start;
635             start = 0;
636         }
637 
638         memcpy(&rsp_data[rsp->len], str, len);
639         rsp->len += len;
640     }
641     /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
642     if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
643         rsp_data[rsp->len++] = '\0';
644     }
645 }
646 
647 static void xs_directory(XenXenstoreState *s, unsigned int req_id,
648                          xs_transaction_t tx_id, uint8_t *req_data,
649                          unsigned int len)
650 {
651     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
652     GList *items = NULL;
653     const char *path;
654     int err;
655 
656     if (len == 0 || req_data[len - 1] != '\0') {
657         xs_error(s, req_id, tx_id, EINVAL);
658         return;
659     }
660 
661     path = (const char *)req_data;
662 
663     trace_xenstore_directory(tx_id, path);
664     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
665     if (err != 0) {
666         xs_error(s, req_id, tx_id, err);
667         return;
668     }
669 
670     rsp->type = XS_DIRECTORY;
671     rsp->req_id = req_id;
672     rsp->tx_id = tx_id;
673     rsp->len = 0;
674 
675     xs_append_strings(s, rsp, items, 0, false);
676 
677     g_list_free_full(items, g_free);
678 }
679 
680 static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
681                               xs_transaction_t tx_id, uint8_t *req_data,
682                               unsigned int len)
683 {
684     const char *offset_str, *path = (const char *)req_data;
685     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
686     char *rsp_data = (char *)&rsp[1];
687     uint64_t gencnt = 0;
688     unsigned int offset;
689     GList *items = NULL;
690     int err;
691 
692     if (len == 0) {
693         xs_error(s, req_id, tx_id, EINVAL);
694         return;
695     }
696 
697     while (len--) {
698         if (*req_data++ == '\0') {
699             break;
700         }
701         if (len == 0) {
702             xs_error(s, req_id, tx_id, EINVAL);
703             return;
704         }
705     }
706 
707     offset_str = (const char *)req_data;
708     while (len--) {
709         if (*req_data++ == '\0') {
710             break;
711         }
712         if (len == 0) {
713             xs_error(s, req_id, tx_id, EINVAL);
714             return;
715         }
716     }
717 
718     if (len) {
719         xs_error(s, req_id, tx_id, EINVAL);
720         return;
721     }
722 
723     if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
724         xs_error(s, req_id, tx_id, EINVAL);
725         return;
726     }
727 
728     trace_xenstore_directory_part(tx_id, path, offset);
729     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
730     if (err != 0) {
731         xs_error(s, req_id, tx_id, err);
732         return;
733     }
734 
735     rsp->type = XS_DIRECTORY_PART;
736     rsp->req_id = req_id;
737     rsp->tx_id = tx_id;
738     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
739 
740     xs_append_strings(s, rsp, items, offset, true);
741 
742     g_list_free_full(items, g_free);
743 }
744 
745 static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
746                                  xs_transaction_t tx_id, uint8_t *req_data,
747                                  unsigned int len)
748 {
749     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
750     char *rsp_data = (char *)&rsp[1];
751     int err;
752 
753     if (len != 1 || req_data[0] != '\0') {
754         xs_error(s, req_id, tx_id, EINVAL);
755         return;
756     }
757 
758     rsp->type = XS_TRANSACTION_START;
759     rsp->req_id = req_id;
760     rsp->tx_id = tx_id;
761     rsp->len = 0;
762 
763     err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
764     if (err) {
765         xs_error(s, req_id, tx_id, err);
766         return;
767     }
768 
769     trace_xenstore_transaction_start(tx_id);
770 
771     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
772     assert(rsp->len < XENSTORE_PAYLOAD_MAX);
773     rsp->len++;
774 }
775 
776 static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
777                                xs_transaction_t tx_id, uint8_t *req_data,
778                                unsigned int len)
779 {
780     bool commit;
781     int err;
782 
783     if (len != 2 || req_data[1] != '\0') {
784         xs_error(s, req_id, tx_id, EINVAL);
785         return;
786     }
787 
788     switch (req_data[0]) {
789     case 'T':
790         commit = true;
791         break;
792     case 'F':
793         commit = false;
794         break;
795     default:
796         xs_error(s, req_id, tx_id, EINVAL);
797         return;
798     }
799 
800     trace_xenstore_transaction_end(tx_id, commit);
801     err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
802     if (err) {
803         xs_error(s, req_id, tx_id, err);
804         return;
805     }
806 
807     xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
808 }
809 
810 static void xs_rm(XenXenstoreState *s, unsigned int req_id,
811                   xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
812 {
813     const char *path = (const char *)req_data;
814     int err;
815 
816     if (len == 0 || req_data[len - 1] != '\0') {
817         xs_error(s, req_id, tx_id, EINVAL);
818         return;
819     }
820 
821     trace_xenstore_rm(tx_id, path);
822     err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
823     if (err) {
824         xs_error(s, req_id, tx_id, err);
825         return;
826     }
827 
828     xs_ok(s, XS_RM, req_id, tx_id);
829 }
830 
831 static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
832                          xs_transaction_t tx_id, uint8_t *req_data,
833                          unsigned int len)
834 {
835     const char *path = (const char *)req_data;
836     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
837     GList *perms = NULL;
838     int err;
839 
840     if (len == 0 || req_data[len - 1] != '\0') {
841         xs_error(s, req_id, tx_id, EINVAL);
842         return;
843     }
844 
845     trace_xenstore_get_perms(tx_id, path);
846     err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
847     if (err) {
848         xs_error(s, req_id, tx_id, err);
849         return;
850     }
851 
852     rsp->type = XS_GET_PERMS;
853     rsp->req_id = req_id;
854     rsp->tx_id = tx_id;
855     rsp->len = 0;
856 
857     xs_append_strings(s, rsp, perms, 0, false);
858 
859     g_list_free_full(perms, g_free);
860 }
861 
862 static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
863                          xs_transaction_t tx_id, uint8_t *req_data,
864                          unsigned int len)
865 {
866     const char *path = (const char *)req_data;
867     uint8_t *perm;
868     GList *perms = NULL;
869     int err;
870 
871     if (len == 0) {
872         xs_error(s, req_id, tx_id, EINVAL);
873         return;
874     }
875 
876     while (len--) {
877         if (*req_data++ == '\0') {
878             break;
879         }
880         if (len == 0) {
881             xs_error(s, req_id, tx_id, EINVAL);
882             return;
883         }
884     }
885 
886     perm = req_data;
887     while (len--) {
888         if (*req_data++ == '\0') {
889             perms = g_list_append(perms, perm);
890             perm = req_data;
891         }
892     }
893 
894     /*
895      * Note that there may be trailing garbage at the end of the buffer.
896      * This is explicitly permitted by the '?' at the end of the definition:
897      *
898      *    SET_PERMS         <path>|<perm-as-string>|+?
899      */
900 
901     trace_xenstore_set_perms(tx_id, path);
902     err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
903     g_list_free(perms);
904     if (err) {
905         xs_error(s, req_id, tx_id, err);
906         return;
907     }
908 
909     xs_ok(s, XS_SET_PERMS, req_id, tx_id);
910 }
911 
912 static void xs_watch(XenXenstoreState *s, unsigned int req_id,
913                      xs_transaction_t tx_id, uint8_t *req_data,
914                      unsigned int len)
915 {
916     const char *token, *path = (const char *)req_data;
917     int err;
918 
919     if (len == 0) {
920         xs_error(s, req_id, tx_id, EINVAL);
921         return;
922     }
923 
924     while (len--) {
925         if (*req_data++ == '\0') {
926             break;
927         }
928         if (len == 0) {
929             xs_error(s, req_id, tx_id, EINVAL);
930             return;
931         }
932     }
933 
934     token = (const char *)req_data;
935     while (len--) {
936         if (*req_data++ == '\0') {
937             break;
938         }
939         if (len == 0) {
940             xs_error(s, req_id, tx_id, EINVAL);
941             return;
942         }
943     }
944 
945     /*
946      * Note that there may be trailing garbage at the end of the buffer.
947      * This is explicitly permitted by the '?' at the end of the definition:
948      *
949      *    WATCH             <wpath>|<token>|?
950      */
951 
952     trace_xenstore_watch(path, token);
953     err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
954     if (err) {
955         xs_error(s, req_id, tx_id, err);
956         return;
957     }
958 
959     xs_ok(s, XS_WATCH, req_id, tx_id);
960 }
961 
962 static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
963                        xs_transaction_t tx_id, uint8_t *req_data,
964                        unsigned int len)
965 {
966     const char *token, *path = (const char *)req_data;
967     int err;
968 
969     if (len == 0) {
970         xs_error(s, req_id, tx_id, EINVAL);
971         return;
972     }
973 
974     while (len--) {
975         if (*req_data++ == '\0') {
976             break;
977         }
978         if (len == 0) {
979             xs_error(s, req_id, tx_id, EINVAL);
980             return;
981         }
982     }
983 
984     token = (const char *)req_data;
985     while (len--) {
986         if (*req_data++ == '\0') {
987             break;
988         }
989         if (len == 0) {
990             xs_error(s, req_id, tx_id, EINVAL);
991             return;
992         }
993     }
994 
995     trace_xenstore_unwatch(path, token);
996     err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
997     if (err) {
998         xs_error(s, req_id, tx_id, err);
999         return;
1000     }
1001 
1002     xs_ok(s, XS_UNWATCH, req_id, tx_id);
1003 }
1004 
1005 static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
1006                              xs_transaction_t tx_id, uint8_t *req_data,
1007                              unsigned int len)
1008 {
1009     if (len == 0 || req_data[len - 1] != '\0') {
1010         xs_error(s, req_id, tx_id, EINVAL);
1011         return;
1012     }
1013 
1014     trace_xenstore_reset_watches();
1015     xs_impl_reset_watches(s->impl, xen_domid);
1016 
1017     xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
1018 }
1019 
1020 static void xs_priv(XenXenstoreState *s, unsigned int req_id,
1021                     xs_transaction_t tx_id, uint8_t *data,
1022                     unsigned int len)
1023 {
1024     xs_error(s, req_id, tx_id, EACCES);
1025 }
1026 
1027 static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
1028                       xs_transaction_t tx_id, uint8_t *data,
1029                       unsigned int len)
1030 {
1031     xs_error(s, req_id, tx_id, ENOSYS);
1032 }
1033 
1034 typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
1035                         xs_transaction_t tx_id, uint8_t *data,
1036                         unsigned int len);
1037 
1038 struct xsd_req {
1039     const char *name;
1040     xs_impl fn;
1041 };
1042 #define XSD_REQ(_type, _fn)                           \
1043     [_type] = { .name = #_type, .fn = _fn }
1044 
1045 struct xsd_req xsd_reqs[] = {
1046     XSD_REQ(XS_READ, xs_read),
1047     XSD_REQ(XS_WRITE, xs_write),
1048     XSD_REQ(XS_MKDIR, xs_mkdir),
1049     XSD_REQ(XS_DIRECTORY, xs_directory),
1050     XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
1051     XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
1052     XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
1053     XSD_REQ(XS_RM, xs_rm),
1054     XSD_REQ(XS_GET_PERMS, xs_get_perms),
1055     XSD_REQ(XS_SET_PERMS, xs_set_perms),
1056     XSD_REQ(XS_WATCH, xs_watch),
1057     XSD_REQ(XS_UNWATCH, xs_unwatch),
1058     XSD_REQ(XS_CONTROL, xs_priv),
1059     XSD_REQ(XS_INTRODUCE, xs_priv),
1060     XSD_REQ(XS_RELEASE, xs_priv),
1061     XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
1062     XSD_REQ(XS_RESUME, xs_priv),
1063     XSD_REQ(XS_SET_TARGET, xs_priv),
1064     XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
1065 };
1066 
1067 static void process_req(XenXenstoreState *s)
1068 {
1069     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1070     xs_impl handler = NULL;
1071 
1072     assert(req_pending(s));
1073     assert(!s->rsp_pending);
1074 
1075     if (req->type < ARRAY_SIZE(xsd_reqs)) {
1076         handler = xsd_reqs[req->type].fn;
1077     }
1078     if (!handler) {
1079         handler = &xs_unimpl;
1080     }
1081 
1082     handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
1083 
1084     s->rsp_pending = true;
1085     reset_req(s);
1086 }
1087 
1088 static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
1089                                    unsigned int len)
1090 {
1091     if (!len) {
1092         return 0;
1093     }
1094 
1095     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
1096     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
1097     unsigned int copied = 0;
1098 
1099     /* Ensure the ring contents don't cross the req_prod access. */
1100     smp_rmb();
1101 
1102     while (len) {
1103         unsigned int avail = prod - cons;
1104         unsigned int offset = MASK_XENSTORE_IDX(cons);
1105         unsigned int copylen = avail;
1106 
1107         if (avail > XENSTORE_RING_SIZE) {
1108             error_report("XenStore ring handling error");
1109             s->fatal_error = true;
1110             break;
1111         } else if (avail == 0) {
1112             break;
1113         }
1114 
1115         if (copylen > len) {
1116             copylen = len;
1117         }
1118         if (copylen > XENSTORE_RING_SIZE - offset) {
1119             copylen = XENSTORE_RING_SIZE - offset;
1120         }
1121 
1122         memcpy(ptr, &s->xs->req[offset], copylen);
1123         copied += copylen;
1124 
1125         ptr += copylen;
1126         len -= copylen;
1127 
1128         cons += copylen;
1129     }
1130 
1131     /*
1132      * Not sure this ever mattered except on Alpha, but this barrier
1133      * is to ensure that the update to req_cons is globally visible
1134      * only after we have consumed all the data from the ring, and we
1135      * don't end up seeing data written to the ring *after* the other
1136      * end sees the update and writes more to the ring. Xen's own
1137      * xenstored has the same barrier here (although with no comment
1138      * at all, obviously, because it's Xen code).
1139      */
1140     smp_mb();
1141 
1142     qatomic_set(&s->xs->req_cons, cons);
1143 
1144     return copied;
1145 }
1146 
1147 static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
1148                                  unsigned int len)
1149 {
1150     if (!len) {
1151         return 0;
1152     }
1153 
1154     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
1155     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
1156     unsigned int copied = 0;
1157 
1158     /*
1159      * This matches the barrier in copy_to_ring() (or the guest's
1160      * equivalent) between writing the data to the ring and updating
1161      * rsp_prod. It protects against the pathological case (which
1162      * again I think never happened except on Alpha) where our
1163      * subsequent writes to the ring could *cross* the read of
1164      * rsp_cons and the guest could see the new data when it was
1165      * intending to read the old.
1166      */
1167     smp_mb();
1168 
1169     while (len) {
1170         unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
1171         unsigned int offset = MASK_XENSTORE_IDX(prod);
1172         unsigned int copylen = len;
1173 
1174         if (avail > XENSTORE_RING_SIZE) {
1175             error_report("XenStore ring handling error");
1176             s->fatal_error = true;
1177             break;
1178         } else if (avail == 0) {
1179             break;
1180         }
1181 
1182         if (copylen > avail) {
1183             copylen = avail;
1184         }
1185         if (copylen > XENSTORE_RING_SIZE - offset) {
1186             copylen = XENSTORE_RING_SIZE - offset;
1187         }
1188 
1189 
1190         memcpy(&s->xs->rsp[offset], ptr, copylen);
1191         copied += copylen;
1192 
1193         ptr += copylen;
1194         len -= copylen;
1195 
1196         prod += copylen;
1197     }
1198 
1199     /* Ensure the ring contents are seen before rsp_prod update. */
1200     smp_wmb();
1201 
1202     qatomic_set(&s->xs->rsp_prod, prod);
1203 
1204     return copied;
1205 }
1206 
1207 static unsigned int get_req(XenXenstoreState *s)
1208 {
1209     unsigned int copied = 0;
1210 
1211     if (s->fatal_error) {
1212         return 0;
1213     }
1214 
1215     assert(!req_pending(s));
1216 
1217     if (s->req_offset < XENSTORE_HEADER_SIZE) {
1218         void *ptr = s->req_data + s->req_offset;
1219         unsigned int len = XENSTORE_HEADER_SIZE;
1220         unsigned int copylen = copy_from_ring(s, ptr, len);
1221 
1222         copied += copylen;
1223         s->req_offset += copylen;
1224     }
1225 
1226     if (s->req_offset >= XENSTORE_HEADER_SIZE) {
1227         struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1228 
1229         if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
1230             error_report("Illegal XenStore request");
1231             s->fatal_error = true;
1232             return 0;
1233         }
1234 
1235         void *ptr = s->req_data + s->req_offset;
1236         unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
1237         unsigned int copylen = copy_from_ring(s, ptr, len);
1238 
1239         copied += copylen;
1240         s->req_offset += copylen;
1241     }
1242 
1243     return copied;
1244 }
1245 
1246 static unsigned int put_rsp(XenXenstoreState *s)
1247 {
1248     if (s->fatal_error) {
1249         return 0;
1250     }
1251 
1252     assert(s->rsp_pending);
1253 
1254     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1255     assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
1256 
1257     void *ptr = s->rsp_data + s->rsp_offset;
1258     unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
1259     unsigned int copylen = copy_to_ring(s, ptr, len);
1260 
1261     s->rsp_offset += copylen;
1262 
1263     /* Have we produced a complete response? */
1264     if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
1265         reset_rsp(s);
1266     }
1267 
1268     return copylen;
1269 }
1270 
1271 static void deliver_watch(XenXenstoreState *s, const char *path,
1272                           const char *token)
1273 {
1274     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1275     uint8_t *rsp_data = (uint8_t *)&rsp[1];
1276     unsigned int len;
1277 
1278     assert(!s->rsp_pending);
1279 
1280     trace_xenstore_watch_event(path, token);
1281 
1282     rsp->type = XS_WATCH_EVENT;
1283     rsp->req_id = 0;
1284     rsp->tx_id = 0;
1285     rsp->len = 0;
1286 
1287     len = strlen(path);
1288 
1289     /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
1290     assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
1291 
1292     memcpy(&rsp_data[rsp->len], path, len);
1293     rsp->len += len;
1294     rsp_data[rsp->len] = '\0';
1295     rsp->len++;
1296 
1297     len = strlen(token);
1298     /*
1299      * It is possible for the guest to have chosen a token that will
1300      * not fit (along with the patch) into a watch event. We have no
1301      * choice but to drop the event if this is the case.
1302      */
1303     if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
1304         return;
1305     }
1306 
1307     memcpy(&rsp_data[rsp->len], token, len);
1308     rsp->len += len;
1309     rsp_data[rsp->len] = '\0';
1310     rsp->len++;
1311 
1312     s->rsp_pending = true;
1313 }
1314 
1315 struct watch_event {
1316     char *path;
1317     char *token;
1318 };
1319 
1320 static void free_watch_event(struct watch_event *ev)
1321 {
1322     if (ev) {
1323         g_free(ev->path);
1324         g_free(ev->token);
1325         g_free(ev);
1326     }
1327 }
1328 
1329 static void queue_watch(XenXenstoreState *s, const char *path,
1330                         const char *token)
1331 {
1332     struct watch_event *ev = g_new0(struct watch_event, 1);
1333 
1334     ev->path = g_strdup(path);
1335     ev->token = g_strdup(token);
1336 
1337     s->watch_events = g_list_append(s->watch_events, ev);
1338 }
1339 
1340 static void fire_watch_cb(void *opaque, const char *path, const char *token)
1341 {
1342     XenXenstoreState *s = opaque;
1343 
1344     assert(bql_locked());
1345 
1346     /*
1347      * If there's a response pending, we obviously can't scribble over
1348      * it. But if there's a request pending, it has dibs on the buffer
1349      * too.
1350      *
1351      * In the common case of a watch firing due to backend activity
1352      * when the ring was otherwise idle, we should be able to copy the
1353      * strings directly into the rsp_data and thence the actual ring,
1354      * without needing to perform any allocations and queue them.
1355      */
1356     if (s->rsp_pending || req_pending(s)) {
1357         queue_watch(s, path, token);
1358     } else {
1359         deliver_watch(s, path, token);
1360         /*
1361          * Attempt to queue the message into the actual ring, and send
1362          * the event channel notification if any bytes are copied.
1363          */
1364         if (s->rsp_pending && put_rsp(s) > 0) {
1365             xen_be_evtchn_notify(s->eh, s->be_port);
1366         }
1367     }
1368 }
1369 
1370 static void process_watch_events(XenXenstoreState *s)
1371 {
1372     struct watch_event *ev = s->watch_events->data;
1373 
1374     deliver_watch(s, ev->path, ev->token);
1375 
1376     s->watch_events = g_list_remove(s->watch_events, ev);
1377     free_watch_event(ev);
1378 }
1379 
1380 static void xen_xenstore_event(void *opaque)
1381 {
1382     XenXenstoreState *s = opaque;
1383     evtchn_port_t port = xen_be_evtchn_pending(s->eh);
1384     unsigned int copied_to, copied_from;
1385     bool processed, notify = false;
1386 
1387     if (port != s->be_port) {
1388         return;
1389     }
1390 
1391     /* We know this is a no-op. */
1392     xen_be_evtchn_unmask(s->eh, port);
1393 
1394     do {
1395         copied_to = copied_from = 0;
1396         processed = false;
1397 
1398         if (!s->rsp_pending && s->watch_events) {
1399             process_watch_events(s);
1400         }
1401 
1402         if (s->rsp_pending) {
1403             copied_to = put_rsp(s);
1404         }
1405 
1406         if (!req_pending(s)) {
1407             copied_from = get_req(s);
1408         }
1409 
1410         if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
1411             process_req(s);
1412             processed = true;
1413         }
1414 
1415         notify |= copied_to || copied_from;
1416     } while (copied_to || copied_from || processed);
1417 
1418     if (notify) {
1419         xen_be_evtchn_notify(s->eh, s->be_port);
1420     }
1421 }
1422 
1423 static void alloc_guest_port(XenXenstoreState *s)
1424 {
1425     struct evtchn_alloc_unbound alloc = {
1426         .dom = DOMID_SELF,
1427         .remote_dom = DOMID_QEMU,
1428     };
1429 
1430     if (!xen_evtchn_alloc_unbound_op(&alloc)) {
1431         s->guest_port = alloc.port;
1432     }
1433 }
1434 
1435 int xen_xenstore_reset(void)
1436 {
1437     XenXenstoreState *s = xen_xenstore_singleton;
1438     int console_port;
1439     GList *perms;
1440     int err;
1441 
1442     if (!s) {
1443         return -ENOTSUP;
1444     }
1445 
1446     s->req_offset = s->rsp_offset = 0;
1447     s->rsp_pending = false;
1448 
1449     if (!memory_region_is_mapped(&s->xenstore_page)) {
1450         uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
1451         xen_overlay_do_map_page(&s->xenstore_page, gpa);
1452     }
1453 
1454     alloc_guest_port(s);
1455 
1456     /*
1457      * As qemu/dom0, bind to the guest's port. For incoming migration, this
1458      * will be unbound as the guest's evtchn table is overwritten. We then
1459      * rebind to the correct guest port in xen_xenstore_post_load().
1460      */
1461     err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
1462     if (err < 0) {
1463         return err;
1464     }
1465     s->be_port = err;
1466 
1467     /* Create frontend store nodes */
1468     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
1469     perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
1470 
1471     relpath_printf(s, perms, "store/port", "%u", s->guest_port);
1472     relpath_printf(s, perms, "store/ring-ref", "%lu",
1473                    XEN_SPECIAL_PFN(XENSTORE));
1474 
1475     console_port = xen_primary_console_get_port();
1476     if (console_port) {
1477         relpath_printf(s, perms, "console/ring-ref", "%lu",
1478                        XEN_SPECIAL_PFN(CONSOLE));
1479         relpath_printf(s, perms, "console/port", "%u", console_port);
1480         relpath_printf(s, perms, "console/state", "%u", XenbusStateInitialised);
1481     }
1482 
1483     g_list_free_full(perms, g_free);
1484 
1485     /*
1486      * We don't actually access the guest's page through the grant, because
1487      * this isn't real Xen, and we can just use the page we gave it in the
1488      * first place. Map the grant anyway, mostly for cosmetic purposes so
1489      * it *looks* like it's in use in the guest-visible grant table.
1490      */
1491     s->gt = qemu_xen_gnttab_open();
1492     uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
1493     s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
1494                                              PROT_READ | PROT_WRITE);
1495 
1496     return 0;
1497 }
1498 
1499 struct qemu_xs_handle {
1500     XenstoreImplState *impl;
1501     GList *watches;
1502     QEMUBH *watch_bh;
1503 };
1504 
1505 struct qemu_xs_watch {
1506     struct qemu_xs_handle *h;
1507     char *path;
1508     xs_watch_fn fn;
1509     void *opaque;
1510     GList *events;
1511 };
1512 
1513 static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
1514 {
1515     return g_strdup_printf("/local/domain/%u", domid);
1516 }
1517 
1518 static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
1519                               const char *path, unsigned int *num)
1520 {
1521     GList *items = NULL, *l;
1522     unsigned int i = 0;
1523     char **items_ret;
1524     int err;
1525 
1526     err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
1527     if (err) {
1528         errno = err;
1529         return NULL;
1530     }
1531 
1532     items_ret = g_new0(char *, g_list_length(items) + 1);
1533     *num = 0;
1534     for (l = items; l; l = l->next) {
1535         items_ret[i++] = l->data;
1536         (*num)++;
1537     }
1538     g_list_free(items);
1539     return items_ret;
1540 }
1541 
1542 static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
1543                         const char *path, unsigned int *len)
1544 {
1545     GByteArray *data = g_byte_array_new();
1546     bool free_segment = false;
1547     int err;
1548 
1549     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1550     if (err) {
1551         free_segment = true;
1552         errno = err;
1553     } else {
1554         if (len) {
1555             *len = data->len;
1556         }
1557         /* The xen-bus-helper code expects to get NUL terminated string! */
1558         g_byte_array_append(data, (void *)"", 1);
1559     }
1560 
1561     return g_byte_array_free(data, free_segment);
1562 }
1563 
1564 static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
1565                         const char *path, const void *data, unsigned int len)
1566 {
1567     GByteArray *gdata = g_byte_array_new();
1568     int err;
1569 
1570     g_byte_array_append(gdata, data, len);
1571     err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
1572     g_byte_array_unref(gdata);
1573     if (err) {
1574         errno = err;
1575         return false;
1576     }
1577     return true;
1578 }
1579 
1580 static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
1581                          unsigned int owner, unsigned int domid,
1582                          unsigned int perms, const char *path)
1583 {
1584     g_autoptr(GByteArray) data = g_byte_array_new();
1585     GList *perms_list = NULL;
1586     int err;
1587 
1588     /* mkdir does this */
1589     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1590     if (err == ENOENT) {
1591         err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
1592     }
1593     if (err) {
1594         errno = err;
1595         return false;
1596     }
1597 
1598     perms_list = g_list_append(perms_list,
1599                                xs_perm_as_string(XS_PERM_NONE, owner));
1600     perms_list = g_list_append(perms_list,
1601                                xs_perm_as_string(perms, domid));
1602 
1603     err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
1604     g_list_free_full(perms_list, g_free);
1605     if (err) {
1606         errno = err;
1607         return false;
1608     }
1609     return true;
1610 }
1611 
1612 static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
1613                           const char *path)
1614 {
1615     int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
1616     if (err) {
1617         errno = err;
1618         return false;
1619     }
1620     return true;
1621 }
1622 
1623 static void be_watch_bh(void *_h)
1624 {
1625     struct qemu_xs_handle *h = _h;
1626     GList *l;
1627 
1628     for (l = h->watches; l; l = l->next) {
1629         struct qemu_xs_watch *w = l->data;
1630 
1631         while (w->events) {
1632             struct watch_event *ev = w->events->data;
1633 
1634             w->fn(w->opaque, ev->path);
1635 
1636             w->events = g_list_remove(w->events, ev);
1637             free_watch_event(ev);
1638         }
1639     }
1640 }
1641 
1642 static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
1643 {
1644     struct watch_event *ev = g_new0(struct watch_event, 1);
1645     struct qemu_xs_watch *w = opaque;
1646 
1647     /* We don't care about the token */
1648     ev->path = g_strdup(path);
1649     w->events = g_list_append(w->events, ev);
1650 
1651     qemu_bh_schedule(w->h->watch_bh);
1652 }
1653 
1654 static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
1655                                          const char *path, xs_watch_fn fn,
1656                                          void *opaque)
1657 {
1658     struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
1659     int err;
1660 
1661     w->h = h;
1662     w->fn = fn;
1663     w->opaque = opaque;
1664 
1665     err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
1666     if (err) {
1667         errno = err;
1668         g_free(w);
1669         return NULL;
1670     }
1671 
1672     w->path = g_strdup(path);
1673     h->watches = g_list_append(h->watches, w);
1674     return w;
1675 }
1676 
1677 static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
1678 {
1679     xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
1680 
1681     h->watches = g_list_remove(h->watches, w);
1682     g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
1683     g_free(w->path);
1684     g_free(w);
1685 }
1686 
1687 static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
1688 {
1689     unsigned int new_tx = XBT_NULL;
1690     int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
1691     if (err) {
1692         errno = err;
1693         return XBT_NULL;
1694     }
1695     return new_tx;
1696 }
1697 
1698 static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
1699                                   bool abort)
1700 {
1701     int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
1702     if (err) {
1703         errno = err;
1704         return false;
1705     }
1706     return true;
1707 }
1708 
1709 static struct qemu_xs_handle *xs_be_open(void)
1710 {
1711     XenXenstoreState *s = xen_xenstore_singleton;
1712     struct qemu_xs_handle *h;
1713 
1714     if (!s || !s->impl) {
1715         errno = -ENOSYS;
1716         return NULL;
1717     }
1718 
1719     h = g_new0(struct qemu_xs_handle, 1);
1720     h->impl = s->impl;
1721 
1722     h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
1723 
1724     return h;
1725 }
1726 
1727 static void xs_be_close(struct qemu_xs_handle *h)
1728 {
1729     while (h->watches) {
1730         struct qemu_xs_watch *w = h->watches->data;
1731         xs_be_unwatch(h, w);
1732     }
1733 
1734     qemu_bh_delete(h->watch_bh);
1735     g_free(h);
1736 }
1737 
1738 static struct xenstore_backend_ops emu_xenstore_backend_ops = {
1739     .open = xs_be_open,
1740     .close = xs_be_close,
1741     .get_domain_path = xs_be_get_domain_path,
1742     .directory = xs_be_directory,
1743     .read = xs_be_read,
1744     .write = xs_be_write,
1745     .create = xs_be_create,
1746     .destroy = xs_be_destroy,
1747     .watch = xs_be_watch,
1748     .unwatch = xs_be_unwatch,
1749     .transaction_start = xs_be_transaction_start,
1750     .transaction_end = xs_be_transaction_end,
1751 };
1752