xref: /qemu/hw/i386/kvm/xen_xenstore.c (revision 03247512)
1 /*
2  * QEMU Xen emulation: Shared/overlay pages support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "qemu/host-utils.h"
15 #include "qemu/module.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/cutils.h"
18 #include "qapi/error.h"
19 #include "qom/object.h"
20 #include "migration/vmstate.h"
21 
22 #include "hw/sysbus.h"
23 #include "hw/xen/xen.h"
24 #include "xen_overlay.h"
25 #include "xen_evtchn.h"
26 #include "xen_xenstore.h"
27 
28 #include "sysemu/kvm.h"
29 #include "sysemu/kvm_xen.h"
30 
31 #include "trace.h"
32 
33 #include "xenstore_impl.h"
34 
35 #include "hw/xen/interface/io/xs_wire.h"
36 #include "hw/xen/interface/event_channel.h"
37 
38 #define TYPE_XEN_XENSTORE "xen-xenstore"
39 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
40 
41 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
42 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
43 
44 #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
45 
46 struct XenXenstoreState {
47     /*< private >*/
48     SysBusDevice busdev;
49     /*< public >*/
50 
51     XenstoreImplState *impl;
52     GList *watch_events; /* for the guest */
53 
54     MemoryRegion xenstore_page;
55     struct xenstore_domain_interface *xs;
56     uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
57     uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
58     uint32_t req_offset;
59     uint32_t rsp_offset;
60     bool rsp_pending;
61     bool fatal_error;
62 
63     evtchn_port_t guest_port;
64     evtchn_port_t be_port;
65     struct xenevtchn_handle *eh;
66 
67     uint8_t *impl_state;
68     uint32_t impl_state_size;
69 };
70 
71 struct XenXenstoreState *xen_xenstore_singleton;
72 
73 static void xen_xenstore_event(void *opaque);
74 static void fire_watch_cb(void *opaque, const char *path, const char *token);
75 
76 static struct xenstore_backend_ops emu_xenstore_backend_ops;
77 
78 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
79                                                 GList *perms,
80                                                 const char *relpath,
81                                                 const char *fmt, ...)
82 {
83     gchar *abspath;
84     gchar *value;
85     va_list args;
86     GByteArray *data;
87     int err;
88 
89     abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
90     va_start(args, fmt);
91     value = g_strdup_vprintf(fmt, args);
92     va_end(args);
93 
94     data = g_byte_array_new_take((void *)value, strlen(value));
95 
96     err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
97     assert(!err);
98 
99     g_byte_array_unref(data);
100 
101     err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
102     assert(!err);
103 
104     g_free(abspath);
105 }
106 
107 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
108 {
109     XenXenstoreState *s = XEN_XENSTORE(dev);
110     GList *perms;
111 
112     if (xen_mode != XEN_EMULATE) {
113         error_setg(errp, "Xen xenstore support is for Xen emulation");
114         return;
115     }
116     memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
117                            XEN_PAGE_SIZE, &error_abort);
118     memory_region_set_enabled(&s->xenstore_page, true);
119     s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
120     memset(s->xs, 0, XEN_PAGE_SIZE);
121 
122     /* We can't map it this early as KVM isn't ready */
123     xen_xenstore_singleton = s;
124 
125     s->eh = xen_be_evtchn_open();
126     if (!s->eh) {
127         error_setg(errp, "Xenstore evtchn port init failed");
128         return;
129     }
130     aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
131                        xen_xenstore_event, NULL, NULL, NULL, s);
132 
133     s->impl = xs_impl_create(xen_domid);
134 
135     /* Populate the default nodes */
136 
137     /* Nodes owned by 'dom0' but readable by the guest */
138     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
139     perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
140 
141     relpath_printf(s, perms, "", "%s", "");
142 
143     relpath_printf(s, perms, "domid", "%u", xen_domid);
144 
145     relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
146     relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
147 
148     relpath_printf(s, perms, "platform/acpi", "%u", 1);
149     relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
150     relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
151     relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
152 
153     g_list_free_full(perms, g_free);
154 
155     /* Nodes owned by the guest */
156     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
157 
158     relpath_printf(s, perms, "attr", "%s", "");
159 
160     relpath_printf(s, perms, "control/shutdown", "%s", "");
161     relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
162     relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
163     relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
164     relpath_printf(s, perms, "control/feature-s3", "%u", 1);
165     relpath_printf(s, perms, "control/feature-s4", "%u", 1);
166 
167     relpath_printf(s, perms, "data", "%s", "");
168     relpath_printf(s, perms, "device", "%s", "");
169     relpath_printf(s, perms, "drivers", "%s", "");
170     relpath_printf(s, perms, "error", "%s", "");
171     relpath_printf(s, perms, "feature", "%s", "");
172 
173     g_list_free_full(perms, g_free);
174 
175     xen_xenstore_ops = &emu_xenstore_backend_ops;
176 }
177 
178 static bool xen_xenstore_is_needed(void *opaque)
179 {
180     return xen_mode == XEN_EMULATE;
181 }
182 
183 static int xen_xenstore_pre_save(void *opaque)
184 {
185     XenXenstoreState *s = opaque;
186     GByteArray *save;
187 
188     if (s->eh) {
189         s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
190     }
191 
192     g_free(s->impl_state);
193     save = xs_impl_serialize(s->impl);
194     s->impl_state = save->data;
195     s->impl_state_size = save->len;
196     g_byte_array_free(save, false);
197 
198     return 0;
199 }
200 
201 static int xen_xenstore_post_load(void *opaque, int ver)
202 {
203     XenXenstoreState *s = opaque;
204     GByteArray *save;
205     int ret;
206 
207     /*
208      * As qemu/dom0, rebind to the guest's port. The Windows drivers may
209      * unbind the XenStore evtchn and rebind to it, having obtained the
210      * "remote" port through EVTCHNOP_status. In the case that migration
211      * occurs while it's unbound, the "remote" port needs to be the same
212      * as before so that the guest can find it, but should remain unbound.
213      */
214     if (s->guest_port) {
215         int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
216                                                      s->guest_port);
217         if (be_port < 0) {
218             return be_port;
219         }
220         s->be_port = be_port;
221     }
222 
223     save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
224     s->impl_state = NULL;
225     s->impl_state_size = 0;
226 
227     ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
228     return ret;
229 }
230 
231 static const VMStateDescription xen_xenstore_vmstate = {
232     .name = "xen_xenstore",
233     .unmigratable = 1, /* The PV back ends don't migrate yet */
234     .version_id = 1,
235     .minimum_version_id = 1,
236     .needed = xen_xenstore_is_needed,
237     .pre_save = xen_xenstore_pre_save,
238     .post_load = xen_xenstore_post_load,
239     .fields = (VMStateField[]) {
240         VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
241                             sizeof_field(XenXenstoreState, req_data)),
242         VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
243                             sizeof_field(XenXenstoreState, rsp_data)),
244         VMSTATE_UINT32(req_offset, XenXenstoreState),
245         VMSTATE_UINT32(rsp_offset, XenXenstoreState),
246         VMSTATE_BOOL(rsp_pending, XenXenstoreState),
247         VMSTATE_UINT32(guest_port, XenXenstoreState),
248         VMSTATE_BOOL(fatal_error, XenXenstoreState),
249         VMSTATE_UINT32(impl_state_size, XenXenstoreState),
250         VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
251                                     impl_state_size, 0,
252                                     vmstate_info_uint8, uint8_t),
253         VMSTATE_END_OF_LIST()
254     }
255 };
256 
257 static void xen_xenstore_class_init(ObjectClass *klass, void *data)
258 {
259     DeviceClass *dc = DEVICE_CLASS(klass);
260 
261     dc->realize = xen_xenstore_realize;
262     dc->vmsd = &xen_xenstore_vmstate;
263 }
264 
265 static const TypeInfo xen_xenstore_info = {
266     .name          = TYPE_XEN_XENSTORE,
267     .parent        = TYPE_SYS_BUS_DEVICE,
268     .instance_size = sizeof(XenXenstoreState),
269     .class_init    = xen_xenstore_class_init,
270 };
271 
272 void xen_xenstore_create(void)
273 {
274     DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
275 
276     xen_xenstore_singleton = XEN_XENSTORE(dev);
277 
278     /*
279      * Defer the init (xen_xenstore_reset()) until KVM is set up and the
280      * overlay page can be mapped.
281      */
282 }
283 
284 static void xen_xenstore_register_types(void)
285 {
286     type_register_static(&xen_xenstore_info);
287 }
288 
289 type_init(xen_xenstore_register_types)
290 
291 uint16_t xen_xenstore_get_port(void)
292 {
293     XenXenstoreState *s = xen_xenstore_singleton;
294     if (!s) {
295         return 0;
296     }
297     return s->guest_port;
298 }
299 
300 static bool req_pending(XenXenstoreState *s)
301 {
302     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
303 
304     return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
305 }
306 
307 static void reset_req(XenXenstoreState *s)
308 {
309     memset(s->req_data, 0, sizeof(s->req_data));
310     s->req_offset = 0;
311 }
312 
313 static void reset_rsp(XenXenstoreState *s)
314 {
315     s->rsp_pending = false;
316 
317     memset(s->rsp_data, 0, sizeof(s->rsp_data));
318     s->rsp_offset = 0;
319 }
320 
321 static void xs_error(XenXenstoreState *s, unsigned int id,
322                      xs_transaction_t tx_id, int errnum)
323 {
324     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
325     const char *errstr = NULL;
326 
327     for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
328         struct xsd_errors *xsd_error = &xsd_errors[i];
329 
330         if (xsd_error->errnum == errnum) {
331             errstr = xsd_error->errstring;
332             break;
333         }
334     }
335     assert(errstr);
336 
337     trace_xenstore_error(id, tx_id, errstr);
338 
339     rsp->type = XS_ERROR;
340     rsp->req_id = id;
341     rsp->tx_id = tx_id;
342     rsp->len = (uint32_t)strlen(errstr) + 1;
343 
344     memcpy(&rsp[1], errstr, rsp->len);
345 }
346 
347 static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
348                   xs_transaction_t tx_id)
349 {
350     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
351     const char *okstr = "OK";
352 
353     rsp->type = type;
354     rsp->req_id = req_id;
355     rsp->tx_id = tx_id;
356     rsp->len = (uint32_t)strlen(okstr) + 1;
357 
358     memcpy(&rsp[1], okstr, rsp->len);
359 }
360 
361 /*
362  * The correct request and response formats are documented in xen.git:
363  * docs/misc/xenstore.txt. A summary is given below for convenience.
364  * The '|' symbol represents a NUL character.
365  *
366  * ---------- Database read, write and permissions operations ----------
367  *
368  * READ                    <path>|                 <value|>
369  * WRITE                   <path>|<value|>
370  *         Store and read the octet string <value> at <path>.
371  *         WRITE creates any missing parent paths, with empty values.
372  *
373  * MKDIR                   <path>|
374  *         Ensures that the <path> exists, by necessary by creating
375  *         it and any missing parents with empty values.  If <path>
376  *         or any parent already exists, its value is left unchanged.
377  *
378  * RM                      <path>|
379  *         Ensures that the <path> does not exist, by deleting
380  *         it and all of its children.  It is not an error if <path> does
381  *         not exist, but it _is_ an error if <path>'s immediate parent
382  *         does not exist either.
383  *
384  * DIRECTORY               <path>|                 <child-leaf-name>|*
385  *         Gives a list of the immediate children of <path>, as only the
386  *         leafnames.  The resulting children are each named
387  *         <path>/<child-leaf-name>.
388  *
389  * DIRECTORY_PART          <path>|<offset>         <gencnt>|<child-leaf-name>|*
390  *         Same as DIRECTORY, but to be used for children lists longer than
391  *         XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
392  *         the list of children to return. Return values are the generation
393  *         count <gencnt> of the node (to be used to ensure the node hasn't
394  *         changed between two reads: <gencnt> being the same for multiple
395  *         reads guarantees the node hasn't changed) and the list of children
396  *         starting at the specified <offset> of the complete list.
397  *
398  * GET_PERMS               <path>|                 <perm-as-string>|+
399  * SET_PERMS               <path>|<perm-as-string>|+?
400  *         <perm-as-string> is one of the following
401  *                 w<domid>        write only
402  *                 r<domid>        read only
403  *                 b<domid>        both read and write
404  *                 n<domid>        no access
405  *         See https://wiki.xen.org/wiki/XenBus section
406  *         `Permissions' for details of the permissions system.
407  *         It is possible to set permissions for the special watch paths
408  *         "@introduceDomain" and "@releaseDomain" to enable receiving those
409  *         watches in unprivileged domains.
410  *
411  * ---------- Watches ----------
412  *
413  * WATCH                   <wpath>|<token>|?
414  *         Adds a watch.
415  *
416  *         When a <path> is modified (including path creation, removal,
417  *         contents change or permissions change) this generates an event
418  *         on the changed <path>.  Changes made in transactions cause an
419  *         event only if and when committed.  Each occurring event is
420  *         matched against all the watches currently set up, and each
421  *         matching watch results in a WATCH_EVENT message (see below).
422  *
423  *         The event's path matches the watch's <wpath> if it is an child
424  *         of <wpath>.
425  *
426  *         <wpath> can be a <path> to watch or @<wspecial>.  In the
427  *         latter case <wspecial> may have any syntax but it matches
428  *         (according to the rules above) only the following special
429  *         events which are invented by xenstored:
430  *             @introduceDomain    occurs on INTRODUCE
431  *             @releaseDomain      occurs on any domain crash or
432  *                                 shutdown, and also on RELEASE
433  *                                 and domain destruction
434  *         <wspecial> events are sent to privileged callers or explicitly
435  *         via SET_PERMS enabled domains only.
436  *
437  *         When a watch is first set up it is triggered once straight
438  *         away, with <path> equal to <wpath>.  Watches may be triggered
439  *         spuriously.  The tx_id in a WATCH request is ignored.
440  *
441  *         Watches are supposed to be restricted by the permissions
442  *         system but in practice the implementation is imperfect.
443  *         Applications should not rely on being sent a notification for
444  *         paths that they cannot read; however, an application may rely
445  *         on being sent a watch when a path which it _is_ able to read
446  *         is deleted even if that leaves only a nonexistent unreadable
447  *         parent.  A notification may omitted if a node's permissions
448  *         are changed so as to make it unreadable, in which case future
449  *         notifications may be suppressed (and if the node is later made
450  *         readable, some notifications may have been lost).
451  *
452  * WATCH_EVENT                                     <epath>|<token>|
453  *         Unsolicited `reply' generated for matching modification events
454  *         as described above.  req_id and tx_id are both 0.
455  *
456  *         <epath> is the event's path, ie the actual path that was
457  *         modified; however if the event was the recursive removal of an
458  *         parent of <wpath>, <epath> is just
459  *         <wpath> (rather than the actual path which was removed).  So
460  *         <epath> is a child of <wpath>, regardless.
461  *
462  *         Iff <wpath> for the watch was specified as a relative pathname,
463  *         the <epath> path will also be relative (with the same base,
464  *         obviously).
465  *
466  * UNWATCH                 <wpath>|<token>|?
467  *
468  * RESET_WATCHES           |
469  *         Reset all watches and transactions of the caller.
470  *
471  * ---------- Transactions ----------
472  *
473  * TRANSACTION_START       |                       <transid>|
474  *         <transid> is an opaque uint32_t allocated by xenstored
475  *         represented as unsigned decimal.  After this, transaction may
476  *         be referenced by using <transid> (as 32-bit binary) in the
477  *         tx_id request header field.  When transaction is started whole
478  *         db is copied; reads and writes happen on the copy.
479  *         It is not legal to send non-0 tx_id in TRANSACTION_START.
480  *
481  * TRANSACTION_END         T|
482  * TRANSACTION_END         F|
483  *         tx_id must refer to existing transaction.  After this
484  *         request the tx_id is no longer valid and may be reused by
485  *         xenstore.  If F, the transaction is discarded.  If T,
486  *         it is committed: if there were any other intervening writes
487  *         then our END gets get EAGAIN.
488  *
489  *         The plan is that in the future only intervening `conflicting'
490  *         writes cause EAGAIN, meaning only writes or other commits
491  *         which changed paths which were read or written in the
492  *         transaction at hand.
493  *
494  */
495 
496 static void xs_read(XenXenstoreState *s, unsigned int req_id,
497                     xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
498 {
499     const char *path = (const char *)req_data;
500     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
501     uint8_t *rsp_data = (uint8_t *)&rsp[1];
502     g_autoptr(GByteArray) data = g_byte_array_new();
503     int err;
504 
505     if (len == 0 || req_data[len - 1] != '\0') {
506         xs_error(s, req_id, tx_id, EINVAL);
507         return;
508     }
509 
510     trace_xenstore_read(tx_id, path);
511     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
512     if (err) {
513         xs_error(s, req_id, tx_id, err);
514         return;
515     }
516 
517     rsp->type = XS_READ;
518     rsp->req_id = req_id;
519     rsp->tx_id = tx_id;
520     rsp->len = 0;
521 
522     len = data->len;
523     if (len > XENSTORE_PAYLOAD_MAX) {
524         xs_error(s, req_id, tx_id, E2BIG);
525         return;
526     }
527 
528     memcpy(&rsp_data[rsp->len], data->data, len);
529     rsp->len += len;
530 }
531 
532 static void xs_write(XenXenstoreState *s, unsigned int req_id,
533                      xs_transaction_t tx_id, uint8_t *req_data,
534                      unsigned int len)
535 {
536     g_autoptr(GByteArray) data = g_byte_array_new();
537     const char *path;
538     int err;
539 
540     if (len == 0) {
541         xs_error(s, req_id, tx_id, EINVAL);
542         return;
543     }
544 
545     path = (const char *)req_data;
546 
547     while (len--) {
548         if (*req_data++ == '\0') {
549             break;
550         }
551         if (len == 0) {
552             xs_error(s, req_id, tx_id, EINVAL);
553             return;
554         }
555     }
556 
557     g_byte_array_append(data, req_data, len);
558 
559     trace_xenstore_write(tx_id, path);
560     err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
561     if (err) {
562         xs_error(s, req_id, tx_id, err);
563         return;
564     }
565 
566     xs_ok(s, XS_WRITE, req_id, tx_id);
567 }
568 
569 static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
570                      xs_transaction_t tx_id, uint8_t *req_data,
571                      unsigned int len)
572 {
573     g_autoptr(GByteArray) data = g_byte_array_new();
574     const char *path;
575     int err;
576 
577     if (len == 0 || req_data[len - 1] != '\0') {
578         xs_error(s, req_id, tx_id, EINVAL);
579         return;
580     }
581 
582     path = (const char *)req_data;
583 
584     trace_xenstore_mkdir(tx_id, path);
585     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
586     if (err == ENOENT) {
587         err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
588     }
589 
590     if (!err) {
591         xs_error(s, req_id, tx_id, err);
592         return;
593     }
594 
595     xs_ok(s, XS_MKDIR, req_id, tx_id);
596 }
597 
598 static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
599                               GList *strings, unsigned int start, bool truncate)
600 {
601     uint8_t *rsp_data = (uint8_t *)&rsp[1];
602     GList *l;
603 
604     for (l = strings; l; l = l->next) {
605         size_t len = strlen(l->data) + 1; /* Including the NUL termination */
606         char *str = l->data;
607 
608         if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
609             if (truncate) {
610                 len = XENSTORE_PAYLOAD_MAX - rsp->len;
611                 if (!len) {
612                     return;
613                 }
614             } else {
615                 xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
616                 return;
617             }
618         }
619 
620         if (start) {
621             if (start >= len) {
622                 start -= len;
623                 continue;
624             }
625 
626             str += start;
627             len -= start;
628             start = 0;
629         }
630 
631         memcpy(&rsp_data[rsp->len], str, len);
632         rsp->len += len;
633     }
634     /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
635     if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
636         rsp_data[rsp->len++] = '\0';
637     }
638 }
639 
640 static void xs_directory(XenXenstoreState *s, unsigned int req_id,
641                          xs_transaction_t tx_id, uint8_t *req_data,
642                          unsigned int len)
643 {
644     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
645     GList *items = NULL;
646     const char *path;
647     int err;
648 
649     if (len == 0 || req_data[len - 1] != '\0') {
650         xs_error(s, req_id, tx_id, EINVAL);
651         return;
652     }
653 
654     path = (const char *)req_data;
655 
656     trace_xenstore_directory(tx_id, path);
657     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
658     if (err != 0) {
659         xs_error(s, req_id, tx_id, err);
660         return;
661     }
662 
663     rsp->type = XS_DIRECTORY;
664     rsp->req_id = req_id;
665     rsp->tx_id = tx_id;
666     rsp->len = 0;
667 
668     xs_append_strings(s, rsp, items, 0, false);
669 
670     g_list_free_full(items, g_free);
671 }
672 
673 static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
674                               xs_transaction_t tx_id, uint8_t *req_data,
675                               unsigned int len)
676 {
677     const char *offset_str, *path = (const char *)req_data;
678     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
679     char *rsp_data = (char *)&rsp[1];
680     uint64_t gencnt = 0;
681     unsigned int offset;
682     GList *items = NULL;
683     int err;
684 
685     if (len == 0) {
686         xs_error(s, req_id, tx_id, EINVAL);
687         return;
688     }
689 
690     while (len--) {
691         if (*req_data++ == '\0') {
692             break;
693         }
694         if (len == 0) {
695             xs_error(s, req_id, tx_id, EINVAL);
696             return;
697         }
698     }
699 
700     offset_str = (const char *)req_data;
701     while (len--) {
702         if (*req_data++ == '\0') {
703             break;
704         }
705         if (len == 0) {
706             xs_error(s, req_id, tx_id, EINVAL);
707             return;
708         }
709     }
710 
711     if (len) {
712         xs_error(s, req_id, tx_id, EINVAL);
713         return;
714     }
715 
716     if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
717         xs_error(s, req_id, tx_id, EINVAL);
718         return;
719     }
720 
721     trace_xenstore_directory_part(tx_id, path, offset);
722     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
723     if (err != 0) {
724         xs_error(s, req_id, tx_id, err);
725         return;
726     }
727 
728     rsp->type = XS_DIRECTORY_PART;
729     rsp->req_id = req_id;
730     rsp->tx_id = tx_id;
731     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
732 
733     xs_append_strings(s, rsp, items, offset, true);
734 
735     g_list_free_full(items, g_free);
736 }
737 
738 static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
739                                  xs_transaction_t tx_id, uint8_t *req_data,
740                                  unsigned int len)
741 {
742     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
743     char *rsp_data = (char *)&rsp[1];
744     int err;
745 
746     if (len != 1 || req_data[0] != '\0') {
747         xs_error(s, req_id, tx_id, EINVAL);
748         return;
749     }
750 
751     rsp->type = XS_TRANSACTION_START;
752     rsp->req_id = req_id;
753     rsp->tx_id = tx_id;
754     rsp->len = 0;
755 
756     err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
757     if (err) {
758         xs_error(s, req_id, tx_id, err);
759         return;
760     }
761 
762     trace_xenstore_transaction_start(tx_id);
763 
764     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
765     assert(rsp->len < XENSTORE_PAYLOAD_MAX);
766     rsp->len++;
767 }
768 
769 static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
770                                xs_transaction_t tx_id, uint8_t *req_data,
771                                unsigned int len)
772 {
773     bool commit;
774     int err;
775 
776     if (len != 2 || req_data[1] != '\0') {
777         xs_error(s, req_id, tx_id, EINVAL);
778         return;
779     }
780 
781     switch (req_data[0]) {
782     case 'T':
783         commit = true;
784         break;
785     case 'F':
786         commit = false;
787         break;
788     default:
789         xs_error(s, req_id, tx_id, EINVAL);
790         return;
791     }
792 
793     trace_xenstore_transaction_end(tx_id, commit);
794     err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
795     if (err) {
796         xs_error(s, req_id, tx_id, err);
797         return;
798     }
799 
800     xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
801 }
802 
803 static void xs_rm(XenXenstoreState *s, unsigned int req_id,
804                   xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
805 {
806     const char *path = (const char *)req_data;
807     int err;
808 
809     if (len == 0 || req_data[len - 1] != '\0') {
810         xs_error(s, req_id, tx_id, EINVAL);
811         return;
812     }
813 
814     trace_xenstore_rm(tx_id, path);
815     err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
816     if (err) {
817         xs_error(s, req_id, tx_id, err);
818         return;
819     }
820 
821     xs_ok(s, XS_RM, req_id, tx_id);
822 }
823 
824 static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
825                          xs_transaction_t tx_id, uint8_t *req_data,
826                          unsigned int len)
827 {
828     const char *path = (const char *)req_data;
829     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
830     GList *perms = NULL;
831     int err;
832 
833     if (len == 0 || req_data[len - 1] != '\0') {
834         xs_error(s, req_id, tx_id, EINVAL);
835         return;
836     }
837 
838     trace_xenstore_get_perms(tx_id, path);
839     err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
840     if (err) {
841         xs_error(s, req_id, tx_id, err);
842         return;
843     }
844 
845     rsp->type = XS_GET_PERMS;
846     rsp->req_id = req_id;
847     rsp->tx_id = tx_id;
848     rsp->len = 0;
849 
850     xs_append_strings(s, rsp, perms, 0, false);
851 
852     g_list_free_full(perms, g_free);
853 }
854 
855 static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
856                          xs_transaction_t tx_id, uint8_t *req_data,
857                          unsigned int len)
858 {
859     const char *path = (const char *)req_data;
860     uint8_t *perm;
861     GList *perms = NULL;
862     int err;
863 
864     if (len == 0) {
865         xs_error(s, req_id, tx_id, EINVAL);
866         return;
867     }
868 
869     while (len--) {
870         if (*req_data++ == '\0') {
871             break;
872         }
873         if (len == 0) {
874             xs_error(s, req_id, tx_id, EINVAL);
875             return;
876         }
877     }
878 
879     perm = req_data;
880     while (len--) {
881         if (*req_data++ == '\0') {
882             perms = g_list_append(perms, perm);
883             perm = req_data;
884         }
885     }
886 
887     /*
888      * Note that there may be trailing garbage at the end of the buffer.
889      * This is explicitly permitted by the '?' at the end of the definition:
890      *
891      *    SET_PERMS         <path>|<perm-as-string>|+?
892      */
893 
894     trace_xenstore_set_perms(tx_id, path);
895     err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
896     g_list_free(perms);
897     if (err) {
898         xs_error(s, req_id, tx_id, err);
899         return;
900     }
901 
902     xs_ok(s, XS_SET_PERMS, req_id, tx_id);
903 }
904 
905 static void xs_watch(XenXenstoreState *s, unsigned int req_id,
906                      xs_transaction_t tx_id, uint8_t *req_data,
907                      unsigned int len)
908 {
909     const char *token, *path = (const char *)req_data;
910     int err;
911 
912     if (len == 0) {
913         xs_error(s, req_id, tx_id, EINVAL);
914         return;
915     }
916 
917     while (len--) {
918         if (*req_data++ == '\0') {
919             break;
920         }
921         if (len == 0) {
922             xs_error(s, req_id, tx_id, EINVAL);
923             return;
924         }
925     }
926 
927     token = (const char *)req_data;
928     while (len--) {
929         if (*req_data++ == '\0') {
930             break;
931         }
932         if (len == 0) {
933             xs_error(s, req_id, tx_id, EINVAL);
934             return;
935         }
936     }
937 
938     /*
939      * Note that there may be trailing garbage at the end of the buffer.
940      * This is explicitly permitted by the '?' at the end of the definition:
941      *
942      *    WATCH             <wpath>|<token>|?
943      */
944 
945     trace_xenstore_watch(path, token);
946     err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
947     if (err) {
948         xs_error(s, req_id, tx_id, err);
949         return;
950     }
951 
952     xs_ok(s, XS_WATCH, req_id, tx_id);
953 }
954 
955 static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
956                        xs_transaction_t tx_id, uint8_t *req_data,
957                        unsigned int len)
958 {
959     const char *token, *path = (const char *)req_data;
960     int err;
961 
962     if (len == 0) {
963         xs_error(s, req_id, tx_id, EINVAL);
964         return;
965     }
966 
967     while (len--) {
968         if (*req_data++ == '\0') {
969             break;
970         }
971         if (len == 0) {
972             xs_error(s, req_id, tx_id, EINVAL);
973             return;
974         }
975     }
976 
977     token = (const char *)req_data;
978     while (len--) {
979         if (*req_data++ == '\0') {
980             break;
981         }
982         if (len == 0) {
983             xs_error(s, req_id, tx_id, EINVAL);
984             return;
985         }
986     }
987 
988     trace_xenstore_unwatch(path, token);
989     err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
990     if (err) {
991         xs_error(s, req_id, tx_id, err);
992         return;
993     }
994 
995     xs_ok(s, XS_UNWATCH, req_id, tx_id);
996 }
997 
998 static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
999                              xs_transaction_t tx_id, uint8_t *req_data,
1000                              unsigned int len)
1001 {
1002     if (len == 0 || req_data[len - 1] != '\0') {
1003         xs_error(s, req_id, tx_id, EINVAL);
1004         return;
1005     }
1006 
1007     trace_xenstore_reset_watches();
1008     xs_impl_reset_watches(s->impl, xen_domid);
1009 
1010     xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
1011 }
1012 
1013 static void xs_priv(XenXenstoreState *s, unsigned int req_id,
1014                     xs_transaction_t tx_id, uint8_t *data,
1015                     unsigned int len)
1016 {
1017     xs_error(s, req_id, tx_id, EACCES);
1018 }
1019 
1020 static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
1021                       xs_transaction_t tx_id, uint8_t *data,
1022                       unsigned int len)
1023 {
1024     xs_error(s, req_id, tx_id, ENOSYS);
1025 }
1026 
1027 typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
1028                         xs_transaction_t tx_id, uint8_t *data,
1029                         unsigned int len);
1030 
1031 struct xsd_req {
1032     const char *name;
1033     xs_impl fn;
1034 };
1035 #define XSD_REQ(_type, _fn)                           \
1036     [_type] = { .name = #_type, .fn = _fn }
1037 
1038 struct xsd_req xsd_reqs[] = {
1039     XSD_REQ(XS_READ, xs_read),
1040     XSD_REQ(XS_WRITE, xs_write),
1041     XSD_REQ(XS_MKDIR, xs_mkdir),
1042     XSD_REQ(XS_DIRECTORY, xs_directory),
1043     XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
1044     XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
1045     XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
1046     XSD_REQ(XS_RM, xs_rm),
1047     XSD_REQ(XS_GET_PERMS, xs_get_perms),
1048     XSD_REQ(XS_SET_PERMS, xs_set_perms),
1049     XSD_REQ(XS_WATCH, xs_watch),
1050     XSD_REQ(XS_UNWATCH, xs_unwatch),
1051     XSD_REQ(XS_CONTROL, xs_priv),
1052     XSD_REQ(XS_INTRODUCE, xs_priv),
1053     XSD_REQ(XS_RELEASE, xs_priv),
1054     XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
1055     XSD_REQ(XS_RESUME, xs_priv),
1056     XSD_REQ(XS_SET_TARGET, xs_priv),
1057     XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
1058 };
1059 
1060 static void process_req(XenXenstoreState *s)
1061 {
1062     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1063     xs_impl handler = NULL;
1064 
1065     assert(req_pending(s));
1066     assert(!s->rsp_pending);
1067 
1068     if (req->type < ARRAY_SIZE(xsd_reqs)) {
1069         handler = xsd_reqs[req->type].fn;
1070     }
1071     if (!handler) {
1072         handler = &xs_unimpl;
1073     }
1074 
1075     handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
1076 
1077     s->rsp_pending = true;
1078     reset_req(s);
1079 }
1080 
1081 static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
1082                                    unsigned int len)
1083 {
1084     if (!len) {
1085         return 0;
1086     }
1087 
1088     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
1089     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
1090     unsigned int copied = 0;
1091 
1092     /* Ensure the ring contents don't cross the req_prod access. */
1093     smp_rmb();
1094 
1095     while (len) {
1096         unsigned int avail = prod - cons;
1097         unsigned int offset = MASK_XENSTORE_IDX(cons);
1098         unsigned int copylen = avail;
1099 
1100         if (avail > XENSTORE_RING_SIZE) {
1101             error_report("XenStore ring handling error");
1102             s->fatal_error = true;
1103             break;
1104         } else if (avail == 0) {
1105             break;
1106         }
1107 
1108         if (copylen > len) {
1109             copylen = len;
1110         }
1111         if (copylen > XENSTORE_RING_SIZE - offset) {
1112             copylen = XENSTORE_RING_SIZE - offset;
1113         }
1114 
1115         memcpy(ptr, &s->xs->req[offset], copylen);
1116         copied += copylen;
1117 
1118         ptr += copylen;
1119         len -= copylen;
1120 
1121         cons += copylen;
1122     }
1123 
1124     /*
1125      * Not sure this ever mattered except on Alpha, but this barrier
1126      * is to ensure that the update to req_cons is globally visible
1127      * only after we have consumed all the data from the ring, and we
1128      * don't end up seeing data written to the ring *after* the other
1129      * end sees the update and writes more to the ring. Xen's own
1130      * xenstored has the same barrier here (although with no comment
1131      * at all, obviously, because it's Xen code).
1132      */
1133     smp_mb();
1134 
1135     qatomic_set(&s->xs->req_cons, cons);
1136 
1137     return copied;
1138 }
1139 
1140 static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
1141                                  unsigned int len)
1142 {
1143     if (!len) {
1144         return 0;
1145     }
1146 
1147     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
1148     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
1149     unsigned int copied = 0;
1150 
1151     /*
1152      * This matches the barrier in copy_to_ring() (or the guest's
1153      * equivalent) betweem writing the data to the ring and updating
1154      * rsp_prod. It protects against the pathological case (which
1155      * again I think never happened except on Alpha) where our
1156      * subsequent writes to the ring could *cross* the read of
1157      * rsp_cons and the guest could see the new data when it was
1158      * intending to read the old.
1159      */
1160     smp_mb();
1161 
1162     while (len) {
1163         unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
1164         unsigned int offset = MASK_XENSTORE_IDX(prod);
1165         unsigned int copylen = len;
1166 
1167         if (avail > XENSTORE_RING_SIZE) {
1168             error_report("XenStore ring handling error");
1169             s->fatal_error = true;
1170             break;
1171         } else if (avail == 0) {
1172             break;
1173         }
1174 
1175         if (copylen > avail) {
1176             copylen = avail;
1177         }
1178         if (copylen > XENSTORE_RING_SIZE - offset) {
1179             copylen = XENSTORE_RING_SIZE - offset;
1180         }
1181 
1182 
1183         memcpy(&s->xs->rsp[offset], ptr, copylen);
1184         copied += copylen;
1185 
1186         ptr += copylen;
1187         len -= copylen;
1188 
1189         prod += copylen;
1190     }
1191 
1192     /* Ensure the ring contents are seen before rsp_prod update. */
1193     smp_wmb();
1194 
1195     qatomic_set(&s->xs->rsp_prod, prod);
1196 
1197     return copied;
1198 }
1199 
1200 static unsigned int get_req(XenXenstoreState *s)
1201 {
1202     unsigned int copied = 0;
1203 
1204     if (s->fatal_error) {
1205         return 0;
1206     }
1207 
1208     assert(!req_pending(s));
1209 
1210     if (s->req_offset < XENSTORE_HEADER_SIZE) {
1211         void *ptr = s->req_data + s->req_offset;
1212         unsigned int len = XENSTORE_HEADER_SIZE;
1213         unsigned int copylen = copy_from_ring(s, ptr, len);
1214 
1215         copied += copylen;
1216         s->req_offset += copylen;
1217     }
1218 
1219     if (s->req_offset >= XENSTORE_HEADER_SIZE) {
1220         struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1221 
1222         if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
1223             error_report("Illegal XenStore request");
1224             s->fatal_error = true;
1225             return 0;
1226         }
1227 
1228         void *ptr = s->req_data + s->req_offset;
1229         unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
1230         unsigned int copylen = copy_from_ring(s, ptr, len);
1231 
1232         copied += copylen;
1233         s->req_offset += copylen;
1234     }
1235 
1236     return copied;
1237 }
1238 
1239 static unsigned int put_rsp(XenXenstoreState *s)
1240 {
1241     if (s->fatal_error) {
1242         return 0;
1243     }
1244 
1245     assert(s->rsp_pending);
1246 
1247     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1248     assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
1249 
1250     void *ptr = s->rsp_data + s->rsp_offset;
1251     unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
1252     unsigned int copylen = copy_to_ring(s, ptr, len);
1253 
1254     s->rsp_offset += copylen;
1255 
1256     /* Have we produced a complete response? */
1257     if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
1258         reset_rsp(s);
1259     }
1260 
1261     return copylen;
1262 }
1263 
1264 static void deliver_watch(XenXenstoreState *s, const char *path,
1265                           const char *token)
1266 {
1267     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1268     uint8_t *rsp_data = (uint8_t *)&rsp[1];
1269     unsigned int len;
1270 
1271     assert(!s->rsp_pending);
1272 
1273     trace_xenstore_watch_event(path, token);
1274 
1275     rsp->type = XS_WATCH_EVENT;
1276     rsp->req_id = 0;
1277     rsp->tx_id = 0;
1278     rsp->len = 0;
1279 
1280     len = strlen(path);
1281 
1282     /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
1283     assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
1284 
1285     memcpy(&rsp_data[rsp->len], path, len);
1286     rsp->len += len;
1287     rsp_data[rsp->len] = '\0';
1288     rsp->len++;
1289 
1290     len = strlen(token);
1291     /*
1292      * It is possible for the guest to have chosen a token that will
1293      * not fit (along with the patch) into a watch event. We have no
1294      * choice but to drop the event if this is the case.
1295      */
1296     if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
1297         return;
1298     }
1299 
1300     memcpy(&rsp_data[rsp->len], token, len);
1301     rsp->len += len;
1302     rsp_data[rsp->len] = '\0';
1303     rsp->len++;
1304 
1305     s->rsp_pending = true;
1306 }
1307 
1308 struct watch_event {
1309     char *path;
1310     char *token;
1311 };
1312 
1313 static void free_watch_event(struct watch_event *ev)
1314 {
1315     if (ev) {
1316         g_free(ev->path);
1317         g_free(ev->token);
1318         g_free(ev);
1319     }
1320 }
1321 
1322 static void queue_watch(XenXenstoreState *s, const char *path,
1323                         const char *token)
1324 {
1325     struct watch_event *ev = g_new0(struct watch_event, 1);
1326 
1327     ev->path = g_strdup(path);
1328     ev->token = g_strdup(token);
1329 
1330     s->watch_events = g_list_append(s->watch_events, ev);
1331 }
1332 
1333 static void fire_watch_cb(void *opaque, const char *path, const char *token)
1334 {
1335     XenXenstoreState *s = opaque;
1336 
1337     assert(qemu_mutex_iothread_locked());
1338 
1339     /*
1340      * If there's a response pending, we obviously can't scribble over
1341      * it. But if there's a request pending, it has dibs on the buffer
1342      * too.
1343      *
1344      * In the common case of a watch firing due to backend activity
1345      * when the ring was otherwise idle, we should be able to copy the
1346      * strings directly into the rsp_data and thence the actual ring,
1347      * without needing to perform any allocations and queue them.
1348      */
1349     if (s->rsp_pending || req_pending(s)) {
1350         queue_watch(s, path, token);
1351     } else {
1352         deliver_watch(s, path, token);
1353         /*
1354          * If the message was queued because there was already ring activity,
1355          * no need to wake the guest. But if not, we need to send the evtchn.
1356          */
1357         xen_be_evtchn_notify(s->eh, s->be_port);
1358     }
1359 }
1360 
1361 static void process_watch_events(XenXenstoreState *s)
1362 {
1363     struct watch_event *ev = s->watch_events->data;
1364 
1365     deliver_watch(s, ev->path, ev->token);
1366 
1367     s->watch_events = g_list_remove(s->watch_events, ev);
1368     free_watch_event(ev);
1369 }
1370 
1371 static void xen_xenstore_event(void *opaque)
1372 {
1373     XenXenstoreState *s = opaque;
1374     evtchn_port_t port = xen_be_evtchn_pending(s->eh);
1375     unsigned int copied_to, copied_from;
1376     bool processed, notify = false;
1377 
1378     if (port != s->be_port) {
1379         return;
1380     }
1381 
1382     /* We know this is a no-op. */
1383     xen_be_evtchn_unmask(s->eh, port);
1384 
1385     do {
1386         copied_to = copied_from = 0;
1387         processed = false;
1388 
1389         if (!s->rsp_pending && s->watch_events) {
1390             process_watch_events(s);
1391         }
1392 
1393         if (s->rsp_pending) {
1394             copied_to = put_rsp(s);
1395         }
1396 
1397         if (!req_pending(s)) {
1398             copied_from = get_req(s);
1399         }
1400 
1401         if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
1402             process_req(s);
1403             processed = true;
1404         }
1405 
1406         notify |= copied_to || copied_from;
1407     } while (copied_to || copied_from || processed);
1408 
1409     if (notify) {
1410         xen_be_evtchn_notify(s->eh, s->be_port);
1411     }
1412 }
1413 
1414 static void alloc_guest_port(XenXenstoreState *s)
1415 {
1416     struct evtchn_alloc_unbound alloc = {
1417         .dom = DOMID_SELF,
1418         .remote_dom = DOMID_QEMU,
1419     };
1420 
1421     if (!xen_evtchn_alloc_unbound_op(&alloc)) {
1422         s->guest_port = alloc.port;
1423     }
1424 }
1425 
1426 int xen_xenstore_reset(void)
1427 {
1428     XenXenstoreState *s = xen_xenstore_singleton;
1429     int err;
1430 
1431     if (!s) {
1432         return -ENOTSUP;
1433     }
1434 
1435     s->req_offset = s->rsp_offset = 0;
1436     s->rsp_pending = false;
1437 
1438     if (!memory_region_is_mapped(&s->xenstore_page)) {
1439         uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
1440         xen_overlay_do_map_page(&s->xenstore_page, gpa);
1441     }
1442 
1443     alloc_guest_port(s);
1444 
1445     /*
1446      * As qemu/dom0, bind to the guest's port. For incoming migration, this
1447      * will be unbound as the guest's evtchn table is overwritten. We then
1448      * rebind to the correct guest port in xen_xenstore_post_load().
1449      */
1450     err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
1451     if (err < 0) {
1452         return err;
1453     }
1454     s->be_port = err;
1455 
1456     return 0;
1457 }
1458 
1459 struct qemu_xs_handle {
1460     XenstoreImplState *impl;
1461     GList *watches;
1462     QEMUBH *watch_bh;
1463 };
1464 
1465 struct qemu_xs_watch {
1466     struct qemu_xs_handle *h;
1467     char *path;
1468     xs_watch_fn fn;
1469     void *opaque;
1470     GList *events;
1471 };
1472 
1473 static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
1474 {
1475     return g_strdup_printf("/local/domain/%u", domid);
1476 }
1477 
1478 static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
1479                               const char *path, unsigned int *num)
1480 {
1481     GList *items = NULL, *l;
1482     unsigned int i = 0;
1483     char **items_ret;
1484     int err;
1485 
1486     err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
1487     if (err) {
1488         errno = err;
1489         return NULL;
1490     }
1491 
1492     items_ret = g_new0(char *, g_list_length(items) + 1);
1493     *num = 0;
1494     for (l = items; l; l = l->next) {
1495         items_ret[i++] = l->data;
1496         (*num)++;
1497     }
1498     g_list_free(items);
1499     return items_ret;
1500 }
1501 
1502 static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
1503                         const char *path, unsigned int *len)
1504 {
1505     GByteArray *data = g_byte_array_new();
1506     bool free_segment = false;
1507     int err;
1508 
1509     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1510     if (err) {
1511         free_segment = true;
1512         errno = err;
1513     } else {
1514         if (len) {
1515             *len = data->len;
1516         }
1517         /* The xen-bus-helper code expects to get NUL terminated string! */
1518         g_byte_array_append(data, (void *)"", 1);
1519     }
1520 
1521     return g_byte_array_free(data, free_segment);
1522 }
1523 
1524 static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
1525                         const char *path, const void *data, unsigned int len)
1526 {
1527     GByteArray *gdata = g_byte_array_new();
1528     int err;
1529 
1530     g_byte_array_append(gdata, data, len);
1531     err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
1532     g_byte_array_unref(gdata);
1533     if (err) {
1534         errno = err;
1535         return false;
1536     }
1537     return true;
1538 }
1539 
1540 static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
1541                          unsigned int owner, unsigned int domid,
1542                          unsigned int perms, const char *path)
1543 {
1544     g_autoptr(GByteArray) data = g_byte_array_new();
1545     GList *perms_list = NULL;
1546     int err;
1547 
1548     /* mkdir does this */
1549     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1550     if (err == ENOENT) {
1551         err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
1552     }
1553     if (err) {
1554         errno = err;
1555         return false;
1556     }
1557 
1558     perms_list = g_list_append(perms_list,
1559                                xs_perm_as_string(XS_PERM_NONE, owner));
1560     perms_list = g_list_append(perms_list,
1561                                xs_perm_as_string(perms, domid));
1562 
1563     err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
1564     g_list_free_full(perms_list, g_free);
1565     if (err) {
1566         errno = err;
1567         return false;
1568     }
1569     return true;
1570 }
1571 
1572 static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
1573                           const char *path)
1574 {
1575     int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
1576     if (err) {
1577         errno = err;
1578         return false;
1579     }
1580     return true;
1581 }
1582 
1583 static void be_watch_bh(void *_h)
1584 {
1585     struct qemu_xs_handle *h = _h;
1586     GList *l;
1587 
1588     for (l = h->watches; l; l = l->next) {
1589         struct qemu_xs_watch *w = l->data;
1590 
1591         while (w->events) {
1592             struct watch_event *ev = w->events->data;
1593 
1594             w->fn(w->opaque, ev->path);
1595 
1596             w->events = g_list_remove(w->events, ev);
1597             free_watch_event(ev);
1598         }
1599     }
1600 }
1601 
1602 static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
1603 {
1604     struct watch_event *ev = g_new0(struct watch_event, 1);
1605     struct qemu_xs_watch *w = opaque;
1606 
1607     /* We don't care about the token */
1608     ev->path = g_strdup(path);
1609     w->events = g_list_append(w->events, ev);
1610 
1611     qemu_bh_schedule(w->h->watch_bh);
1612 }
1613 
1614 static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
1615                                          const char *path, xs_watch_fn fn,
1616                                          void *opaque)
1617 {
1618     struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
1619     int err;
1620 
1621     w->h = h;
1622     w->fn = fn;
1623     w->opaque = opaque;
1624 
1625     err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
1626     if (err) {
1627         errno = err;
1628         g_free(w);
1629         return NULL;
1630     }
1631 
1632     w->path = g_strdup(path);
1633     h->watches = g_list_append(h->watches, w);
1634     return w;
1635 }
1636 
1637 static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
1638 {
1639     xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
1640 
1641     h->watches = g_list_remove(h->watches, w);
1642     g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
1643     g_free(w->path);
1644     g_free(w);
1645 }
1646 
1647 static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
1648 {
1649     unsigned int new_tx = XBT_NULL;
1650     int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
1651     if (err) {
1652         errno = err;
1653         return XBT_NULL;
1654     }
1655     return new_tx;
1656 }
1657 
1658 static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
1659                                   bool abort)
1660 {
1661     int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
1662     if (err) {
1663         errno = err;
1664         return false;
1665     }
1666     return true;
1667 }
1668 
1669 static struct qemu_xs_handle *xs_be_open(void)
1670 {
1671     XenXenstoreState *s = xen_xenstore_singleton;
1672     struct qemu_xs_handle *h;
1673 
1674     if (!s && !s->impl) {
1675         errno = -ENOSYS;
1676         return NULL;
1677     }
1678 
1679     h = g_new0(struct qemu_xs_handle, 1);
1680     h->impl = s->impl;
1681 
1682     h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
1683 
1684     return h;
1685 }
1686 
1687 static void xs_be_close(struct qemu_xs_handle *h)
1688 {
1689     while (h->watches) {
1690         struct qemu_xs_watch *w = h->watches->data;
1691         xs_be_unwatch(h, w);
1692     }
1693 
1694     qemu_bh_delete(h->watch_bh);
1695     g_free(h);
1696 }
1697 
1698 static struct xenstore_backend_ops emu_xenstore_backend_ops = {
1699     .open = xs_be_open,
1700     .close = xs_be_close,
1701     .get_domain_path = xs_be_get_domain_path,
1702     .directory = xs_be_directory,
1703     .read = xs_be_read,
1704     .write = xs_be_write,
1705     .create = xs_be_create,
1706     .destroy = xs_be_destroy,
1707     .watch = xs_be_watch,
1708     .unwatch = xs_be_unwatch,
1709     .transaction_start = xs_be_transaction_start,
1710     .transaction_end = xs_be_transaction_end,
1711 };
1712