1 /* $OpenBSD: xenstore.c,v 1.50 2024/05/24 10:05:55 jsg Exp $ */
2
3 /*
4 * Copyright (c) 2015 Mike Belopuhov
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/atomic.h>
22 #include <sys/malloc.h>
23 #include <sys/device.h>
24 #include <sys/mutex.h>
25 #include <sys/rwlock.h>
26 #include <sys/task.h>
27
28 #include <machine/bus.h>
29
30 #include <uvm/uvm_extern.h>
31
32 #include <dev/pv/pvvar.h>
33 #include <dev/pv/xenreg.h>
34 #include <dev/pv/xenvar.h>
35
36 /* #define XS_DEBUG */
37
38 #ifdef XS_DEBUG
39 #define DPRINTF(x...) printf(x)
40 #else
41 #define DPRINTF(x...)
42 #endif
43
44 /*
45 * The XenStore interface is a simple storage system that is a means of
46 * communicating state and configuration data between the Xen Domain 0
47 * and the various guest domains. All configuration data other than
48 * a small amount of essential information required during the early
49 * boot process of launching a Xen aware guest, is managed using the
50 * XenStore.
51 *
52 * The XenStore is ASCII string based, and has a structure and semantics
53 * similar to a filesystem. There are files and directories that are
54 * able to contain files or other directories. The depth of the hierarchy
55 * is only limited by the XenStore's maximum path length.
56 *
57 * The communication channel between the XenStore service and other
58 * domains is via two, guest specific, ring buffers in a shared memory
59 * area. One ring buffer is used for communicating in each direction.
60 * The grant table references for this shared memory are given to the
61 * guest via HVM hypercalls.
62 *
63 * The XenStore communication relies on an event channel and thus
64 * interrupts. Several Xen services depend on the XenStore, most
65 * notably the XenBus used to discover and manage Xen devices.
66 */
67
68 const struct {
69 const char *xse_errstr;
70 int xse_errnum;
71 } xs_errors[] = {
72 { "EINVAL", EINVAL },
73 { "EACCES", EACCES },
74 { "EEXIST", EEXIST },
75 { "EISDIR", EISDIR },
76 { "ENOENT", ENOENT },
77 { "ENOMEM", ENOMEM },
78 { "ENOSPC", ENOSPC },
79 { "EIO", EIO },
80 { "ENOTEMPTY", ENOTEMPTY },
81 { "ENOSYS", ENOSYS },
82 { "EROFS", EROFS },
83 { "EBUSY", EBUSY },
84 { "EAGAIN", EAGAIN },
85 { "EISCONN", EISCONN },
86 { NULL, -1 },
87 };
88
89 struct xs_msghdr {
90 /* Message type */
91 uint32_t xmh_type;
92 /* Request identifier, echoed in daemon's response. */
93 uint32_t xmh_rid;
94 /* Transaction id (0 if not related to a transaction). */
95 uint32_t xmh_tid;
96 /* Length of data following this. */
97 uint32_t xmh_len;
98 /* Generally followed by nul-terminated string(s). */
99 } __packed;
100
101 /*
102 * A minimum output buffer size needed to store an error string.
103 */
104 #define XS_ERR_PAYLOAD 16
105
106 /*
107 * Although the Xen source code implies that the limit is 4k,
108 * in practice it turns out that we can only send 2k bytes of
109 * payload before receiving a ENOSPC. We set it to an even
110 * smaller value however, because there's no real need to use
111 * large buffers for anything.
112 */
113 #define XS_MAX_PAYLOAD 1024
114
115 struct xs_msg {
116 struct xs_msghdr xsm_hdr;
117 uint32_t xsm_read;
118 uint32_t xsm_dlen;
119 int xsm_error;
120 uint8_t *xsm_data;
121 TAILQ_ENTRY(xs_msg) xsm_link;
122 };
123 TAILQ_HEAD(xs_msgq, xs_msg);
124
125 #define XS_RING_SIZE 1024
126
127 struct xs_ring {
128 uint8_t xsr_req[XS_RING_SIZE];
129 uint8_t xsr_rsp[XS_RING_SIZE];
130 uint32_t xsr_req_cons;
131 uint32_t xsr_req_prod;
132 uint32_t xsr_rsp_cons;
133 uint32_t xsr_rsp_prod;
134 } __packed;
135
136 #define XST_DELAY 1 /* in seconds */
137
138 #define XSW_TOKLEN (sizeof(void *) * 2 + 1)
139
140 struct xs_watch {
141 TAILQ_ENTRY(xs_watch) xsw_entry;
142 uint8_t xsw_token[XSW_TOKLEN];
143 struct task *xsw_task;
144 };
145
146 /*
147 * Container for all XenStore related state.
148 */
149 struct xs_softc {
150 struct xen_softc *xs_sc;
151
152 evtchn_port_t xs_port;
153 xen_intr_handle_t xs_ih;
154
155 struct xs_ring *xs_ring;
156
157 struct xs_msg xs_msgs[10];
158 struct xs_msg *xs_rmsg;
159
160 struct xs_msgq xs_free;
161 struct xs_msgq xs_reqs;
162 struct xs_msgq xs_rsps;
163
164 volatile uint xs_rid;
165
166 const char *xs_wchan;
167 const char *xs_rchan;
168
169 struct mutex xs_reqlck; /* request queue mutex */
170 struct mutex xs_rsplck; /* response queue mutex */
171 struct mutex xs_frqlck; /* free queue mutex */
172
173 TAILQ_HEAD(, xs_watch) xs_watches;
174 struct mutex xs_watchlck;
175 struct xs_msg xs_emsg;
176 struct taskq *xs_watchtq;
177
178 struct rwlock xs_rnglck;
179 };
180
181 struct xs_msg *
182 xs_get_msg(struct xs_softc *, int);
183 void xs_put_msg(struct xs_softc *, struct xs_msg *);
184 int xs_ring_get(struct xs_softc *, void *, size_t);
185 int xs_ring_put(struct xs_softc *, void *, size_t);
186 void xs_intr(void *);
187 void xs_poll(struct xs_softc *, int);
188 int xs_output(struct xs_transaction *, uint8_t *, int);
189 int xs_start(struct xs_transaction *, struct xs_msg *, struct iovec *, int);
190 struct xs_msg *
191 xs_reply(struct xs_transaction *, uint);
192 int xs_parse(struct xs_transaction *, struct xs_msg *, struct iovec **,
193 int *);
194 int xs_event(struct xs_softc *, struct xs_msg *);
195
196 int
xs_attach(struct xen_softc * sc)197 xs_attach(struct xen_softc *sc)
198 {
199 struct xen_hvm_param xhv;
200 struct xs_softc *xs;
201 paddr_t pa;
202 int i;
203
204 if ((xs = malloc(sizeof(*xs), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) {
205 printf(": failed to allocate xenstore softc\n");
206 return (-1);
207 }
208 sc->sc_xs = xs;
209 xs->xs_sc = sc;
210
211 /* Fetch event channel port */
212 memset(&xhv, 0, sizeof(xhv));
213 xhv.domid = DOMID_SELF;
214 xhv.index = HVM_PARAM_STORE_EVTCHN;
215 if (xen_hypercall(sc, XC_HVM, 2, HVMOP_get_param, &xhv)) {
216 printf(": failed to obtain a xenstore event channel\n");
217 goto fail_1;
218 }
219 xs->xs_port = xhv.value;
220
221 printf(", event channel %u\n", xs->xs_port);
222
223 /* Fetch a frame number (PA) of a shared xenstore page */
224 memset(&xhv, 0, sizeof(xhv));
225 xhv.domid = DOMID_SELF;
226 xhv.index = HVM_PARAM_STORE_PFN;
227 if (xen_hypercall(sc, XC_HVM, 2, HVMOP_get_param, &xhv))
228 goto fail_1;
229 pa = ptoa(xhv.value);
230 /* Allocate a page of virtual memory */
231 xs->xs_ring = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
232 if (xs->xs_ring == NULL)
233 goto fail_1;
234 /* Map in the xenstore page into our KVA */
235 pa |= PMAP_NOCACHE;
236 pmap_kenter_pa((vaddr_t)xs->xs_ring, pa, PROT_READ | PROT_WRITE);
237 pmap_update(pmap_kernel());
238
239 if (xen_intr_establish(xs->xs_port, &xs->xs_ih, 0, xs_intr, xs,
240 sc->sc_dev.dv_xname))
241 goto fail_2;
242
243 xs->xs_wchan = "xswrite";
244 xs->xs_rchan = "xsread";
245
246 TAILQ_INIT(&xs->xs_free);
247 TAILQ_INIT(&xs->xs_reqs);
248 TAILQ_INIT(&xs->xs_rsps);
249 for (i = 0; i < nitems(xs->xs_msgs); i++)
250 TAILQ_INSERT_TAIL(&xs->xs_free, &xs->xs_msgs[i], xsm_link);
251
252 mtx_init(&xs->xs_reqlck, IPL_NET);
253 mtx_init(&xs->xs_rsplck, IPL_NET);
254 mtx_init(&xs->xs_frqlck, IPL_NET);
255
256 rw_init(&xs->xs_rnglck, "xsrnglck");
257
258 xs->xs_watchtq = taskq_create("xenwatch", 1, IPL_NET, 0);
259
260 mtx_init(&xs->xs_watchlck, IPL_NET);
261 TAILQ_INIT(&xs->xs_watches);
262
263 xs->xs_emsg.xsm_data = malloc(XS_MAX_PAYLOAD, M_DEVBUF,
264 M_ZERO | M_NOWAIT);
265 if (xs->xs_emsg.xsm_data == NULL)
266 goto fail_2;
267 xs->xs_emsg.xsm_dlen = XS_MAX_PAYLOAD;
268
269 return (0);
270
271 fail_2:
272 pmap_kremove((vaddr_t)xs->xs_ring, PAGE_SIZE);
273 pmap_update(pmap_kernel());
274 km_free(xs->xs_ring, PAGE_SIZE, &kv_any, &kp_none);
275 xs->xs_ring = NULL;
276 fail_1:
277 free(xs, sizeof(*xs), M_DEVBUF);
278 sc->sc_xs = NULL;
279 return (-1);
280 }
281
282 struct xs_msg *
xs_get_msg(struct xs_softc * xs,int waitok)283 xs_get_msg(struct xs_softc *xs, int waitok)
284 {
285 static const char *chan = "xsalloc";
286 struct xs_msg *xsm;
287
288 mtx_enter(&xs->xs_frqlck);
289 for (;;) {
290 xsm = TAILQ_FIRST(&xs->xs_free);
291 if (xsm != NULL) {
292 TAILQ_REMOVE(&xs->xs_free, xsm, xsm_link);
293 break;
294 }
295 if (!waitok) {
296 mtx_leave(&xs->xs_frqlck);
297 delay(XST_DELAY * 1000 >> 2);
298 mtx_enter(&xs->xs_frqlck);
299 } else
300 msleep_nsec(chan, &xs->xs_frqlck, PRIBIO, chan,
301 SEC_TO_NSEC(XST_DELAY) >> 2);
302 }
303 mtx_leave(&xs->xs_frqlck);
304 return (xsm);
305 }
306
307 void
xs_put_msg(struct xs_softc * xs,struct xs_msg * xsm)308 xs_put_msg(struct xs_softc *xs, struct xs_msg *xsm)
309 {
310 memset(xsm, 0, sizeof(*xsm));
311 mtx_enter(&xs->xs_frqlck);
312 TAILQ_INSERT_TAIL(&xs->xs_free, xsm, xsm_link);
313 mtx_leave(&xs->xs_frqlck);
314 }
315
316 int
xs_geterror(struct xs_msg * xsm)317 xs_geterror(struct xs_msg *xsm)
318 {
319 int i;
320
321 for (i = 0; i < nitems(xs_errors); i++)
322 if (strcmp(xs_errors[i].xse_errstr, xsm->xsm_data) == 0)
323 return (xs_errors[i].xse_errnum);
324 return (EOPNOTSUPP);
325 }
326
327 static inline uint32_t
xs_ring_avail(struct xs_ring * xsr,int req)328 xs_ring_avail(struct xs_ring *xsr, int req)
329 {
330 uint32_t cons = req ? xsr->xsr_req_cons : xsr->xsr_rsp_cons;
331 uint32_t prod = req ? xsr->xsr_req_prod : xsr->xsr_rsp_prod;
332
333 KASSERT(prod - cons <= XS_RING_SIZE);
334 return (req ? XS_RING_SIZE - (prod - cons) : prod - cons);
335 }
336
337 void
xs_poll(struct xs_softc * xs,int nosleep)338 xs_poll(struct xs_softc *xs, int nosleep)
339 {
340 int s;
341
342 if (nosleep) {
343 delay(XST_DELAY * 1000 >> 2);
344 s = splnet();
345 xs_intr(xs);
346 splx(s);
347 } else {
348 tsleep_nsec(xs->xs_wchan, PRIBIO, xs->xs_wchan,
349 SEC_TO_NSEC(XST_DELAY) >> 2);
350 }
351 }
352
353 int
xs_output(struct xs_transaction * xst,uint8_t * bp,int len)354 xs_output(struct xs_transaction *xst, uint8_t *bp, int len)
355 {
356 struct xs_softc *xs = xst->xst_cookie;
357 int chunk;
358
359 while (len > 0) {
360 chunk = xs_ring_put(xs, bp, MIN(len, XS_RING_SIZE));
361 if (chunk < 0)
362 return (-1);
363 if (chunk > 0) {
364 len -= chunk;
365 bp += chunk;
366 if (xs_ring_avail(xs->xs_ring, 1) > 0)
367 continue;
368 }
369 /* Squeaky wheel gets the kick */
370 xen_intr_signal(xs->xs_ih);
371 /*
372 * chunk == 0: we need to wait for hv to consume
373 * what has already been written;
374 *
375 * Alternatively we have managed to fill the ring
376 * and must wait for HV to collect the data.
377 */
378 while (xs->xs_ring->xsr_req_prod != xs->xs_ring->xsr_req_cons)
379 xs_poll(xs, 1);
380 }
381 return (0);
382 }
383
384 int
xs_start(struct xs_transaction * xst,struct xs_msg * xsm,struct iovec * iov,int iov_cnt)385 xs_start(struct xs_transaction *xst, struct xs_msg *xsm, struct iovec *iov,
386 int iov_cnt)
387 {
388 struct xs_softc *xs = xst->xst_cookie;
389 int i;
390
391 rw_enter_write(&xs->xs_rnglck);
392
393 /* Header */
394 if (xs_output(xst, (uint8_t *)&xsm->xsm_hdr,
395 sizeof(xsm->xsm_hdr)) == -1) {
396 printf("%s: failed to write the header\n", __func__);
397 rw_exit_write(&xs->xs_rnglck);
398 return (-1);
399 }
400
401 /* Data loop */
402 for (i = 0; i < iov_cnt; i++) {
403 if (xs_output(xst, iov[i].iov_base, iov[i].iov_len) == -1) {
404 printf("%s: failed on iovec #%d len %lu\n", __func__,
405 i, iov[i].iov_len);
406 rw_exit_write(&xs->xs_rnglck);
407 return (-1);
408 }
409 }
410
411 mtx_enter(&xs->xs_reqlck);
412 TAILQ_INSERT_TAIL(&xs->xs_reqs, xsm, xsm_link);
413 mtx_leave(&xs->xs_reqlck);
414
415 xen_intr_signal(xs->xs_ih);
416
417 rw_exit_write(&xs->xs_rnglck);
418
419 return (0);
420 }
421
422 struct xs_msg *
xs_reply(struct xs_transaction * xst,uint rid)423 xs_reply(struct xs_transaction *xst, uint rid)
424 {
425 struct xs_softc *xs = xst->xst_cookie;
426 struct xs_msg *xsm;
427 int s;
428
429 mtx_enter(&xs->xs_rsplck);
430 for (;;) {
431 TAILQ_FOREACH(xsm, &xs->xs_rsps, xsm_link) {
432 if (xsm->xsm_hdr.xmh_tid == xst->xst_id &&
433 xsm->xsm_hdr.xmh_rid == rid)
434 break;
435 }
436 if (xsm != NULL) {
437 TAILQ_REMOVE(&xs->xs_rsps, xsm, xsm_link);
438 break;
439 }
440 if (cold) {
441 mtx_leave(&xs->xs_rsplck);
442 delay(XST_DELAY * 1000 >> 2);
443 s = splnet();
444 xs_intr(xs);
445 splx(s);
446 mtx_enter(&xs->xs_rsplck);
447 } else
448 msleep_nsec(xs->xs_rchan, &xs->xs_rsplck, PRIBIO,
449 xs->xs_rchan, SEC_TO_NSEC(XST_DELAY) >> 2);
450 }
451 mtx_leave(&xs->xs_rsplck);
452 return (xsm);
453 }
454
455 int
xs_ring_put(struct xs_softc * xs,void * src,size_t size)456 xs_ring_put(struct xs_softc *xs, void *src, size_t size)
457 {
458 struct xs_ring *xsr = xs->xs_ring;
459 uint32_t prod = xsr->xsr_req_prod & (XS_RING_SIZE - 1);
460 uint32_t avail = xs_ring_avail(xsr, 1);
461 size_t left;
462
463 if (size > XS_RING_SIZE)
464 return (-1);
465 if (avail == 0)
466 return (0);
467
468 /* Bound the size by the number of available slots */
469 size = MIN(size, avail);
470 /* How many contiguous bytes can we memcpy... */
471 left = XS_RING_SIZE - prod;
472 /* ...bounded by how much we need to write? */
473 left = MIN(left, size);
474
475 memcpy(&xsr->xsr_req[prod], src, left);
476 memcpy(&xsr->xsr_req[0], (caddr_t)src + left, size - left);
477 virtio_membar_sync();
478 xsr->xsr_req_prod += size;
479 return (size);
480 }
481
482 int
xs_ring_get(struct xs_softc * xs,void * dst,size_t size)483 xs_ring_get(struct xs_softc *xs, void *dst, size_t size)
484 {
485 struct xs_ring *xsr = xs->xs_ring;
486 uint32_t cons = xsr->xsr_rsp_cons & (XS_RING_SIZE - 1);
487 uint32_t avail = xs_ring_avail(xsr, 0);
488 size_t left;
489
490 if (size > XS_RING_SIZE)
491 return (-1);
492 if (avail == 0)
493 return (0);
494
495 /* Bound the size by the number of available slots */
496 size = MIN(size, avail);
497 /* How many contiguous bytes can we memcpy... */
498 left = XS_RING_SIZE - cons;
499 /* ...bounded by how much we need to read? */
500 left = MIN(left, size);
501
502 memcpy(dst, &xsr->xsr_rsp[cons], left);
503 memcpy((caddr_t)dst + left, &xsr->xsr_rsp[0], size - left);
504 virtio_membar_sync();
505 xsr->xsr_rsp_cons += size;
506 return (size);
507 }
508
509 void
xs_intr(void * arg)510 xs_intr(void *arg)
511 {
512 struct xs_softc *xs = arg;
513 struct xs_ring *xsr = xs->xs_ring;
514 struct xen_softc *sc = xs->xs_sc;
515 struct xs_msg *xsm = xs->xs_rmsg;
516 struct xs_msghdr xmh;
517 uint32_t avail;
518 int len;
519
520 virtio_membar_sync();
521
522 if (xsr->xsr_rsp_cons == xsr->xsr_rsp_prod)
523 return;
524
525 avail = xs_ring_avail(xsr, 0);
526
527 /* Response processing */
528
529 again:
530 if (xs->xs_rmsg == NULL) {
531 if (avail < sizeof(xmh)) {
532 DPRINTF("%s: incomplete header: %u\n",
533 sc->sc_dev.dv_xname, avail);
534 goto out;
535 }
536 avail -= sizeof(xmh);
537
538 if ((len = xs_ring_get(xs, &xmh, sizeof(xmh))) != sizeof(xmh)) {
539 printf("%s: message too short: %d\n",
540 sc->sc_dev.dv_xname, len);
541 goto out;
542 }
543
544 if (xmh.xmh_type == XS_EVENT) {
545 xsm = &xs->xs_emsg;
546 xsm->xsm_read = 0;
547 } else {
548 mtx_enter(&xs->xs_reqlck);
549 TAILQ_FOREACH(xsm, &xs->xs_reqs, xsm_link) {
550 if (xsm->xsm_hdr.xmh_rid == xmh.xmh_rid) {
551 TAILQ_REMOVE(&xs->xs_reqs, xsm,
552 xsm_link);
553 break;
554 }
555 }
556 mtx_leave(&xs->xs_reqlck);
557 if (xsm == NULL) {
558 printf("%s: unexpected message id %u\n",
559 sc->sc_dev.dv_xname, xmh.xmh_rid);
560 goto out;
561 }
562 }
563 memcpy(&xsm->xsm_hdr, &xmh, sizeof(xmh));
564 xs->xs_rmsg = xsm;
565 }
566
567 if (xsm->xsm_hdr.xmh_len > xsm->xsm_dlen)
568 xsm->xsm_error = EMSGSIZE;
569
570 len = MIN(xsm->xsm_hdr.xmh_len - xsm->xsm_read, avail);
571 if (len) {
572 /* Get data if reply is not empty */
573 if ((len = xs_ring_get(xs,
574 &xsm->xsm_data[xsm->xsm_read], len)) <= 0) {
575 printf("%s: read failure %d\n", sc->sc_dev.dv_xname,
576 len);
577 goto out;
578 }
579 xsm->xsm_read += len;
580 }
581
582 /* Notify reader that we've managed to read the whole message */
583 if (xsm->xsm_read == xsm->xsm_hdr.xmh_len) {
584 xs->xs_rmsg = NULL;
585 if (xsm->xsm_hdr.xmh_type == XS_EVENT) {
586 xs_event(xs, xsm);
587 } else {
588 mtx_enter(&xs->xs_rsplck);
589 TAILQ_INSERT_TAIL(&xs->xs_rsps, xsm, xsm_link);
590 mtx_leave(&xs->xs_rsplck);
591 wakeup(xs->xs_rchan);
592 }
593 }
594
595 if ((avail = xs_ring_avail(xsr, 0)) > 0)
596 goto again;
597
598 out:
599 /* Wakeup sleeping writes (if any) */
600 wakeup(xs->xs_wchan);
601 xen_intr_signal(xs->xs_ih);
602 }
603
604 static inline int
xs_get_buf(struct xs_transaction * xst,struct xs_msg * xsm,int len)605 xs_get_buf(struct xs_transaction *xst, struct xs_msg *xsm, int len)
606 {
607 unsigned char *buf;
608
609 buf = malloc(len, M_DEVBUF, M_ZERO | (cold ? M_NOWAIT : M_WAITOK));
610 if (buf == NULL)
611 return (-1);
612 xsm->xsm_dlen = len;
613 xsm->xsm_data = buf;
614 return (0);
615 }
616
617 static inline void
xs_put_buf(struct xs_transaction * xst,struct xs_msg * xsm)618 xs_put_buf(struct xs_transaction *xst, struct xs_msg *xsm)
619 {
620 free(xsm->xsm_data, M_DEVBUF, xsm->xsm_dlen);
621 xsm->xsm_data = NULL;
622 }
623
624 void
xs_resfree(struct xs_transaction * xst,struct iovec * iov,int iov_cnt)625 xs_resfree(struct xs_transaction *xst, struct iovec *iov, int iov_cnt)
626 {
627 int i;
628
629 for (i = 0; i < iov_cnt; i++)
630 free(iov[i].iov_base, M_DEVBUF, iov[i].iov_len);
631 free(iov, M_DEVBUF, sizeof(struct iovec) * iov_cnt);
632 }
633
634 int
xs_parse(struct xs_transaction * xst,struct xs_msg * xsm,struct iovec ** iov,int * iov_cnt)635 xs_parse(struct xs_transaction *xst, struct xs_msg *xsm, struct iovec **iov,
636 int *iov_cnt)
637 {
638 char *bp, *cp;
639 uint32_t dlen;
640 int i, flags;
641
642 /* If the response size is zero, we return an empty string */
643 dlen = MAX(xsm->xsm_hdr.xmh_len, 1);
644 flags = M_ZERO | (cold ? M_NOWAIT : M_WAITOK);
645
646 *iov_cnt = 0;
647 /* Make sure that the data is NUL terminated */
648 if (xsm->xsm_data[dlen - 1] != '\0') {
649 /*
650 * The XS_READ operation always returns length without
651 * the trailing NUL so we have to adjust the length.
652 */
653 dlen = MIN(dlen + 1, xsm->xsm_dlen);
654 xsm->xsm_data[dlen - 1] = '\0';
655 }
656 for (i = 0; i < dlen; i++)
657 if (xsm->xsm_data[i] == '\0')
658 (*iov_cnt)++;
659 *iov = mallocarray(*iov_cnt, sizeof(struct iovec), M_DEVBUF, flags);
660 if (*iov == NULL)
661 goto cleanup;
662 bp = xsm->xsm_data;
663 for (i = 0; i < *iov_cnt; i++) {
664 cp = bp;
665 while (cp - (caddr_t)xsm->xsm_data < dlen && *cp != '\0')
666 cp++;
667 (*iov)[i].iov_len = cp - bp + 1;
668 (*iov)[i].iov_base = malloc((*iov)[i].iov_len, M_DEVBUF, flags);
669 if (!(*iov)[i].iov_base) {
670 xs_resfree(xst, *iov, *iov_cnt);
671 goto cleanup;
672 }
673 memcpy((*iov)[i].iov_base, bp, (*iov)[i].iov_len);
674 bp = ++cp;
675 }
676 return (0);
677
678 cleanup:
679 *iov = NULL;
680 *iov_cnt = 0;
681 return (ENOMEM);
682 }
683
684 int
xs_event(struct xs_softc * xs,struct xs_msg * xsm)685 xs_event(struct xs_softc *xs, struct xs_msg *xsm)
686 {
687 struct xs_watch *xsw;
688 char *token = NULL;
689 int i;
690
691 for (i = 0; i < xsm->xsm_read; i++) {
692 if (xsm->xsm_data[i] == '\0') {
693 token = &xsm->xsm_data[i+1];
694 break;
695 }
696 }
697 if (token == NULL) {
698 printf("%s: event on \"%s\" without token\n",
699 xs->xs_sc->sc_dev.dv_xname, xsm->xsm_data);
700 return (-1);
701 }
702
703 mtx_enter(&xs->xs_watchlck);
704 TAILQ_FOREACH(xsw, &xs->xs_watches, xsw_entry) {
705 if (strcmp(xsw->xsw_token, token))
706 continue;
707 mtx_leave(&xs->xs_watchlck);
708 task_add(xs->xs_watchtq, xsw->xsw_task);
709 return (0);
710 }
711 mtx_leave(&xs->xs_watchlck);
712
713 printf("%s: no watchers for node \"%s\"\n",
714 xs->xs_sc->sc_dev.dv_xname, xsm->xsm_data);
715 return (-1);
716 }
717
718 int
xs_cmd(struct xs_transaction * xst,int cmd,const char * path,struct iovec ** iov,int * iov_cnt)719 xs_cmd(struct xs_transaction *xst, int cmd, const char *path,
720 struct iovec **iov, int *iov_cnt)
721 {
722 struct xs_softc *xs = xst->xst_cookie;
723 struct xs_msg *xsm;
724 struct iovec ov[10]; /* output vector */
725 int datalen = XS_ERR_PAYLOAD;
726 int ov_cnt = 0;
727 enum { READ, WRITE } mode = READ;
728 int i, error = 0;
729
730 if (cmd >= XS_MAX)
731 return (EINVAL);
732
733 switch (cmd) {
734 case XS_TOPEN:
735 ov[0].iov_base = "";
736 ov[0].iov_len = 1;
737 ov_cnt++;
738 break;
739 case XS_TCLOSE:
740 case XS_RM:
741 case XS_WATCH:
742 case XS_WRITE:
743 mode = WRITE;
744 /* FALLTHROUGH */
745 default:
746 if (mode == READ)
747 datalen = XS_MAX_PAYLOAD;
748 break;
749 }
750
751 if (path) {
752 ov[ov_cnt].iov_base = (void *)path;
753 ov[ov_cnt++].iov_len = strlen(path) + 1; /* +NUL */
754 }
755
756 if (mode == WRITE && iov && iov_cnt && *iov_cnt > 0) {
757 for (i = 0; i < *iov_cnt && ov_cnt < nitems(ov);
758 i++, ov_cnt++) {
759 ov[ov_cnt].iov_base = (*iov)[i].iov_base;
760 ov[ov_cnt].iov_len = (*iov)[i].iov_len;
761 }
762 }
763
764 xsm = xs_get_msg(xs, !cold);
765
766 if (xs_get_buf(xst, xsm, datalen)) {
767 xs_put_msg(xs, xsm);
768 return (ENOMEM);
769 }
770
771 xsm->xsm_hdr.xmh_tid = xst->xst_id;
772 xsm->xsm_hdr.xmh_type = cmd;
773 xsm->xsm_hdr.xmh_rid = atomic_inc_int_nv(&xs->xs_rid);
774
775 for (i = 0; i < ov_cnt; i++)
776 xsm->xsm_hdr.xmh_len += ov[i].iov_len;
777
778 if (xsm->xsm_hdr.xmh_len > XS_MAX_PAYLOAD) {
779 printf("%s: message type %d with payload above the limit\n",
780 xs->xs_sc->sc_dev.dv_xname, cmd);
781 xs_put_buf(xst, xsm);
782 xs_put_msg(xs, xsm);
783 return (EIO);
784 }
785
786 if (xs_start(xst, xsm, ov, ov_cnt)) {
787 printf("%s: message type %d transmission failed\n",
788 xs->xs_sc->sc_dev.dv_xname, cmd);
789 xs_put_buf(xst, xsm);
790 xs_put_msg(xs, xsm);
791 return (EIO);
792 }
793
794 xsm = xs_reply(xst, xsm->xsm_hdr.xmh_rid);
795
796 if (xsm->xsm_hdr.xmh_type == XS_ERROR) {
797 error = xs_geterror(xsm);
798 DPRINTF("%s: xenstore request %d \"%s\" error %s\n",
799 xs->xs_sc->sc_dev.dv_xname, cmd, path, xsm->xsm_data);
800 } else if (xsm->xsm_error != 0)
801 error = xsm->xsm_error;
802 else if (mode == READ) {
803 KASSERT(iov && iov_cnt);
804 error = xs_parse(xst, xsm, iov, iov_cnt);
805 }
806 #ifdef XS_DEBUG
807 else
808 if (strcmp(xsm->xsm_data, "OK"))
809 printf("%s: xenstore request %d failed: %s\n",
810 xs->xs_sc->sc_dev.dv_xname, cmd, xsm->xsm_data);
811 #endif
812
813 xs_put_buf(xst, xsm);
814 xs_put_msg(xs, xsm);
815
816 return (error);
817 }
818
819 int
xs_watch(void * xsc,const char * path,const char * property,struct task * task,void (* cb)(void *),void * arg)820 xs_watch(void *xsc, const char *path, const char *property, struct task *task,
821 void (*cb)(void *), void *arg)
822 {
823 struct xen_softc *sc = xsc;
824 struct xs_softc *xs = sc->sc_xs;
825 struct xs_transaction xst;
826 struct xs_watch *xsw;
827 struct iovec iov, *iovp = &iov;
828 char key[256];
829 int error, iov_cnt, ret;
830
831 memset(&xst, 0, sizeof(xst));
832 xst.xst_id = 0;
833 xst.xst_cookie = sc->sc_xs;
834
835 xsw = malloc(sizeof(*xsw), M_DEVBUF, M_NOWAIT | M_ZERO);
836 if (xsw == NULL)
837 return (-1);
838
839 task_set(task, cb, arg);
840 xsw->xsw_task = task;
841
842 snprintf(xsw->xsw_token, sizeof(xsw->xsw_token), "%0lx",
843 (unsigned long)xsw);
844
845 if (path)
846 ret = snprintf(key, sizeof(key), "%s/%s", path, property);
847 else
848 ret = snprintf(key, sizeof(key), "%s", property);
849 if (ret == -1 || ret >= sizeof(key)) {
850 free(xsw, M_DEVBUF, sizeof(*xsw));
851 return (EINVAL);
852 }
853
854 iov.iov_base = xsw->xsw_token;
855 iov.iov_len = sizeof(xsw->xsw_token);
856 iov_cnt = 1;
857
858 /*
859 * xs_watches must be prepared pre-emptively because a xenstore
860 * event is raised immediately after a watch is established.
861 */
862 mtx_enter(&xs->xs_watchlck);
863 TAILQ_INSERT_TAIL(&xs->xs_watches, xsw, xsw_entry);
864 mtx_leave(&xs->xs_watchlck);
865
866 if ((error = xs_cmd(&xst, XS_WATCH, key, &iovp, &iov_cnt)) != 0) {
867 mtx_enter(&xs->xs_watchlck);
868 TAILQ_REMOVE(&xs->xs_watches, xsw, xsw_entry);
869 mtx_leave(&xs->xs_watchlck);
870 free(xsw, M_DEVBUF, sizeof(*xsw));
871 return (error);
872 }
873
874 return (0);
875 }
876
877 static unsigned long long
atoull(const char * cp,int * error)878 atoull(const char *cp, int *error)
879 {
880 unsigned long long res, cutoff;
881 int ch;
882 int cutlim;
883
884 res = 0;
885 cutoff = ULLONG_MAX / (unsigned long long)10;
886 cutlim = ULLONG_MAX % (unsigned long long)10;
887
888 do {
889 if (*cp < '0' || *cp > '9') {
890 *error = EINVAL;
891 return (res);
892 }
893 ch = *cp - '0';
894 if (res > cutoff || (res == cutoff && ch > cutlim)) {
895 *error = ERANGE;
896 return (res);
897 }
898 res *= 10;
899 res += ch;
900 } while (*(++cp) != '\0');
901
902 *error = 0;
903 return (res);
904 }
905
906 int
xs_getnum(void * xsc,const char * path,const char * property,unsigned long long * val)907 xs_getnum(void *xsc, const char *path, const char *property,
908 unsigned long long *val)
909 {
910 char *buf;
911 int error = 0;
912
913 buf = malloc(XS_MAX_PAYLOAD, M_DEVBUF, M_ZERO |
914 (cold ? M_NOWAIT : M_WAITOK));
915 if (buf == NULL)
916 return (ENOMEM);
917
918 error = xs_getprop(xsc, path, property, buf, XS_MAX_PAYLOAD);
919 if (error)
920 goto out;
921
922 *val = atoull(buf, &error);
923 if (error)
924 goto out;
925
926 out:
927 free(buf, M_DEVBUF, XS_MAX_PAYLOAD);
928 return (error);
929 }
930
931 int
xs_setnum(void * xsc,const char * path,const char * property,unsigned long long val)932 xs_setnum(void *xsc, const char *path, const char *property,
933 unsigned long long val)
934 {
935 char buf[32];
936 int ret;
937
938 ret = snprintf(buf, sizeof(buf), "%llu", val);
939 if (ret == -1 || ret >= sizeof(buf))
940 return (ERANGE);
941
942 return (xs_setprop(xsc, path, property, buf, strlen(buf)));
943 }
944
945 int
xs_getprop(void * xsc,const char * path,const char * property,char * value,int size)946 xs_getprop(void *xsc, const char *path, const char *property, char *value,
947 int size)
948 {
949 struct xen_softc *sc = xsc;
950 struct xs_transaction xst;
951 struct iovec *iovp = NULL;
952 char key[256];
953 int error, ret, iov_cnt = 0;
954
955 if (!property)
956 return (EINVAL);
957
958 memset(&xst, 0, sizeof(xst));
959 xst.xst_id = 0;
960 xst.xst_cookie = sc->sc_xs;
961
962 if (path)
963 ret = snprintf(key, sizeof(key), "%s/%s", path, property);
964 else
965 ret = snprintf(key, sizeof(key), "%s", property);
966 if (ret == -1 || ret >= sizeof(key))
967 return (EINVAL);
968
969 if ((error = xs_cmd(&xst, XS_READ, key, &iovp, &iov_cnt)) != 0)
970 return (error);
971
972 if (iov_cnt > 0)
973 strlcpy(value, (char *)iovp->iov_base, size);
974
975 xs_resfree(&xst, iovp, iov_cnt);
976
977 return (0);
978 }
979
980 int
xs_setprop(void * xsc,const char * path,const char * property,char * value,int size)981 xs_setprop(void *xsc, const char *path, const char *property, char *value,
982 int size)
983 {
984 struct xen_softc *sc = xsc;
985 struct xs_transaction xst;
986 struct iovec iov, *iovp = &iov;
987 char key[256];
988 int error, ret, iov_cnt = 0;
989
990 if (!property)
991 return (EINVAL);
992
993 memset(&xst, 0, sizeof(xst));
994 xst.xst_id = 0;
995 xst.xst_cookie = sc->sc_xs;
996
997 if (path)
998 ret = snprintf(key, sizeof(key), "%s/%s", path, property);
999 else
1000 ret = snprintf(key, sizeof(key), "%s", property);
1001 if (ret == -1 || ret >= sizeof(key))
1002 return (EINVAL);
1003
1004 iov.iov_base = value;
1005 iov.iov_len = size;
1006 iov_cnt = 1;
1007
1008 error = xs_cmd(&xst, XS_WRITE, key, &iovp, &iov_cnt);
1009
1010 return (error);
1011 }
1012
1013 int
xs_cmpprop(void * xsc,const char * path,const char * property,const char * value,int * result)1014 xs_cmpprop(void *xsc, const char *path, const char *property, const char *value,
1015 int *result)
1016 {
1017 struct xen_softc *sc = xsc;
1018 struct xs_transaction xst;
1019 struct iovec *iovp = NULL;
1020 char key[256];
1021 int error, ret, iov_cnt = 0;
1022
1023 if (!property)
1024 return (EINVAL);
1025
1026 memset(&xst, 0, sizeof(xst));
1027 xst.xst_id = 0;
1028 xst.xst_cookie = sc->sc_xs;
1029
1030 if (path)
1031 ret = snprintf(key, sizeof(key), "%s/%s", path, property);
1032 else
1033 ret = snprintf(key, sizeof(key), "%s", property);
1034 if (ret == -1 || ret >= sizeof(key))
1035 return (EINVAL);
1036
1037 if ((error = xs_cmd(&xst, XS_READ, key, &iovp, &iov_cnt)) != 0)
1038 return (error);
1039
1040 *result = strcmp(value, (char *)iovp->iov_base);
1041
1042 xs_resfree(&xst, iovp, iov_cnt);
1043
1044 return (0);
1045 }
1046
1047 int
xs_await_transition(void * xsc,const char * path,const char * property,const char * value,int timo)1048 xs_await_transition(void *xsc, const char *path, const char *property,
1049 const char *value, int timo)
1050 {
1051 struct xen_softc *sc = xsc;
1052 int error, res;
1053
1054 do {
1055 error = xs_cmpprop(xsc, path, property, value, &res);
1056 if (error)
1057 return (error);
1058 if (timo && --timo == 0)
1059 return (ETIMEDOUT);
1060 xs_poll(sc->sc_xs, cold);
1061 } while (res != 0);
1062
1063 return (0);
1064 }
1065
1066 int
xs_kvop(void * xsc,int op,char * key,char * value,size_t valuelen)1067 xs_kvop(void *xsc, int op, char *key, char *value, size_t valuelen)
1068 {
1069 struct xen_softc *sc = xsc;
1070 struct xs_transaction xst;
1071 struct iovec iov, *iovp = &iov;
1072 int error = 0, iov_cnt = 0, cmd, i;
1073
1074 switch (op) {
1075 case PVBUS_KVWRITE:
1076 cmd = XS_WRITE;
1077 iov.iov_base = value;
1078 iov.iov_len = strlen(value);
1079 iov_cnt = 1;
1080 break;
1081 case PVBUS_KVREAD:
1082 cmd = XS_READ;
1083 break;
1084 case PVBUS_KVLS:
1085 cmd = XS_LIST;
1086 break;
1087 default:
1088 return (EOPNOTSUPP);
1089 }
1090
1091 memset(&xst, 0, sizeof(xst));
1092 xst.xst_id = 0;
1093 xst.xst_cookie = sc->sc_xs;
1094
1095 if ((error = xs_cmd(&xst, cmd, key, &iovp, &iov_cnt)) != 0)
1096 return (error);
1097
1098 memset(value, 0, valuelen);
1099
1100 switch (cmd) {
1101 case XS_READ:
1102 if (iov_cnt == 1 && iovp[0].iov_len == 1) {
1103 xs_resfree(&xst, iovp, iov_cnt);
1104
1105 /*
1106 * We cannot distinguish if the returned value is
1107 * a directory or a file in the xenstore. The only
1108 * indication is that the read value of a directory
1109 * returns an empty string (single nul byte),
1110 * so try to get the directory list in this case.
1111 */
1112 return (xs_kvop(xsc, PVBUS_KVLS, key, value, valuelen));
1113 }
1114 /* FALLTHROUGH */
1115 case XS_LIST:
1116 for (i = 0; i < iov_cnt; i++) {
1117 if (i > 0 && strlcat(value, "\n", valuelen) >=
1118 valuelen) {
1119 error = ERANGE;
1120 break;
1121 }
1122 if (strlcat(value, iovp[i].iov_base,
1123 valuelen) >= valuelen) {
1124 error = ERANGE;
1125 break;
1126 }
1127 }
1128 xs_resfree(&xst, iovp, iov_cnt);
1129 break;
1130 default:
1131 break;
1132 }
1133
1134 return (error);
1135 }
1136