xref: /openbsd/sys/dev/pv/xenstore.c (revision b4155af8)
1 /*	$OpenBSD: xenstore.c,v 1.50 2024/05/24 10:05:55 jsg Exp $	*/
2 
3 /*
4  * Copyright (c) 2015 Mike Belopuhov
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/atomic.h>
22 #include <sys/malloc.h>
23 #include <sys/device.h>
24 #include <sys/mutex.h>
25 #include <sys/rwlock.h>
26 #include <sys/task.h>
27 
28 #include <machine/bus.h>
29 
30 #include <uvm/uvm_extern.h>
31 
32 #include <dev/pv/pvvar.h>
33 #include <dev/pv/xenreg.h>
34 #include <dev/pv/xenvar.h>
35 
36 /* #define XS_DEBUG */
37 
38 #ifdef XS_DEBUG
39 #define DPRINTF(x...)		printf(x)
40 #else
41 #define DPRINTF(x...)
42 #endif
43 
44 /*
45  * The XenStore interface is a simple storage system that is a means of
46  * communicating state and configuration data between the Xen Domain 0
47  * and the various guest domains.  All configuration data other than
48  * a small amount of essential information required during the early
49  * boot process of launching a Xen aware guest, is managed using the
50  * XenStore.
51  *
52  * The XenStore is ASCII string based, and has a structure and semantics
53  * similar to a filesystem.  There are files and directories that are
54  * able to contain files or other directories.  The depth of the hierarchy
55  * is only limited by the XenStore's maximum path length.
56  *
57  * The communication channel between the XenStore service and other
58  * domains is via two, guest specific, ring buffers in a shared memory
59  * area.  One ring buffer is used for communicating in each direction.
60  * The grant table references for this shared memory are given to the
61  * guest via HVM hypercalls.
62  *
63  * The XenStore communication relies on an event channel and thus
64  * interrupts. Several Xen services depend on the XenStore, most
65  * notably the XenBus used to discover and manage Xen devices.
66  */
67 
68 const struct {
69 	const char		*xse_errstr;
70 	int			 xse_errnum;
71 } xs_errors[] = {
72 	{ "EINVAL",	EINVAL },
73 	{ "EACCES",	EACCES },
74 	{ "EEXIST",	EEXIST },
75 	{ "EISDIR",	EISDIR },
76 	{ "ENOENT",	ENOENT },
77 	{ "ENOMEM",	ENOMEM },
78 	{ "ENOSPC",	ENOSPC },
79 	{ "EIO",	EIO },
80 	{ "ENOTEMPTY",	ENOTEMPTY },
81 	{ "ENOSYS",	ENOSYS },
82 	{ "EROFS",	EROFS },
83 	{ "EBUSY",	EBUSY },
84 	{ "EAGAIN",	EAGAIN },
85 	{ "EISCONN",	EISCONN },
86 	{ NULL,		-1 },
87 };
88 
89 struct xs_msghdr {
90 	/* Message type */
91 	uint32_t		 xmh_type;
92 	/* Request identifier, echoed in daemon's response.  */
93 	uint32_t		 xmh_rid;
94 	/* Transaction id (0 if not related to a transaction). */
95 	uint32_t		 xmh_tid;
96 	/* Length of data following this. */
97 	uint32_t		 xmh_len;
98 	/* Generally followed by nul-terminated string(s). */
99 } __packed;
100 
101 /*
102  * A minimum output buffer size needed to store an error string.
103  */
104 #define XS_ERR_PAYLOAD		16
105 
106 /*
107  * Although the Xen source code implies that the limit is 4k,
108  * in practice it turns out that we can only send 2k bytes of
109  * payload before receiving a ENOSPC.  We set it to an even
110  * smaller value however, because there's no real need to use
111  * large buffers for anything.
112  */
113 #define XS_MAX_PAYLOAD		1024
114 
115 struct xs_msg {
116 	struct xs_msghdr	 xsm_hdr;
117 	uint32_t		 xsm_read;
118 	uint32_t		 xsm_dlen;
119 	int			 xsm_error;
120 	uint8_t			*xsm_data;
121 	TAILQ_ENTRY(xs_msg)	 xsm_link;
122 };
123 TAILQ_HEAD(xs_msgq, xs_msg);
124 
125 #define XS_RING_SIZE		1024
126 
127 struct xs_ring {
128 	uint8_t			xsr_req[XS_RING_SIZE];
129 	uint8_t			xsr_rsp[XS_RING_SIZE];
130 	uint32_t		xsr_req_cons;
131 	uint32_t		xsr_req_prod;
132 	uint32_t		xsr_rsp_cons;
133 	uint32_t		xsr_rsp_prod;
134 } __packed;
135 
136 #define XST_DELAY		1	/* in seconds */
137 
138 #define XSW_TOKLEN		(sizeof(void *) * 2 + 1)
139 
140 struct xs_watch {
141 	TAILQ_ENTRY(xs_watch)	 xsw_entry;
142 	uint8_t			 xsw_token[XSW_TOKLEN];
143 	struct task		*xsw_task;
144 };
145 
146 /*
147  * Container for all XenStore related state.
148  */
149 struct xs_softc {
150 	struct xen_softc	*xs_sc;
151 
152 	evtchn_port_t		 xs_port;
153 	xen_intr_handle_t	 xs_ih;
154 
155 	struct xs_ring		*xs_ring;
156 
157 	struct xs_msg		 xs_msgs[10];
158 	struct xs_msg		*xs_rmsg;
159 
160 	struct xs_msgq		 xs_free;
161 	struct xs_msgq		 xs_reqs;
162 	struct xs_msgq		 xs_rsps;
163 
164 	volatile uint		 xs_rid;
165 
166 	const char		*xs_wchan;
167 	const char		*xs_rchan;
168 
169 	struct mutex		 xs_reqlck;	/* request queue mutex */
170 	struct mutex		 xs_rsplck;	/* response queue mutex */
171 	struct mutex		 xs_frqlck;	/* free queue mutex */
172 
173 	TAILQ_HEAD(, xs_watch)	 xs_watches;
174 	struct mutex		 xs_watchlck;
175 	struct xs_msg		 xs_emsg;
176 	struct taskq		*xs_watchtq;
177 
178 	struct rwlock		 xs_rnglck;
179 };
180 
181 struct xs_msg *
182 	xs_get_msg(struct xs_softc *, int);
183 void	xs_put_msg(struct xs_softc *, struct xs_msg *);
184 int	xs_ring_get(struct xs_softc *, void *, size_t);
185 int	xs_ring_put(struct xs_softc *, void *, size_t);
186 void	xs_intr(void *);
187 void	xs_poll(struct xs_softc *, int);
188 int	xs_output(struct xs_transaction *, uint8_t *, int);
189 int	xs_start(struct xs_transaction *, struct xs_msg *, struct iovec *, int);
190 struct xs_msg *
191 	xs_reply(struct xs_transaction *, uint);
192 int	xs_parse(struct xs_transaction *, struct xs_msg *, struct iovec **,
193 	    int *);
194 int	xs_event(struct xs_softc *, struct xs_msg *);
195 
196 int
xs_attach(struct xen_softc * sc)197 xs_attach(struct xen_softc *sc)
198 {
199         struct xen_hvm_param xhv;
200 	struct xs_softc *xs;
201 	paddr_t pa;
202 	int i;
203 
204 	if ((xs = malloc(sizeof(*xs), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) {
205 		printf(": failed to allocate xenstore softc\n");
206 		return (-1);
207 	}
208 	sc->sc_xs = xs;
209 	xs->xs_sc = sc;
210 
211 	/* Fetch event channel port */
212 	memset(&xhv, 0, sizeof(xhv));
213 	xhv.domid = DOMID_SELF;
214 	xhv.index = HVM_PARAM_STORE_EVTCHN;
215 	if (xen_hypercall(sc, XC_HVM, 2, HVMOP_get_param, &xhv)) {
216 		printf(": failed to obtain a xenstore event channel\n");
217 		goto fail_1;
218 	}
219 	xs->xs_port = xhv.value;
220 
221 	printf(", event channel %u\n", xs->xs_port);
222 
223 	/* Fetch a frame number (PA) of a shared xenstore page */
224 	memset(&xhv, 0, sizeof(xhv));
225 	xhv.domid = DOMID_SELF;
226 	xhv.index = HVM_PARAM_STORE_PFN;
227 	if (xen_hypercall(sc, XC_HVM, 2, HVMOP_get_param, &xhv))
228 		goto fail_1;
229 	pa = ptoa(xhv.value);
230 	/* Allocate a page of virtual memory */
231 	xs->xs_ring = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
232 	if (xs->xs_ring == NULL)
233 		goto fail_1;
234 	/* Map in the xenstore page into our KVA */
235 	pa |= PMAP_NOCACHE;
236 	pmap_kenter_pa((vaddr_t)xs->xs_ring, pa, PROT_READ | PROT_WRITE);
237 	pmap_update(pmap_kernel());
238 
239 	if (xen_intr_establish(xs->xs_port, &xs->xs_ih, 0, xs_intr, xs,
240 	    sc->sc_dev.dv_xname))
241 		goto fail_2;
242 
243 	xs->xs_wchan = "xswrite";
244 	xs->xs_rchan = "xsread";
245 
246 	TAILQ_INIT(&xs->xs_free);
247 	TAILQ_INIT(&xs->xs_reqs);
248 	TAILQ_INIT(&xs->xs_rsps);
249 	for (i = 0; i < nitems(xs->xs_msgs); i++)
250 		TAILQ_INSERT_TAIL(&xs->xs_free, &xs->xs_msgs[i], xsm_link);
251 
252 	mtx_init(&xs->xs_reqlck, IPL_NET);
253 	mtx_init(&xs->xs_rsplck, IPL_NET);
254 	mtx_init(&xs->xs_frqlck, IPL_NET);
255 
256 	rw_init(&xs->xs_rnglck, "xsrnglck");
257 
258 	xs->xs_watchtq = taskq_create("xenwatch", 1, IPL_NET, 0);
259 
260 	mtx_init(&xs->xs_watchlck, IPL_NET);
261 	TAILQ_INIT(&xs->xs_watches);
262 
263 	xs->xs_emsg.xsm_data = malloc(XS_MAX_PAYLOAD, M_DEVBUF,
264 	    M_ZERO | M_NOWAIT);
265 	if (xs->xs_emsg.xsm_data == NULL)
266 		goto fail_2;
267 	xs->xs_emsg.xsm_dlen = XS_MAX_PAYLOAD;
268 
269 	return (0);
270 
271  fail_2:
272 	pmap_kremove((vaddr_t)xs->xs_ring, PAGE_SIZE);
273 	pmap_update(pmap_kernel());
274 	km_free(xs->xs_ring, PAGE_SIZE, &kv_any, &kp_none);
275 	xs->xs_ring = NULL;
276  fail_1:
277 	free(xs, sizeof(*xs), M_DEVBUF);
278 	sc->sc_xs = NULL;
279 	return (-1);
280 }
281 
282 struct xs_msg *
xs_get_msg(struct xs_softc * xs,int waitok)283 xs_get_msg(struct xs_softc *xs, int waitok)
284 {
285 	static const char *chan = "xsalloc";
286 	struct xs_msg *xsm;
287 
288 	mtx_enter(&xs->xs_frqlck);
289 	for (;;) {
290 		xsm = TAILQ_FIRST(&xs->xs_free);
291 		if (xsm != NULL) {
292 			TAILQ_REMOVE(&xs->xs_free, xsm, xsm_link);
293 			break;
294 		}
295 		if (!waitok) {
296 			mtx_leave(&xs->xs_frqlck);
297 			delay(XST_DELAY * 1000 >> 2);
298 			mtx_enter(&xs->xs_frqlck);
299 		} else
300 			msleep_nsec(chan, &xs->xs_frqlck, PRIBIO, chan,
301 			    SEC_TO_NSEC(XST_DELAY) >> 2);
302 	}
303 	mtx_leave(&xs->xs_frqlck);
304 	return (xsm);
305 }
306 
307 void
xs_put_msg(struct xs_softc * xs,struct xs_msg * xsm)308 xs_put_msg(struct xs_softc *xs, struct xs_msg *xsm)
309 {
310 	memset(xsm, 0, sizeof(*xsm));
311 	mtx_enter(&xs->xs_frqlck);
312 	TAILQ_INSERT_TAIL(&xs->xs_free, xsm, xsm_link);
313 	mtx_leave(&xs->xs_frqlck);
314 }
315 
316 int
xs_geterror(struct xs_msg * xsm)317 xs_geterror(struct xs_msg *xsm)
318 {
319 	int i;
320 
321 	for (i = 0; i < nitems(xs_errors); i++)
322 		if (strcmp(xs_errors[i].xse_errstr, xsm->xsm_data) == 0)
323 			return (xs_errors[i].xse_errnum);
324 	return (EOPNOTSUPP);
325 }
326 
327 static inline uint32_t
xs_ring_avail(struct xs_ring * xsr,int req)328 xs_ring_avail(struct xs_ring *xsr, int req)
329 {
330 	uint32_t cons = req ? xsr->xsr_req_cons : xsr->xsr_rsp_cons;
331 	uint32_t prod = req ? xsr->xsr_req_prod : xsr->xsr_rsp_prod;
332 
333 	KASSERT(prod - cons <= XS_RING_SIZE);
334 	return (req ? XS_RING_SIZE - (prod - cons) : prod - cons);
335 }
336 
337 void
xs_poll(struct xs_softc * xs,int nosleep)338 xs_poll(struct xs_softc *xs, int nosleep)
339 {
340 	int s;
341 
342 	if (nosleep) {
343 		delay(XST_DELAY * 1000 >> 2);
344 		s = splnet();
345 		xs_intr(xs);
346 		splx(s);
347 	} else {
348 		tsleep_nsec(xs->xs_wchan, PRIBIO, xs->xs_wchan,
349 		    SEC_TO_NSEC(XST_DELAY) >> 2);
350 	}
351 }
352 
353 int
xs_output(struct xs_transaction * xst,uint8_t * bp,int len)354 xs_output(struct xs_transaction *xst, uint8_t *bp, int len)
355 {
356 	struct xs_softc *xs = xst->xst_cookie;
357 	int chunk;
358 
359 	while (len > 0) {
360 		chunk = xs_ring_put(xs, bp, MIN(len, XS_RING_SIZE));
361 		if (chunk < 0)
362 			return (-1);
363 		if (chunk > 0) {
364 			len -= chunk;
365 			bp += chunk;
366 			if (xs_ring_avail(xs->xs_ring, 1) > 0)
367 				continue;
368 		}
369 		/* Squeaky wheel gets the kick */
370 		xen_intr_signal(xs->xs_ih);
371 		/*
372 		 * chunk == 0: we need to wait for hv to consume
373 		 * what has already been written;
374 		 *
375 		 * Alternatively we have managed to fill the ring
376 		 * and must wait for HV to collect the data.
377 		 */
378 		while (xs->xs_ring->xsr_req_prod != xs->xs_ring->xsr_req_cons)
379 			xs_poll(xs, 1);
380 	}
381 	return (0);
382 }
383 
384 int
xs_start(struct xs_transaction * xst,struct xs_msg * xsm,struct iovec * iov,int iov_cnt)385 xs_start(struct xs_transaction *xst, struct xs_msg *xsm, struct iovec *iov,
386     int iov_cnt)
387 {
388 	struct xs_softc *xs = xst->xst_cookie;
389 	int i;
390 
391 	rw_enter_write(&xs->xs_rnglck);
392 
393 	/* Header */
394 	if (xs_output(xst, (uint8_t *)&xsm->xsm_hdr,
395 	    sizeof(xsm->xsm_hdr)) == -1) {
396 		printf("%s: failed to write the header\n", __func__);
397 		rw_exit_write(&xs->xs_rnglck);
398 		return (-1);
399 	}
400 
401 	/* Data loop */
402 	for (i = 0; i < iov_cnt; i++) {
403 		if (xs_output(xst, iov[i].iov_base, iov[i].iov_len) == -1) {
404 			printf("%s: failed on iovec #%d len %lu\n", __func__,
405 			    i, iov[i].iov_len);
406 			rw_exit_write(&xs->xs_rnglck);
407 			return (-1);
408 		}
409 	}
410 
411 	mtx_enter(&xs->xs_reqlck);
412 	TAILQ_INSERT_TAIL(&xs->xs_reqs, xsm, xsm_link);
413 	mtx_leave(&xs->xs_reqlck);
414 
415 	xen_intr_signal(xs->xs_ih);
416 
417 	rw_exit_write(&xs->xs_rnglck);
418 
419 	return (0);
420 }
421 
422 struct xs_msg *
xs_reply(struct xs_transaction * xst,uint rid)423 xs_reply(struct xs_transaction *xst, uint rid)
424 {
425 	struct xs_softc *xs = xst->xst_cookie;
426 	struct xs_msg *xsm;
427 	int s;
428 
429 	mtx_enter(&xs->xs_rsplck);
430 	for (;;) {
431 		TAILQ_FOREACH(xsm, &xs->xs_rsps, xsm_link) {
432 			if (xsm->xsm_hdr.xmh_tid == xst->xst_id &&
433 			    xsm->xsm_hdr.xmh_rid == rid)
434 				break;
435 		}
436 		if (xsm != NULL) {
437 			TAILQ_REMOVE(&xs->xs_rsps, xsm, xsm_link);
438 			break;
439 		}
440 		if (cold) {
441 			mtx_leave(&xs->xs_rsplck);
442 			delay(XST_DELAY * 1000 >> 2);
443 			s = splnet();
444 			xs_intr(xs);
445 			splx(s);
446 			mtx_enter(&xs->xs_rsplck);
447 		} else
448 			msleep_nsec(xs->xs_rchan, &xs->xs_rsplck, PRIBIO,
449 			    xs->xs_rchan, SEC_TO_NSEC(XST_DELAY) >> 2);
450 	}
451 	mtx_leave(&xs->xs_rsplck);
452 	return (xsm);
453 }
454 
455 int
xs_ring_put(struct xs_softc * xs,void * src,size_t size)456 xs_ring_put(struct xs_softc *xs, void *src, size_t size)
457 {
458 	struct xs_ring *xsr = xs->xs_ring;
459 	uint32_t prod = xsr->xsr_req_prod & (XS_RING_SIZE - 1);
460 	uint32_t avail = xs_ring_avail(xsr, 1);
461 	size_t left;
462 
463 	if (size > XS_RING_SIZE)
464 		return (-1);
465 	if (avail == 0)
466 		return (0);
467 
468 	/* Bound the size by the number of available slots */
469 	size = MIN(size, avail);
470 	/* How many contiguous bytes can we memcpy... */
471 	left = XS_RING_SIZE - prod;
472 	/* ...bounded by how much we need to write? */
473 	left = MIN(left, size);
474 
475 	memcpy(&xsr->xsr_req[prod], src, left);
476 	memcpy(&xsr->xsr_req[0], (caddr_t)src + left, size - left);
477 	virtio_membar_sync();
478 	xsr->xsr_req_prod += size;
479 	return (size);
480 }
481 
482 int
xs_ring_get(struct xs_softc * xs,void * dst,size_t size)483 xs_ring_get(struct xs_softc *xs, void *dst, size_t size)
484 {
485 	struct xs_ring *xsr = xs->xs_ring;
486 	uint32_t cons = xsr->xsr_rsp_cons & (XS_RING_SIZE - 1);
487 	uint32_t avail = xs_ring_avail(xsr, 0);
488 	size_t left;
489 
490 	if (size > XS_RING_SIZE)
491 		return (-1);
492 	if (avail == 0)
493 		return (0);
494 
495 	/* Bound the size by the number of available slots */
496 	size = MIN(size, avail);
497 	/* How many contiguous bytes can we memcpy... */
498 	left = XS_RING_SIZE - cons;
499 	/* ...bounded by how much we need to read? */
500 	left = MIN(left, size);
501 
502 	memcpy(dst, &xsr->xsr_rsp[cons], left);
503 	memcpy((caddr_t)dst + left, &xsr->xsr_rsp[0], size - left);
504 	virtio_membar_sync();
505 	xsr->xsr_rsp_cons += size;
506 	return (size);
507 }
508 
509 void
xs_intr(void * arg)510 xs_intr(void *arg)
511 {
512 	struct xs_softc *xs = arg;
513 	struct xs_ring *xsr = xs->xs_ring;
514 	struct xen_softc *sc = xs->xs_sc;
515 	struct xs_msg *xsm = xs->xs_rmsg;
516 	struct xs_msghdr xmh;
517 	uint32_t avail;
518 	int len;
519 
520 	virtio_membar_sync();
521 
522 	if (xsr->xsr_rsp_cons == xsr->xsr_rsp_prod)
523 		return;
524 
525 	avail = xs_ring_avail(xsr, 0);
526 
527 	/* Response processing */
528 
529  again:
530 	if (xs->xs_rmsg == NULL) {
531 		if (avail < sizeof(xmh)) {
532 			DPRINTF("%s: incomplete header: %u\n",
533 			    sc->sc_dev.dv_xname, avail);
534 			goto out;
535 		}
536 		avail -= sizeof(xmh);
537 
538 		if ((len = xs_ring_get(xs, &xmh, sizeof(xmh))) != sizeof(xmh)) {
539 			printf("%s: message too short: %d\n",
540 			    sc->sc_dev.dv_xname, len);
541 			goto out;
542 		}
543 
544 		if (xmh.xmh_type == XS_EVENT) {
545 			xsm = &xs->xs_emsg;
546 			xsm->xsm_read = 0;
547 		} else {
548 			mtx_enter(&xs->xs_reqlck);
549 			TAILQ_FOREACH(xsm, &xs->xs_reqs, xsm_link) {
550 				if (xsm->xsm_hdr.xmh_rid == xmh.xmh_rid) {
551 					TAILQ_REMOVE(&xs->xs_reqs, xsm,
552 					    xsm_link);
553 					break;
554 				}
555 			}
556 			mtx_leave(&xs->xs_reqlck);
557 			if (xsm == NULL) {
558 				printf("%s: unexpected message id %u\n",
559 				    sc->sc_dev.dv_xname, xmh.xmh_rid);
560 				goto out;
561 			}
562 		}
563 		memcpy(&xsm->xsm_hdr, &xmh, sizeof(xmh));
564 		xs->xs_rmsg = xsm;
565 	}
566 
567 	if (xsm->xsm_hdr.xmh_len > xsm->xsm_dlen)
568 		xsm->xsm_error = EMSGSIZE;
569 
570 	len = MIN(xsm->xsm_hdr.xmh_len - xsm->xsm_read, avail);
571 	if (len) {
572 		/* Get data if reply is not empty */
573 		if ((len = xs_ring_get(xs,
574 		    &xsm->xsm_data[xsm->xsm_read], len)) <= 0) {
575 			printf("%s: read failure %d\n", sc->sc_dev.dv_xname,
576 			    len);
577 			goto out;
578 		}
579 		xsm->xsm_read += len;
580 	}
581 
582 	/* Notify reader that we've managed to read the whole message */
583 	if (xsm->xsm_read == xsm->xsm_hdr.xmh_len) {
584 		xs->xs_rmsg = NULL;
585 		if (xsm->xsm_hdr.xmh_type == XS_EVENT) {
586 			xs_event(xs, xsm);
587 		} else {
588 			mtx_enter(&xs->xs_rsplck);
589 			TAILQ_INSERT_TAIL(&xs->xs_rsps, xsm, xsm_link);
590 			mtx_leave(&xs->xs_rsplck);
591 			wakeup(xs->xs_rchan);
592 		}
593 	}
594 
595 	if ((avail = xs_ring_avail(xsr, 0)) > 0)
596 		goto again;
597 
598  out:
599 	/* Wakeup sleeping writes (if any) */
600 	wakeup(xs->xs_wchan);
601 	xen_intr_signal(xs->xs_ih);
602 }
603 
604 static inline int
xs_get_buf(struct xs_transaction * xst,struct xs_msg * xsm,int len)605 xs_get_buf(struct xs_transaction *xst, struct xs_msg *xsm, int len)
606 {
607 	unsigned char *buf;
608 
609 	buf = malloc(len, M_DEVBUF, M_ZERO | (cold ? M_NOWAIT : M_WAITOK));
610 	if (buf == NULL)
611 		return (-1);
612 	xsm->xsm_dlen = len;
613 	xsm->xsm_data = buf;
614 	return (0);
615 }
616 
617 static inline void
xs_put_buf(struct xs_transaction * xst,struct xs_msg * xsm)618 xs_put_buf(struct xs_transaction *xst, struct xs_msg *xsm)
619 {
620 	free(xsm->xsm_data, M_DEVBUF, xsm->xsm_dlen);
621 	xsm->xsm_data = NULL;
622 }
623 
624 void
xs_resfree(struct xs_transaction * xst,struct iovec * iov,int iov_cnt)625 xs_resfree(struct xs_transaction *xst, struct iovec *iov, int iov_cnt)
626 {
627 	int i;
628 
629 	for (i = 0; i < iov_cnt; i++)
630 		free(iov[i].iov_base, M_DEVBUF, iov[i].iov_len);
631 	free(iov, M_DEVBUF, sizeof(struct iovec) * iov_cnt);
632 }
633 
634 int
xs_parse(struct xs_transaction * xst,struct xs_msg * xsm,struct iovec ** iov,int * iov_cnt)635 xs_parse(struct xs_transaction *xst, struct xs_msg *xsm, struct iovec **iov,
636     int *iov_cnt)
637 {
638 	char *bp, *cp;
639 	uint32_t dlen;
640 	int i, flags;
641 
642 	/* If the response size is zero, we return an empty string */
643 	dlen = MAX(xsm->xsm_hdr.xmh_len, 1);
644 	flags = M_ZERO | (cold ? M_NOWAIT : M_WAITOK);
645 
646 	*iov_cnt = 0;
647 	/* Make sure that the data is NUL terminated */
648 	if (xsm->xsm_data[dlen - 1] != '\0') {
649 		/*
650 		 * The XS_READ operation always returns length without
651 		 * the trailing NUL so we have to adjust the length.
652 		 */
653 		dlen = MIN(dlen + 1, xsm->xsm_dlen);
654 		xsm->xsm_data[dlen - 1] = '\0';
655 	}
656 	for (i = 0; i < dlen; i++)
657 		if (xsm->xsm_data[i] == '\0')
658 			(*iov_cnt)++;
659 	*iov = mallocarray(*iov_cnt, sizeof(struct iovec), M_DEVBUF, flags);
660 	if (*iov == NULL)
661 		goto cleanup;
662 	bp = xsm->xsm_data;
663 	for (i = 0; i < *iov_cnt; i++) {
664 		cp = bp;
665 		while (cp - (caddr_t)xsm->xsm_data < dlen && *cp != '\0')
666 			cp++;
667 		(*iov)[i].iov_len = cp - bp + 1;
668 		(*iov)[i].iov_base = malloc((*iov)[i].iov_len, M_DEVBUF, flags);
669 		if (!(*iov)[i].iov_base) {
670 			xs_resfree(xst, *iov, *iov_cnt);
671 			goto cleanup;
672 		}
673 		memcpy((*iov)[i].iov_base, bp, (*iov)[i].iov_len);
674 		bp = ++cp;
675 	}
676 	return (0);
677 
678  cleanup:
679 	*iov = NULL;
680 	*iov_cnt = 0;
681 	return (ENOMEM);
682 }
683 
684 int
xs_event(struct xs_softc * xs,struct xs_msg * xsm)685 xs_event(struct xs_softc *xs, struct xs_msg *xsm)
686 {
687 	struct xs_watch *xsw;
688 	char *token = NULL;
689 	int i;
690 
691 	for (i = 0; i < xsm->xsm_read; i++) {
692 		if (xsm->xsm_data[i] == '\0') {
693 			token = &xsm->xsm_data[i+1];
694 			break;
695 		}
696 	}
697 	if (token == NULL) {
698 		printf("%s: event on \"%s\" without token\n",
699 		    xs->xs_sc->sc_dev.dv_xname, xsm->xsm_data);
700 		return (-1);
701 	}
702 
703 	mtx_enter(&xs->xs_watchlck);
704 	TAILQ_FOREACH(xsw, &xs->xs_watches, xsw_entry) {
705 		if (strcmp(xsw->xsw_token, token))
706 			continue;
707 		mtx_leave(&xs->xs_watchlck);
708 		task_add(xs->xs_watchtq, xsw->xsw_task);
709 		return (0);
710 	}
711 	mtx_leave(&xs->xs_watchlck);
712 
713 	printf("%s: no watchers for node \"%s\"\n",
714 	    xs->xs_sc->sc_dev.dv_xname, xsm->xsm_data);
715 	return (-1);
716 }
717 
718 int
xs_cmd(struct xs_transaction * xst,int cmd,const char * path,struct iovec ** iov,int * iov_cnt)719 xs_cmd(struct xs_transaction *xst, int cmd, const char *path,
720     struct iovec **iov, int *iov_cnt)
721 {
722 	struct xs_softc *xs = xst->xst_cookie;
723 	struct xs_msg *xsm;
724 	struct iovec ov[10];	/* output vector */
725 	int datalen = XS_ERR_PAYLOAD;
726 	int ov_cnt = 0;
727 	enum { READ, WRITE } mode = READ;
728 	int i, error = 0;
729 
730 	if (cmd >= XS_MAX)
731 		return (EINVAL);
732 
733 	switch (cmd) {
734 	case XS_TOPEN:
735 		ov[0].iov_base = "";
736 		ov[0].iov_len = 1;
737 		ov_cnt++;
738 		break;
739 	case XS_TCLOSE:
740 	case XS_RM:
741 	case XS_WATCH:
742 	case XS_WRITE:
743 		mode = WRITE;
744 		/* FALLTHROUGH */
745 	default:
746 		if (mode == READ)
747 			datalen = XS_MAX_PAYLOAD;
748 		break;
749 	}
750 
751 	if (path) {
752 		ov[ov_cnt].iov_base = (void *)path;
753 		ov[ov_cnt++].iov_len = strlen(path) + 1; /* +NUL */
754 	}
755 
756 	if (mode == WRITE && iov && iov_cnt && *iov_cnt > 0) {
757 		for (i = 0; i < *iov_cnt && ov_cnt < nitems(ov);
758 		     i++, ov_cnt++) {
759 			ov[ov_cnt].iov_base = (*iov)[i].iov_base;
760 			ov[ov_cnt].iov_len = (*iov)[i].iov_len;
761 		}
762 	}
763 
764 	xsm = xs_get_msg(xs, !cold);
765 
766 	if (xs_get_buf(xst, xsm, datalen)) {
767 		xs_put_msg(xs, xsm);
768 		return (ENOMEM);
769 	}
770 
771 	xsm->xsm_hdr.xmh_tid = xst->xst_id;
772 	xsm->xsm_hdr.xmh_type = cmd;
773 	xsm->xsm_hdr.xmh_rid = atomic_inc_int_nv(&xs->xs_rid);
774 
775 	for (i = 0; i < ov_cnt; i++)
776 		xsm->xsm_hdr.xmh_len += ov[i].iov_len;
777 
778 	if (xsm->xsm_hdr.xmh_len > XS_MAX_PAYLOAD) {
779 		printf("%s: message type %d with payload above the limit\n",
780 		    xs->xs_sc->sc_dev.dv_xname, cmd);
781 		xs_put_buf(xst, xsm);
782 		xs_put_msg(xs, xsm);
783 		return (EIO);
784 	}
785 
786 	if (xs_start(xst, xsm, ov, ov_cnt)) {
787 		printf("%s: message type %d transmission failed\n",
788 		    xs->xs_sc->sc_dev.dv_xname, cmd);
789 		xs_put_buf(xst, xsm);
790 		xs_put_msg(xs, xsm);
791 		return (EIO);
792 	}
793 
794 	xsm = xs_reply(xst, xsm->xsm_hdr.xmh_rid);
795 
796 	if (xsm->xsm_hdr.xmh_type == XS_ERROR) {
797 		error = xs_geterror(xsm);
798 		DPRINTF("%s: xenstore request %d \"%s\" error %s\n",
799 		    xs->xs_sc->sc_dev.dv_xname, cmd, path, xsm->xsm_data);
800 	} else if (xsm->xsm_error != 0)
801 		error = xsm->xsm_error;
802 	else if (mode == READ) {
803 		KASSERT(iov && iov_cnt);
804 		error = xs_parse(xst, xsm, iov, iov_cnt);
805 	}
806 #ifdef XS_DEBUG
807 	else
808 		if (strcmp(xsm->xsm_data, "OK"))
809 			printf("%s: xenstore request %d failed: %s\n",
810 			    xs->xs_sc->sc_dev.dv_xname, cmd, xsm->xsm_data);
811 #endif
812 
813 	xs_put_buf(xst, xsm);
814 	xs_put_msg(xs, xsm);
815 
816 	return (error);
817 }
818 
819 int
xs_watch(void * xsc,const char * path,const char * property,struct task * task,void (* cb)(void *),void * arg)820 xs_watch(void *xsc, const char *path, const char *property, struct task *task,
821     void (*cb)(void *), void *arg)
822 {
823 	struct xen_softc *sc = xsc;
824 	struct xs_softc *xs = sc->sc_xs;
825 	struct xs_transaction xst;
826 	struct xs_watch *xsw;
827 	struct iovec iov, *iovp = &iov;
828 	char key[256];
829 	int error, iov_cnt, ret;
830 
831 	memset(&xst, 0, sizeof(xst));
832 	xst.xst_id = 0;
833 	xst.xst_cookie = sc->sc_xs;
834 
835 	xsw = malloc(sizeof(*xsw), M_DEVBUF, M_NOWAIT | M_ZERO);
836 	if (xsw == NULL)
837 		return (-1);
838 
839 	task_set(task, cb, arg);
840 	xsw->xsw_task = task;
841 
842 	snprintf(xsw->xsw_token, sizeof(xsw->xsw_token), "%0lx",
843 	    (unsigned long)xsw);
844 
845 	if (path)
846 		ret = snprintf(key, sizeof(key), "%s/%s", path, property);
847 	else
848 		ret = snprintf(key, sizeof(key), "%s", property);
849 	if (ret == -1 || ret >= sizeof(key)) {
850 		free(xsw, M_DEVBUF, sizeof(*xsw));
851 		return (EINVAL);
852 	}
853 
854 	iov.iov_base = xsw->xsw_token;
855 	iov.iov_len = sizeof(xsw->xsw_token);
856 	iov_cnt = 1;
857 
858 	/*
859 	 * xs_watches must be prepared pre-emptively because a xenstore
860 	 * event is raised immediately after a watch is established.
861 	 */
862 	mtx_enter(&xs->xs_watchlck);
863 	TAILQ_INSERT_TAIL(&xs->xs_watches, xsw, xsw_entry);
864 	mtx_leave(&xs->xs_watchlck);
865 
866 	if ((error = xs_cmd(&xst, XS_WATCH, key, &iovp, &iov_cnt)) != 0) {
867 		mtx_enter(&xs->xs_watchlck);
868 		TAILQ_REMOVE(&xs->xs_watches, xsw, xsw_entry);
869 		mtx_leave(&xs->xs_watchlck);
870 		free(xsw, M_DEVBUF, sizeof(*xsw));
871 		return (error);
872 	}
873 
874 	return (0);
875 }
876 
877 static unsigned long long
atoull(const char * cp,int * error)878 atoull(const char *cp, int *error)
879 {
880 	unsigned long long res, cutoff;
881 	int ch;
882 	int cutlim;
883 
884 	res = 0;
885 	cutoff = ULLONG_MAX / (unsigned long long)10;
886 	cutlim = ULLONG_MAX % (unsigned long long)10;
887 
888 	do {
889 		if (*cp < '0' || *cp > '9') {
890 			*error = EINVAL;
891 			return (res);
892 		}
893 		ch = *cp - '0';
894 		if (res > cutoff || (res == cutoff && ch > cutlim)) {
895 			*error = ERANGE;
896 			return (res);
897 		}
898 		res *= 10;
899 		res += ch;
900 	} while (*(++cp) != '\0');
901 
902 	*error = 0;
903 	return (res);
904 }
905 
906 int
xs_getnum(void * xsc,const char * path,const char * property,unsigned long long * val)907 xs_getnum(void *xsc, const char *path, const char *property,
908     unsigned long long *val)
909 {
910 	char *buf;
911 	int error = 0;
912 
913 	buf = malloc(XS_MAX_PAYLOAD, M_DEVBUF, M_ZERO |
914 	    (cold ? M_NOWAIT : M_WAITOK));
915 	if (buf == NULL)
916 		return (ENOMEM);
917 
918 	error = xs_getprop(xsc, path, property, buf, XS_MAX_PAYLOAD);
919 	if (error)
920 		goto out;
921 
922 	*val = atoull(buf, &error);
923 	if (error)
924 		goto out;
925 
926  out:
927 	free(buf, M_DEVBUF, XS_MAX_PAYLOAD);
928 	return (error);
929 }
930 
931 int
xs_setnum(void * xsc,const char * path,const char * property,unsigned long long val)932 xs_setnum(void *xsc, const char *path, const char *property,
933     unsigned long long val)
934 {
935 	char buf[32];
936 	int ret;
937 
938 	ret = snprintf(buf, sizeof(buf), "%llu", val);
939 	if (ret == -1 || ret >= sizeof(buf))
940 		return (ERANGE);
941 
942 	return (xs_setprop(xsc, path, property, buf, strlen(buf)));
943 }
944 
945 int
xs_getprop(void * xsc,const char * path,const char * property,char * value,int size)946 xs_getprop(void *xsc, const char *path, const char *property, char *value,
947     int size)
948 {
949 	struct xen_softc *sc = xsc;
950 	struct xs_transaction xst;
951 	struct iovec *iovp = NULL;
952 	char key[256];
953 	int error, ret, iov_cnt = 0;
954 
955 	if (!property)
956 		return (EINVAL);
957 
958 	memset(&xst, 0, sizeof(xst));
959 	xst.xst_id = 0;
960 	xst.xst_cookie = sc->sc_xs;
961 
962 	if (path)
963 		ret = snprintf(key, sizeof(key), "%s/%s", path, property);
964 	else
965 		ret = snprintf(key, sizeof(key), "%s", property);
966 	if (ret == -1 || ret >= sizeof(key))
967 		return (EINVAL);
968 
969 	if ((error = xs_cmd(&xst, XS_READ, key, &iovp, &iov_cnt)) != 0)
970 		return (error);
971 
972 	if (iov_cnt > 0)
973 		strlcpy(value, (char *)iovp->iov_base, size);
974 
975 	xs_resfree(&xst, iovp, iov_cnt);
976 
977 	return (0);
978 }
979 
980 int
xs_setprop(void * xsc,const char * path,const char * property,char * value,int size)981 xs_setprop(void *xsc, const char *path, const char *property, char *value,
982     int size)
983 {
984 	struct xen_softc *sc = xsc;
985 	struct xs_transaction xst;
986 	struct iovec iov, *iovp = &iov;
987 	char key[256];
988 	int error, ret, iov_cnt = 0;
989 
990 	if (!property)
991 		return (EINVAL);
992 
993 	memset(&xst, 0, sizeof(xst));
994 	xst.xst_id = 0;
995 	xst.xst_cookie = sc->sc_xs;
996 
997 	if (path)
998 		ret = snprintf(key, sizeof(key), "%s/%s", path, property);
999 	else
1000 		ret = snprintf(key, sizeof(key), "%s", property);
1001 	if (ret == -1 || ret >= sizeof(key))
1002 		return (EINVAL);
1003 
1004 	iov.iov_base = value;
1005 	iov.iov_len = size;
1006 	iov_cnt = 1;
1007 
1008 	error = xs_cmd(&xst, XS_WRITE, key, &iovp, &iov_cnt);
1009 
1010 	return (error);
1011 }
1012 
1013 int
xs_cmpprop(void * xsc,const char * path,const char * property,const char * value,int * result)1014 xs_cmpprop(void *xsc, const char *path, const char *property, const char *value,
1015     int *result)
1016 {
1017 	struct xen_softc *sc = xsc;
1018 	struct xs_transaction xst;
1019 	struct iovec *iovp = NULL;
1020 	char key[256];
1021 	int error, ret, iov_cnt = 0;
1022 
1023 	if (!property)
1024 		return (EINVAL);
1025 
1026 	memset(&xst, 0, sizeof(xst));
1027 	xst.xst_id = 0;
1028 	xst.xst_cookie = sc->sc_xs;
1029 
1030 	if (path)
1031 		ret = snprintf(key, sizeof(key), "%s/%s", path, property);
1032 	else
1033 		ret = snprintf(key, sizeof(key), "%s", property);
1034 	if (ret == -1 || ret >= sizeof(key))
1035 		return (EINVAL);
1036 
1037 	if ((error = xs_cmd(&xst, XS_READ, key, &iovp, &iov_cnt)) != 0)
1038 		return (error);
1039 
1040 	*result = strcmp(value, (char *)iovp->iov_base);
1041 
1042 	xs_resfree(&xst, iovp, iov_cnt);
1043 
1044 	return (0);
1045 }
1046 
1047 int
xs_await_transition(void * xsc,const char * path,const char * property,const char * value,int timo)1048 xs_await_transition(void *xsc, const char *path, const char *property,
1049     const char *value, int timo)
1050 {
1051 	struct xen_softc *sc = xsc;
1052 	int error, res;
1053 
1054 	do {
1055 		error = xs_cmpprop(xsc, path, property, value, &res);
1056 		if (error)
1057 			return (error);
1058 		if (timo && --timo == 0)
1059 			return (ETIMEDOUT);
1060 		xs_poll(sc->sc_xs, cold);
1061 	} while (res != 0);
1062 
1063 	return (0);
1064 }
1065 
1066 int
xs_kvop(void * xsc,int op,char * key,char * value,size_t valuelen)1067 xs_kvop(void *xsc, int op, char *key, char *value, size_t valuelen)
1068 {
1069 	struct xen_softc *sc = xsc;
1070 	struct xs_transaction xst;
1071 	struct iovec iov, *iovp = &iov;
1072 	int error = 0, iov_cnt = 0, cmd, i;
1073 
1074 	switch (op) {
1075 	case PVBUS_KVWRITE:
1076 		cmd = XS_WRITE;
1077 		iov.iov_base = value;
1078 		iov.iov_len = strlen(value);
1079 		iov_cnt = 1;
1080 		break;
1081 	case PVBUS_KVREAD:
1082 		cmd = XS_READ;
1083 		break;
1084 	case PVBUS_KVLS:
1085 		cmd = XS_LIST;
1086 		break;
1087 	default:
1088 		return (EOPNOTSUPP);
1089 	}
1090 
1091 	memset(&xst, 0, sizeof(xst));
1092 	xst.xst_id = 0;
1093 	xst.xst_cookie = sc->sc_xs;
1094 
1095 	if ((error = xs_cmd(&xst, cmd, key, &iovp, &iov_cnt)) != 0)
1096 		return (error);
1097 
1098 	memset(value, 0, valuelen);
1099 
1100 	switch (cmd) {
1101 	case XS_READ:
1102 		if (iov_cnt == 1 && iovp[0].iov_len == 1) {
1103 			xs_resfree(&xst, iovp, iov_cnt);
1104 
1105 			/*
1106 			 * We cannot distinguish if the returned value is
1107 			 * a directory or a file in the xenstore.  The only
1108 			 * indication is that the read value of a directory
1109 			 * returns an empty string (single nul byte),
1110 			 * so try to get the directory list in this case.
1111 			 */
1112 			return (xs_kvop(xsc, PVBUS_KVLS, key, value, valuelen));
1113 		}
1114 		/* FALLTHROUGH */
1115 	case XS_LIST:
1116 		for (i = 0; i < iov_cnt; i++) {
1117 			if (i > 0 && strlcat(value, "\n", valuelen) >=
1118 			    valuelen) {
1119 				error = ERANGE;
1120 				break;
1121 			}
1122 			if (strlcat(value, iovp[i].iov_base,
1123 			    valuelen) >= valuelen) {
1124 				error = ERANGE;
1125 				break;
1126 			}
1127 		}
1128 		xs_resfree(&xst, iovp, iov_cnt);
1129 		break;
1130 	default:
1131 		break;
1132 	}
1133 
1134 	return (error);
1135 }
1136