xref: /openbsd/sys/dev/pv/xenstore.c (revision 905646f0)
1 /*	$OpenBSD: xenstore.c,v 1.45 2020/01/11 21:30:00 cheloha Exp $	*/
2 
3 /*
4  * Copyright (c) 2015 Mike Belopuhov
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/atomic.h>
22 #include <sys/kernel.h>
23 #include <sys/malloc.h>
24 #include <sys/device.h>
25 #include <sys/mutex.h>
26 #include <sys/rwlock.h>
27 #include <sys/ioctl.h>
28 #include <sys/task.h>
29 
30 #include <machine/bus.h>
31 
32 #include <uvm/uvm_extern.h>
33 
34 #include <dev/pv/pvvar.h>
35 #include <dev/pv/xenreg.h>
36 #include <dev/pv/xenvar.h>
37 
38 /* #define XS_DEBUG */
39 
40 #ifdef XS_DEBUG
41 #define DPRINTF(x...)		printf(x)
42 #else
43 #define DPRINTF(x...)
44 #endif
45 
46 /*
47  * The XenStore interface is a simple storage system that is a means of
48  * communicating state and configuration data between the Xen Domain 0
49  * and the various guest domains.  All configuration data other than
50  * a small amount of essential information required during the early
51  * boot process of launching a Xen aware guest, is managed using the
52  * XenStore.
53  *
54  * The XenStore is ASCII string based, and has a structure and semantics
55  * similar to a filesystem.  There are files and directories that are
56  * able to contain files or other directories.  The depth of the hierachy
57  * is only limited by the XenStore's maximum path length.
58  *
59  * The communication channel between the XenStore service and other
60  * domains is via two, guest specific, ring buffers in a shared memory
61  * area.  One ring buffer is used for communicating in each direction.
62  * The grant table references for this shared memory are given to the
63  * guest via HVM hypercalls.
64  *
65  * The XenStore communication relies on an event channel and thus
66  * interrupts. Several Xen services depend on the XenStore, most
67  * notably the XenBus used to discover and manage Xen devices.
68  */
69 
70 const struct {
71 	const char		*xse_errstr;
72 	int			 xse_errnum;
73 } xs_errors[] = {
74 	{ "EINVAL",	EINVAL },
75 	{ "EACCES",	EACCES },
76 	{ "EEXIST",	EEXIST },
77 	{ "EISDIR",	EISDIR },
78 	{ "ENOENT",	ENOENT },
79 	{ "ENOMEM",	ENOMEM },
80 	{ "ENOSPC",	ENOSPC },
81 	{ "EIO",	EIO },
82 	{ "ENOTEMPTY",	ENOTEMPTY },
83 	{ "ENOSYS",	ENOSYS },
84 	{ "EROFS",	EROFS },
85 	{ "EBUSY",	EBUSY },
86 	{ "EAGAIN",	EAGAIN },
87 	{ "EISCONN",	EISCONN },
88 	{ NULL,		-1 },
89 };
90 
91 struct xs_msghdr {
92 	/* Message type */
93 	uint32_t		 xmh_type;
94 	/* Request identifier, echoed in daemon's response.  */
95 	uint32_t		 xmh_rid;
96 	/* Transaction id (0 if not related to a transaction). */
97 	uint32_t		 xmh_tid;
98 	/* Length of data following this. */
99 	uint32_t		 xmh_len;
100 	/* Generally followed by nul-terminated string(s). */
101 } __packed;
102 
103 /*
104  * A minimum output buffer size needed to store an error string.
105  */
106 #define XS_ERR_PAYLOAD		16
107 
108 /*
109  * Although the Xen source code implies that the limit is 4k,
110  * in practice it turns out that we can only send 2k bytes of
111  * payload before receiving a ENOSPC.  We set it to an even
112  * smaller value however, because there's no real need to use
113  * large buffers for anything.
114  */
115 #define XS_MAX_PAYLOAD		1024
116 
117 struct xs_msg {
118 	struct xs_msghdr	 xsm_hdr;
119 	uint32_t		 xsm_read;
120 	uint32_t		 xsm_dlen;
121 	uint8_t			*xsm_data;
122 	TAILQ_ENTRY(xs_msg)	 xsm_link;
123 };
124 TAILQ_HEAD(xs_msgq, xs_msg);
125 
126 #define XS_RING_SIZE		1024
127 
128 struct xs_ring {
129 	uint8_t			xsr_req[XS_RING_SIZE];
130 	uint8_t			xsr_rsp[XS_RING_SIZE];
131 	uint32_t		xsr_req_cons;
132 	uint32_t		xsr_req_prod;
133 	uint32_t		xsr_rsp_cons;
134 	uint32_t		xsr_rsp_prod;
135 } __packed;
136 
137 #define XST_DELAY		1	/* in seconds */
138 
139 #define XSW_TOKLEN		(sizeof(void *) * 2 + 1)
140 
141 struct xs_watch {
142 	TAILQ_ENTRY(xs_watch)	 xsw_entry;
143 	uint8_t			 xsw_token[XSW_TOKLEN];
144 	struct task		*xsw_task;
145 };
146 
147 /*
148  * Container for all XenStore related state.
149  */
150 struct xs_softc {
151 	struct xen_softc	*xs_sc;
152 
153 	evtchn_port_t		 xs_port;
154 	xen_intr_handle_t	 xs_ih;
155 
156 	struct xs_ring		*xs_ring;
157 
158 	struct xs_msg		 xs_msgs[10];
159 	struct xs_msg		*xs_rmsg;
160 
161 	struct xs_msgq		 xs_free;
162 	struct xs_msgq		 xs_reqs;
163 	struct xs_msgq		 xs_rsps;
164 
165 	volatile uint		 xs_rid;
166 
167 	const char		*xs_wchan;
168 	const char		*xs_rchan;
169 
170 	struct mutex		 xs_reqlck;	/* request queue mutex */
171 	struct mutex		 xs_rsplck;	/* response queue mutex */
172 	struct mutex		 xs_frqlck;	/* free queue mutex */
173 
174 	TAILQ_HEAD(, xs_watch)	 xs_watches;
175 	struct mutex		 xs_watchlck;
176 	struct xs_msg		 xs_emsg;
177 	struct taskq		*xs_watchtq;
178 
179 	struct rwlock		 xs_rnglck;
180 };
181 
182 struct xs_msg *
183 	xs_get_msg(struct xs_softc *, int);
184 void	xs_put_msg(struct xs_softc *, struct xs_msg *);
185 int	xs_ring_get(struct xs_softc *, void *, size_t);
186 int	xs_ring_put(struct xs_softc *, void *, size_t);
187 void	xs_intr(void *);
188 void	xs_poll(struct xs_softc *, int);
189 int	xs_output(struct xs_transaction *, uint8_t *, int);
190 int	xs_start(struct xs_transaction *, struct xs_msg *, struct iovec *, int);
191 struct xs_msg *
192 	xs_reply(struct xs_transaction *, uint);
193 int	xs_parse(struct xs_transaction *, struct xs_msg *, struct iovec **,
194 	    int *);
195 int	xs_event(struct xs_softc *, struct xs_msg *);
196 
197 int
198 xs_attach(struct xen_softc *sc)
199 {
200         struct xen_hvm_param xhv;
201 	struct xs_softc *xs;
202 	paddr_t pa;
203 	int i;
204 
205 	if ((xs = malloc(sizeof(*xs), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) {
206 		printf(": failed to allocate xenstore softc\n");
207 		return (-1);
208 	}
209 	sc->sc_xs = xs;
210 	xs->xs_sc = sc;
211 
212 	/* Fetch event channel port */
213 	memset(&xhv, 0, sizeof(xhv));
214 	xhv.domid = DOMID_SELF;
215 	xhv.index = HVM_PARAM_STORE_EVTCHN;
216 	if (xen_hypercall(sc, XC_HVM, 2, HVMOP_get_param, &xhv)) {
217 		printf(": failed to obtain a xenstore event channel\n");
218 		goto fail_1;
219 	}
220 	xs->xs_port = xhv.value;
221 
222 	printf(", event channel %u\n", xs->xs_port);
223 
224 	/* Fetch a frame number (PA) of a shared xenstore page */
225 	memset(&xhv, 0, sizeof(xhv));
226 	xhv.domid = DOMID_SELF;
227 	xhv.index = HVM_PARAM_STORE_PFN;
228 	if (xen_hypercall(sc, XC_HVM, 2, HVMOP_get_param, &xhv))
229 		goto fail_1;
230 	pa = ptoa(xhv.value);
231 	/* Allocate a page of virtual memory */
232 	xs->xs_ring = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
233 	if (xs->xs_ring == NULL)
234 		goto fail_1;
235 	/* Map in the xenstore page into our KVA */
236 	pa |= PMAP_NOCACHE;
237 	pmap_kenter_pa((vaddr_t)xs->xs_ring, pa, PROT_READ | PROT_WRITE);
238 	pmap_update(pmap_kernel());
239 
240 	if (xen_intr_establish(xs->xs_port, &xs->xs_ih, 0, xs_intr, xs,
241 	    sc->sc_dev.dv_xname))
242 		goto fail_2;
243 
244 	xs->xs_wchan = "xswrite";
245 	xs->xs_rchan = "xsread";
246 
247 	TAILQ_INIT(&xs->xs_free);
248 	TAILQ_INIT(&xs->xs_reqs);
249 	TAILQ_INIT(&xs->xs_rsps);
250 	for (i = 0; i < nitems(xs->xs_msgs); i++)
251 		TAILQ_INSERT_TAIL(&xs->xs_free, &xs->xs_msgs[i], xsm_link);
252 
253 	mtx_init(&xs->xs_reqlck, IPL_NET);
254 	mtx_init(&xs->xs_rsplck, IPL_NET);
255 	mtx_init(&xs->xs_frqlck, IPL_NET);
256 
257 	rw_init(&xs->xs_rnglck, "xsrnglck");
258 
259 	xs->xs_watchtq = taskq_create("xenwatch", 1, IPL_NET, 0);
260 
261 	mtx_init(&xs->xs_watchlck, IPL_NET);
262 	TAILQ_INIT(&xs->xs_watches);
263 
264 	xs->xs_emsg.xsm_data = malloc(XS_MAX_PAYLOAD, M_DEVBUF,
265 	    M_ZERO | M_NOWAIT);
266 	if (xs->xs_emsg.xsm_data == NULL)
267 		goto fail_2;
268 	xs->xs_emsg.xsm_dlen = XS_MAX_PAYLOAD;
269 
270 	return (0);
271 
272  fail_2:
273 	pmap_kremove((vaddr_t)xs->xs_ring, PAGE_SIZE);
274 	pmap_update(pmap_kernel());
275 	km_free(xs->xs_ring, PAGE_SIZE, &kv_any, &kp_none);
276 	xs->xs_ring = NULL;
277  fail_1:
278 	free(xs, sizeof(*xs), M_DEVBUF);
279 	sc->sc_xs = NULL;
280 	return (-1);
281 }
282 
283 struct xs_msg *
284 xs_get_msg(struct xs_softc *xs, int waitok)
285 {
286 	static const char *chan = "xsalloc";
287 	struct xs_msg *xsm;
288 
289 	mtx_enter(&xs->xs_frqlck);
290 	for (;;) {
291 		xsm = TAILQ_FIRST(&xs->xs_free);
292 		if (xsm != NULL) {
293 			TAILQ_REMOVE(&xs->xs_free, xsm, xsm_link);
294 			break;
295 		}
296 		if (!waitok) {
297 			mtx_leave(&xs->xs_frqlck);
298 			delay(XST_DELAY * 1000 >> 2);
299 			mtx_enter(&xs->xs_frqlck);
300 		} else
301 			msleep_nsec(chan, &xs->xs_frqlck, PRIBIO, chan,
302 			    SEC_TO_NSEC(XST_DELAY) >> 2);
303 	}
304 	mtx_leave(&xs->xs_frqlck);
305 	return (xsm);
306 }
307 
308 void
309 xs_put_msg(struct xs_softc *xs, struct xs_msg *xsm)
310 {
311 	memset(xsm, 0, sizeof(*xsm));
312 	mtx_enter(&xs->xs_frqlck);
313 	TAILQ_INSERT_TAIL(&xs->xs_free, xsm, xsm_link);
314 	mtx_leave(&xs->xs_frqlck);
315 }
316 
317 int
318 xs_geterror(struct xs_msg *xsm)
319 {
320 	int i;
321 
322 	for (i = 0; i < nitems(xs_errors); i++)
323 		if (strcmp(xs_errors[i].xse_errstr, xsm->xsm_data) == 0)
324 			return (xs_errors[i].xse_errnum);
325 	return (EOPNOTSUPP);
326 }
327 
328 static inline uint32_t
329 xs_ring_avail(struct xs_ring *xsr, int req)
330 {
331 	uint32_t cons = req ? xsr->xsr_req_cons : xsr->xsr_rsp_cons;
332 	uint32_t prod = req ? xsr->xsr_req_prod : xsr->xsr_rsp_prod;
333 
334 	KASSERT(prod - cons <= XS_RING_SIZE);
335 	return (req ? XS_RING_SIZE - (prod - cons) : prod - cons);
336 }
337 
338 void
339 xs_poll(struct xs_softc *xs, int nosleep)
340 {
341 	int s;
342 
343 	if (nosleep) {
344 		delay(XST_DELAY * 1000 >> 2);
345 		s = splnet();
346 		xs_intr(xs);
347 		splx(s);
348 	} else {
349 		tsleep_nsec(xs->xs_wchan, PRIBIO, xs->xs_wchan,
350 		    SEC_TO_NSEC(XST_DELAY) >> 2);
351 	}
352 }
353 
354 int
355 xs_output(struct xs_transaction *xst, uint8_t *bp, int len)
356 {
357 	struct xs_softc *xs = xst->xst_cookie;
358 	int chunk;
359 
360 	while (len > 0) {
361 		chunk = xs_ring_put(xs, bp, MIN(len, XS_RING_SIZE));
362 		if (chunk < 0)
363 			return (-1);
364 		if (chunk > 0) {
365 			len -= chunk;
366 			bp += chunk;
367 			if (xs_ring_avail(xs->xs_ring, 1) > 0)
368 				continue;
369 		}
370 		/* Squeaky wheel gets the kick */
371 		xen_intr_signal(xs->xs_ih);
372 		/*
373 		 * chunk == 0: we need to wait for hv to consume
374 		 * what has already been written;
375 		 *
376 		 * Alternatively we have managed to fill the ring
377 		 * and must wait for HV to collect the data.
378 		 */
379 		while (xs->xs_ring->xsr_req_prod != xs->xs_ring->xsr_req_cons)
380 			xs_poll(xs, 1);
381 	}
382 	return (0);
383 }
384 
385 int
386 xs_start(struct xs_transaction *xst, struct xs_msg *xsm, struct iovec *iov,
387     int iov_cnt)
388 {
389 	struct xs_softc *xs = xst->xst_cookie;
390 	int i;
391 
392 	rw_enter_write(&xs->xs_rnglck);
393 
394 	/* Header */
395 	if (xs_output(xst, (uint8_t *)&xsm->xsm_hdr,
396 	    sizeof(xsm->xsm_hdr)) == -1) {
397 		printf("%s: failed to write the header\n", __func__);
398 		rw_exit_write(&xs->xs_rnglck);
399 		return (-1);
400 	}
401 
402 	/* Data loop */
403 	for (i = 0; i < iov_cnt; i++) {
404 		if (xs_output(xst, iov[i].iov_base, iov[i].iov_len) == -1) {
405 			printf("%s: failed on iovec #%d len %lu\n", __func__,
406 			    i, iov[i].iov_len);
407 			rw_exit_write(&xs->xs_rnglck);
408 			return (-1);
409 		}
410 	}
411 
412 	mtx_enter(&xs->xs_reqlck);
413 	TAILQ_INSERT_TAIL(&xs->xs_reqs, xsm, xsm_link);
414 	mtx_leave(&xs->xs_reqlck);
415 
416 	xen_intr_signal(xs->xs_ih);
417 
418 	rw_exit_write(&xs->xs_rnglck);
419 
420 	return (0);
421 }
422 
423 struct xs_msg *
424 xs_reply(struct xs_transaction *xst, uint rid)
425 {
426 	struct xs_softc *xs = xst->xst_cookie;
427 	struct xs_msg *xsm;
428 	int s;
429 
430 	mtx_enter(&xs->xs_rsplck);
431 	for (;;) {
432 		TAILQ_FOREACH(xsm, &xs->xs_rsps, xsm_link) {
433 			if (xsm->xsm_hdr.xmh_tid == xst->xst_id &&
434 			    xsm->xsm_hdr.xmh_rid == rid)
435 				break;
436 		}
437 		if (xsm != NULL) {
438 			TAILQ_REMOVE(&xs->xs_rsps, xsm, xsm_link);
439 			break;
440 		}
441 		if (cold) {
442 			mtx_leave(&xs->xs_rsplck);
443 			delay(XST_DELAY * 1000 >> 2);
444 			s = splnet();
445 			xs_intr(xs);
446 			splx(s);
447 			mtx_enter(&xs->xs_rsplck);
448 		} else
449 			msleep_nsec(xs->xs_rchan, &xs->xs_rsplck, PRIBIO,
450 			    xs->xs_rchan, SEC_TO_NSEC(XST_DELAY) >> 2);
451 	}
452 	mtx_leave(&xs->xs_rsplck);
453 	return (xsm);
454 }
455 
456 int
457 xs_ring_put(struct xs_softc *xs, void *src, size_t size)
458 {
459 	struct xs_ring *xsr = xs->xs_ring;
460 	uint32_t prod = xsr->xsr_req_prod & (XS_RING_SIZE - 1);
461 	uint32_t avail = xs_ring_avail(xsr, 1);
462 	size_t left;
463 
464 	if (size > XS_RING_SIZE)
465 		return (-1);
466 	if (avail == 0)
467 		return (0);
468 
469 	/* Bound the size by the number of available slots */
470 	size = MIN(size, avail);
471 	/* How many contiguous bytes can we memcpy... */
472 	left = XS_RING_SIZE - prod;
473 	/* ...bounded by by how much we need to write? */
474 	left = MIN(left, size);
475 
476 	memcpy(&xsr->xsr_req[prod], src, left);
477 	memcpy(&xsr->xsr_req[0], (caddr_t)src + left, size - left);
478 	virtio_membar_sync();
479 	xsr->xsr_req_prod += size;
480 	return (size);
481 }
482 
483 int
484 xs_ring_get(struct xs_softc *xs, void *dst, size_t size)
485 {
486 	struct xs_ring *xsr = xs->xs_ring;
487 	uint32_t cons = xsr->xsr_rsp_cons & (XS_RING_SIZE - 1);
488 	uint32_t avail = xs_ring_avail(xsr, 0);
489 	size_t left;
490 
491 	if (size > XS_RING_SIZE)
492 		return (-1);
493 	if (avail == 0)
494 		return (0);
495 
496 	/* Bound the size by the number of available slots */
497 	size = MIN(size, avail);
498 	/* How many contiguous bytes can we memcpy... */
499 	left = XS_RING_SIZE - cons;
500 	/* ...bounded by by how much we need to read? */
501 	left = MIN(left, size);
502 
503 	memcpy(dst, &xsr->xsr_rsp[cons], left);
504 	memcpy((caddr_t)dst + left, &xsr->xsr_rsp[0], size - left);
505 	virtio_membar_sync();
506 	xsr->xsr_rsp_cons += size;
507 	return (size);
508 }
509 
510 void
511 xs_intr(void *arg)
512 {
513 	struct xs_softc *xs = arg;
514 	struct xs_ring *xsr = xs->xs_ring;
515 	struct xen_softc *sc = xs->xs_sc;
516 	struct xs_msg *xsm = xs->xs_rmsg;
517 	struct xs_msghdr xmh;
518 	uint32_t avail;
519 	int len;
520 
521 	virtio_membar_sync();
522 
523 	if (xsr->xsr_rsp_cons == xsr->xsr_rsp_prod)
524 		return;
525 
526 	avail = xs_ring_avail(xsr, 0);
527 
528 	/* Response processing */
529 
530  again:
531 	if (xs->xs_rmsg == NULL) {
532 		if (avail < sizeof(xmh)) {
533 			DPRINTF("%s: incomplete header: %u\n",
534 			    sc->sc_dev.dv_xname, avail);
535 			goto out;
536 		}
537 		avail -= sizeof(xmh);
538 
539 		if ((len = xs_ring_get(xs, &xmh, sizeof(xmh))) != sizeof(xmh)) {
540 			printf("%s: message too short: %d\n",
541 			    sc->sc_dev.dv_xname, len);
542 			goto out;
543 		}
544 
545 		if (xmh.xmh_type == XS_EVENT) {
546 			xsm = &xs->xs_emsg;
547 			xsm->xsm_read = 0;
548 		} else {
549 			mtx_enter(&xs->xs_reqlck);
550 			TAILQ_FOREACH(xsm, &xs->xs_reqs, xsm_link) {
551 				if (xsm->xsm_hdr.xmh_rid == xmh.xmh_rid) {
552 					TAILQ_REMOVE(&xs->xs_reqs, xsm,
553 					    xsm_link);
554 					break;
555 				}
556 			}
557 			mtx_leave(&xs->xs_reqlck);
558 			if (xsm == NULL) {
559 				printf("%s: unexpected message id %u\n",
560 				    sc->sc_dev.dv_xname, xmh.xmh_rid);
561 				goto out;
562 			}
563 		}
564 		memcpy(&xsm->xsm_hdr, &xmh, sizeof(xmh));
565 		xs->xs_rmsg = xsm;
566 	}
567 
568 	if (xsm->xsm_hdr.xmh_len > xsm->xsm_dlen)
569 		panic("message too large: %d vs %d for type %d, rid %u",
570 		    xsm->xsm_hdr.xmh_len, xsm->xsm_dlen, xsm->xsm_hdr.xmh_type,
571 		    xsm->xsm_hdr.xmh_rid);
572 
573 	len = MIN(xsm->xsm_hdr.xmh_len - xsm->xsm_read, avail);
574 	if (len) {
575 		/* Get data if reply is not empty */
576 		if ((len = xs_ring_get(xs,
577 		    &xsm->xsm_data[xsm->xsm_read], len)) <= 0) {
578 			printf("%s: read failure %d\n", sc->sc_dev.dv_xname,
579 			    len);
580 			goto out;
581 		}
582 		xsm->xsm_read += len;
583 	}
584 
585 	/* Notify reader that we've managed to read the whole message */
586 	if (xsm->xsm_read == xsm->xsm_hdr.xmh_len) {
587 		xs->xs_rmsg = NULL;
588 		if (xsm->xsm_hdr.xmh_type == XS_EVENT) {
589 			xs_event(xs, xsm);
590 		} else {
591 			mtx_enter(&xs->xs_rsplck);
592 			TAILQ_INSERT_TAIL(&xs->xs_rsps, xsm, xsm_link);
593 			mtx_leave(&xs->xs_rsplck);
594 			wakeup(xs->xs_rchan);
595 		}
596 	}
597 
598 	if ((avail = xs_ring_avail(xsr, 0)) > 0)
599 		goto again;
600 
601  out:
602 	/* Wakeup sleeping writes (if any) */
603 	wakeup(xs->xs_wchan);
604 	xen_intr_signal(xs->xs_ih);
605 }
606 
607 static inline int
608 xs_get_buf(struct xs_transaction *xst, struct xs_msg *xsm, int len)
609 {
610 	unsigned char *buf;
611 
612 	buf = malloc(len, M_DEVBUF, M_ZERO | (cold ? M_NOWAIT : M_WAITOK));
613 	if (buf == NULL)
614 		return (-1);
615 	xsm->xsm_dlen = len;
616 	xsm->xsm_data = buf;
617 	return (0);
618 }
619 
620 static inline void
621 xs_put_buf(struct xs_transaction *xst, struct xs_msg *xsm)
622 {
623 	free(xsm->xsm_data, M_DEVBUF, xsm->xsm_dlen);
624 	xsm->xsm_data = NULL;
625 }
626 
627 void
628 xs_resfree(struct xs_transaction *xst, struct iovec *iov, int iov_cnt)
629 {
630 	int i;
631 
632 	for (i = 0; i < iov_cnt; i++)
633 		free(iov[i].iov_base, M_DEVBUF, iov[i].iov_len);
634 	free(iov, M_DEVBUF, sizeof(struct iovec) * iov_cnt);
635 }
636 
637 int
638 xs_parse(struct xs_transaction *xst, struct xs_msg *xsm, struct iovec **iov,
639     int *iov_cnt)
640 {
641 	char *bp, *cp;
642 	uint32_t dlen;
643 	int i, flags;
644 
645 	/* If the response size is zero, we return an empty string */
646 	dlen = MAX(xsm->xsm_hdr.xmh_len, 1);
647 	flags = M_ZERO | (cold ? M_NOWAIT : M_WAITOK);
648 
649 	*iov_cnt = 0;
650 	/* Make sure that the data is NUL terminated */
651 	if (xsm->xsm_data[dlen - 1] != '\0') {
652 		/*
653 		 * The XS_READ operation always returns length without
654 		 * the trailing NUL so we have to adjust the length.
655 		 */
656 		dlen = MIN(dlen + 1, xsm->xsm_dlen);
657 		xsm->xsm_data[dlen - 1] = '\0';
658 	}
659 	for (i = 0; i < dlen; i++)
660 		if (xsm->xsm_data[i] == '\0')
661 			(*iov_cnt)++;
662 	*iov = mallocarray(*iov_cnt, sizeof(struct iovec), M_DEVBUF, flags);
663 	if (*iov == NULL)
664 		goto cleanup;
665 	bp = xsm->xsm_data;
666 	for (i = 0; i < *iov_cnt; i++) {
667 		cp = bp;
668 		while (cp - (caddr_t)xsm->xsm_data < dlen && *cp != '\0')
669 			cp++;
670 		(*iov)[i].iov_len = cp - bp + 1;
671 		(*iov)[i].iov_base = malloc((*iov)[i].iov_len, M_DEVBUF, flags);
672 		if (!(*iov)[i].iov_base) {
673 			xs_resfree(xst, *iov, *iov_cnt);
674 			goto cleanup;
675 		}
676 		memcpy((*iov)[i].iov_base, bp, (*iov)[i].iov_len);
677 		bp = ++cp;
678 	}
679 	return (0);
680 
681  cleanup:
682 	*iov = NULL;
683 	*iov_cnt = 0;
684 	return (ENOMEM);
685 }
686 
687 int
688 xs_event(struct xs_softc *xs, struct xs_msg *xsm)
689 {
690 	struct xs_watch *xsw;
691 	char *token = NULL;
692 	int i;
693 
694 	for (i = 0; i < xsm->xsm_read; i++) {
695 		if (xsm->xsm_data[i] == '\0') {
696 			token = &xsm->xsm_data[i+1];
697 			break;
698 		}
699 	}
700 	if (token == NULL) {
701 		printf("%s: event on \"%s\" without token\n",
702 		    xs->xs_sc->sc_dev.dv_xname, xsm->xsm_data);
703 		return (-1);
704 	}
705 
706 	mtx_enter(&xs->xs_watchlck);
707 	TAILQ_FOREACH(xsw, &xs->xs_watches, xsw_entry) {
708 		if (strcmp(xsw->xsw_token, token))
709 			continue;
710 		mtx_leave(&xs->xs_watchlck);
711 		task_add(xs->xs_watchtq, xsw->xsw_task);
712 		return (0);
713 	}
714 	mtx_leave(&xs->xs_watchlck);
715 
716 	printf("%s: no watchers for node \"%s\"\n",
717 	    xs->xs_sc->sc_dev.dv_xname, xsm->xsm_data);
718 	return (-1);
719 }
720 
721 int
722 xs_cmd(struct xs_transaction *xst, int cmd, const char *path,
723     struct iovec **iov, int *iov_cnt)
724 {
725 	struct xs_softc *xs = xst->xst_cookie;
726 	struct xs_msg *xsm;
727 	struct iovec ov[10];	/* output vector */
728 	int datalen = XS_ERR_PAYLOAD;
729 	int ov_cnt = 0;
730 	enum { READ, WRITE } mode = READ;
731 	int i, error = 0;
732 
733 	if (cmd >= XS_MAX)
734 		return (EINVAL);
735 
736 	switch (cmd) {
737 	case XS_TOPEN:
738 		ov[0].iov_base = "";
739 		ov[0].iov_len = 1;
740 		ov_cnt++;
741 		break;
742 	case XS_TCLOSE:
743 	case XS_RM:
744 	case XS_WATCH:
745 	case XS_WRITE:
746 		mode = WRITE;
747 		/* FALLTHROUGH */
748 	default:
749 		if (mode == READ)
750 			datalen = XS_MAX_PAYLOAD;
751 		break;
752 	}
753 
754 	if (path) {
755 		ov[ov_cnt].iov_base = (void *)path;
756 		ov[ov_cnt++].iov_len = strlen(path) + 1; /* +NUL */
757 	}
758 
759 	if (mode == WRITE && iov && iov_cnt && *iov_cnt > 0) {
760 		for (i = 0; i < *iov_cnt && ov_cnt < nitems(ov);
761 		     i++, ov_cnt++) {
762 			ov[ov_cnt].iov_base = (*iov)[i].iov_base;
763 			ov[ov_cnt].iov_len = (*iov)[i].iov_len;
764 		}
765 	}
766 
767 	xsm = xs_get_msg(xs, !cold);
768 
769 	if (xs_get_buf(xst, xsm, datalen)) {
770 		xs_put_msg(xs, xsm);
771 		return (ENOMEM);
772 	}
773 
774 	xsm->xsm_hdr.xmh_tid = xst->xst_id;
775 	xsm->xsm_hdr.xmh_type = cmd;
776 	xsm->xsm_hdr.xmh_rid = atomic_inc_int_nv(&xs->xs_rid);
777 
778 	for (i = 0; i < ov_cnt; i++)
779 		xsm->xsm_hdr.xmh_len += ov[i].iov_len;
780 
781 	if (xsm->xsm_hdr.xmh_len > XS_MAX_PAYLOAD) {
782 		printf("%s: message type %d with payload above the limit\n",
783 		    xs->xs_sc->sc_dev.dv_xname, cmd);
784 		xs_put_buf(xst, xsm);
785 		xs_put_msg(xs, xsm);
786 		return (EIO);
787 	}
788 
789 	if (xs_start(xst, xsm, ov, ov_cnt)) {
790 		printf("%s: message type %d transmission failed\n",
791 		    xs->xs_sc->sc_dev.dv_xname, cmd);
792 		xs_put_buf(xst, xsm);
793 		xs_put_msg(xs, xsm);
794 		return (EIO);
795 	}
796 
797 	xsm = xs_reply(xst, xsm->xsm_hdr.xmh_rid);
798 
799 	if (xsm->xsm_hdr.xmh_type == XS_ERROR) {
800 		error = xs_geterror(xsm);
801 		DPRINTF("%s: xenstore request %d \"%s\" error %s\n",
802 		    xs->xs_sc->sc_dev.dv_xname, cmd, path, xsm->xsm_data);
803 	} else if (mode == READ) {
804 		KASSERT(iov && iov_cnt);
805 		error = xs_parse(xst, xsm, iov, iov_cnt);
806 	}
807 #ifdef XS_DEBUG
808 	else
809 		if (strcmp(xsm->xsm_data, "OK"))
810 			printf("%s: xenstore request %d failed: %s\n",
811 			    xs->xs_sc->sc_dev.dv_xname, cmd, xsm->xsm_data);
812 #endif
813 
814 	xs_put_buf(xst, xsm);
815 	xs_put_msg(xs, xsm);
816 
817 	return (error);
818 }
819 
820 int
821 xs_watch(void *xsc, const char *path, const char *property, struct task *task,
822     void (*cb)(void *), void *arg)
823 {
824 	struct xen_softc *sc = xsc;
825 	struct xs_softc *xs = sc->sc_xs;
826 	struct xs_transaction xst;
827 	struct xs_watch *xsw;
828 	struct iovec iov, *iovp = &iov;
829 	char key[256];
830 	int error, iov_cnt, ret;
831 
832 	memset(&xst, 0, sizeof(xst));
833 	xst.xst_id = 0;
834 	xst.xst_cookie = sc->sc_xs;
835 
836 	xsw = malloc(sizeof(*xsw), M_DEVBUF, M_NOWAIT | M_ZERO);
837 	if (xsw == NULL)
838 		return (-1);
839 
840 	task_set(task, cb, arg);
841 	xsw->xsw_task = task;
842 
843 	snprintf(xsw->xsw_token, sizeof(xsw->xsw_token), "%0lx",
844 	    (unsigned long)xsw);
845 
846 	if (path)
847 		ret = snprintf(key, sizeof(key), "%s/%s", path, property);
848 	else
849 		ret = snprintf(key, sizeof(key), "%s", property);
850 	if (ret == -1 || ret >= sizeof(key)) {
851 		free(xsw, M_DEVBUF, sizeof(*xsw));
852 		return (EINVAL);
853 	}
854 
855 	iov.iov_base = xsw->xsw_token;
856 	iov.iov_len = sizeof(xsw->xsw_token);
857 	iov_cnt = 1;
858 
859 	/*
860 	 * xs_watches must be prepared pre-emptively because a xenstore
861 	 * event is raised immediately after a watch is established.
862 	 */
863 	mtx_enter(&xs->xs_watchlck);
864 	TAILQ_INSERT_TAIL(&xs->xs_watches, xsw, xsw_entry);
865 	mtx_leave(&xs->xs_watchlck);
866 
867 	if ((error = xs_cmd(&xst, XS_WATCH, key, &iovp, &iov_cnt)) != 0) {
868 		mtx_enter(&xs->xs_watchlck);
869 		TAILQ_REMOVE(&xs->xs_watches, xsw, xsw_entry);
870 		mtx_leave(&xs->xs_watchlck);
871 		free(xsw, M_DEVBUF, sizeof(*xsw));
872 		return (error);
873 	}
874 
875 	return (0);
876 }
877 
878 static unsigned long long
879 atoull(const char *cp, int *error)
880 {
881 	unsigned long long res, cutoff;
882 	int ch;
883 	int cutlim;
884 
885 	res = 0;
886 	cutoff = ULLONG_MAX / (unsigned long long)10;
887 	cutlim = ULLONG_MAX % (unsigned long long)10;
888 
889 	do {
890 		if (*cp < '0' || *cp > '9') {
891 			*error = EINVAL;
892 			return (res);
893 		}
894 		ch = *cp - '0';
895 		if (res > cutoff || (res == cutoff && ch > cutlim)) {
896 			*error = ERANGE;
897 			return (res);
898 		}
899 		res *= 10;
900 		res += ch;
901 	} while (*(++cp) != '\0');
902 
903 	*error = 0;
904 	return (res);
905 }
906 
907 int
908 xs_getnum(void *xsc, const char *path, const char *property,
909     unsigned long long *val)
910 {
911 	char *buf;
912 	int error = 0;
913 
914 	buf = malloc(XS_MAX_PAYLOAD, M_DEVBUF, M_ZERO |
915 	    (cold ? M_NOWAIT : M_WAITOK));
916 	if (buf == NULL)
917 		return (ENOMEM);
918 
919 	error = xs_getprop(xsc, path, property, buf, XS_MAX_PAYLOAD);
920 	if (error)
921 		goto out;
922 
923 	*val = atoull(buf, &error);
924 	if (error)
925 		goto out;
926 
927  out:
928 	free(buf, M_DEVBUF, XS_MAX_PAYLOAD);
929 	return (error);
930 }
931 
932 int
933 xs_setnum(void *xsc, const char *path, const char *property,
934     unsigned long long val)
935 {
936 	char buf[32];
937 	int ret;
938 
939 	ret = snprintf(buf, sizeof(buf), "%llu", val);
940 	if (ret == -1 || ret >= sizeof(buf))
941 		return (ERANGE);
942 
943 	return (xs_setprop(xsc, path, property, buf, strlen(buf)));
944 }
945 
946 int
947 xs_getprop(void *xsc, const char *path, const char *property, char *value,
948     int size)
949 {
950 	struct xen_softc *sc = xsc;
951 	struct xs_transaction xst;
952 	struct iovec *iovp = NULL;
953 	char key[256];
954 	int error, ret, iov_cnt = 0;
955 
956 	if (!property)
957 		return (EINVAL);
958 
959 	memset(&xst, 0, sizeof(xst));
960 	xst.xst_id = 0;
961 	xst.xst_cookie = sc->sc_xs;
962 
963 	if (path)
964 		ret = snprintf(key, sizeof(key), "%s/%s", path, property);
965 	else
966 		ret = snprintf(key, sizeof(key), "%s", property);
967 	if (ret == -1 || ret >= sizeof(key))
968 		return (EINVAL);
969 
970 	if ((error = xs_cmd(&xst, XS_READ, key, &iovp, &iov_cnt)) != 0)
971 		return (error);
972 
973 	if (iov_cnt > 0)
974 		strlcpy(value, (char *)iovp->iov_base, size);
975 
976 	xs_resfree(&xst, iovp, iov_cnt);
977 
978 	return (0);
979 }
980 
981 int
982 xs_setprop(void *xsc, const char *path, const char *property, char *value,
983     int size)
984 {
985 	struct xen_softc *sc = xsc;
986 	struct xs_transaction xst;
987 	struct iovec iov, *iovp = &iov;
988 	char key[256];
989 	int error, ret, iov_cnt = 0;
990 
991 	if (!property)
992 		return (EINVAL);
993 
994 	memset(&xst, 0, sizeof(xst));
995 	xst.xst_id = 0;
996 	xst.xst_cookie = sc->sc_xs;
997 
998 	if (path)
999 		ret = snprintf(key, sizeof(key), "%s/%s", path, property);
1000 	else
1001 		ret = snprintf(key, sizeof(key), "%s", property);
1002 	if (ret == -1 || ret >= sizeof(key))
1003 		return (EINVAL);
1004 
1005 	iov.iov_base = value;
1006 	iov.iov_len = size;
1007 	iov_cnt = 1;
1008 
1009 	error = xs_cmd(&xst, XS_WRITE, key, &iovp, &iov_cnt);
1010 
1011 	return (error);
1012 }
1013 
1014 int
1015 xs_cmpprop(void *xsc, const char *path, const char *property, const char *value,
1016     int *result)
1017 {
1018 	struct xen_softc *sc = xsc;
1019 	struct xs_transaction xst;
1020 	struct iovec *iovp = NULL;
1021 	char key[256];
1022 	int error, ret, iov_cnt = 0;
1023 
1024 	if (!property)
1025 		return (EINVAL);
1026 
1027 	memset(&xst, 0, sizeof(xst));
1028 	xst.xst_id = 0;
1029 	xst.xst_cookie = sc->sc_xs;
1030 
1031 	if (path)
1032 		ret = snprintf(key, sizeof(key), "%s/%s", path, property);
1033 	else
1034 		ret = snprintf(key, sizeof(key), "%s", property);
1035 	if (ret == -1 || ret >= sizeof(key))
1036 		return (EINVAL);
1037 
1038 	if ((error = xs_cmd(&xst, XS_READ, key, &iovp, &iov_cnt)) != 0)
1039 		return (error);
1040 
1041 	*result = strcmp(value, (char *)iovp->iov_base);
1042 
1043 	xs_resfree(&xst, iovp, iov_cnt);
1044 
1045 	return (0);
1046 }
1047 
1048 int
1049 xs_await_transition(void *xsc, const char *path, const char *property,
1050     const char *value, int timo)
1051 {
1052 	struct xen_softc *sc = xsc;
1053 	int error, res;
1054 
1055 	do {
1056 		error = xs_cmpprop(xsc, path, property, value, &res);
1057 		if (error)
1058 			return (error);
1059 		if (timo && --timo == 0)
1060 			return (ETIMEDOUT);
1061 		xs_poll(sc->sc_xs, cold);
1062 	} while (res != 0);
1063 
1064 	return (0);
1065 }
1066 
1067 int
1068 xs_kvop(void *xsc, int op, char *key, char *value, size_t valuelen)
1069 {
1070 	struct xen_softc *sc = xsc;
1071 	struct xs_transaction xst;
1072 	struct iovec iov, *iovp = &iov;
1073 	int error = 0, iov_cnt = 0, cmd, i;
1074 
1075 	switch (op) {
1076 	case PVBUS_KVWRITE:
1077 		cmd = XS_WRITE;
1078 		iov.iov_base = value;
1079 		iov.iov_len = strlen(value);
1080 		iov_cnt = 1;
1081 		break;
1082 	case PVBUS_KVREAD:
1083 		cmd = XS_READ;
1084 		break;
1085 	case PVBUS_KVLS:
1086 		cmd = XS_LIST;
1087 		break;
1088 	default:
1089 		return (EOPNOTSUPP);
1090 	}
1091 
1092 	memset(&xst, 0, sizeof(xst));
1093 	xst.xst_id = 0;
1094 	xst.xst_cookie = sc->sc_xs;
1095 
1096 	if ((error = xs_cmd(&xst, cmd, key, &iovp, &iov_cnt)) != 0)
1097 		return (error);
1098 
1099 	memset(value, 0, valuelen);
1100 
1101 	switch (cmd) {
1102 	case XS_READ:
1103 		if (iov_cnt == 1 && iovp[0].iov_len == 1) {
1104 			xs_resfree(&xst, iovp, iov_cnt);
1105 
1106 			/*
1107 			 * We cannot distinguish if the returned value is
1108 			 * a directory or a file in the xenstore.  The only
1109 			 * indication is that the read value of a directory
1110 			 * returns an empty string (single nul byte),
1111 			 * so try to get the directory list in this case.
1112 			 */
1113 			return (xs_kvop(xsc, PVBUS_KVLS, key, value, valuelen));
1114 		}
1115 		/* FALLTHROUGH */
1116 	case XS_LIST:
1117 		for (i = 0; i < iov_cnt; i++) {
1118 			if (i && strlcat(value, "\n", valuelen) >= valuelen)
1119 				break;
1120 			if (strlcat(value, iovp[i].iov_base,
1121 			    valuelen) >= valuelen)
1122 				break;
1123 		}
1124 		xs_resfree(&xst, iovp, iov_cnt);
1125 		break;
1126 	default:
1127 		break;
1128 	}
1129 
1130 	return (0);
1131 }
1132