xref: /freebsd/sys/dev/xen/xenstore/xenstore.c (revision 685dc743)
1 /******************************************************************************
2  * xenstore.c
3  *
4  * Low-level kernel interface to the XenStore.
5  *
6  * Copyright (C) 2005 Rusty Russell, IBM Corporation
7  * Copyright (C) 2009,2010 Spectra Logic Corporation
8  *
9  * This file may be distributed separately from the Linux kernel, or
10  * incorporated into other software packages, subject to the following license:
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this source file (the "Software"), to deal in the Software without
14  * restriction, including without limitation the rights to use, copy, modify,
15  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16  * and to permit persons to whom the Software is furnished to do so, subject to
17  * the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in
20  * all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/kernel.h>
35 #include <sys/lock.h>
36 #include <sys/module.h>
37 #include <sys/mutex.h>
38 #include <sys/sx.h>
39 #include <sys/syslog.h>
40 #include <sys/malloc.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/kthread.h>
44 #include <sys/sbuf.h>
45 #include <sys/sysctl.h>
46 #include <sys/uio.h>
47 #include <sys/unistd.h>
48 #include <sys/queue.h>
49 #include <sys/taskqueue.h>
50 
51 #include <machine/stdarg.h>
52 
53 #include <xen/xen-os.h>
54 #include <xen/hypervisor.h>
55 #include <xen/xen_intr.h>
56 
57 #include <contrib/xen/hvm/params.h>
58 #include <xen/hvm.h>
59 
60 #include <xen/xenstore/xenstorevar.h>
61 #include <xen/xenstore/xenstore_internal.h>
62 
63 #include <vm/vm.h>
64 #include <vm/pmap.h>
65 
66 /**
67  * \file xenstore.c
68  * \brief XenStore interface
69  *
70  * The XenStore interface is a simple storage system that is a means of
71  * communicating state and configuration data between the Xen Domain 0
72  * and the various guest domains.  All configuration data other than
73  * a small amount of essential information required during the early
74  * boot process of launching a Xen aware guest, is managed using the
75  * XenStore.
76  *
77  * The XenStore is ASCII string based, and has a structure and semantics
78  * similar to a filesystem.  There are files and directories, the directories
79  * able to contain files or other directories.  The depth of the hierarchy
80  * is only limited by the XenStore's maximum path length.
81  *
82  * The communication channel between the XenStore service and other
83  * domains is via two, guest specific, ring buffers in a shared memory
84  * area.  One ring buffer is used for communicating in each direction.
85  * The grant table references for this shared memory are given to the
86  * guest either via the xen_start_info structure for a fully para-
87  * virtualized guest, or via HVM hypercalls for a hardware virtualized
88  * guest.
89  *
90  * The XenStore communication relies on an event channel and thus
91  * interrupts.  For this reason, the attachment of the XenStore
92  * relies on an interrupt driven configuration hook to hold off
93  * boot processing until communication with the XenStore service
94  * can be established.
95  *
96  * Several Xen services depend on the XenStore, most notably the
97  * XenBus used to discover and manage Xen devices.  These services
98  * are implemented as NewBus child attachments to a bus exported
99  * by this XenStore driver.
100  */
101 
102 static struct xs_watch *find_watch(const char *token);
103 
104 MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
105 
106 /**
107  * Pointer to shared memory communication structures allowing us
108  * to communicate with the XenStore service.
109  *
110  * When operating in full PV mode, this pointer is set early in kernel
111  * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
112  * to get the guest frame number for the shared page and then map it
113  * into kva.  See xs_init() for details.
114  */
115 static struct xenstore_domain_interface *xen_store;
116 
117 /*-------------------------- Private Data Structures ------------------------*/
118 
119 /**
120  * Structure capturing messages received from the XenStore service.
121  */
122 struct xs_stored_msg {
123 	TAILQ_ENTRY(xs_stored_msg) list;
124 
125 	struct xsd_sockmsg hdr;
126 
127 	union {
128 		/* Queued replies. */
129 		struct {
130 			char *body;
131 		} reply;
132 
133 		/* Queued watch events. */
134 		struct {
135 			struct xs_watch *handle;
136 			const char **vec;
137 			u_int vec_size;
138 		} watch;
139 	} u;
140 };
141 TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
142 
143 /**
144  * Container for all XenStore related state.
145  */
146 struct xs_softc {
147 	/** Newbus device for the XenStore. */
148 	device_t xs_dev;
149 
150 	/**
151 	 * Lock serializing access to ring producer/consumer
152 	 * indexes.  Use of this lock guarantees that wakeups
153 	 * of blocking readers/writers are not missed due to
154 	 * races with the XenStore service.
155 	 */
156 	struct mtx ring_lock;
157 
158 	/*
159 	 * Mutex used to insure exclusive access to the outgoing
160 	 * communication ring.  We use a lock type that can be
161 	 * held while sleeping so that xs_write() can block waiting
162 	 * for space in the ring to free up, without allowing another
163 	 * writer to come in and corrupt a partial message write.
164 	 */
165 	struct sx request_mutex;
166 
167 	/**
168 	 * A list of replies to our requests.
169 	 *
170 	 * The reply list is filled by xs_rcv_thread().  It
171 	 * is consumed by the context that issued the request
172 	 * to which a reply is made.  The requester blocks in
173 	 * xs_read_reply().
174 	 *
175 	 * /note Only one requesting context can be active at a time.
176 	 *       This is guaranteed by the request_mutex and insures
177 	 *	 that the requester sees replies matching the order
178 	 *	 of its requests.
179 	 */
180 	struct xs_stored_msg_list reply_list;
181 
182 	/** Lock protecting the reply list. */
183 	struct mtx reply_lock;
184 
185 	/**
186 	 * List of registered watches.
187 	 */
188 	struct xs_watch_list  registered_watches;
189 
190 	/** Lock protecting the registered watches list. */
191 	struct mtx registered_watches_lock;
192 
193 	/**
194 	 * List of pending watch callback events.
195 	 */
196 	struct xs_stored_msg_list watch_events;
197 
198 	/** Lock protecting the watch calback list. */
199 	struct mtx watch_events_lock;
200 
201 	/**
202 	 * The processid of the xenwatch thread.
203 	 */
204 	pid_t xenwatch_pid;
205 
206 	/**
207 	 * Sleepable mutex used to gate the execution of XenStore
208 	 * watch event callbacks.
209 	 *
210 	 * xenwatch_thread holds an exclusive lock on this mutex
211 	 * while delivering event callbacks, and xenstore_unregister_watch()
212 	 * uses an exclusive lock of this mutex to guarantee that no
213 	 * callbacks of the just unregistered watch are pending
214 	 * before returning to its caller.
215 	 */
216 	struct sx xenwatch_mutex;
217 
218 	/**
219 	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
220 	 * of the true machine frame number into our "physical address space".
221 	 */
222 	unsigned long gpfn;
223 
224 	/**
225 	 * The event channel for communicating with the
226 	 * XenStore service.
227 	 */
228 	int evtchn;
229 
230 	/** Handle for XenStore interrupts. */
231 	xen_intr_handle_t xen_intr_handle;
232 
233 	/**
234 	 * Interrupt driven config hook allowing us to defer
235 	 * attaching children until interrupts (and thus communication
236 	 * with the XenStore service) are available.
237 	 */
238 	struct intr_config_hook xs_attachcb;
239 
240 	/**
241 	 * Xenstore is a user-space process that usually runs in Dom0,
242 	 * so if this domain is booting as Dom0, xenstore wont we accessible,
243 	 * and we have to defer the initialization of xenstore related
244 	 * devices to later (when xenstore is started).
245 	 */
246 	bool initialized;
247 
248 	/**
249 	 * Task to run when xenstore is initialized (Dom0 only), will
250 	 * take care of attaching xenstore related devices.
251 	 */
252 	struct task xs_late_init;
253 };
254 
255 /*-------------------------------- Global Data ------------------------------*/
256 static struct xs_softc xs;
257 
258 /*------------------------- Private Utility Functions -----------------------*/
259 
260 /**
261  * Count and optionally record pointers to a number of NUL terminated
262  * strings in a buffer.
263  *
264  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
265  * \param dest	   An array to store pointers to each string found in strings.
266  * \param len	   The length of the buffer pointed to by strings.
267  *
268  * \return  A count of the number of strings found.
269  */
270 static u_int
271 extract_strings(const char *strings, const char **dest, u_int len)
272 {
273 	u_int num;
274 	const char *p;
275 
276 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
277 		if (dest != NULL)
278 			*dest++ = p;
279 		num++;
280 	}
281 
282 	return (num);
283 }
284 
285 /**
286  * Convert a contiguous buffer containing a series of NUL terminated
287  * strings into an array of pointers to strings.
288  *
289  * The returned pointer references the array of string pointers which
290  * is followed by the storage for the string data.  It is the client's
291  * responsibility to free this storage.
292  *
293  * The storage addressed by strings is free'd prior to split returning.
294  *
295  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
296  * \param len	   The length of the buffer pointed to by strings.
297  * \param num	   The number of strings found and returned in the strings
298  *                 array.
299  *
300  * \return  An array of pointers to the strings found in the input buffer.
301  */
302 static const char **
303 split(char *strings, u_int len, u_int *num)
304 {
305 	const char **ret;
306 
307 	/* Protect against unterminated buffers. */
308 	if (len > 0)
309 		strings[len - 1] = '\0';
310 
311 	/* Count the strings. */
312 	*num = extract_strings(strings, /*dest*/NULL, len);
313 
314 	/* Transfer to one big alloc for easy freeing by the caller. */
315 	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
316 	memcpy(&ret[*num], strings, len);
317 	free(strings, M_XENSTORE);
318 
319 	/* Extract pointers to newly allocated array. */
320 	strings = (char *)&ret[*num];
321 	(void)extract_strings(strings, /*dest*/ret, len);
322 
323 	return (ret);
324 }
325 
326 /*------------------------- Public Utility Functions -------------------------*/
327 /*------- API comments for these methods can be found in xenstorevar.h -------*/
328 struct sbuf *
329 xs_join(const char *dir, const char *name)
330 {
331 	struct sbuf *sb;
332 
333 	sb = sbuf_new_auto();
334 	sbuf_cat(sb, dir);
335 	if (name[0] != '\0') {
336 		sbuf_putc(sb, '/');
337 		sbuf_cat(sb, name);
338 	}
339 	sbuf_finish(sb);
340 
341 	return (sb);
342 }
343 
344 /*-------------------- Low Level Communication Management --------------------*/
345 /**
346  * Interrupt handler for the XenStore event channel.
347  *
348  * XenStore reads and writes block on "xen_store" for buffer
349  * space.  Wakeup any blocking operations when the XenStore
350  * service has modified the queues.
351  */
352 static void
353 xs_intr(void * arg __unused /*__attribute__((unused))*/)
354 {
355 
356 	/* If xenstore has not been initialized, initialize it now */
357 	if (!xs.initialized) {
358 		xs.initialized = true;
359 		/*
360 		 * Since this task is probing and attaching devices we
361 		 * have to hold the Giant lock.
362 		 */
363 		taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init);
364 	}
365 
366 	/*
367 	 * Hold ring lock across wakeup so that clients
368 	 * cannot miss a wakeup.
369 	 */
370 	mtx_lock(&xs.ring_lock);
371 	wakeup(xen_store);
372 	mtx_unlock(&xs.ring_lock);
373 }
374 
375 /**
376  * Verify that the indexes for a ring are valid.
377  *
378  * The difference between the producer and consumer cannot
379  * exceed the size of the ring.
380  *
381  * \param cons  The consumer index for the ring to test.
382  * \param prod  The producer index for the ring to test.
383  *
384  * \retval 1  If indexes are in range.
385  * \retval 0  If the indexes are out of range.
386  */
387 static int
388 xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
389 {
390 
391 	return ((prod - cons) <= XENSTORE_RING_SIZE);
392 }
393 
394 /**
395  * Return a pointer to, and the length of, the contiguous
396  * free region available for output in a ring buffer.
397  *
398  * \param cons  The consumer index for the ring.
399  * \param prod  The producer index for the ring.
400  * \param buf   The base address of the ring's storage.
401  * \param len   The amount of contiguous storage available.
402  *
403  * \return  A pointer to the start location of the free region.
404  */
405 static void *
406 xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
407     char *buf, uint32_t *len)
408 {
409 
410 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
411 	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
412 		*len = XENSTORE_RING_SIZE - (prod - cons);
413 	return (buf + MASK_XENSTORE_IDX(prod));
414 }
415 
416 /**
417  * Return a pointer to, and the length of, the contiguous
418  * data available to read from a ring buffer.
419  *
420  * \param cons  The consumer index for the ring.
421  * \param prod  The producer index for the ring.
422  * \param buf   The base address of the ring's storage.
423  * \param len   The amount of contiguous data available to read.
424  *
425  * \return  A pointer to the start location of the available data.
426  */
427 static const void *
428 xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
429     const char *buf, uint32_t *len)
430 {
431 
432 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
433 	if ((prod - cons) < *len)
434 		*len = prod - cons;
435 	return (buf + MASK_XENSTORE_IDX(cons));
436 }
437 
438 /**
439  * Transmit data to the XenStore service.
440  *
441  * \param tdata  A pointer to the contiguous data to send.
442  * \param len    The amount of data to send.
443  *
444  * \return  On success 0, otherwise an errno value indicating the
445  *          cause of failure.
446  *
447  * \invariant  Called from thread context.
448  * \invariant  The buffer pointed to by tdata is at least len bytes
449  *             in length.
450  * \invariant  xs.request_mutex exclusively locked.
451  */
452 static int
453 xs_write_store(const void *tdata, unsigned len)
454 {
455 	XENSTORE_RING_IDX cons, prod;
456 	const char *data = (const char *)tdata;
457 	int error;
458 
459 	sx_assert(&xs.request_mutex, SX_XLOCKED);
460 	while (len != 0) {
461 		void *dst;
462 		u_int avail;
463 
464 		/* Hold lock so we can't miss wakeups should we block. */
465 		mtx_lock(&xs.ring_lock);
466 		cons = xen_store->req_cons;
467 		prod = xen_store->req_prod;
468 		if ((prod - cons) == XENSTORE_RING_SIZE) {
469 			/*
470 			 * Output ring is full. Wait for a ring event.
471 			 *
472 			 * Note that the events from both queues
473 			 * are combined, so being woken does not
474 			 * guarantee that data exist in the read
475 			 * ring.
476 			 *
477 			 * To simplify error recovery and the retry,
478 			 * we specify PDROP so our lock is *not* held
479 			 * when msleep returns.
480 			 */
481 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
482 			     "xbwrite", /*timeout*/0);
483 			if (error && error != EWOULDBLOCK)
484 				return (error);
485 
486 			/* Try again. */
487 			continue;
488 		}
489 		mtx_unlock(&xs.ring_lock);
490 
491 		/* Verify queue sanity. */
492 		if (!xs_check_indexes(cons, prod)) {
493 			xen_store->req_cons = xen_store->req_prod = 0;
494 			return (EIO);
495 		}
496 
497 		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
498 		if (avail > len)
499 			avail = len;
500 
501 		memcpy(dst, data, avail);
502 		data += avail;
503 		len -= avail;
504 
505 		/*
506 		 * The store to the producer index, which indicates
507 		 * to the other side that new data has arrived, must
508 		 * be visible only after our copy of the data into the
509 		 * ring has completed.
510 		 */
511 		wmb();
512 		xen_store->req_prod += avail;
513 
514 		/*
515 		 * xen_intr_signal() implies mb(). The other side will see
516 		 * the change to req_prod at the time of the interrupt.
517 		 */
518 		xen_intr_signal(xs.xen_intr_handle);
519 	}
520 
521 	return (0);
522 }
523 
524 /**
525  * Receive data from the XenStore service.
526  *
527  * \param tdata  A pointer to the contiguous buffer to receive the data.
528  * \param len    The amount of data to receive.
529  *
530  * \return  On success 0, otherwise an errno value indicating the
531  *          cause of failure.
532  *
533  * \invariant  Called from thread context.
534  * \invariant  The buffer pointed to by tdata is at least len bytes
535  *             in length.
536  *
537  * \note xs_read does not perform any internal locking to guarantee
538  *       serial access to the incoming ring buffer.  However, there
539  *	 is only one context processing reads: xs_rcv_thread().
540  */
541 static int
542 xs_read_store(void *tdata, unsigned len)
543 {
544 	XENSTORE_RING_IDX cons, prod;
545 	char *data = (char *)tdata;
546 	int error;
547 
548 	while (len != 0) {
549 		u_int avail;
550 		const char *src;
551 
552 		/* Hold lock so we can't miss wakeups should we block. */
553 		mtx_lock(&xs.ring_lock);
554 		cons = xen_store->rsp_cons;
555 		prod = xen_store->rsp_prod;
556 		if (cons == prod) {
557 			/*
558 			 * Nothing to read. Wait for a ring event.
559 			 *
560 			 * Note that the events from both queues
561 			 * are combined, so being woken does not
562 			 * guarantee that data exist in the read
563 			 * ring.
564 			 *
565 			 * To simplify error recovery and the retry,
566 			 * we specify PDROP so our lock is *not* held
567 			 * when msleep returns.
568 			 */
569 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
570 			    "xbread", /*timeout*/0);
571 			if (error && error != EWOULDBLOCK)
572 				return (error);
573 			continue;
574 		}
575 		mtx_unlock(&xs.ring_lock);
576 
577 		/* Verify queue sanity. */
578 		if (!xs_check_indexes(cons, prod)) {
579 			xen_store->rsp_cons = xen_store->rsp_prod = 0;
580 			return (EIO);
581 		}
582 
583 		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
584 		if (avail > len)
585 			avail = len;
586 
587 		/*
588 		 * Insure the data we read is related to the indexes
589 		 * we read above.
590 		 */
591 		rmb();
592 
593 		memcpy(data, src, avail);
594 		data += avail;
595 		len -= avail;
596 
597 		/*
598 		 * Insure that the producer of this ring does not see
599 		 * the ring space as free until after we have copied it
600 		 * out.
601 		 */
602 		mb();
603 		xen_store->rsp_cons += avail;
604 
605 		/*
606 		 * xen_intr_signal() implies mb(). The producer will see
607 		 * the updated consumer index when the event is delivered.
608 		 */
609 		xen_intr_signal(xs.xen_intr_handle);
610 	}
611 
612 	return (0);
613 }
614 
615 /*----------------------- Received Message Processing ------------------------*/
616 /**
617  * Block reading the next message from the XenStore service and
618  * process the result.
619  *
620  * \param type  The returned type of the XenStore message received.
621  *
622  * \return  0 on success.  Otherwise an errno value indicating the
623  *          type of failure encountered.
624  */
625 static int
626 xs_process_msg(enum xsd_sockmsg_type *type)
627 {
628 	struct xs_stored_msg *msg;
629 	char *body;
630 	int error;
631 
632 	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
633 	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
634 	if (error) {
635 		free(msg, M_XENSTORE);
636 		return (error);
637 	}
638 
639 	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
640 	error = xs_read_store(body, msg->hdr.len);
641 	if (error) {
642 		free(body, M_XENSTORE);
643 		free(msg, M_XENSTORE);
644 		return (error);
645 	}
646 	body[msg->hdr.len] = '\0';
647 
648 	*type = msg->hdr.type;
649 	if (msg->hdr.type == XS_WATCH_EVENT) {
650 		msg->u.watch.vec = split(body, msg->hdr.len,
651 		    &msg->u.watch.vec_size);
652 
653 		mtx_lock(&xs.registered_watches_lock);
654 		msg->u.watch.handle = find_watch(
655 		    msg->u.watch.vec[XS_WATCH_TOKEN]);
656 		mtx_lock(&xs.watch_events_lock);
657 		if (msg->u.watch.handle != NULL &&
658 		    (!msg->u.watch.handle->max_pending ||
659 		    msg->u.watch.handle->pending <
660 		    msg->u.watch.handle->max_pending)) {
661 			msg->u.watch.handle->pending++;
662 			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
663 			wakeup(&xs.watch_events);
664 			mtx_unlock(&xs.watch_events_lock);
665 		} else {
666 			mtx_unlock(&xs.watch_events_lock);
667 			free(msg->u.watch.vec, M_XENSTORE);
668 			free(msg, M_XENSTORE);
669 		}
670 		mtx_unlock(&xs.registered_watches_lock);
671 	} else {
672 		msg->u.reply.body = body;
673 		mtx_lock(&xs.reply_lock);
674 		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
675 		wakeup(&xs.reply_list);
676 		mtx_unlock(&xs.reply_lock);
677 	}
678 
679 	return (0);
680 }
681 
682 /**
683  * Thread body of the XenStore receive thread.
684  *
685  * This thread blocks waiting for data from the XenStore service
686  * and processes and received messages.
687  */
688 static void
689 xs_rcv_thread(void *arg __unused)
690 {
691 	int error;
692 	enum xsd_sockmsg_type type;
693 
694 	for (;;) {
695 		error = xs_process_msg(&type);
696 		if (error)
697 			printf("XENSTORE error %d while reading message\n",
698 			    error);
699 	}
700 }
701 
702 /*---------------- XenStore Message Request/Reply Processing -----------------*/
703 #define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
704 
705 /**
706  * Convert a XenStore error string into an errno number.
707  *
708  * \param errorstring  The error string to convert.
709  *
710  * \return  The errno best matching the input string.
711  *
712  * \note Unknown error strings are converted to EINVAL.
713  */
714 static int
715 xs_get_error(const char *errorstring)
716 {
717 	u_int i;
718 
719 	for (i = 0; i < xsd_error_count; i++) {
720 		if (!strcmp(errorstring, xsd_errors[i].errstring))
721 			return (xsd_errors[i].errnum);
722 	}
723 	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
724 	    errorstring);
725 	return (EINVAL);
726 }
727 
728 /**
729  * Block waiting for a reply to a message request.
730  *
731  * \param type	  The returned type of the reply.
732  * \param len	  The returned body length of the reply.
733  * \param result  The returned body of the reply.
734  *
735  * \return  0 on success.  Otherwise an errno indicating the
736  *          cause of failure.
737  */
738 static int
739 xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
740 {
741 	struct xs_stored_msg *msg;
742 	char *body;
743 	int error;
744 
745 	mtx_lock(&xs.reply_lock);
746 	while (TAILQ_EMPTY(&xs.reply_list)) {
747 		error = mtx_sleep(&xs.reply_list, &xs.reply_lock, 0, "xswait",
748 		    hz/10);
749 		if (error && error != EWOULDBLOCK) {
750 			mtx_unlock(&xs.reply_lock);
751 			return (error);
752 		}
753 	}
754 	msg = TAILQ_FIRST(&xs.reply_list);
755 	TAILQ_REMOVE(&xs.reply_list, msg, list);
756 	mtx_unlock(&xs.reply_lock);
757 
758 	*type = msg->hdr.type;
759 	if (len)
760 		*len = msg->hdr.len;
761 	body = msg->u.reply.body;
762 
763 	free(msg, M_XENSTORE);
764 	*result = body;
765 	return (0);
766 }
767 
768 /**
769  * Pass-thru interface for XenStore access by userland processes
770  * via the XenStore device.
771  *
772  * Reply type and length data are returned by overwriting these
773  * fields in the passed in request message.
774  *
775  * \param msg	  A properly formatted message to transmit to
776  *		  the XenStore service.
777  * \param result  The returned body of the reply.
778  *
779  * \return  0 on success.  Otherwise an errno indicating the cause
780  *          of failure.
781  *
782  * \note The returned result is provided in malloced storage and thus
783  *       must be free'd by the caller with 'free(result, M_XENSTORE);
784  */
785 int
786 xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
787 {
788 	int error;
789 
790 	sx_xlock(&xs.request_mutex);
791 	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
792 		error = xs_read_reply(&msg->type, &msg->len, result);
793 	sx_xunlock(&xs.request_mutex);
794 
795 	return (error);
796 }
797 
798 /**
799  * Send a message with an optionally muti-part body to the XenStore service.
800  *
801  * \param t              The transaction to use for this request.
802  * \param request_type   The type of message to send.
803  * \param iovec          Pointers to the body sections of the request.
804  * \param num_vecs       The number of body sections in the request.
805  * \param len            The returned length of the reply.
806  * \param result         The returned body of the reply.
807  *
808  * \return  0 on success.  Otherwise an errno indicating
809  *          the cause of failure.
810  *
811  * \note The returned result is provided in malloced storage and thus
812  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
813  */
814 static int
815 xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
816     const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
817 {
818 	struct xsd_sockmsg msg;
819 	void *ret = NULL;
820 	u_int i;
821 	int error;
822 
823 	msg.tx_id = t.id;
824 	msg.req_id = 0;
825 	msg.type = request_type;
826 	msg.len = 0;
827 	for (i = 0; i < num_vecs; i++)
828 		msg.len += iovec[i].iov_len;
829 
830 	sx_xlock(&xs.request_mutex);
831 	error = xs_write_store(&msg, sizeof(msg));
832 	if (error) {
833 		printf("xs_talkv failed %d\n", error);
834 		goto error_lock_held;
835 	}
836 
837 	for (i = 0; i < num_vecs; i++) {
838 		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
839 		if (error) {
840 			printf("xs_talkv failed %d\n", error);
841 			goto error_lock_held;
842 		}
843 	}
844 
845 	error = xs_read_reply(&msg.type, len, &ret);
846 
847 error_lock_held:
848 	sx_xunlock(&xs.request_mutex);
849 	if (error)
850 		return (error);
851 
852 	if (msg.type == XS_ERROR) {
853 		error = xs_get_error(ret);
854 		free(ret, M_XENSTORE);
855 		return (error);
856 	}
857 
858 	/* Reply is either error or an echo of our request message type. */
859 	KASSERT(msg.type == request_type, ("bad xenstore message type"));
860 
861 	if (result)
862 		*result = ret;
863 	else
864 		free(ret, M_XENSTORE);
865 
866 	return (0);
867 }
868 
869 /**
870  * Wrapper for xs_talkv allowing easy transmission of a message with
871  * a single, contiguous, message body.
872  *
873  * \param t              The transaction to use for this request.
874  * \param request_type   The type of message to send.
875  * \param body           The body of the request.
876  * \param len            The returned length of the reply.
877  * \param result         The returned body of the reply.
878  *
879  * \return  0 on success.  Otherwise an errno indicating
880  *          the cause of failure.
881  *
882  * \note The returned result is provided in malloced storage and thus
883  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
884  */
885 static int
886 xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
887     const char *body, u_int *len, void **result)
888 {
889 	struct iovec iovec;
890 
891 	iovec.iov_base = (void *)(uintptr_t)body;
892 	iovec.iov_len = strlen(body) + 1;
893 
894 	return (xs_talkv(t, request_type, &iovec, 1, len, result));
895 }
896 
897 /*------------------------- XenStore Watch Support ---------------------------*/
898 /**
899  * Transmit a watch request to the XenStore service.
900  *
901  * \param path    The path in the XenStore to watch.
902  * \param tocken  A unique identifier for this watch.
903  *
904  * \return  0 on success.  Otherwise an errno indicating the
905  *          cause of failure.
906  */
907 static int
908 xs_watch(const char *path, const char *token)
909 {
910 	struct iovec iov[2];
911 
912 	iov[0].iov_base = (void *)(uintptr_t) path;
913 	iov[0].iov_len = strlen(path) + 1;
914 	iov[1].iov_base = (void *)(uintptr_t) token;
915 	iov[1].iov_len = strlen(token) + 1;
916 
917 	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
918 }
919 
920 /**
921  * Transmit an uwatch request to the XenStore service.
922  *
923  * \param path    The path in the XenStore to watch.
924  * \param tocken  A unique identifier for this watch.
925  *
926  * \return  0 on success.  Otherwise an errno indicating the
927  *          cause of failure.
928  */
929 static int
930 xs_unwatch(const char *path, const char *token)
931 {
932 	struct iovec iov[2];
933 
934 	iov[0].iov_base = (void *)(uintptr_t) path;
935 	iov[0].iov_len = strlen(path) + 1;
936 	iov[1].iov_base = (void *)(uintptr_t) token;
937 	iov[1].iov_len = strlen(token) + 1;
938 
939 	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
940 }
941 
942 /**
943  * Convert from watch token (unique identifier) to the associated
944  * internal tracking structure for this watch.
945  *
946  * \param tocken  The unique identifier for the watch to find.
947  *
948  * \return  A pointer to the found watch structure or NULL.
949  */
950 static struct xs_watch *
951 find_watch(const char *token)
952 {
953 	struct xs_watch *i, *cmp;
954 
955 	cmp = (void *)strtoul(token, NULL, 16);
956 
957 	LIST_FOREACH(i, &xs.registered_watches, list)
958 		if (i == cmp)
959 			return (i);
960 
961 	return (NULL);
962 }
963 
964 /**
965  * Thread body of the XenStore watch event dispatch thread.
966  */
967 static void
968 xenwatch_thread(void *unused)
969 {
970 	struct xs_stored_msg *msg;
971 
972 	for (;;) {
973 		mtx_lock(&xs.watch_events_lock);
974 		while (TAILQ_EMPTY(&xs.watch_events))
975 			mtx_sleep(&xs.watch_events,
976 			    &xs.watch_events_lock,
977 			    PWAIT | PCATCH, "waitev", hz/10);
978 
979 		mtx_unlock(&xs.watch_events_lock);
980 		sx_xlock(&xs.xenwatch_mutex);
981 
982 		mtx_lock(&xs.watch_events_lock);
983 		msg = TAILQ_FIRST(&xs.watch_events);
984 		if (msg) {
985 			TAILQ_REMOVE(&xs.watch_events, msg, list);
986 			msg->u.watch.handle->pending--;
987 		}
988 		mtx_unlock(&xs.watch_events_lock);
989 
990 		if (msg != NULL) {
991 			/*
992 			 * XXX There are messages coming in with a NULL
993 			 * XXX callback.  This deserves further investigation;
994 			 * XXX the workaround here simply prevents the kernel
995 			 * XXX from panic'ing on startup.
996 			 */
997 			if (msg->u.watch.handle->callback != NULL)
998 				msg->u.watch.handle->callback(
999 					msg->u.watch.handle,
1000 					(const char **)msg->u.watch.vec,
1001 					msg->u.watch.vec_size);
1002 			free(msg->u.watch.vec, M_XENSTORE);
1003 			free(msg, M_XENSTORE);
1004 		}
1005 
1006 		sx_xunlock(&xs.xenwatch_mutex);
1007 	}
1008 }
1009 
1010 /*----------- XenStore Configuration, Initialization, and Control ------------*/
1011 /**
1012  * Setup communication channels with the XenStore service.
1013  *
1014  * \return  On success, 0. Otherwise an errno value indicating the
1015  *          type of failure.
1016  */
1017 static int
1018 xs_init_comms(void)
1019 {
1020 	int error;
1021 
1022 	if (xen_store->rsp_prod != xen_store->rsp_cons) {
1023 		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1024 		    "(%08x:%08x): fixing up\n",
1025 		    xen_store->rsp_cons, xen_store->rsp_prod);
1026 		xen_store->rsp_cons = xen_store->rsp_prod;
1027 	}
1028 
1029 	xen_intr_unbind(&xs.xen_intr_handle);
1030 
1031 	error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
1032 	    /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
1033 	    &xs.xen_intr_handle);
1034 	if (error) {
1035 		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1036 		return (error);
1037 	}
1038 
1039 	return (0);
1040 }
1041 
1042 /*------------------ Private Device Attachment Functions  --------------------*/
1043 static void
1044 xs_identify(driver_t *driver, device_t parent)
1045 {
1046 
1047 	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1048 }
1049 
1050 /**
1051  * Probe for the existence of the XenStore.
1052  *
1053  * \param dev
1054  */
1055 static int
1056 xs_probe(device_t dev)
1057 {
1058 	/*
1059 	 * We are either operating within a PV kernel or being probed
1060 	 * as the child of the successfully attached xenpci device.
1061 	 * Thus we are in a Xen environment and there will be a XenStore.
1062 	 * Unconditionally return success.
1063 	 */
1064 	device_set_desc(dev, "XenStore");
1065 	return (BUS_PROBE_NOWILDCARD);
1066 }
1067 
1068 static void
1069 xs_attach_deferred(void *arg)
1070 {
1071 
1072 	bus_generic_probe(xs.xs_dev);
1073 	bus_generic_attach(xs.xs_dev);
1074 
1075 	config_intrhook_disestablish(&xs.xs_attachcb);
1076 }
1077 
1078 static void
1079 xs_attach_late(void *arg, int pending)
1080 {
1081 
1082 	KASSERT((pending == 1), ("xs late attach queued several times"));
1083 	bus_generic_probe(xs.xs_dev);
1084 	bus_generic_attach(xs.xs_dev);
1085 }
1086 
1087 /**
1088  * Attach to the XenStore.
1089  *
1090  * This routine also prepares for the probe/attach of drivers that rely
1091  * on the XenStore.
1092  */
1093 static int
1094 xs_attach(device_t dev)
1095 {
1096 	int error;
1097 
1098 	/* Allow us to get device_t from softc and vice-versa. */
1099 	xs.xs_dev = dev;
1100 	device_set_softc(dev, &xs);
1101 
1102 	/* Initialize the interface to xenstore. */
1103 	struct proc *p;
1104 
1105 	xs.initialized = false;
1106 	xs.evtchn = xen_get_xenstore_evtchn();
1107 	if (xs.evtchn == 0) {
1108 		struct evtchn_alloc_unbound alloc_unbound;
1109 
1110 		/* Allocate a local event channel for xenstore */
1111 		alloc_unbound.dom = DOMID_SELF;
1112 		alloc_unbound.remote_dom = DOMID_SELF;
1113 		error = HYPERVISOR_event_channel_op(
1114 		    EVTCHNOP_alloc_unbound, &alloc_unbound);
1115 		if (error != 0)
1116 			panic(
1117 			   "unable to alloc event channel for Dom0: %d",
1118 			    error);
1119 
1120 		xs.evtchn = alloc_unbound.port;
1121 
1122 		/* Allocate memory for the xs shared ring */
1123 		xen_store = malloc(PAGE_SIZE, M_XENSTORE, M_WAITOK | M_ZERO);
1124 		xs.gpfn = atop(pmap_kextract((vm_offset_t)xen_store));
1125 	} else {
1126 		xs.gpfn = xen_get_xenstore_mfn();
1127 		xen_store = pmap_mapdev_attr(ptoa(xs.gpfn), PAGE_SIZE,
1128 		    VM_MEMATTR_XEN);
1129 		xs.initialized = true;
1130 	}
1131 
1132 	TAILQ_INIT(&xs.reply_list);
1133 	TAILQ_INIT(&xs.watch_events);
1134 
1135 	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1136 	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1137 	sx_init(&xs.xenwatch_mutex, "xenwatch");
1138 	sx_init(&xs.request_mutex, "xenstore request");
1139 	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1140 	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1141 
1142 	/* Initialize the shared memory rings to talk to xenstored */
1143 	error = xs_init_comms();
1144 	if (error)
1145 		return (error);
1146 
1147 	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1148 	    0, "xenwatch");
1149 	if (error)
1150 		return (error);
1151 	xs.xenwatch_pid = p->p_pid;
1152 
1153 	error = kproc_create(xs_rcv_thread, NULL, NULL,
1154 	    RFHIGHPID, 0, "xenstore_rcv");
1155 
1156 	xs.xs_attachcb.ich_func = xs_attach_deferred;
1157 	xs.xs_attachcb.ich_arg = NULL;
1158 	if (xs.initialized) {
1159 		config_intrhook_establish(&xs.xs_attachcb);
1160 	} else {
1161 		TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL);
1162 	}
1163 
1164 	return (error);
1165 }
1166 
1167 /**
1168  * Prepare for suspension of this VM by halting XenStore access after
1169  * all transactions and individual requests have completed.
1170  */
1171 static int
1172 xs_suspend(device_t dev)
1173 {
1174 	int error;
1175 
1176 	/* Suspend child Xen devices. */
1177 	error = bus_generic_suspend(dev);
1178 	if (error != 0)
1179 		return (error);
1180 
1181 	sx_xlock(&xs.request_mutex);
1182 
1183 	return (0);
1184 }
1185 
1186 /**
1187  * Resume XenStore operations after this VM is resumed.
1188  */
1189 static int
1190 xs_resume(device_t dev __unused)
1191 {
1192 	struct xs_watch *watch;
1193 	char token[sizeof(watch) * 2 + 1];
1194 
1195 	xs_init_comms();
1196 
1197 	sx_xunlock(&xs.request_mutex);
1198 
1199 	/*
1200 	 * NB: since xenstore childs have not been resumed yet, there's
1201 	 * no need to hold any watch mutex. Having clients try to add or
1202 	 * remove watches at this point (before xenstore is resumed) is
1203 	 * clearly a violantion of the resume order.
1204 	 */
1205 	LIST_FOREACH(watch, &xs.registered_watches, list) {
1206 		sprintf(token, "%lX", (long)watch);
1207 		xs_watch(watch->node, token);
1208 	}
1209 
1210 	/* Resume child Xen devices. */
1211 	bus_generic_resume(dev);
1212 
1213 	return (0);
1214 }
1215 
1216 /*-------------------- Private Device Attachment Data  -----------------------*/
1217 static device_method_t xenstore_methods[] = {
1218 	/* Device interface */
1219 	DEVMETHOD(device_identify,	xs_identify),
1220 	DEVMETHOD(device_probe,         xs_probe),
1221 	DEVMETHOD(device_attach,        xs_attach),
1222 	DEVMETHOD(device_detach,        bus_generic_detach),
1223 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1224 	DEVMETHOD(device_suspend,       xs_suspend),
1225 	DEVMETHOD(device_resume,        xs_resume),
1226 
1227 	/* Bus interface */
1228 	DEVMETHOD(bus_add_child,        bus_generic_add_child),
1229 	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
1230 	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1231 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1232 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1233 
1234 	DEVMETHOD_END
1235 };
1236 
1237 DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1238 
1239 DRIVER_MODULE(xenstore, xenpv, xenstore_driver, 0, 0);
1240 
1241 /*------------------------------- Sysctl Data --------------------------------*/
1242 /* XXX Shouldn't the node be somewhere else? */
1243 SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1244     "Xen");
1245 SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1246 SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1247 
1248 /*-------------------------------- Public API --------------------------------*/
1249 /*------- API comments for these methods can be found in xenstorevar.h -------*/
1250 bool
1251 xs_initialized(void)
1252 {
1253 
1254 	return (xs.initialized);
1255 }
1256 
1257 evtchn_port_t
1258 xs_evtchn(void)
1259 {
1260 
1261     return (xs.evtchn);
1262 }
1263 
1264 vm_paddr_t
1265 xs_address(void)
1266 {
1267 
1268     return (ptoa(xs.gpfn));
1269 }
1270 
1271 int
1272 xs_directory(struct xs_transaction t, const char *dir, const char *node,
1273     u_int *num, const char ***result)
1274 {
1275 	struct sbuf *path;
1276 	char *strings;
1277 	u_int len = 0;
1278 	int error;
1279 
1280 	path = xs_join(dir, node);
1281 	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1282 	    (void **)&strings);
1283 	sbuf_delete(path);
1284 	if (error)
1285 		return (error);
1286 
1287 	*result = split(strings, len, num);
1288 
1289 	return (0);
1290 }
1291 
1292 int
1293 xs_exists(struct xs_transaction t, const char *dir, const char *node)
1294 {
1295 	const char **d;
1296 	int error, dir_n;
1297 
1298 	error = xs_directory(t, dir, node, &dir_n, &d);
1299 	if (error)
1300 		return (0);
1301 	free(d, M_XENSTORE);
1302 	return (1);
1303 }
1304 
1305 int
1306 xs_read(struct xs_transaction t, const char *dir, const char *node,
1307     u_int *len, void **result)
1308 {
1309 	struct sbuf *path;
1310 	void *ret;
1311 	int error;
1312 
1313 	path = xs_join(dir, node);
1314 	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1315 	sbuf_delete(path);
1316 	if (error)
1317 		return (error);
1318 	*result = ret;
1319 	return (0);
1320 }
1321 
1322 int
1323 xs_write(struct xs_transaction t, const char *dir, const char *node,
1324     const char *string)
1325 {
1326 	struct sbuf *path;
1327 	struct iovec iovec[2];
1328 	int error;
1329 
1330 	path = xs_join(dir, node);
1331 
1332 	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1333 	iovec[0].iov_len = sbuf_len(path) + 1;
1334 	iovec[1].iov_base = (void *)(uintptr_t) string;
1335 	iovec[1].iov_len = strlen(string);
1336 
1337 	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1338 	sbuf_delete(path);
1339 
1340 	return (error);
1341 }
1342 
1343 int
1344 xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1345 {
1346 	struct sbuf *path;
1347 	int ret;
1348 
1349 	path = xs_join(dir, node);
1350 	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1351 	sbuf_delete(path);
1352 
1353 	return (ret);
1354 }
1355 
1356 int
1357 xs_rm(struct xs_transaction t, const char *dir, const char *node)
1358 {
1359 	struct sbuf *path;
1360 	int ret;
1361 
1362 	path = xs_join(dir, node);
1363 	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1364 	sbuf_delete(path);
1365 
1366 	return (ret);
1367 }
1368 
1369 int
1370 xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1371 {
1372 	struct xs_transaction local_xbt;
1373 	struct sbuf *root_path_sbuf;
1374 	struct sbuf *cur_path_sbuf;
1375 	char *root_path;
1376 	char *cur_path;
1377 	const char **dir;
1378 	int error;
1379 
1380 retry:
1381 	root_path_sbuf = xs_join(base, node);
1382 	cur_path_sbuf  = xs_join(base, node);
1383 	root_path      = sbuf_data(root_path_sbuf);
1384 	cur_path       = sbuf_data(cur_path_sbuf);
1385 	dir            = NULL;
1386 	local_xbt.id   = 0;
1387 
1388 	if (xbt.id == 0) {
1389 		error = xs_transaction_start(&local_xbt);
1390 		if (error != 0)
1391 			goto out;
1392 		xbt = local_xbt;
1393 	}
1394 
1395 	while (1) {
1396 		u_int count;
1397 		u_int i;
1398 
1399 		error = xs_directory(xbt, cur_path, "", &count, &dir);
1400 		if (error)
1401 			goto out;
1402 
1403 		for (i = 0; i < count; i++) {
1404 			error = xs_rm(xbt, cur_path, dir[i]);
1405 			if (error == ENOTEMPTY) {
1406 				struct sbuf *push_dir;
1407 
1408 				/*
1409 				 * Descend to clear out this sub directory.
1410 				 * We'll return to cur_dir once push_dir
1411 				 * is empty.
1412 				 */
1413 				push_dir = xs_join(cur_path, dir[i]);
1414 				sbuf_delete(cur_path_sbuf);
1415 				cur_path_sbuf = push_dir;
1416 				cur_path = sbuf_data(cur_path_sbuf);
1417 				break;
1418 			} else if (error != 0) {
1419 				goto out;
1420 			}
1421 		}
1422 
1423 		free(dir, M_XENSTORE);
1424 		dir = NULL;
1425 
1426 		if (i == count) {
1427 			char *last_slash;
1428 
1429 			/* Directory is empty.  It is now safe to remove. */
1430 			error = xs_rm(xbt, cur_path, "");
1431 			if (error != 0)
1432 				goto out;
1433 
1434 			if (!strcmp(cur_path, root_path))
1435 				break;
1436 
1437 			/* Return to processing the parent directory. */
1438 			last_slash = strrchr(cur_path, '/');
1439 			KASSERT(last_slash != NULL,
1440 				("xs_rm_tree: mangled path %s", cur_path));
1441 			*last_slash = '\0';
1442 		}
1443 	}
1444 
1445 out:
1446 	sbuf_delete(cur_path_sbuf);
1447 	sbuf_delete(root_path_sbuf);
1448 	if (dir != NULL)
1449 		free(dir, M_XENSTORE);
1450 
1451 	if (local_xbt.id != 0) {
1452 		int terror;
1453 
1454 		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1455 		xbt.id = 0;
1456 		if (terror == EAGAIN && error == 0)
1457 			goto retry;
1458 	}
1459 	return (error);
1460 }
1461 
1462 int
1463 xs_transaction_start(struct xs_transaction *t)
1464 {
1465 	char *id_str;
1466 	int error;
1467 
1468 	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1469 	    (void **)&id_str);
1470 	if (error == 0) {
1471 		t->id = strtoul(id_str, NULL, 0);
1472 		free(id_str, M_XENSTORE);
1473 	}
1474 	return (error);
1475 }
1476 
1477 int
1478 xs_transaction_end(struct xs_transaction t, int abort)
1479 {
1480 	char abortstr[2];
1481 
1482 	if (abort)
1483 		strcpy(abortstr, "F");
1484 	else
1485 		strcpy(abortstr, "T");
1486 
1487 	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1488 }
1489 
1490 int
1491 xs_scanf(struct xs_transaction t, const char *dir, const char *node,
1492      int *scancountp, const char *fmt, ...)
1493 {
1494 	va_list ap;
1495 	int error, ns;
1496 	char *val;
1497 
1498 	error = xs_read(t, dir, node, NULL, (void **) &val);
1499 	if (error)
1500 		return (error);
1501 
1502 	va_start(ap, fmt);
1503 	ns = vsscanf(val, fmt, ap);
1504 	va_end(ap);
1505 	free(val, M_XENSTORE);
1506 	/* Distinctive errno. */
1507 	if (ns == 0)
1508 		return (ERANGE);
1509 	if (scancountp)
1510 		*scancountp = ns;
1511 	return (0);
1512 }
1513 
1514 int
1515 xs_vprintf(struct xs_transaction t,
1516     const char *dir, const char *node, const char *fmt, va_list ap)
1517 {
1518 	struct sbuf *sb;
1519 	int error;
1520 
1521 	sb = sbuf_new_auto();
1522 	sbuf_vprintf(sb, fmt, ap);
1523 	sbuf_finish(sb);
1524 	error = xs_write(t, dir, node, sbuf_data(sb));
1525 	sbuf_delete(sb);
1526 
1527 	return (error);
1528 }
1529 
1530 int
1531 xs_printf(struct xs_transaction t, const char *dir, const char *node,
1532      const char *fmt, ...)
1533 {
1534 	va_list ap;
1535 	int error;
1536 
1537 	va_start(ap, fmt);
1538 	error = xs_vprintf(t, dir, node, fmt, ap);
1539 	va_end(ap);
1540 
1541 	return (error);
1542 }
1543 
1544 int
1545 xs_gather(struct xs_transaction t, const char *dir, ...)
1546 {
1547 	va_list ap;
1548 	const char *name;
1549 	int error;
1550 
1551 	va_start(ap, dir);
1552 	error = 0;
1553 	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1554 		const char *fmt = va_arg(ap, char *);
1555 		void *result = va_arg(ap, void *);
1556 		char *p;
1557 
1558 		error = xs_read(t, dir, name, NULL, (void **) &p);
1559 		if (error)
1560 			break;
1561 
1562 		if (fmt) {
1563 			if (sscanf(p, fmt, result) == 0)
1564 				error = EINVAL;
1565 			free(p, M_XENSTORE);
1566 		} else
1567 			*(char **)result = p;
1568 	}
1569 	va_end(ap);
1570 
1571 	return (error);
1572 }
1573 
1574 int
1575 xs_register_watch(struct xs_watch *watch)
1576 {
1577 	/* Pointer in ascii is the token. */
1578 	char token[sizeof(watch) * 2 + 1];
1579 	int error;
1580 
1581 	watch->pending = 0;
1582 	sprintf(token, "%lX", (long)watch);
1583 
1584 	mtx_lock(&xs.registered_watches_lock);
1585 	KASSERT(find_watch(token) == NULL, ("watch already registered"));
1586 	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1587 	mtx_unlock(&xs.registered_watches_lock);
1588 
1589 	error = xs_watch(watch->node, token);
1590 
1591 	/* Ignore errors due to multiple registration. */
1592 	if (error == EEXIST)
1593 		error = 0;
1594 
1595 	if (error != 0) {
1596 		mtx_lock(&xs.registered_watches_lock);
1597 		LIST_REMOVE(watch, list);
1598 		mtx_unlock(&xs.registered_watches_lock);
1599 	}
1600 
1601 	return (error);
1602 }
1603 
1604 void
1605 xs_unregister_watch(struct xs_watch *watch)
1606 {
1607 	struct xs_stored_msg *msg, *tmp;
1608 	char token[sizeof(watch) * 2 + 1];
1609 	int error;
1610 
1611 	sprintf(token, "%lX", (long)watch);
1612 
1613 	mtx_lock(&xs.registered_watches_lock);
1614 	if (find_watch(token) == NULL) {
1615 		mtx_unlock(&xs.registered_watches_lock);
1616 		return;
1617 	}
1618 	LIST_REMOVE(watch, list);
1619 	mtx_unlock(&xs.registered_watches_lock);
1620 
1621 	error = xs_unwatch(watch->node, token);
1622 	if (error)
1623 		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1624 		    watch->node, error);
1625 
1626 	/* Cancel pending watch events. */
1627 	mtx_lock(&xs.watch_events_lock);
1628 	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1629 		if (msg->u.watch.handle != watch)
1630 			continue;
1631 		TAILQ_REMOVE(&xs.watch_events, msg, list);
1632 		free(msg->u.watch.vec, M_XENSTORE);
1633 		free(msg, M_XENSTORE);
1634 	}
1635 	mtx_unlock(&xs.watch_events_lock);
1636 
1637 	/* Flush any currently-executing callback, unless we are it. :-) */
1638 	if (curproc->p_pid != xs.xenwatch_pid) {
1639 		sx_xlock(&xs.xenwatch_mutex);
1640 		sx_xunlock(&xs.xenwatch_mutex);
1641 	}
1642 }
1643 
1644 void
1645 xs_lock(void)
1646 {
1647 
1648 	sx_xlock(&xs.request_mutex);
1649 	return;
1650 }
1651 
1652 void
1653 xs_unlock(void)
1654 {
1655 
1656 	sx_xunlock(&xs.request_mutex);
1657 	return;
1658 }
1659