xref: /illumos-gate/usr/src/uts/common/xen/io/xdb.c (revision 15d9d0b5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Note: This is the backend part of the split PV disk driver. This driver
29  * is not a nexus driver, nor is it a leaf driver(block/char/stream driver).
30  * Currently, it does not create any minor node. So, although, it runs in
31  * backend domain, it will not be used directly from within dom0.
32  * It simply gets block I/O requests issued by frontend from a shared page
33  * (blkif ring buffer - defined by Xen) between backend and frontend domain,
34  * generates a buf, and push it down to underlying disk target driver via
35  * ldi interface. When buf is done, this driver will generate a response
36  * and put it into ring buffer to inform frontend of the status of the I/O
37  * request issued by it. When a new virtual device entry is added in xenstore,
38  * there will be an watch event sent from Xen to xvdi framework, who will,
39  * in turn, create the devinfo node and try to attach this driver
40  * (see xvdi_create_dev). When frontend peer changes its state to
41  * XenbusStateClose, an event will also be sent from Xen to xvdi framework,
42  * who will detach and remove this devinfo node (see i_xvdi_oestate_handler).
43  * I/O requests get from ring buffer and event coming from xenstore cannot be
44  * trusted. We verify them in xdb_get_buf() and xdb_check_state_transition().
45  *
46  * Virtual device configuration is read/written from/to the database via
47  * xenbus_* interfaces. Driver also use xvdi_* to interact with hypervisor.
48  * There is an on-going effort to make xvdi_* cover all xenbus_*.
49  */
50 
51 #pragma ident	"%Z%%M%	%I%	%E% SMI"
52 
53 #include <sys/types.h>
54 #include <sys/conf.h>
55 #include <sys/ddi.h>
56 #include <sys/dditypes.h>
57 #include <sys/sunddi.h>
58 #include <sys/list.h>
59 #include <sys/dkio.h>
60 #include <sys/cmlb.h>
61 #include <sys/vtoc.h>
62 #include <sys/modctl.h>
63 #include <sys/bootconf.h>
64 #include <sys/promif.h>
65 #include <sys/sysmacros.h>
66 #include <public/io/xenbus.h>
67 #include <xen/sys/xenbus_impl.h>
68 #include <xen/sys/xendev.h>
69 #include <sys/gnttab.h>
70 #include <sys/scsi/generic/inquiry.h>
71 #include <vm/seg_kmem.h>
72 #include <vm/hat_i86.h>
73 #include <sys/gnttab.h>
74 #include <sys/lofi.h>
75 #include <io/xdf.h>
76 #include <io/xdb.h>
77 
78 static xdb_t *xdb_statep;
79 static int xdb_debug = 0;
80 
81 #ifdef DEBUG
82 /*
83  * debug aid functions
84  */
85 
86 static void
87 logva(xdb_t *vdp, uint64_t va)
88 {
89 	uint64_t *page_addrs;
90 	int i;
91 
92 	page_addrs = vdp->page_addrs;
93 	for (i = 0; i < XDB_MAX_IO_PAGES; i++) {
94 		if (page_addrs[i] == va)
95 			debug_enter("VA remapping found!");
96 	}
97 
98 	for (i = 0; i < XDB_MAX_IO_PAGES; i++) {
99 		if (page_addrs[i] == 0) {
100 			page_addrs[i] = va;
101 			break;
102 		}
103 	}
104 	ASSERT(i < XDB_MAX_IO_PAGES);
105 }
106 
107 static void
108 unlogva(xdb_t *vdp, uint64_t va)
109 {
110 	uint64_t *page_addrs;
111 	int i;
112 
113 	page_addrs = vdp->page_addrs;
114 	for (i = 0; i < XDB_MAX_IO_PAGES; i++) {
115 		if (page_addrs[i] == va) {
116 			page_addrs[i] = 0;
117 			break;
118 		}
119 	}
120 	ASSERT(i < XDB_MAX_IO_PAGES);
121 }
122 
123 static void
124 xdb_dump_request_oe(blkif_request_t *req)
125 {
126 	int i;
127 
128 	/*
129 	 * Exploit the public interface definitions for BLKIF_OP_READ
130 	 * etc..
131 	 */
132 	char *op_name[] = { "read", "write", "barrier", "flush" };
133 
134 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "op=%s", op_name[req->operation]));
135 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "num of segments=%d",
136 	    req->nr_segments));
137 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "handle=%d", req->handle));
138 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "id=%llu",
139 	    (unsigned long long)req->id));
140 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "start sector=%llu",
141 	    (unsigned long long)req->sector_number));
142 	for (i = 0; i < req->nr_segments; i++) {
143 		XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "gref=%d, first sec=%d,"
144 		    "last sec=%d", req->seg[i].gref, req->seg[i].first_sect,
145 		    req->seg[i].last_sect));
146 	}
147 }
148 #endif /* DEBUG */
149 
150 /*
151  * Statistics.
152  */
153 static char *xdb_stats[] = {
154 	"rd_reqs",
155 	"wr_reqs",
156 	"br_reqs",
157 	"fl_reqs",
158 	"oo_reqs"
159 };
160 
161 static int
162 xdb_kstat_update(kstat_t *ksp, int flag)
163 {
164 	xdb_t *vdp;
165 	kstat_named_t *knp;
166 
167 	if (flag != KSTAT_READ)
168 		return (EACCES);
169 
170 	vdp = ksp->ks_private;
171 	knp = ksp->ks_data;
172 
173 	/*
174 	 * Assignment order should match that of the names in
175 	 * xdb_stats.
176 	 */
177 	(knp++)->value.ui64 = vdp->xs_stat_req_reads;
178 	(knp++)->value.ui64 = vdp->xs_stat_req_writes;
179 	(knp++)->value.ui64 = vdp->xs_stat_req_barriers;
180 	(knp++)->value.ui64 = vdp->xs_stat_req_flushes;
181 	(knp++)->value.ui64 = 0; /* oo_req */
182 
183 	return (0);
184 }
185 
186 static boolean_t
187 xdb_kstat_init(xdb_t *vdp)
188 {
189 	int nstat = sizeof (xdb_stats) / sizeof (xdb_stats[0]);
190 	char **cp = xdb_stats;
191 	kstat_named_t *knp;
192 
193 	if ((vdp->xs_kstats = kstat_create("xdb",
194 	    ddi_get_instance(vdp->xs_dip),
195 	    "req_statistics", "block", KSTAT_TYPE_NAMED,
196 	    nstat, 0)) == NULL)
197 		return (B_FALSE);
198 
199 	vdp->xs_kstats->ks_private = vdp;
200 	vdp->xs_kstats->ks_update = xdb_kstat_update;
201 
202 	knp = vdp->xs_kstats->ks_data;
203 	while (nstat > 0) {
204 		kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
205 		knp++;
206 		cp++;
207 		nstat--;
208 	}
209 
210 	kstat_install(vdp->xs_kstats);
211 
212 	return (B_TRUE);
213 }
214 
215 static int xdb_biodone(buf_t *);
216 
217 static buf_t *
218 xdb_get_buf(xdb_t *vdp, blkif_request_t *req, xdb_request_t *xreq)
219 {
220 	buf_t *bp;
221 	uint8_t segs, curseg;
222 	int sectors;
223 	int i, err;
224 	gnttab_map_grant_ref_t mapops[BLKIF_MAX_SEGMENTS_PER_REQUEST];
225 	ddi_acc_handle_t acchdl;
226 
227 	acchdl = vdp->xs_ring_hdl;
228 	bp = XDB_XREQ2BP(xreq);
229 	curseg = xreq->xr_curseg;
230 	/* init a new xdb request */
231 	if (req != NULL) {
232 		ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
233 		boolean_t pagemapok = B_TRUE;
234 		uint8_t op = ddi_get8(acchdl, &req->operation);
235 
236 		xreq->xr_vdp = vdp;
237 		xreq->xr_op = op;
238 		xreq->xr_id = ddi_get64(acchdl, &req->id);
239 		segs = xreq->xr_buf_pages = ddi_get8(acchdl, &req->nr_segments);
240 		if (segs == 0) {
241 			if (op != BLKIF_OP_FLUSH_DISKCACHE)
242 				cmn_err(CE_WARN, "!non-BLKIF_OP_FLUSH_DISKCACHE"
243 				    " is seen from domain %d with zero "
244 				    "length data buffer!", vdp->xs_peer);
245 			bioinit(bp);
246 			bp->b_bcount = 0;
247 			bp->b_lblkno = 0;
248 			bp->b_un.b_addr = NULL;
249 			return (bp);
250 		} else if (op == BLKIF_OP_FLUSH_DISKCACHE) {
251 			cmn_err(CE_WARN, "!BLKIF_OP_FLUSH_DISKCACHE"
252 			    " is seen from domain %d with non-zero "
253 			    "length data buffer!", vdp->xs_peer);
254 		}
255 
256 		/*
257 		 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
258 		 * according to the definition of blk interface by Xen
259 		 * we do sanity check here
260 		 */
261 		if (segs > BLKIF_MAX_SEGMENTS_PER_REQUEST)
262 			segs = xreq->xr_buf_pages =
263 			    BLKIF_MAX_SEGMENTS_PER_REQUEST;
264 
265 		for (i = 0; i < segs; i++) {
266 			uint8_t fs, ls;
267 
268 			mapops[i].host_addr =
269 			    (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
270 			    vdp->xs_iopage_va, xreq->xr_idx, i);
271 			mapops[i].dom = vdp->xs_peer;
272 			mapops[i].ref = ddi_get32(acchdl, &req->seg[i].gref);
273 			mapops[i].flags = GNTMAP_host_map;
274 			if (op != BLKIF_OP_READ)
275 				mapops[i].flags |= GNTMAP_readonly;
276 
277 			fs = ddi_get8(acchdl, &req->seg[i].first_sect);
278 			ls = ddi_get8(acchdl, &req->seg[i].last_sect);
279 
280 			/*
281 			 * first_sect should be no bigger than last_sect and
282 			 * both of them should be no bigger than
283 			 * (PAGESIZE / XB_BSIZE - 1) according to definition
284 			 * of blk interface by Xen, so sanity check again
285 			 */
286 			if (fs > (PAGESIZE / XB_BSIZE - 1))
287 				fs = PAGESIZE / XB_BSIZE - 1;
288 			if (ls > (PAGESIZE / XB_BSIZE - 1))
289 				ls = PAGESIZE / XB_BSIZE - 1;
290 			if (fs > ls)
291 				fs = ls;
292 
293 			xreq->xr_segs[i].fs = fs;
294 			xreq->xr_segs[i].ls = ls;
295 		}
296 
297 		/* map in io pages */
298 		err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
299 		    mapops, i);
300 		if (err != 0)
301 			return (NULL);
302 		for (i = 0; i < segs; i++) {
303 			/*
304 			 * Although HYPERVISOR_grant_table_op() returned no
305 			 * error, mapping of each single page can fail. So,
306 			 * we have to do the check here and handle the error
307 			 * if needed
308 			 */
309 			if (mapops[i].status != GNTST_okay) {
310 				int j;
311 				for (j = 0; j < i; j++) {
312 #ifdef DEBUG
313 					unlogva(vdp, mapops[j].host_addr);
314 #endif
315 					xen_release_pfn(
316 					    xreq->xr_plist[j].p_pagenum);
317 				}
318 				pagemapok = B_FALSE;
319 				break;
320 			}
321 			/* record page mapping handle for unmapping later */
322 			xreq->xr_page_hdls[i] = mapops[i].handle;
323 #ifdef DEBUG
324 			logva(vdp, mapops[i].host_addr);
325 #endif
326 			/*
327 			 * Pass the MFNs down using the shadow list (xr_pplist)
328 			 *
329 			 * This is pretty ugly since we have implict knowledge
330 			 * of how the rootnex binds buffers.
331 			 * The GNTTABOP_map_grant_ref op makes us do some ugly
332 			 * stuff since we're not allowed to touch these PTEs
333 			 * from the VM.
334 			 *
335 			 * Obviously, these aren't real page_t's. The rootnex
336 			 * only needs p_pagenum.
337 			 * Also, don't use btop() here or 32 bit PAE breaks.
338 			 */
339 			xreq->xr_pplist[i] = &xreq->xr_plist[i];
340 			xreq->xr_plist[i].p_pagenum =
341 			    xen_assign_pfn(mapops[i].dev_bus_addr >> PAGESHIFT);
342 		}
343 
344 		/*
345 		 * not all pages mapped in successfully, unmap those mapped-in
346 		 * page and return failure
347 		 */
348 		if (!pagemapok) {
349 			gnttab_unmap_grant_ref_t unmapop;
350 
351 			for (i = 0; i < segs; i++) {
352 				if (mapops[i].status != GNTST_okay)
353 					continue;
354 				unmapop.host_addr =
355 				    (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
356 				    vdp->xs_iopage_va, xreq->xr_idx, i);
357 				unmapop.dev_bus_addr = NULL;
358 				unmapop.handle = mapops[i].handle;
359 				(void) HYPERVISOR_grant_table_op(
360 				    GNTTABOP_unmap_grant_ref, &unmapop, 1);
361 			}
362 
363 			return (NULL);
364 		}
365 		bioinit(bp);
366 		bp->b_lblkno = ddi_get64(acchdl, &req->sector_number);
367 		bp->b_flags = B_BUSY | B_SHADOW | B_PHYS;
368 		bp->b_flags |= (ddi_get8(acchdl, &req->operation) ==
369 		    BLKIF_OP_READ) ? B_READ : (B_WRITE | B_ASYNC);
370 	} else {
371 		uint64_t blkst;
372 		int isread;
373 
374 		/* reuse this buf */
375 		blkst = bp->b_lblkno + bp->b_bcount / DEV_BSIZE;
376 		isread = bp->b_flags & B_READ;
377 		bioreset(bp);
378 		bp->b_lblkno = blkst;
379 		bp->b_flags = B_BUSY | B_SHADOW | B_PHYS;
380 		bp->b_flags |= isread ? B_READ : (B_WRITE | B_ASYNC);
381 		XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "reuse buf, xreq is %d!!",
382 		    xreq->xr_idx));
383 	}
384 
385 	/* form a buf */
386 	bp->b_un.b_addr = XDB_IOPAGE_VA(vdp->xs_iopage_va, xreq->xr_idx,
387 	    curseg) + xreq->xr_segs[curseg].fs * DEV_BSIZE;
388 	bp->b_shadow = &xreq->xr_pplist[curseg];
389 	bp->b_iodone = xdb_biodone;
390 	sectors = 0;
391 	for (i = curseg; i < xreq->xr_buf_pages; i++) {
392 		/*
393 		 * The xreq->xr_segs[i].fs of the first seg can be non-zero
394 		 * otherwise, we'll break it into multiple bufs
395 		 */
396 		if ((i != curseg) && (xreq->xr_segs[i].fs != 0)) {
397 			break;
398 		}
399 		sectors += (xreq->xr_segs[i].ls - xreq->xr_segs[i].fs + 1);
400 	}
401 	xreq->xr_curseg = i;
402 	bp->b_bcount = sectors * DEV_BSIZE;
403 	bp->b_bufsize = bp->b_bcount;
404 
405 	return (bp);
406 }
407 
408 static xdb_request_t *
409 xdb_get_req(xdb_t *vdp)
410 {
411 	xdb_request_t *req;
412 	int idx;
413 
414 	ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
415 	ASSERT(vdp->xs_free_req != -1);
416 	req = &vdp->xs_req[vdp->xs_free_req];
417 	vdp->xs_free_req = req->xr_next;
418 	idx = req->xr_idx;
419 	bzero(req, sizeof (xdb_request_t));
420 	req->xr_idx = idx;
421 	return (req);
422 }
423 
424 static void
425 xdb_free_req(xdb_request_t *req)
426 {
427 	xdb_t *vdp = req->xr_vdp;
428 
429 	ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
430 	req->xr_next = vdp->xs_free_req;
431 	vdp->xs_free_req = req->xr_idx;
432 }
433 
434 static void
435 xdb_response(xdb_t *vdp, blkif_request_t *req, boolean_t ok)
436 {
437 	xendev_ring_t *ringp = vdp->xs_ring;
438 	ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
439 	blkif_response_t *resp;
440 
441 	resp = xvdi_ring_get_response(ringp);
442 	ASSERT(resp);
443 
444 	ddi_put64(acchdl, &resp->id, ddi_get64(acchdl, &req->id));
445 	ddi_put8(acchdl, &resp->operation, ddi_get8(acchdl, &req->operation));
446 	ddi_put16(acchdl, (uint16_t *)&resp->status,
447 	    ok ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
448 	if (xvdi_ring_push_response(ringp))
449 		xvdi_notify_oe(vdp->xs_dip);
450 }
451 
452 static void
453 xdb_init_ioreqs(xdb_t *vdp)
454 {
455 	int i;
456 
457 	for (i = 0; i < BLKIF_RING_SIZE; i++) {
458 		vdp->xs_req[i].xr_idx = i;
459 		vdp->xs_req[i].xr_next = i + 1;
460 	}
461 	vdp->xs_req[BLKIF_RING_SIZE - 1].xr_next = -1;
462 	vdp->xs_free_req = 0;
463 
464 	/* alloc va in host dom for io page mapping */
465 	vdp->xs_iopage_va = vmem_xalloc(heap_arena,
466 	    XDB_MAX_IO_PAGES * PAGESIZE, PAGESIZE, 0, 0, 0, 0,
467 	    VM_SLEEP);
468 	for (i = 0; i < XDB_MAX_IO_PAGES; i++)
469 		hat_prepare_mapping(kas.a_hat,
470 		    vdp->xs_iopage_va + i * PAGESIZE);
471 }
472 
473 static void
474 xdb_uninit_ioreqs(xdb_t *vdp)
475 {
476 	int i;
477 
478 	for (i = 0; i < XDB_MAX_IO_PAGES; i++)
479 		hat_release_mapping(kas.a_hat,
480 		    vdp->xs_iopage_va + i * PAGESIZE);
481 	vmem_xfree(heap_arena, vdp->xs_iopage_va,
482 	    XDB_MAX_IO_PAGES * PAGESIZE);
483 }
484 
485 static uint_t
486 xdb_intr(caddr_t arg)
487 {
488 	xendev_ring_t *ringp;
489 	blkif_request_t *req;
490 	xdb_request_t *xreq;
491 	buf_t *bp;
492 	uint8_t op;
493 	xdb_t *vdp = (xdb_t *)arg;
494 	int ret = DDI_INTR_UNCLAIMED;
495 	dev_info_t *dip = vdp->xs_dip;
496 
497 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
498 	    "xdb@%s: I/O request received from dom %d",
499 	    ddi_get_name_addr(dip), vdp->xs_peer));
500 
501 	mutex_enter(&vdp->xs_iomutex);
502 
503 	/* shouldn't touch ring buffer if not in connected state */
504 	if (vdp->xs_if_status != XDB_CONNECTED) {
505 		mutex_exit(&vdp->xs_iomutex);
506 		return (DDI_INTR_UNCLAIMED);
507 	}
508 
509 	ringp = vdp->xs_ring;
510 
511 	/*
512 	 * We'll loop till there is no more request in the ring
513 	 * We won't stuck in this loop for ever since the size of ring buffer
514 	 * is limited, and frontend will stop pushing requests into it when
515 	 * the ring buffer is full
516 	 */
517 
518 	/* req_event will be increased in xvdi_ring_get_request() */
519 	while ((req = xvdi_ring_get_request(ringp)) != NULL) {
520 		ret = DDI_INTR_CLAIMED;
521 
522 		op = ddi_get8(vdp->xs_ring_hdl, &req->operation);
523 		if (op == BLKIF_OP_READ			||
524 		    op == BLKIF_OP_WRITE		||
525 		    op == BLKIF_OP_WRITE_BARRIER	||
526 		    op == BLKIF_OP_FLUSH_DISKCACHE) {
527 #ifdef DEBUG
528 			xdb_dump_request_oe(req);
529 #endif
530 			xreq = xdb_get_req(vdp);
531 			ASSERT(xreq);
532 			switch (op) {
533 			case BLKIF_OP_READ:
534 				vdp->xs_stat_req_reads++;
535 				break;
536 			case BLKIF_OP_WRITE_BARRIER:
537 				vdp->xs_stat_req_barriers++;
538 				/* FALLTHRU */
539 			case BLKIF_OP_WRITE:
540 				vdp->xs_stat_req_writes++;
541 				break;
542 			case BLKIF_OP_FLUSH_DISKCACHE:
543 				vdp->xs_stat_req_flushes++;
544 				break;
545 			}
546 
547 			xreq->xr_curseg = 0; /* start from first segment */
548 			bp = xdb_get_buf(vdp, req, xreq);
549 			if (bp == NULL) {
550 				/* failed to form a buf */
551 				xdb_free_req(xreq);
552 				xdb_response(vdp, req, B_FALSE);
553 				continue;
554 			}
555 			bp->av_forw = NULL;
556 
557 			XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
558 			    " buf %p, blkno %lld, size %lu, addr %p",
559 			    (void *)bp, (longlong_t)bp->b_blkno,
560 			    (ulong_t)bp->b_bcount, (void *)bp->b_un.b_addr));
561 
562 			/* send bp to underlying blk driver */
563 			if (vdp->xs_f_iobuf == NULL) {
564 				vdp->xs_f_iobuf = vdp->xs_l_iobuf = bp;
565 			} else {
566 				vdp->xs_l_iobuf->av_forw = bp;
567 				vdp->xs_l_iobuf = bp;
568 			}
569 			vdp->xs_ionum++;
570 		} else {
571 			xdb_response(vdp, req, B_FALSE);
572 			XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: "
573 			    "Unsupported cmd received from dom %d",
574 			    ddi_get_name_addr(dip), vdp->xs_peer));
575 		}
576 	}
577 	/* notify our taskq to push buf to underlying blk driver */
578 	if (ret == DDI_INTR_CLAIMED)
579 		cv_broadcast(&vdp->xs_iocv);
580 
581 	mutex_exit(&vdp->xs_iomutex);
582 
583 	return (ret);
584 }
585 
586 static int
587 xdb_biodone(buf_t *bp)
588 {
589 	blkif_response_t *resp;
590 	int i, err, bioerr;
591 	uint8_t segs;
592 	gnttab_unmap_grant_ref_t unmapops[BLKIF_MAX_SEGMENTS_PER_REQUEST];
593 	xdb_request_t *xreq = XDB_BP2XREQ(bp);
594 	xdb_t *vdp = xreq->xr_vdp;
595 	xendev_ring_t *ringp = vdp->xs_ring;
596 	ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
597 	buf_t *nbp;
598 
599 	bioerr = geterror(bp);
600 	if (bioerr)
601 		XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: I/O error %d",
602 		    ddi_get_name_addr(vdp->xs_dip), bioerr));
603 
604 	/* check if we are done w/ this I/O request */
605 	if ((bioerr == 0) && (xreq->xr_curseg < xreq->xr_buf_pages)) {
606 		nbp = xdb_get_buf(vdp, NULL, xreq);
607 		if (nbp) {
608 			err = ldi_strategy(vdp->xs_ldi_hdl, nbp);
609 			if (err == 0) {
610 				XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
611 				    "sent buf to backend ok"));
612 				return (DDI_SUCCESS);
613 			}
614 			bioerr = EIO;
615 			XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: "
616 			    "sent buf to backend dev failed, err=%d",
617 			    ddi_get_name_addr(vdp->xs_dip), err));
618 		} else {
619 			bioerr = EIO;
620 		}
621 	}
622 
623 	/* unmap io pages */
624 	segs = xreq->xr_buf_pages;
625 	/*
626 	 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
627 	 * according to the definition of blk interface by Xen
628 	 */
629 	ASSERT(segs <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
630 	for (i = 0; i < segs; i++) {
631 		unmapops[i].host_addr = (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
632 		    vdp->xs_iopage_va, xreq->xr_idx, i);
633 #ifdef DEBUG
634 		mutex_enter(&vdp->xs_iomutex);
635 		unlogva(vdp, unmapops[i].host_addr);
636 		mutex_exit(&vdp->xs_iomutex);
637 #endif
638 		unmapops[i].dev_bus_addr = NULL;
639 		unmapops[i].handle = xreq->xr_page_hdls[i];
640 	}
641 	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
642 	    unmapops, segs);
643 	ASSERT(!err);
644 
645 	/*
646 	 * If we have reached a barrier write or a cache flush , then we must
647 	 * flush all our I/Os.
648 	 */
649 	if (xreq->xr_op == BLKIF_OP_WRITE_BARRIER ||
650 	    xreq->xr_op == BLKIF_OP_FLUSH_DISKCACHE) {
651 		/*
652 		 * XXX At this point the write did succeed, so I don't
653 		 * believe we should report an error because the flush
654 		 * failed. However, this is a debatable point, so
655 		 * maybe we need to think more carefully about this.
656 		 * For now, just cast to void.
657 		 */
658 		(void) ldi_ioctl(vdp->xs_ldi_hdl,
659 		    DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, kcred, NULL);
660 	}
661 
662 	mutex_enter(&vdp->xs_iomutex);
663 
664 	/* send response back to frontend */
665 	if (vdp->xs_if_status == XDB_CONNECTED) {
666 		resp = xvdi_ring_get_response(ringp);
667 		ASSERT(resp);
668 		ddi_put64(acchdl, &resp->id, xreq->xr_id);
669 		ddi_put8(acchdl, &resp->operation, xreq->xr_op);
670 		ddi_put16(acchdl, (uint16_t *)&resp->status,
671 		    bioerr ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY);
672 		if (xvdi_ring_push_response(ringp))
673 			xvdi_notify_oe(vdp->xs_dip);
674 		XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
675 		    "sent resp back to frontend, id=%llu",
676 		    (unsigned long long)xreq->xr_id));
677 	}
678 	/* free io resources */
679 	biofini(bp);
680 	xdb_free_req(xreq);
681 
682 	vdp->xs_ionum--;
683 	if ((vdp->xs_if_status != XDB_CONNECTED) && (vdp->xs_ionum == 0))
684 		/* we're closing, someone is waiting for I/O clean-up */
685 		cv_signal(&vdp->xs_ionumcv);
686 
687 	mutex_exit(&vdp->xs_iomutex);
688 
689 	return (DDI_SUCCESS);
690 }
691 
692 static int
693 xdb_bindto_frontend(xdb_t *vdp)
694 {
695 	int err;
696 	char *oename;
697 	grant_ref_t gref;
698 	evtchn_port_t evtchn;
699 	dev_info_t *dip = vdp->xs_dip;
700 
701 	/*
702 	 * Gather info from frontend
703 	 */
704 	oename = xvdi_get_oename(dip);
705 	if (oename == NULL)
706 		return (DDI_FAILURE);
707 
708 	err = xenbus_gather(XBT_NULL, oename,
709 	    "ring-ref", "%lu", &gref, "event-channel", "%u", &evtchn, NULL);
710 	if (err != 0) {
711 		xvdi_fatal_error(dip, err,
712 		    "Getting ring-ref and evtchn from frontend");
713 		return (DDI_FAILURE);
714 	}
715 
716 	/*
717 	 * map and init ring
718 	 */
719 	err = xvdi_map_ring(dip, BLKIF_RING_SIZE,
720 	    sizeof (union blkif_sring_entry), gref, &vdp->xs_ring);
721 	if (err != DDI_SUCCESS)
722 		return (DDI_FAILURE);
723 	/*
724 	 * This will be removed after we use shadow I/O ring request since
725 	 * we don't need to access the ring itself directly, thus the access
726 	 * handle is not needed
727 	 */
728 	vdp->xs_ring_hdl = vdp->xs_ring->xr_acc_hdl;
729 
730 	/*
731 	 * bind event channel
732 	 */
733 	err = xvdi_bind_evtchn(dip, evtchn);
734 	if (err != DDI_SUCCESS) {
735 		xvdi_unmap_ring(vdp->xs_ring);
736 		return (DDI_FAILURE);
737 	}
738 
739 	return (DDI_SUCCESS);
740 }
741 
742 static void
743 xdb_unbindfrom_frontend(xdb_t *vdp)
744 {
745 	xvdi_free_evtchn(vdp->xs_dip);
746 	xvdi_unmap_ring(vdp->xs_ring);
747 }
748 
749 #define	LOFI_CTRL_NODE	"/dev/lofictl"
750 #define	LOFI_DEV_NODE	"/devices/pseudo/lofi@0:"
751 #define	LOFI_MODE	FREAD | FWRITE | FEXCL
752 
753 static int
754 xdb_setup_node(xdb_t *vdp, char *path)
755 {
756 	dev_info_t *dip;
757 	char *xsnode, *node;
758 	ldi_handle_t ldi_hdl;
759 	struct lofi_ioctl *li;
760 	int minor;
761 	int err;
762 	unsigned int len;
763 
764 	dip = vdp->xs_dip;
765 	xsnode = xvdi_get_xsname(dip);
766 	if (xsnode == NULL)
767 		return (DDI_FAILURE);
768 
769 	err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node, &len);
770 	if (err != 0) {
771 		xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'");
772 		return (DDI_FAILURE);
773 	}
774 
775 	if (!XDB_IS_LOFI(vdp)) {
776 		(void) strlcpy(path, node, MAXPATHLEN + 1);
777 		kmem_free(node, len);
778 		return (DDI_SUCCESS);
779 	}
780 
781 	do {
782 		err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred,
783 		    &ldi_hdl, vdp->xs_ldi_li);
784 	} while (err == EBUSY);
785 	if (err != 0) {
786 		kmem_free(node, len);
787 		return (DDI_FAILURE);
788 	}
789 
790 	li = kmem_zalloc(sizeof (*li), KM_SLEEP);
791 	(void) strlcpy(li->li_filename, node, MAXPATHLEN + 1);
792 	kmem_free(node, len);
793 	if (ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li,
794 	    LOFI_MODE | FKIOCTL, kcred, &minor) != 0) {
795 		cmn_err(CE_WARN, "xdb@%s: Failed to create lofi dev for %s",
796 		    ddi_get_name_addr(dip), li->li_filename);
797 		(void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
798 		kmem_free(li, sizeof (*li));
799 		return (DDI_FAILURE);
800 	}
801 	/*
802 	 * return '/devices/...' instead of '/dev/lofi/...' since the
803 	 * former is available immediately after calling ldi_ioctl
804 	 */
805 	(void) snprintf(path, MAXPATHLEN + 1, LOFI_DEV_NODE "%d", minor);
806 	(void) xenbus_printf(XBT_NULL, xsnode, "node", "%s", path);
807 	(void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
808 	kmem_free(li, sizeof (*li));
809 	return (DDI_SUCCESS);
810 }
811 
812 static void
813 xdb_teardown_node(xdb_t *vdp)
814 {
815 	dev_info_t *dip;
816 	char *xsnode, *node;
817 	ldi_handle_t ldi_hdl;
818 	struct lofi_ioctl *li;
819 	int err;
820 	unsigned int len;
821 
822 	if (!XDB_IS_LOFI(vdp))
823 		return;
824 
825 	dip = vdp->xs_dip;
826 	xsnode = xvdi_get_xsname(dip);
827 	if (xsnode == NULL)
828 		return;
829 
830 	err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node, &len);
831 	if (err != 0) {
832 		xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'");
833 		return;
834 	}
835 
836 	li = kmem_zalloc(sizeof (*li), KM_SLEEP);
837 	(void) strlcpy(li->li_filename, node, MAXPATHLEN + 1);
838 	kmem_free(node, len);
839 
840 	do {
841 		err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred,
842 		    &ldi_hdl, vdp->xs_ldi_li);
843 	} while (err == EBUSY);
844 
845 	if (err != 0) {
846 		kmem_free(li, sizeof (*li));
847 		return;
848 	}
849 
850 	if (ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE, (intptr_t)li,
851 	    LOFI_MODE | FKIOCTL, kcred, NULL) != 0) {
852 		cmn_err(CE_WARN, "xdb@%s: Failed to delete lofi dev for %s",
853 		    ddi_get_name_addr(dip), li->li_filename);
854 	}
855 
856 	(void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
857 	kmem_free(li, sizeof (*li));
858 }
859 
860 static int
861 xdb_open_device(xdb_t *vdp)
862 {
863 	uint64_t devsize;
864 	dev_info_t *dip;
865 	char *xsnode;
866 	char *nodepath;
867 	char *mode = NULL;
868 	char *type = NULL;
869 	int err;
870 
871 	dip = vdp->xs_dip;
872 	xsnode = xvdi_get_xsname(dip);
873 	if (xsnode == NULL)
874 		return (DDI_FAILURE);
875 
876 	err = xenbus_gather(XBT_NULL, xsnode,
877 	    "mode", NULL, &mode, "type", NULL, &type, NULL);
878 	if (err != 0) {
879 		if (mode)
880 			kmem_free(mode, strlen(mode) + 1);
881 		if (type)
882 			kmem_free(type, strlen(type) + 1);
883 		xvdi_fatal_error(dip, err,
884 		    "Getting mode and type from backend device");
885 		return (DDI_FAILURE);
886 	}
887 	if (strcmp(type, "file") == 0) {
888 		vdp->xs_type |= XDB_DEV_LOFI;
889 	}
890 	kmem_free(type, strlen(type) + 1);
891 	if ((strcmp(mode, "r") == NULL) || (strcmp(mode, "ro") == NULL)) {
892 		vdp->xs_type |= XDB_DEV_RO;
893 	}
894 	kmem_free(mode, strlen(mode) + 1);
895 
896 	/*
897 	 * try to open backend device
898 	 */
899 	if (ldi_ident_from_dip(dip, &vdp->xs_ldi_li) != 0)
900 		return (DDI_FAILURE);
901 
902 	nodepath = kmem_zalloc(MAXPATHLEN + 1, KM_SLEEP);
903 	err = xdb_setup_node(vdp, nodepath);
904 	if (err != DDI_SUCCESS) {
905 		xvdi_fatal_error(dip, err,
906 		    "Getting device path of backend device");
907 		ldi_ident_release(vdp->xs_ldi_li);
908 		kmem_free(nodepath, MAXPATHLEN + 1);
909 		return (DDI_FAILURE);
910 	}
911 
912 	if (ldi_open_by_name(nodepath,
913 	    FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE),
914 	    kcred, &vdp->xs_ldi_hdl, vdp->xs_ldi_li) != 0) {
915 		xdb_teardown_node(vdp);
916 		ldi_ident_release(vdp->xs_ldi_li);
917 		cmn_err(CE_WARN, "xdb@%s: Failed to open: %s",
918 		    ddi_get_name_addr(dip), nodepath);
919 		kmem_free(nodepath, MAXPATHLEN + 1);
920 		return (DDI_FAILURE);
921 	}
922 
923 	/* check if it's a CD/DVD disc */
924 	if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS,
925 	    "inquiry-device-type", DTYPE_DIRECT) == DTYPE_RODIRECT)
926 		vdp->xs_type |= XDB_DEV_CD;
927 	/* check if it's a removable disk */
928 	if (ldi_prop_exists(vdp->xs_ldi_hdl,
929 	    LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
930 	    "removable-media"))
931 		vdp->xs_type |= XDB_DEV_RMB;
932 
933 	if (ldi_get_size(vdp->xs_ldi_hdl, &devsize) != DDI_SUCCESS) {
934 		(void) ldi_close(vdp->xs_ldi_hdl,
935 		    FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred);
936 		xdb_teardown_node(vdp);
937 		ldi_ident_release(vdp->xs_ldi_li);
938 		kmem_free(nodepath, MAXPATHLEN + 1);
939 		return (DDI_FAILURE);
940 	}
941 	vdp->xs_sectors = devsize / XB_BSIZE;
942 
943 	kmem_free(nodepath, MAXPATHLEN + 1);
944 	return (DDI_SUCCESS);
945 }
946 
947 static void
948 xdb_close_device(xdb_t *vdp)
949 {
950 	(void) ldi_close(vdp->xs_ldi_hdl,
951 	    FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred);
952 	xdb_teardown_node(vdp);
953 	ldi_ident_release(vdp->xs_ldi_li);
954 	vdp->xs_ldi_li = NULL;
955 	vdp->xs_ldi_hdl = NULL;
956 }
957 
958 /*
959  * Kick-off connect process
960  * If xs_fe_status == XDB_FE_READY and xs_dev_status == XDB_DEV_READY
961  * the xs_if_status will be changed to XDB_CONNECTED on success,
962  * otherwise, xs_if_status will not be changed
963  */
964 static int
965 xdb_start_connect(xdb_t *vdp)
966 {
967 	uint32_t dinfo;
968 	xenbus_transaction_t xbt;
969 	int err, svdst;
970 	char *xsnode;
971 	dev_info_t *dip = vdp->xs_dip;
972 	char *barrier;
973 	uint_t len;
974 
975 	/*
976 	 * Start connect to frontend only when backend device are ready
977 	 * and frontend has moved to XenbusStateInitialised, which means
978 	 * ready to connect
979 	 */
980 	ASSERT((vdp->xs_fe_status == XDB_FE_READY) &&
981 	    (vdp->xs_dev_status == XDB_DEV_READY));
982 
983 	if (((xsnode = xvdi_get_xsname(dip)) == NULL)		 ||
984 	    ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1) ||
985 	    (xdb_open_device(vdp) != DDI_SUCCESS))
986 		return (DDI_FAILURE);
987 
988 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised);
989 
990 	if (xdb_bindto_frontend(vdp) != DDI_SUCCESS)
991 		goto errout1;
992 
993 	/* init i/o requests */
994 	xdb_init_ioreqs(vdp);
995 
996 	if (ddi_add_intr(dip, 0, NULL, NULL, xdb_intr, (caddr_t)vdp)
997 	    != DDI_SUCCESS)
998 		goto errout2;
999 
1000 	/*
1001 	 * we can recieve intr any time from now on
1002 	 * mark that we're ready to take intr
1003 	 */
1004 	mutex_enter(&vdp->xs_iomutex);
1005 	/*
1006 	 * save it in case we need to restore when we
1007 	 * fail to write xenstore later
1008 	 */
1009 	svdst = vdp->xs_if_status;
1010 	vdp->xs_if_status = XDB_CONNECTED;
1011 	mutex_exit(&vdp->xs_iomutex);
1012 
1013 	/* write into xenstore the info needed by frontend */
1014 trans_retry:
1015 	if (xenbus_transaction_start(&xbt)) {
1016 		xvdi_fatal_error(dip, EIO, "transaction start");
1017 		goto errout3;
1018 	}
1019 
1020 	/*
1021 	 * If feature-barrier isn't present in xenstore, add it.
1022 	 */
1023 	if (xenbus_read(xbt, xsnode, "feature-barrier",
1024 	    (void **)&barrier, &len) != 0) {
1025 		if ((err = xenbus_printf(xbt, xsnode, "feature-barrier",
1026 		    "%d", 1)) != 0) {
1027 			cmn_err(CE_WARN, "xdb@%s: failed to write "
1028 			    "'feature-barrier'", ddi_get_name_addr(dip));
1029 			xvdi_fatal_error(dip, err, "writing 'feature-barrier'");
1030 			goto abort_trans;
1031 		}
1032 	} else
1033 		kmem_free(barrier, len);
1034 
1035 	dinfo = 0;
1036 	if (XDB_IS_RO(vdp))
1037 		dinfo |= VDISK_READONLY;
1038 	if (XDB_IS_CD(vdp))
1039 		dinfo |= VDISK_CDROM;
1040 	if (XDB_IS_RMB(vdp))
1041 		dinfo |= VDISK_REMOVABLE;
1042 	if (err = xenbus_printf(xbt, xsnode, "info", "%u", dinfo)) {
1043 		xvdi_fatal_error(dip, err, "writing 'info'");
1044 		goto abort_trans;
1045 	}
1046 
1047 	/* hard-coded 512-byte sector size */
1048 	if (err = xenbus_printf(xbt, xsnode, "sector-size", "%u", DEV_BSIZE)) {
1049 		xvdi_fatal_error(dip, err, "writing 'sector-size'");
1050 		goto abort_trans;
1051 	}
1052 
1053 	if (err = xenbus_printf(xbt, xsnode, "sectors", "%"PRIu64,
1054 	    vdp->xs_sectors)) {
1055 		xvdi_fatal_error(dip, err, "writing 'sectors'");
1056 		goto abort_trans;
1057 	}
1058 
1059 	if (err = xenbus_printf(xbt, xsnode, "instance", "%d",
1060 	    ddi_get_instance(dip))) {
1061 		xvdi_fatal_error(dip, err, "writing 'instance'");
1062 		goto abort_trans;
1063 	}
1064 
1065 	if ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0) {
1066 		xvdi_fatal_error(dip, err, "writing 'state'");
1067 		goto abort_trans;
1068 	}
1069 
1070 	if (err = xenbus_transaction_end(xbt, 0)) {
1071 		if (err == EAGAIN)
1072 			/* transaction is ended, don't need to abort it */
1073 			goto trans_retry;
1074 		xvdi_fatal_error(dip, err, "completing transaction");
1075 		goto errout3;
1076 	}
1077 
1078 	return (DDI_SUCCESS);
1079 
1080 abort_trans:
1081 	(void) xenbus_transaction_end(xbt, 1);
1082 errout3:
1083 	mutex_enter(&vdp->xs_iomutex);
1084 	vdp->xs_if_status = svdst;
1085 	mutex_exit(&vdp->xs_iomutex);
1086 	ddi_remove_intr(dip, 0, NULL);
1087 errout2:
1088 	xdb_uninit_ioreqs(vdp);
1089 	xdb_unbindfrom_frontend(vdp);
1090 errout1:
1091 	xdb_close_device(vdp);
1092 	return (DDI_FAILURE);
1093 }
1094 
1095 /*
1096  * Kick-off disconnect process
1097  * xs_if_status will not be changed
1098  */
1099 static int
1100 xdb_start_disconnect(xdb_t *vdp)
1101 {
1102 	/*
1103 	 * Kick-off disconnect process
1104 	 */
1105 	if (xvdi_switch_state(vdp->xs_dip, XBT_NULL, XenbusStateClosing) > 0)
1106 		return (DDI_FAILURE);
1107 
1108 	return (DDI_SUCCESS);
1109 }
1110 
1111 /*
1112  * Disconnect from frontend and close backend device
1113  * ifstatus will be changed to XDB_DISCONNECTED
1114  * Xenbus state will be changed to XenbusStateClosed
1115  */
1116 static void
1117 xdb_close(dev_info_t *dip)
1118 {
1119 	xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
1120 
1121 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1122 
1123 	mutex_enter(&vdp->xs_iomutex);
1124 
1125 	if (vdp->xs_if_status != XDB_CONNECTED) {
1126 		vdp->xs_if_status = XDB_DISCONNECTED;
1127 		cv_broadcast(&vdp->xs_iocv);
1128 		mutex_exit(&vdp->xs_iomutex);
1129 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
1130 		return;
1131 	}
1132 	vdp->xs_if_status = XDB_DISCONNECTED;
1133 	cv_broadcast(&vdp->xs_iocv);
1134 
1135 	mutex_exit(&vdp->xs_iomutex);
1136 
1137 	/* stop accepting I/O request from frontend */
1138 	ddi_remove_intr(dip, 0, NULL);
1139 	/* clear all on-going I/Os, if any */
1140 	mutex_enter(&vdp->xs_iomutex);
1141 	while (vdp->xs_ionum > 0)
1142 		cv_wait(&vdp->xs_ionumcv, &vdp->xs_iomutex);
1143 	mutex_exit(&vdp->xs_iomutex);
1144 
1145 	/* clean up resources and close this interface */
1146 	xdb_uninit_ioreqs(vdp);
1147 	xdb_unbindfrom_frontend(vdp);
1148 	xdb_close_device(vdp);
1149 	vdp->xs_peer = (domid_t)-1;
1150 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
1151 }
1152 
1153 /*
1154  * Xdb_check_state_transition will check the XenbusState change to see
1155  * if the change is a valid transition or not.
1156  * The new state is written by frontend domain, or by running xenstore-write
1157  * to change it manually in dom0
1158  */
1159 static int
1160 xdb_check_state_transition(xdb_t *vdp, XenbusState oestate)
1161 {
1162 	enum xdb_state status;
1163 	int stcheck;
1164 #define	STOK	0 /* need further process */
1165 #define	STNOP	1 /* no action need taking */
1166 #define	STBUG	2 /* unexpected state change, could be a bug */
1167 
1168 	status = vdp->xs_if_status;
1169 	stcheck = STOK;
1170 
1171 	switch (status) {
1172 	case XDB_UNKNOWN:
1173 		if (vdp->xs_fe_status == XDB_FE_UNKNOWN) {
1174 			if ((oestate == XenbusStateUnknown)		||
1175 			    (oestate == XenbusStateConnected))
1176 				stcheck = STBUG;
1177 			else if ((oestate == XenbusStateInitialising)	||
1178 			    (oestate == XenbusStateInitWait))
1179 				stcheck = STNOP;
1180 		} else {
1181 			if ((oestate == XenbusStateUnknown)		||
1182 			    (oestate == XenbusStateInitialising)	||
1183 			    (oestate == XenbusStateInitWait)		||
1184 			    (oestate == XenbusStateConnected))
1185 				stcheck = STBUG;
1186 			else if (oestate == XenbusStateInitialised)
1187 				stcheck = STNOP;
1188 		}
1189 		break;
1190 	case XDB_CONNECTED:
1191 		if ((oestate == XenbusStateUnknown)		||
1192 		    (oestate == XenbusStateInitialising)	||
1193 		    (oestate == XenbusStateInitWait)		||
1194 		    (oestate == XenbusStateInitialised))
1195 			stcheck = STBUG;
1196 		else if (oestate == XenbusStateConnected)
1197 			stcheck = STNOP;
1198 		break;
1199 	case XDB_DISCONNECTED:
1200 	default:
1201 			stcheck = STBUG;
1202 	}
1203 
1204 	if (stcheck == STOK)
1205 		return (DDI_SUCCESS);
1206 
1207 	if (stcheck == STBUG)
1208 		cmn_err(CE_NOTE, "xdb@%s: unexpected otherend "
1209 		    "state change to %d!, when status is %d",
1210 		    ddi_get_name_addr(vdp->xs_dip), oestate, status);
1211 
1212 	return (DDI_FAILURE);
1213 }
1214 
1215 static void
1216 xdb_send_buf(void *arg)
1217 {
1218 	buf_t *bp;
1219 	xdb_t *vdp = (xdb_t *)arg;
1220 
1221 	mutex_enter(&vdp->xs_iomutex);
1222 
1223 	while (vdp->xs_if_status != XDB_DISCONNECTED) {
1224 		while ((bp = vdp->xs_f_iobuf) != NULL) {
1225 			vdp->xs_f_iobuf = bp->av_forw;
1226 			bp->av_forw = NULL;
1227 			mutex_exit(&vdp->xs_iomutex);
1228 			if (bp->b_bcount != 0) {
1229 				int err = ldi_strategy(vdp->xs_ldi_hdl, bp);
1230 				if (err != 0) {
1231 					bp->b_flags |= B_ERROR;
1232 					(void) xdb_biodone(bp);
1233 					XDB_DBPRINT(XDB_DBG_IO, (CE_WARN,
1234 					    "xdb@%s: sent buf to backend dev"
1235 					    "failed, err=%d",
1236 					    ddi_get_name_addr(vdp->xs_dip),
1237 					    err));
1238 				} else {
1239 					XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
1240 					    "sent buf to backend ok"));
1241 				}
1242 			} else /* no I/O need to be done */
1243 				(void) xdb_biodone(bp);
1244 
1245 			mutex_enter(&vdp->xs_iomutex);
1246 		}
1247 
1248 		if (vdp->xs_if_status != XDB_DISCONNECTED)
1249 			cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex);
1250 	}
1251 
1252 	mutex_exit(&vdp->xs_iomutex);
1253 }
1254 
1255 /*ARGSUSED*/
1256 static void
1257 xdb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
1258     void *impl_data)
1259 {
1260 	xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
1261 	xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
1262 
1263 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: "
1264 	    "hotplug status change to %d!", ddi_get_name_addr(dip), state));
1265 
1266 	mutex_enter(&vdp->xs_cbmutex);
1267 	if (state == Connected) {
1268 		/* Hotplug script has completed successfully */
1269 		if (vdp->xs_dev_status == XDB_DEV_UNKNOWN) {
1270 			vdp->xs_dev_status = XDB_DEV_READY;
1271 			if (vdp->xs_fe_status == XDB_FE_READY)
1272 				/* try to connect to frontend */
1273 				if (xdb_start_connect(vdp) != DDI_SUCCESS)
1274 					(void) xdb_start_disconnect(vdp);
1275 		}
1276 	}
1277 	mutex_exit(&vdp->xs_cbmutex);
1278 }
1279 
1280 /*ARGSUSED*/
1281 static void
1282 xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
1283     void *impl_data)
1284 {
1285 	XenbusState new_state = *(XenbusState *)impl_data;
1286 	xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
1287 
1288 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: "
1289 	    "otherend state change to %d!", ddi_get_name_addr(dip), new_state));
1290 
1291 	mutex_enter(&vdp->xs_cbmutex);
1292 
1293 	if (xdb_check_state_transition(vdp, new_state) == DDI_FAILURE) {
1294 		mutex_exit(&vdp->xs_cbmutex);
1295 		return;
1296 	}
1297 
1298 	switch (new_state) {
1299 	case XenbusStateInitialised:
1300 		ASSERT(vdp->xs_if_status == XDB_UNKNOWN);
1301 
1302 		/* frontend is ready for connecting */
1303 		vdp->xs_fe_status = XDB_FE_READY;
1304 
1305 		if (vdp->xs_dev_status == XDB_DEV_READY)
1306 			if (xdb_start_connect(vdp) != DDI_SUCCESS)
1307 				(void) xdb_start_disconnect(vdp);
1308 		break;
1309 	case XenbusStateClosing:
1310 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
1311 		break;
1312 	case XenbusStateClosed:
1313 		/* clean up */
1314 		xdb_close(dip);
1315 	}
1316 
1317 	mutex_exit(&vdp->xs_cbmutex);
1318 }
1319 
1320 static int
1321 xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1322 {
1323 	xdb_t *vdp;
1324 	ddi_iblock_cookie_t ibc;
1325 	int instance;
1326 
1327 	switch (cmd) {
1328 	case DDI_RESUME:
1329 		return (DDI_FAILURE);
1330 	case DDI_ATTACH:
1331 		break;
1332 	default:
1333 		return (DDI_FAILURE);
1334 	}
1335 
1336 	/* DDI_ATTACH */
1337 	instance = ddi_get_instance(dip);
1338 	if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS)
1339 		return (DDI_FAILURE);
1340 
1341 	vdp = ddi_get_soft_state(xdb_statep, instance);
1342 	vdp->xs_dip = dip;
1343 	if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS)
1344 		goto errout1;
1345 
1346 	if (!xdb_kstat_init(vdp))
1347 		goto errout1;
1348 
1349 	mutex_init(&vdp->xs_iomutex, NULL, MUTEX_DRIVER, (void *)ibc);
1350 	mutex_init(&vdp->xs_cbmutex, NULL, MUTEX_DRIVER, (void *)ibc);
1351 	cv_init(&vdp->xs_iocv, NULL, CV_DRIVER, NULL);
1352 	cv_init(&vdp->xs_ionumcv, NULL, CV_DRIVER, NULL);
1353 
1354 	ddi_set_driver_private(dip, vdp);
1355 
1356 	vdp->xs_iotaskq = ddi_taskq_create(dip, "xdb_iotask", 1,
1357 	    TASKQ_DEFAULTPRI, 0);
1358 	if (vdp->xs_iotaskq == NULL)
1359 		goto errout2;
1360 	(void) ddi_taskq_dispatch(vdp->xs_iotaskq, xdb_send_buf, vdp,
1361 	    DDI_SLEEP);
1362 
1363 	/* Watch frontend and hotplug state change */
1364 	if (xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change) !=
1365 	    DDI_SUCCESS)
1366 		goto errout3;
1367 	if (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change) !=
1368 	    DDI_SUCCESS) {
1369 		goto errout4;
1370 	}
1371 
1372 	/*
1373 	 * Kick-off hotplug script
1374 	 */
1375 	if (xvdi_post_event(dip, XEN_HP_ADD) != DDI_SUCCESS) {
1376 		cmn_err(CE_WARN, "xdb@%s: failed to start hotplug script",
1377 		    ddi_get_name_addr(dip));
1378 		goto errout4;
1379 	}
1380 
1381 	/*
1382 	 * start waiting for hotplug event and otherend state event
1383 	 * mainly for debugging, frontend will not take any op seeing this
1384 	 */
1385 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
1386 
1387 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: attached!",
1388 	    ddi_get_name_addr(dip)));
1389 	return (DDI_SUCCESS);
1390 
1391 errout4:
1392 	xvdi_remove_event_handler(dip, NULL);
1393 errout3:
1394 	mutex_enter(&vdp->xs_cbmutex);
1395 	mutex_enter(&vdp->xs_iomutex);
1396 	vdp->xs_if_status = XDB_DISCONNECTED;
1397 	cv_broadcast(&vdp->xs_iocv);
1398 	mutex_exit(&vdp->xs_iomutex);
1399 	mutex_exit(&vdp->xs_cbmutex);
1400 	ddi_taskq_destroy(vdp->xs_iotaskq);
1401 errout2:
1402 	ddi_set_driver_private(dip, NULL);
1403 	cv_destroy(&vdp->xs_iocv);
1404 	cv_destroy(&vdp->xs_ionumcv);
1405 	mutex_destroy(&vdp->xs_cbmutex);
1406 	mutex_destroy(&vdp->xs_iomutex);
1407 	kstat_delete(vdp->xs_kstats);
1408 errout1:
1409 	ddi_soft_state_free(xdb_statep, instance);
1410 	return (DDI_FAILURE);
1411 }
1412 
1413 /*ARGSUSED*/
1414 static int
1415 xdb_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1416 {
1417 	int instance = ddi_get_instance(dip);
1418 	xdb_t *vdp = XDB_INST2SOFTS(instance);
1419 
1420 	switch (cmd) {
1421 	case DDI_SUSPEND:
1422 		return (DDI_FAILURE);
1423 	case DDI_DETACH:
1424 		break;
1425 	default:
1426 		return (DDI_FAILURE);
1427 	}
1428 
1429 	/* DDI_DETACH handling */
1430 
1431 	/* shouldn't detach, if still used by frontend */
1432 	mutex_enter(&vdp->xs_iomutex);
1433 	if (vdp->xs_if_status != XDB_DISCONNECTED) {
1434 		mutex_exit(&vdp->xs_iomutex);
1435 		return (DDI_FAILURE);
1436 	}
1437 	mutex_exit(&vdp->xs_iomutex);
1438 
1439 	xvdi_remove_event_handler(dip, NULL);
1440 	/* can do nothing about it, if it fails */
1441 	(void) xvdi_post_event(dip, XEN_HP_REMOVE);
1442 
1443 	ddi_taskq_destroy(vdp->xs_iotaskq);
1444 	cv_destroy(&vdp->xs_iocv);
1445 	cv_destroy(&vdp->xs_ionumcv);
1446 	mutex_destroy(&vdp->xs_cbmutex);
1447 	mutex_destroy(&vdp->xs_iomutex);
1448 	kstat_delete(vdp->xs_kstats);
1449 	ddi_set_driver_private(dip, NULL);
1450 	ddi_soft_state_free(xdb_statep, instance);
1451 
1452 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: detached!",
1453 	    ddi_get_name_addr(dip)));
1454 	return (DDI_SUCCESS);
1455 }
1456 
1457 static struct dev_ops xdb_dev_ops = {
1458 	DEVO_REV,	/* devo_rev */
1459 	0,		/* devo_refcnt */
1460 	ddi_getinfo_1to1, /* devo_getinfo */
1461 	nulldev,	/* devo_identify */
1462 	nulldev,	/* devo_probe */
1463 	xdb_attach,	/* devo_attach */
1464 	xdb_detach,	/* devo_detach */
1465 	nodev,		/* devo_reset */
1466 	NULL,		/* devo_cb_ops */
1467 	NULL,		/* devo_bus_ops */
1468 	NULL		/* power */
1469 };
1470 
1471 /*
1472  * Module linkage information for the kernel.
1473  */
1474 static struct modldrv modldrv = {
1475 	&mod_driverops,			/* Type of module. */
1476 	"vbd backend driver %I%",	/* Name of the module */
1477 	&xdb_dev_ops			/* driver ops */
1478 };
1479 
1480 static struct modlinkage xdb_modlinkage = {
1481 	MODREV_1,
1482 	&modldrv,
1483 	NULL
1484 };
1485 
1486 int
1487 _init(void)
1488 {
1489 	int rv;
1490 
1491 	if ((rv = ddi_soft_state_init((void **)&xdb_statep,
1492 	    sizeof (xdb_t), 0)) == 0)
1493 		if ((rv = mod_install(&xdb_modlinkage)) != 0)
1494 			ddi_soft_state_fini((void **)&xdb_statep);
1495 	return (rv);
1496 }
1497 
1498 int
1499 _fini(void)
1500 {
1501 	int rv;
1502 
1503 	if ((rv = mod_remove(&xdb_modlinkage)) != 0)
1504 		return (rv);
1505 	ddi_soft_state_fini((void **)&xdb_statep);
1506 	return (rv);
1507 }
1508 
1509 int
1510 _info(struct modinfo *modinfop)
1511 {
1512 	return (mod_info(&xdb_modlinkage, modinfop));
1513 }
1514