17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
572888e72Speng liu - Sun Microsystems - Beijing China  * Common Development and Distribution License (the "License").
672888e72Speng liu - Sun Microsystems - Beijing China  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2272888e72Speng liu - Sun Microsystems - Beijing China  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/scsi/scsi.h>
277c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
287c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
297c478bd9Sstevel@tonic-gate #include <sys/thread.h>
307c478bd9Sstevel@tonic-gate #include <sys/var.h>
317c478bd9Sstevel@tonic-gate 
327c478bd9Sstevel@tonic-gate #include "sd_xbuf.h"
337c478bd9Sstevel@tonic-gate 
347c478bd9Sstevel@tonic-gate /*
357c478bd9Sstevel@tonic-gate  * xbuf.c: buf(9s) extension facility.
367c478bd9Sstevel@tonic-gate  *
377c478bd9Sstevel@tonic-gate  * The buf(9S) extension facility is intended to allow block drivers to
387c478bd9Sstevel@tonic-gate  * allocate additional memory that is associated with a particular buf(9S)
397c478bd9Sstevel@tonic-gate  * struct.  It is further intended to help in addressing the usual set of
407c478bd9Sstevel@tonic-gate  * problems associated with such allocations, in particular those involving
417c478bd9Sstevel@tonic-gate  * recovery from allocation failures, especially in code paths that the
427c478bd9Sstevel@tonic-gate  * system relies on to free memory.
437c478bd9Sstevel@tonic-gate  *
447c478bd9Sstevel@tonic-gate  * CAVEAT: Currently this code is completely private to the sd driver and in
457c478bd9Sstevel@tonic-gate  * NO WAY constitutes a public or supported interface of any kind. It is
467c478bd9Sstevel@tonic-gate  * envisioned that this may one day migrate into the Solaris DDI, but until
477c478bd9Sstevel@tonic-gate  * that time this ought to be considered completely unstable and is subject
487c478bd9Sstevel@tonic-gate  * to change without notice. This code may NOT in any way be utilized by
497c478bd9Sstevel@tonic-gate  * ANY code outside the sd driver.
507c478bd9Sstevel@tonic-gate  */
517c478bd9Sstevel@tonic-gate 
527c478bd9Sstevel@tonic-gate 
537c478bd9Sstevel@tonic-gate static int xbuf_iostart(ddi_xbuf_attr_t xap);
547c478bd9Sstevel@tonic-gate static void xbuf_dispatch(ddi_xbuf_attr_t xap);
557c478bd9Sstevel@tonic-gate static void xbuf_restart_callback(void *arg);
5672888e72Speng liu - Sun Microsystems - Beijing China static int xbuf_brk_done(struct buf *bp);
577c478bd9Sstevel@tonic-gate 
587c478bd9Sstevel@tonic-gate 
597c478bd9Sstevel@tonic-gate /*
607c478bd9Sstevel@tonic-gate  * Note: Should this be exposed to the caller.... do we want to give the
617c478bd9Sstevel@tonic-gate  * caller the fexibility of specifying the parameters for the thread pool?
627c478bd9Sstevel@tonic-gate  * Note: these values are just estimates at this time, based upon what
637c478bd9Sstevel@tonic-gate  * seems reasonable for the sd driver. It may be preferable to make these
647c478bd9Sstevel@tonic-gate  * parameters self-scaling in a real (future) implementation.
657c478bd9Sstevel@tonic-gate  */
667c478bd9Sstevel@tonic-gate #define	XBUF_TQ_MINALLOC	64
677c478bd9Sstevel@tonic-gate #define	XBUF_TQ_MAXALLOC	512
687c478bd9Sstevel@tonic-gate #define	XBUF_DISPATCH_DELAY	(drv_usectohz(50000))	/* 50 msec */
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate static taskq_t *xbuf_tq = NULL;
717c478bd9Sstevel@tonic-gate static int xbuf_attr_tq_minalloc = XBUF_TQ_MINALLOC;
727c478bd9Sstevel@tonic-gate static int xbuf_attr_tq_maxalloc = XBUF_TQ_MAXALLOC;
737c478bd9Sstevel@tonic-gate 
747c478bd9Sstevel@tonic-gate static kmutex_t	xbuf_mutex = { 0 };
757c478bd9Sstevel@tonic-gate static uint32_t	xbuf_refcount = 0;
767c478bd9Sstevel@tonic-gate 
7772888e72Speng liu - Sun Microsystems - Beijing China /*
7872888e72Speng liu - Sun Microsystems - Beijing China  * Private wrapper for buf cloned via ddi_xbuf_qstrategy()
7972888e72Speng liu - Sun Microsystems - Beijing China  */
8072888e72Speng liu - Sun Microsystems - Beijing China struct xbuf_brk {
8172888e72Speng liu - Sun Microsystems - Beijing China 	kmutex_t mutex;
8272888e72Speng liu - Sun Microsystems - Beijing China 	struct buf *bp0;
8372888e72Speng liu - Sun Microsystems - Beijing China 	uint8_t nbufs;	/* number of buf allocated */
8472888e72Speng liu - Sun Microsystems - Beijing China 	uint8_t active; /* number of active xfer */
8572888e72Speng liu - Sun Microsystems - Beijing China 
8672888e72Speng liu - Sun Microsystems - Beijing China 	size_t brksize;	/* break size used for this buf */
8772888e72Speng liu - Sun Microsystems - Beijing China 	int brkblk;
8872888e72Speng liu - Sun Microsystems - Beijing China 
8972888e72Speng liu - Sun Microsystems - Beijing China 	/* xfer position */
9072888e72Speng liu - Sun Microsystems - Beijing China 	off_t off;
9172888e72Speng liu - Sun Microsystems - Beijing China 	off_t noff;
9272888e72Speng liu - Sun Microsystems - Beijing China 	daddr_t blkno;
9372888e72Speng liu - Sun Microsystems - Beijing China };
9472888e72Speng liu - Sun Microsystems - Beijing China 
_NOTE(DATA_READABLE_WITHOUT_LOCK (xbuf_brk::off))9572888e72Speng liu - Sun Microsystems - Beijing China _NOTE(DATA_READABLE_WITHOUT_LOCK(xbuf_brk::off))
9672888e72Speng liu - Sun Microsystems - Beijing China 
9772888e72Speng liu - Sun Microsystems - Beijing China /*
9872888e72Speng liu - Sun Microsystems - Beijing China  * Hack needed in the prototype so buf breakup will work.
9972888e72Speng liu - Sun Microsystems - Beijing China  * Here we can rely on the sd code not changing the value in
10072888e72Speng liu - Sun Microsystems - Beijing China  * b_forw.
10172888e72Speng liu - Sun Microsystems - Beijing China  */
10272888e72Speng liu - Sun Microsystems - Beijing China #define	b_clone_private b_forw
10372888e72Speng liu - Sun Microsystems - Beijing China 
1047c478bd9Sstevel@tonic-gate 
1057c478bd9Sstevel@tonic-gate /* ARGSUSED */
1067c478bd9Sstevel@tonic-gate DDII ddi_xbuf_attr_t
1077c478bd9Sstevel@tonic-gate ddi_xbuf_attr_create(size_t xsize,
1087c478bd9Sstevel@tonic-gate     void (*xa_strategy)(struct buf *bp, ddi_xbuf_t xp, void *attr_arg),
1097c478bd9Sstevel@tonic-gate     void *attr_arg, uint32_t active_limit, uint32_t reserve_limit,
1107c478bd9Sstevel@tonic-gate     major_t major, int flags)
1117c478bd9Sstevel@tonic-gate {
1127c478bd9Sstevel@tonic-gate 	ddi_xbuf_attr_t	xap;
1137c478bd9Sstevel@tonic-gate 
1147c478bd9Sstevel@tonic-gate 	xap = kmem_zalloc(sizeof (struct __ddi_xbuf_attr), KM_SLEEP);
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate 	mutex_init(&xap->xa_mutex, NULL, MUTEX_DRIVER, NULL);
1177c478bd9Sstevel@tonic-gate 	mutex_init(&xap->xa_reserve_mutex, NULL, MUTEX_DRIVER, NULL);
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate 	/* Future: Allow the caller to specify alignment requirements? */
1207c478bd9Sstevel@tonic-gate 	xap->xa_allocsize	= max(xsize, sizeof (void *));
1217c478bd9Sstevel@tonic-gate 	xap->xa_active_limit	= active_limit;
1227c478bd9Sstevel@tonic-gate 	xap->xa_active_lowater	= xap->xa_active_limit / 2;
1237c478bd9Sstevel@tonic-gate 	xap->xa_reserve_limit	= reserve_limit;
1247c478bd9Sstevel@tonic-gate 	xap->xa_strategy	= xa_strategy;
1257c478bd9Sstevel@tonic-gate 	xap->xa_attr_arg	= attr_arg;
1267c478bd9Sstevel@tonic-gate 
1277c478bd9Sstevel@tonic-gate 	mutex_enter(&xbuf_mutex);
1287c478bd9Sstevel@tonic-gate 	if (xbuf_refcount == 0) {
1297c478bd9Sstevel@tonic-gate 		ASSERT(xbuf_tq == NULL);
1307c478bd9Sstevel@tonic-gate 		/*
1317c478bd9Sstevel@tonic-gate 		 * Note: Would be nice if: (1) #threads in the taskq pool (set
1327c478bd9Sstevel@tonic-gate 		 * to the value of 'ncpus' at the time the taskq is created)
1337c478bd9Sstevel@tonic-gate 		 * could adjust automatically with DR; (2) the taskq
1347c478bd9Sstevel@tonic-gate 		 * minalloc/maxalloc counts could be grown/shrunk on the fly.
1357c478bd9Sstevel@tonic-gate 		 */
1367c478bd9Sstevel@tonic-gate 		xbuf_tq = taskq_create("xbuf_taskq", ncpus,
1377c478bd9Sstevel@tonic-gate 		    (v.v_maxsyspri - 2), xbuf_attr_tq_minalloc,
1387c478bd9Sstevel@tonic-gate 		    xbuf_attr_tq_maxalloc, TASKQ_PREPOPULATE);
1397c478bd9Sstevel@tonic-gate 	}
1407c478bd9Sstevel@tonic-gate 	xbuf_refcount++;
1417c478bd9Sstevel@tonic-gate 	mutex_exit(&xbuf_mutex);
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate 	/* In this prototype we just always use the global system pool. */
1447c478bd9Sstevel@tonic-gate 	xap->xa_tq = xbuf_tq;
1457c478bd9Sstevel@tonic-gate 
1467c478bd9Sstevel@tonic-gate 	return (xap);
1477c478bd9Sstevel@tonic-gate }
1487c478bd9Sstevel@tonic-gate 
1497c478bd9Sstevel@tonic-gate 
1507c478bd9Sstevel@tonic-gate DDII void
ddi_xbuf_attr_destroy(ddi_xbuf_attr_t xap)1517c478bd9Sstevel@tonic-gate ddi_xbuf_attr_destroy(ddi_xbuf_attr_t xap)
1527c478bd9Sstevel@tonic-gate {
1537c478bd9Sstevel@tonic-gate 	ddi_xbuf_t	xp;
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate 	mutex_destroy(&xap->xa_mutex);
1567c478bd9Sstevel@tonic-gate 	mutex_destroy(&xap->xa_reserve_mutex);
1577c478bd9Sstevel@tonic-gate 
1587c478bd9Sstevel@tonic-gate 	/* Free any xbufs on the reserve list */
1597c478bd9Sstevel@tonic-gate 	while (xap->xa_reserve_count != 0) {
1607c478bd9Sstevel@tonic-gate 		xp = xap->xa_reserve_headp;
1617c478bd9Sstevel@tonic-gate 		xap->xa_reserve_headp = *((void **)xp);
1627c478bd9Sstevel@tonic-gate 		xap->xa_reserve_count--;
1637c478bd9Sstevel@tonic-gate 		kmem_free(xp, xap->xa_allocsize);
1647c478bd9Sstevel@tonic-gate 	}
1657c478bd9Sstevel@tonic-gate 	ASSERT(xap->xa_reserve_headp == NULL);
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate 	mutex_enter(&xbuf_mutex);
1687c478bd9Sstevel@tonic-gate 	ASSERT((xbuf_refcount != 0) && (xbuf_tq != NULL));
1697c478bd9Sstevel@tonic-gate 	xbuf_refcount--;
1707c478bd9Sstevel@tonic-gate 	if (xbuf_refcount == 0) {
1717c478bd9Sstevel@tonic-gate 		taskq_destroy(xbuf_tq);
1727c478bd9Sstevel@tonic-gate 		xbuf_tq = NULL;
1737c478bd9Sstevel@tonic-gate 	}
1747c478bd9Sstevel@tonic-gate 	mutex_exit(&xbuf_mutex);
1757c478bd9Sstevel@tonic-gate 
1767c478bd9Sstevel@tonic-gate 	kmem_free(xap, sizeof (struct __ddi_xbuf_attr));
1777c478bd9Sstevel@tonic-gate }
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate 
1807c478bd9Sstevel@tonic-gate /* ARGSUSED */
1817c478bd9Sstevel@tonic-gate DDII void
ddi_xbuf_attr_register_devinfo(ddi_xbuf_attr_t xbuf_attr,dev_info_t * dip)1827c478bd9Sstevel@tonic-gate ddi_xbuf_attr_register_devinfo(ddi_xbuf_attr_t xbuf_attr, dev_info_t *dip)
1837c478bd9Sstevel@tonic-gate {
1847c478bd9Sstevel@tonic-gate 	/* Currently a no-op in this prototype */
1857c478bd9Sstevel@tonic-gate }
1867c478bd9Sstevel@tonic-gate 
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate /* ARGSUSED */
1897c478bd9Sstevel@tonic-gate DDII void
ddi_xbuf_attr_unregister_devinfo(ddi_xbuf_attr_t xbuf_attr,dev_info_t * dip)1907c478bd9Sstevel@tonic-gate ddi_xbuf_attr_unregister_devinfo(ddi_xbuf_attr_t xbuf_attr, dev_info_t *dip)
1917c478bd9Sstevel@tonic-gate {
1927c478bd9Sstevel@tonic-gate 	/* Currently a no-op in this prototype */
1937c478bd9Sstevel@tonic-gate }
1947c478bd9Sstevel@tonic-gate 
19572888e72Speng liu - Sun Microsystems - Beijing China DDII int
ddi_xbuf_attr_setup_brk(ddi_xbuf_attr_t xap,size_t size)19672888e72Speng liu - Sun Microsystems - Beijing China ddi_xbuf_attr_setup_brk(ddi_xbuf_attr_t xap, size_t size)
19772888e72Speng liu - Sun Microsystems - Beijing China {
19872888e72Speng liu - Sun Microsystems - Beijing China 	if (size < DEV_BSIZE)
19972888e72Speng liu - Sun Microsystems - Beijing China 		return (0);
20072888e72Speng liu - Sun Microsystems - Beijing China 
20172888e72Speng liu - Sun Microsystems - Beijing China 	mutex_enter(&xap->xa_mutex);
20272888e72Speng liu - Sun Microsystems - Beijing China 	xap->xa_brksize = size & ~(DEV_BSIZE - 1);
20372888e72Speng liu - Sun Microsystems - Beijing China 	mutex_exit(&xap->xa_mutex);
20472888e72Speng liu - Sun Microsystems - Beijing China 	return (1);
20572888e72Speng liu - Sun Microsystems - Beijing China }
20672888e72Speng liu - Sun Microsystems - Beijing China 
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate 
2097c478bd9Sstevel@tonic-gate /*
2107c478bd9Sstevel@tonic-gate  * Enqueue the given buf and attempt to initiate IO.
2117c478bd9Sstevel@tonic-gate  * Called from the driver strategy(9E) routine.
2127c478bd9Sstevel@tonic-gate  */
2137c478bd9Sstevel@tonic-gate 
2147c478bd9Sstevel@tonic-gate DDII int
ddi_xbuf_qstrategy(struct buf * bp,ddi_xbuf_attr_t xap)2157c478bd9Sstevel@tonic-gate ddi_xbuf_qstrategy(struct buf *bp, ddi_xbuf_attr_t xap)
2167c478bd9Sstevel@tonic-gate {
2177c478bd9Sstevel@tonic-gate 	ASSERT(xap != NULL);
2187c478bd9Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_mutex));
2197c478bd9Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_reserve_mutex));
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate 	mutex_enter(&xap->xa_mutex);
2227c478bd9Sstevel@tonic-gate 
22372888e72Speng liu - Sun Microsystems - Beijing China 	ASSERT((bp->b_bcount & (DEV_BSIZE - 1)) == 0);
22472888e72Speng liu - Sun Microsystems - Beijing China 
22572888e72Speng liu - Sun Microsystems - Beijing China 	/*
22672888e72Speng liu - Sun Microsystems - Beijing China 	 * Breakup buf if necessary. bp->b_private is temporarily
22772888e72Speng liu - Sun Microsystems - Beijing China 	 * used to save xbuf_brk
22872888e72Speng liu - Sun Microsystems - Beijing China 	 */
22972888e72Speng liu - Sun Microsystems - Beijing China 	if (xap->xa_brksize && bp->b_bcount > xap->xa_brksize) {
23072888e72Speng liu - Sun Microsystems - Beijing China 		struct xbuf_brk *brkp;
23172888e72Speng liu - Sun Microsystems - Beijing China 
23272888e72Speng liu - Sun Microsystems - Beijing China 		brkp = kmem_zalloc(sizeof (struct xbuf_brk), KM_SLEEP);
23372888e72Speng liu - Sun Microsystems - Beijing China 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*brkp))
23472888e72Speng liu - Sun Microsystems - Beijing China 		mutex_init(&brkp->mutex, NULL, MUTEX_DRIVER, NULL);
23572888e72Speng liu - Sun Microsystems - Beijing China 		brkp->bp0 = bp;
23672888e72Speng liu - Sun Microsystems - Beijing China 		brkp->brksize = xap->xa_brksize;
23772888e72Speng liu - Sun Microsystems - Beijing China 		brkp->brkblk = btodt(xap->xa_brksize);
23872888e72Speng liu - Sun Microsystems - Beijing China 		brkp->noff = xap->xa_brksize;
23972888e72Speng liu - Sun Microsystems - Beijing China 		brkp->blkno = bp->b_blkno;
24072888e72Speng liu - Sun Microsystems - Beijing China 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*brkp))
24172888e72Speng liu - Sun Microsystems - Beijing China 		bp->b_private = brkp;
24272888e72Speng liu - Sun Microsystems - Beijing China 	} else {
24372888e72Speng liu - Sun Microsystems - Beijing China 		bp->b_private = NULL;
24472888e72Speng liu - Sun Microsystems - Beijing China 	}
24572888e72Speng liu - Sun Microsystems - Beijing China 
24672888e72Speng liu - Sun Microsystems - Beijing China 	/* Enqueue buf */
2477c478bd9Sstevel@tonic-gate 	if (xap->xa_headp == NULL) {
2487c478bd9Sstevel@tonic-gate 		xap->xa_headp = xap->xa_tailp = bp;
2497c478bd9Sstevel@tonic-gate 	} else {
2507c478bd9Sstevel@tonic-gate 		xap->xa_tailp->av_forw = bp;
2517c478bd9Sstevel@tonic-gate 		xap->xa_tailp = bp;
2527c478bd9Sstevel@tonic-gate 	}
2537c478bd9Sstevel@tonic-gate 	bp->av_forw = NULL;
2547c478bd9Sstevel@tonic-gate 
2557c478bd9Sstevel@tonic-gate 	xap->xa_pending++;
2567c478bd9Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
2577c478bd9Sstevel@tonic-gate 	return (xbuf_iostart(xap));
2587c478bd9Sstevel@tonic-gate }
2597c478bd9Sstevel@tonic-gate 
2607c478bd9Sstevel@tonic-gate 
2617c478bd9Sstevel@tonic-gate /*
2627c478bd9Sstevel@tonic-gate  * Drivers call this immediately before calling biodone(9F), to notify the
2637c478bd9Sstevel@tonic-gate  * framework that the indicated xbuf is no longer being used by the driver.
2647c478bd9Sstevel@tonic-gate  * May be called under interrupt context.
2657c478bd9Sstevel@tonic-gate  */
2667c478bd9Sstevel@tonic-gate 
26772888e72Speng liu - Sun Microsystems - Beijing China DDII int
ddi_xbuf_done(struct buf * bp,ddi_xbuf_attr_t xap)2687c478bd9Sstevel@tonic-gate ddi_xbuf_done(struct buf *bp, ddi_xbuf_attr_t xap)
2697c478bd9Sstevel@tonic-gate {
2707c478bd9Sstevel@tonic-gate 	ddi_xbuf_t xp;
27172888e72Speng liu - Sun Microsystems - Beijing China 	int done;
2727c478bd9Sstevel@tonic-gate 
2737c478bd9Sstevel@tonic-gate 	ASSERT(bp != NULL);
2747c478bd9Sstevel@tonic-gate 	ASSERT(xap != NULL);
2757c478bd9Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_mutex));
2767c478bd9Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_reserve_mutex));
2777c478bd9Sstevel@tonic-gate 
2787c478bd9Sstevel@tonic-gate 	xp = ddi_xbuf_get(bp, xap);
2797c478bd9Sstevel@tonic-gate 
2807c478bd9Sstevel@tonic-gate 	mutex_enter(&xap->xa_mutex);
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate #ifdef	SDDEBUG
2837c478bd9Sstevel@tonic-gate 	if (xap->xa_active_limit != 0) {
2847c478bd9Sstevel@tonic-gate 		ASSERT(xap->xa_active_count > 0);
2857c478bd9Sstevel@tonic-gate 	}
2867c478bd9Sstevel@tonic-gate #endif
2877c478bd9Sstevel@tonic-gate 	xap->xa_active_count--;
2887c478bd9Sstevel@tonic-gate 
2897c478bd9Sstevel@tonic-gate 	if (xap->xa_reserve_limit != 0) {
2907c478bd9Sstevel@tonic-gate 		mutex_enter(&xap->xa_reserve_mutex);
2917c478bd9Sstevel@tonic-gate 		if (xap->xa_reserve_count < xap->xa_reserve_limit) {
2927c478bd9Sstevel@tonic-gate 			/* Put this xbuf onto the reserve list & exit */
2937c478bd9Sstevel@tonic-gate 			*((void **)xp) = xap->xa_reserve_headp;
2947c478bd9Sstevel@tonic-gate 			xap->xa_reserve_headp = xp;
2957c478bd9Sstevel@tonic-gate 			xap->xa_reserve_count++;
2967c478bd9Sstevel@tonic-gate 			mutex_exit(&xap->xa_reserve_mutex);
2977c478bd9Sstevel@tonic-gate 			goto done;
2987c478bd9Sstevel@tonic-gate 		}
2997c478bd9Sstevel@tonic-gate 		mutex_exit(&xap->xa_reserve_mutex);
3007c478bd9Sstevel@tonic-gate 	}
3017c478bd9Sstevel@tonic-gate 
3027c478bd9Sstevel@tonic-gate 	kmem_free(xp, xap->xa_allocsize);	/* return it to the system */
3037c478bd9Sstevel@tonic-gate 
3047c478bd9Sstevel@tonic-gate done:
30572888e72Speng liu - Sun Microsystems - Beijing China 	if (bp->b_iodone == xbuf_brk_done) {
30672888e72Speng liu - Sun Microsystems - Beijing China 		struct xbuf_brk *brkp = (struct xbuf_brk *)bp->b_clone_private;
30772888e72Speng liu - Sun Microsystems - Beijing China 
30872888e72Speng liu - Sun Microsystems - Beijing China 		brkp->active--;
30972888e72Speng liu - Sun Microsystems - Beijing China 		if (brkp->active || xap->xa_headp == brkp->bp0) {
31072888e72Speng liu - Sun Microsystems - Beijing China 			done = 0;
31172888e72Speng liu - Sun Microsystems - Beijing China 		} else {
31272888e72Speng liu - Sun Microsystems - Beijing China 			brkp->off = -1;	/* mark bp0 as completed */
31372888e72Speng liu - Sun Microsystems - Beijing China 			done = 1;
31472888e72Speng liu - Sun Microsystems - Beijing China 		}
31572888e72Speng liu - Sun Microsystems - Beijing China 	} else {
31672888e72Speng liu - Sun Microsystems - Beijing China 		done = 1;
31772888e72Speng liu - Sun Microsystems - Beijing China 	}
31872888e72Speng liu - Sun Microsystems - Beijing China 
3197c478bd9Sstevel@tonic-gate 	if ((xap->xa_active_limit == 0) ||
3207c478bd9Sstevel@tonic-gate 	    (xap->xa_active_count <= xap->xa_active_lowater)) {
3217c478bd9Sstevel@tonic-gate 		xbuf_dispatch(xap);
3227c478bd9Sstevel@tonic-gate 	}
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
32572888e72Speng liu - Sun Microsystems - Beijing China 	return (done);
32672888e72Speng liu - Sun Microsystems - Beijing China }
32772888e72Speng liu - Sun Microsystems - Beijing China 
32872888e72Speng liu - Sun Microsystems - Beijing China static int
xbuf_brk_done(struct buf * bp)32972888e72Speng liu - Sun Microsystems - Beijing China xbuf_brk_done(struct buf *bp)
33072888e72Speng liu - Sun Microsystems - Beijing China {
33172888e72Speng liu - Sun Microsystems - Beijing China 	struct xbuf_brk *brkp = (struct xbuf_brk *)bp->b_clone_private;
33272888e72Speng liu - Sun Microsystems - Beijing China 	struct buf *bp0 = brkp->bp0;
33372888e72Speng liu - Sun Microsystems - Beijing China 	int done;
33472888e72Speng liu - Sun Microsystems - Beijing China 
33572888e72Speng liu - Sun Microsystems - Beijing China 	mutex_enter(&brkp->mutex);
33672888e72Speng liu - Sun Microsystems - Beijing China 	if (bp->b_flags & B_ERROR && !(bp0->b_flags & B_ERROR)) {
33772888e72Speng liu - Sun Microsystems - Beijing China 		bp0->b_flags |= B_ERROR;
33872888e72Speng liu - Sun Microsystems - Beijing China 		bp0->b_error = bp->b_error;
33972888e72Speng liu - Sun Microsystems - Beijing China 	}
34072888e72Speng liu - Sun Microsystems - Beijing China 	if (bp->b_resid)
34172888e72Speng liu - Sun Microsystems - Beijing China 		bp0->b_resid = bp0->b_bcount;
34272888e72Speng liu - Sun Microsystems - Beijing China 
34372888e72Speng liu - Sun Microsystems - Beijing China 	freerbuf(bp);
34472888e72Speng liu - Sun Microsystems - Beijing China 	brkp->nbufs--;
34572888e72Speng liu - Sun Microsystems - Beijing China 
34672888e72Speng liu - Sun Microsystems - Beijing China 	done = (brkp->off == -1 && brkp->nbufs == 0);
34772888e72Speng liu - Sun Microsystems - Beijing China 	mutex_exit(&brkp->mutex);
34872888e72Speng liu - Sun Microsystems - Beijing China 
34972888e72Speng liu - Sun Microsystems - Beijing China 	/* All buf segments done */
35072888e72Speng liu - Sun Microsystems - Beijing China 	if (done) {
35172888e72Speng liu - Sun Microsystems - Beijing China 		mutex_destroy(&brkp->mutex);
35272888e72Speng liu - Sun Microsystems - Beijing China 		kmem_free(brkp, sizeof (struct xbuf_brk));
35372888e72Speng liu - Sun Microsystems - Beijing China 		biodone(bp0);
35472888e72Speng liu - Sun Microsystems - Beijing China 	}
35572888e72Speng liu - Sun Microsystems - Beijing China 	return (0);
3567c478bd9Sstevel@tonic-gate }
3577c478bd9Sstevel@tonic-gate 
3587c478bd9Sstevel@tonic-gate DDII void
ddi_xbuf_dispatch(ddi_xbuf_attr_t xap)3597c478bd9Sstevel@tonic-gate ddi_xbuf_dispatch(ddi_xbuf_attr_t xap)
3607c478bd9Sstevel@tonic-gate {
3617c478bd9Sstevel@tonic-gate 	mutex_enter(&xap->xa_mutex);
3627c478bd9Sstevel@tonic-gate 	if ((xap->xa_active_limit == 0) ||
3637c478bd9Sstevel@tonic-gate 	    (xap->xa_active_count <= xap->xa_active_lowater)) {
3647c478bd9Sstevel@tonic-gate 		xbuf_dispatch(xap);
3657c478bd9Sstevel@tonic-gate 	}
3667c478bd9Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
3677c478bd9Sstevel@tonic-gate }
3687c478bd9Sstevel@tonic-gate 
3697c478bd9Sstevel@tonic-gate 
3707c478bd9Sstevel@tonic-gate /*
3717c478bd9Sstevel@tonic-gate  * ISSUE: in this prototype we cannot really implement ddi_xbuf_get()
3727c478bd9Sstevel@tonic-gate  * unless we explicitly hide the xbuf pointer somewhere in the buf
3737c478bd9Sstevel@tonic-gate  * during allocation, and then rely on the driver never changing it.
3747c478bd9Sstevel@tonic-gate  * We can probably get away with using b_private for this for now,
3757c478bd9Sstevel@tonic-gate  * tho it really is kinda gnarly.....
3767c478bd9Sstevel@tonic-gate  */
3777c478bd9Sstevel@tonic-gate 
3787c478bd9Sstevel@tonic-gate /* ARGSUSED */
3797c478bd9Sstevel@tonic-gate DDII ddi_xbuf_t
ddi_xbuf_get(struct buf * bp,ddi_xbuf_attr_t xap)3807c478bd9Sstevel@tonic-gate ddi_xbuf_get(struct buf *bp, ddi_xbuf_attr_t xap)
3817c478bd9Sstevel@tonic-gate {
3827c478bd9Sstevel@tonic-gate 	return (bp->b_private);
3837c478bd9Sstevel@tonic-gate }
3847c478bd9Sstevel@tonic-gate 
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate /*
3877c478bd9Sstevel@tonic-gate  * Initiate IOs for bufs on the queue.  Called from kernel thread or taskq
3887c478bd9Sstevel@tonic-gate  * thread context. May execute concurrently for the same ddi_xbuf_attr_t.
3897c478bd9Sstevel@tonic-gate  */
3907c478bd9Sstevel@tonic-gate 
3917c478bd9Sstevel@tonic-gate static int
xbuf_iostart(ddi_xbuf_attr_t xap)3927c478bd9Sstevel@tonic-gate xbuf_iostart(ddi_xbuf_attr_t xap)
3937c478bd9Sstevel@tonic-gate {
3947c478bd9Sstevel@tonic-gate 	struct buf *bp;
3957c478bd9Sstevel@tonic-gate 	ddi_xbuf_t xp;
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate 	ASSERT(xap != NULL);
3987c478bd9Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_mutex));
3997c478bd9Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_reserve_mutex));
4007c478bd9Sstevel@tonic-gate 
4017c478bd9Sstevel@tonic-gate 	/*
4027c478bd9Sstevel@tonic-gate 	 * For each request on the queue, attempt to allocate the specified
4037c478bd9Sstevel@tonic-gate 	 * xbuf extension area, and call the driver's iostart() routine.
4047c478bd9Sstevel@tonic-gate 	 * We process as many requests on the queue as we can, until either
4057c478bd9Sstevel@tonic-gate 	 * (1) we run out of requests; or
4067c478bd9Sstevel@tonic-gate 	 * (2) we run out of resources; or
4077c478bd9Sstevel@tonic-gate 	 * (3) we reach the maximum limit for the given ddi_xbuf_attr_t.
4087c478bd9Sstevel@tonic-gate 	 */
4097c478bd9Sstevel@tonic-gate 	for (;;) {
4107c478bd9Sstevel@tonic-gate 		mutex_enter(&xap->xa_mutex);
4117c478bd9Sstevel@tonic-gate 
4127c478bd9Sstevel@tonic-gate 		if ((bp = xap->xa_headp) == NULL) {
4137c478bd9Sstevel@tonic-gate 			break;	/* queue empty */
4147c478bd9Sstevel@tonic-gate 		}
4157c478bd9Sstevel@tonic-gate 
4167c478bd9Sstevel@tonic-gate 		if ((xap->xa_active_limit != 0) &&
4177c478bd9Sstevel@tonic-gate 		    (xap->xa_active_count >= xap->xa_active_limit)) {
4187c478bd9Sstevel@tonic-gate 			break;	/* allocation limit reached */
4197c478bd9Sstevel@tonic-gate 		}
4207c478bd9Sstevel@tonic-gate 
4217c478bd9Sstevel@tonic-gate 		/*
4227c478bd9Sstevel@tonic-gate 		 * If the reserve_limit is non-zero then work with the
4237c478bd9Sstevel@tonic-gate 		 * reserve else always allocate a new struct.
4247c478bd9Sstevel@tonic-gate 		 */
4257c478bd9Sstevel@tonic-gate 		if (xap->xa_reserve_limit != 0) {
4267c478bd9Sstevel@tonic-gate 			/*
4277c478bd9Sstevel@tonic-gate 			 * Don't penalize EVERY I/O by always allocating a new
4287c478bd9Sstevel@tonic-gate 			 * struct. for the sake of maintaining and not touching
4297c478bd9Sstevel@tonic-gate 			 * a reserve for a pathalogical condition that may never
4307c478bd9Sstevel@tonic-gate 			 * happen. Use the reserve entries first, this uses it
4317c478bd9Sstevel@tonic-gate 			 * like a local pool rather than a reserve that goes
4327c478bd9Sstevel@tonic-gate 			 * untouched. Make sure it's re-populated whenever it
4337c478bd9Sstevel@tonic-gate 			 * gets fully depleted just in case it really is needed.
4347c478bd9Sstevel@tonic-gate 			 * This is safe because under the pathalogical
4357c478bd9Sstevel@tonic-gate 			 * condition, when the system runs out of memory such
4367c478bd9Sstevel@tonic-gate 			 * that the below allocs fail, the reserve will still
4377c478bd9Sstevel@tonic-gate 			 * be available whether the entries are saved away on
4387c478bd9Sstevel@tonic-gate 			 * the queue unused or in-transport somewhere. Thus
4397c478bd9Sstevel@tonic-gate 			 * progress can still continue, however slowly.
4407c478bd9Sstevel@tonic-gate 			 */
4417c478bd9Sstevel@tonic-gate 			mutex_enter(&xap->xa_reserve_mutex);
4427c478bd9Sstevel@tonic-gate 			if (xap->xa_reserve_count != 0) {
4437c478bd9Sstevel@tonic-gate 				ASSERT(xap->xa_reserve_headp != NULL);
4447c478bd9Sstevel@tonic-gate 				/* Grab an xbuf from the reserve */
4457c478bd9Sstevel@tonic-gate 				xp = xap->xa_reserve_headp;
4467c478bd9Sstevel@tonic-gate 				xap->xa_reserve_headp = *((void **)xp);
4477c478bd9Sstevel@tonic-gate 				ASSERT(xap->xa_reserve_count > 0);
4487c478bd9Sstevel@tonic-gate 				xap->xa_reserve_count--;
4497c478bd9Sstevel@tonic-gate 			} else {
4507c478bd9Sstevel@tonic-gate 				/*
4517c478bd9Sstevel@tonic-gate 				 * Either this is the first time through,
4527c478bd9Sstevel@tonic-gate 				 * or the reserve has been totally depleted.
4537c478bd9Sstevel@tonic-gate 				 * Re-populate the reserve (pool). Excess
4547c478bd9Sstevel@tonic-gate 				 * structs. get released in the done path.
4557c478bd9Sstevel@tonic-gate 				 */
4567c478bd9Sstevel@tonic-gate 				while (xap->xa_reserve_count <
4577c478bd9Sstevel@tonic-gate 				    xap->xa_reserve_limit) {
4587c478bd9Sstevel@tonic-gate 					xp = kmem_alloc(xap->xa_allocsize,
4597c478bd9Sstevel@tonic-gate 					    KM_NOSLEEP);
4607c478bd9Sstevel@tonic-gate 					if (xp == NULL) {
4617c478bd9Sstevel@tonic-gate 						break;
4627c478bd9Sstevel@tonic-gate 					}
4637c478bd9Sstevel@tonic-gate 					*((void **)xp) = xap->xa_reserve_headp;
4647c478bd9Sstevel@tonic-gate 					xap->xa_reserve_headp = xp;
4657c478bd9Sstevel@tonic-gate 					xap->xa_reserve_count++;
4667c478bd9Sstevel@tonic-gate 				}
4677c478bd9Sstevel@tonic-gate 				/* And one more to use right now. */
4687c478bd9Sstevel@tonic-gate 				xp = kmem_alloc(xap->xa_allocsize, KM_NOSLEEP);
4697c478bd9Sstevel@tonic-gate 			}
4707c478bd9Sstevel@tonic-gate 			mutex_exit(&xap->xa_reserve_mutex);
4717c478bd9Sstevel@tonic-gate 		} else {
4727c478bd9Sstevel@tonic-gate 			/*
4737c478bd9Sstevel@tonic-gate 			 * Try to alloc a new xbuf struct. If this fails just
4747c478bd9Sstevel@tonic-gate 			 * exit for now. We'll get back here again either upon
4757c478bd9Sstevel@tonic-gate 			 * cmd completion or via the timer handler.
4767c478bd9Sstevel@tonic-gate 			 * Question: what if the allocation attempt for the very
4777c478bd9Sstevel@tonic-gate 			 * first cmd. fails? There are no outstanding cmds so
4787c478bd9Sstevel@tonic-gate 			 * how do we get back here?
4797c478bd9Sstevel@tonic-gate 			 * Should look at un_ncmds_in_transport, if it's zero
4807c478bd9Sstevel@tonic-gate 			 * then schedule xbuf_restart_callback via the timer.
4817c478bd9Sstevel@tonic-gate 			 * Athough that breaks the architecture by bringing
4827c478bd9Sstevel@tonic-gate 			 * softstate data into this code.
4837c478bd9Sstevel@tonic-gate 			 */
4847c478bd9Sstevel@tonic-gate 			xp = kmem_alloc(xap->xa_allocsize, KM_NOSLEEP);
4857c478bd9Sstevel@tonic-gate 		}
4867c478bd9Sstevel@tonic-gate 		if (xp == NULL) {
4877c478bd9Sstevel@tonic-gate 			break; /* Can't process a cmd. right now. */
4887c478bd9Sstevel@tonic-gate 		}
4897c478bd9Sstevel@tonic-gate 
4907c478bd9Sstevel@tonic-gate 		/*
4917c478bd9Sstevel@tonic-gate 		 * Always run the counter. It's used/needed when xa_active_limit
4927c478bd9Sstevel@tonic-gate 		 * is non-zero which is the typical (and right now only) case.
4937c478bd9Sstevel@tonic-gate 		 */
4947c478bd9Sstevel@tonic-gate 		xap->xa_active_count++;
4957c478bd9Sstevel@tonic-gate 
49672888e72Speng liu - Sun Microsystems - Beijing China 		if (bp->b_private) {
49772888e72Speng liu - Sun Microsystems - Beijing China 			struct xbuf_brk *brkp = bp->b_private;
49872888e72Speng liu - Sun Microsystems - Beijing China 			struct buf *bp0 = bp;
49972888e72Speng liu - Sun Microsystems - Beijing China 
50072888e72Speng liu - Sun Microsystems - Beijing China 			brkp->active++;
50172888e72Speng liu - Sun Microsystems - Beijing China 
50272888e72Speng liu - Sun Microsystems - Beijing China 			mutex_enter(&brkp->mutex);
50372888e72Speng liu - Sun Microsystems - Beijing China 			brkp->nbufs++;
50472888e72Speng liu - Sun Microsystems - Beijing China 			mutex_exit(&brkp->mutex);
50572888e72Speng liu - Sun Microsystems - Beijing China 
50672888e72Speng liu - Sun Microsystems - Beijing China 			if (brkp->noff < bp0->b_bcount) {
50772888e72Speng liu - Sun Microsystems - Beijing China 				bp = bioclone(bp0, brkp->off, brkp->brksize,
50872888e72Speng liu - Sun Microsystems - Beijing China 				    bp0->b_edev, brkp->blkno, xbuf_brk_done,
50972888e72Speng liu - Sun Microsystems - Beijing China 				    NULL, KM_SLEEP);
51072888e72Speng liu - Sun Microsystems - Beijing China 
51172888e72Speng liu - Sun Microsystems - Beijing China 				/* update xfer position */
51272888e72Speng liu - Sun Microsystems - Beijing China 				brkp->off = brkp->noff;
51372888e72Speng liu - Sun Microsystems - Beijing China 				brkp->noff += brkp->brksize;
51472888e72Speng liu - Sun Microsystems - Beijing China 				brkp->blkno += brkp->brkblk;
51572888e72Speng liu - Sun Microsystems - Beijing China 			} else {
51672888e72Speng liu - Sun Microsystems - Beijing China 				bp = bioclone(bp0, brkp->off,
51772888e72Speng liu - Sun Microsystems - Beijing China 				    bp0->b_bcount - brkp->off, bp0->b_edev,
51872888e72Speng liu - Sun Microsystems - Beijing China 				    brkp->blkno, xbuf_brk_done, NULL, KM_SLEEP);
51972888e72Speng liu - Sun Microsystems - Beijing China 
52072888e72Speng liu - Sun Microsystems - Beijing China 				/* unlink the buf from the list */
52172888e72Speng liu - Sun Microsystems - Beijing China 				xap->xa_headp = bp0->av_forw;
52272888e72Speng liu - Sun Microsystems - Beijing China 				bp0->av_forw = NULL;
52372888e72Speng liu - Sun Microsystems - Beijing China 			}
52472888e72Speng liu - Sun Microsystems - Beijing China 			bp->b_clone_private = (struct buf *)brkp;
52572888e72Speng liu - Sun Microsystems - Beijing China 		} else {
5267c478bd9Sstevel@tonic-gate 			/* unlink the buf from the list */
5277c478bd9Sstevel@tonic-gate 			xap->xa_headp = bp->av_forw;
5287c478bd9Sstevel@tonic-gate 			bp->av_forw = NULL;
52972888e72Speng liu - Sun Microsystems - Beijing China 		}
5307c478bd9Sstevel@tonic-gate 
5317c478bd9Sstevel@tonic-gate 		/*
5327c478bd9Sstevel@tonic-gate 		 * Hack needed in the prototype so ddi_xbuf_get() will work.
5337c478bd9Sstevel@tonic-gate 		 * Here we can rely on the sd code not changing the value in
5347c478bd9Sstevel@tonic-gate 		 * b_private (in fact it wants it there). See ddi_get_xbuf()
5357c478bd9Sstevel@tonic-gate 		 */
5367c478bd9Sstevel@tonic-gate 		bp->b_private = xp;
5377c478bd9Sstevel@tonic-gate 
5387c478bd9Sstevel@tonic-gate 		/* call the driver's iostart routine */
5397c478bd9Sstevel@tonic-gate 		mutex_exit(&xap->xa_mutex);
5407c478bd9Sstevel@tonic-gate 		(*(xap->xa_strategy))(bp, xp, xap->xa_attr_arg);
5417c478bd9Sstevel@tonic-gate 	}
5427c478bd9Sstevel@tonic-gate 
5437c478bd9Sstevel@tonic-gate 	ASSERT(xap->xa_pending > 0);
5447c478bd9Sstevel@tonic-gate 	xap->xa_pending--;
5457c478bd9Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
5467c478bd9Sstevel@tonic-gate 	return (0);
5477c478bd9Sstevel@tonic-gate }
5487c478bd9Sstevel@tonic-gate 
549*e3f3c0e6SToomas Soome static void
xbuf_taskq_cb(void * arg)550*e3f3c0e6SToomas Soome xbuf_taskq_cb(void *arg)
551*e3f3c0e6SToomas Soome {
552*e3f3c0e6SToomas Soome 	(void) xbuf_iostart(arg);
553*e3f3c0e6SToomas Soome }
5547c478bd9Sstevel@tonic-gate 
5557c478bd9Sstevel@tonic-gate /*
5567c478bd9Sstevel@tonic-gate  * Re-start IO processing if there is anything on the queue, AND if the
5577c478bd9Sstevel@tonic-gate  * restart function is not already running/pending for this ddi_xbuf_attr_t
5587c478bd9Sstevel@tonic-gate  */
5597c478bd9Sstevel@tonic-gate static void
xbuf_dispatch(ddi_xbuf_attr_t xap)5607c478bd9Sstevel@tonic-gate xbuf_dispatch(ddi_xbuf_attr_t xap)
5617c478bd9Sstevel@tonic-gate {
5627c478bd9Sstevel@tonic-gate 	ASSERT(xap != NULL);
5637c478bd9Sstevel@tonic-gate 	ASSERT(xap->xa_tq != NULL);
5647c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&xap->xa_mutex));
5657c478bd9Sstevel@tonic-gate 
5667c478bd9Sstevel@tonic-gate 	if ((xap->xa_headp != NULL) && (xap->xa_timeid == NULL) &&
5677c478bd9Sstevel@tonic-gate 	    (xap->xa_pending == 0)) {
5687c478bd9Sstevel@tonic-gate 		/*
5697c478bd9Sstevel@tonic-gate 		 * First try to see if we can dispatch the restart function
5707c478bd9Sstevel@tonic-gate 		 * immediately, in a taskq thread.  If this fails, then
5717c478bd9Sstevel@tonic-gate 		 * schedule a timeout(9F) callback to try again later.
5727c478bd9Sstevel@tonic-gate 		 */
5737c478bd9Sstevel@tonic-gate 		if (taskq_dispatch(xap->xa_tq,
574*e3f3c0e6SToomas Soome 		    xbuf_taskq_cb, xap, KM_NOSLEEP) == TASKQID_INVALID) {
5757c478bd9Sstevel@tonic-gate 			/*
5767c478bd9Sstevel@tonic-gate 			 * Unable to enqueue the request for the taskq thread,
5777c478bd9Sstevel@tonic-gate 			 * try again later.  Note that this will keep re-trying
5787c478bd9Sstevel@tonic-gate 			 * until taskq_dispatch() succeeds.
5797c478bd9Sstevel@tonic-gate 			 */
5807c478bd9Sstevel@tonic-gate 			xap->xa_timeid = timeout(xbuf_restart_callback, xap,
5817c478bd9Sstevel@tonic-gate 			    XBUF_DISPATCH_DELAY);
5827c478bd9Sstevel@tonic-gate 		} else {
5837c478bd9Sstevel@tonic-gate 			/*
5847c478bd9Sstevel@tonic-gate 			 * This indicates that xbuf_iostart() will soon be
5857c478bd9Sstevel@tonic-gate 			 * run for this ddi_xbuf_attr_t, and we do not need to
5867c478bd9Sstevel@tonic-gate 			 * schedule another invocation via timeout/taskq
5877c478bd9Sstevel@tonic-gate 			 */
5887c478bd9Sstevel@tonic-gate 			xap->xa_pending++;
5897c478bd9Sstevel@tonic-gate 		}
5907c478bd9Sstevel@tonic-gate 	}
5917c478bd9Sstevel@tonic-gate }
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate /* timeout(9F) callback routine for xbuf restart mechanism. */
5947c478bd9Sstevel@tonic-gate static void
xbuf_restart_callback(void * arg)5957c478bd9Sstevel@tonic-gate xbuf_restart_callback(void *arg)
5967c478bd9Sstevel@tonic-gate {
5977c478bd9Sstevel@tonic-gate 	ddi_xbuf_attr_t	xap = arg;
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate 	ASSERT(xap != NULL);
6007c478bd9Sstevel@tonic-gate 	ASSERT(xap->xa_tq != NULL);
6017c478bd9Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_mutex));
6027c478bd9Sstevel@tonic-gate 
6037c478bd9Sstevel@tonic-gate 	mutex_enter(&xap->xa_mutex);
6047c478bd9Sstevel@tonic-gate 	xap->xa_timeid = NULL;
6057c478bd9Sstevel@tonic-gate 	xbuf_dispatch(xap);
6067c478bd9Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
6077c478bd9Sstevel@tonic-gate }
6087c478bd9Sstevel@tonic-gate 
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate DDII void
ddi_xbuf_flushq(ddi_xbuf_attr_t xap,int (* funcp)(struct buf *))6117c478bd9Sstevel@tonic-gate ddi_xbuf_flushq(ddi_xbuf_attr_t xap, int (*funcp)(struct buf *))
6127c478bd9Sstevel@tonic-gate {
6137c478bd9Sstevel@tonic-gate 	struct buf *bp;
6147c478bd9Sstevel@tonic-gate 	struct buf *next_bp;
6157c478bd9Sstevel@tonic-gate 	struct buf *prev_bp = NULL;
6167c478bd9Sstevel@tonic-gate 
6177c478bd9Sstevel@tonic-gate 	ASSERT(xap != NULL);
6187c478bd9Sstevel@tonic-gate 	ASSERT(xap->xa_tq != NULL);
6197c478bd9Sstevel@tonic-gate 	ASSERT(!mutex_owned(&xap->xa_mutex));
6207c478bd9Sstevel@tonic-gate 
6217c478bd9Sstevel@tonic-gate 	mutex_enter(&xap->xa_mutex);
6227c478bd9Sstevel@tonic-gate 
6237c478bd9Sstevel@tonic-gate 	for (bp = xap->xa_headp; bp != NULL; bp = next_bp) {
6247c478bd9Sstevel@tonic-gate 
6257c478bd9Sstevel@tonic-gate 		next_bp = bp->av_forw;	/* Save for next iteration */
6267c478bd9Sstevel@tonic-gate 
6277c478bd9Sstevel@tonic-gate 		/*
6287c478bd9Sstevel@tonic-gate 		 * If the user-supplied function is non-NULL and returns
6297c478bd9Sstevel@tonic-gate 		 * FALSE, then just leave the current bp on the queue.
6307c478bd9Sstevel@tonic-gate 		 */
6317c478bd9Sstevel@tonic-gate 		if ((funcp != NULL) && (!(*funcp)(bp))) {
6327c478bd9Sstevel@tonic-gate 			prev_bp = bp;
6337c478bd9Sstevel@tonic-gate 			continue;
6347c478bd9Sstevel@tonic-gate 		}
6357c478bd9Sstevel@tonic-gate 
6367c478bd9Sstevel@tonic-gate 		/* de-queue the bp */
6377c478bd9Sstevel@tonic-gate 		if (bp == xap->xa_headp) {
6387c478bd9Sstevel@tonic-gate 			xap->xa_headp = next_bp;
6397c478bd9Sstevel@tonic-gate 			if (xap->xa_headp == NULL) {
6407c478bd9Sstevel@tonic-gate 				xap->xa_tailp = NULL;
6417c478bd9Sstevel@tonic-gate 			}
6427c478bd9Sstevel@tonic-gate 		} else {
6437c478bd9Sstevel@tonic-gate 			ASSERT(xap->xa_headp != NULL);
6447c478bd9Sstevel@tonic-gate 			ASSERT(prev_bp != NULL);
6457c478bd9Sstevel@tonic-gate 			if (bp == xap->xa_tailp) {
6467c478bd9Sstevel@tonic-gate 				ASSERT(next_bp == NULL);
6477c478bd9Sstevel@tonic-gate 				xap->xa_tailp = prev_bp;
6487c478bd9Sstevel@tonic-gate 			}
6497c478bd9Sstevel@tonic-gate 			prev_bp->av_forw = next_bp;
6507c478bd9Sstevel@tonic-gate 		}
6517c478bd9Sstevel@tonic-gate 		bp->av_forw = NULL;
6527c478bd9Sstevel@tonic-gate 
6537c478bd9Sstevel@tonic-gate 		/* Add the bp to the flush queue */
6547c478bd9Sstevel@tonic-gate 		if (xap->xa_flush_headp == NULL) {
6557c478bd9Sstevel@tonic-gate 			ASSERT(xap->xa_flush_tailp == NULL);
6567c478bd9Sstevel@tonic-gate 			xap->xa_flush_headp = xap->xa_flush_tailp = bp;
6577c478bd9Sstevel@tonic-gate 		} else {
6587c478bd9Sstevel@tonic-gate 			ASSERT(xap->xa_flush_tailp != NULL);
6597c478bd9Sstevel@tonic-gate 			xap->xa_flush_tailp->av_forw = bp;
6607c478bd9Sstevel@tonic-gate 			xap->xa_flush_tailp = bp;
6617c478bd9Sstevel@tonic-gate 		}
6627c478bd9Sstevel@tonic-gate 	}
6637c478bd9Sstevel@tonic-gate 
6647c478bd9Sstevel@tonic-gate 	while ((bp = xap->xa_flush_headp) != NULL) {
6657c478bd9Sstevel@tonic-gate 		xap->xa_flush_headp = bp->av_forw;
6667c478bd9Sstevel@tonic-gate 		if (xap->xa_flush_headp == NULL) {
6677c478bd9Sstevel@tonic-gate 			xap->xa_flush_tailp = NULL;
6687c478bd9Sstevel@tonic-gate 		}
6697c478bd9Sstevel@tonic-gate 		mutex_exit(&xap->xa_mutex);
6707c478bd9Sstevel@tonic-gate 		bioerror(bp, EIO);
6717c478bd9Sstevel@tonic-gate 		bp->b_resid = bp->b_bcount;
6727c478bd9Sstevel@tonic-gate 		biodone(bp);
6737c478bd9Sstevel@tonic-gate 		mutex_enter(&xap->xa_mutex);
6747c478bd9Sstevel@tonic-gate 	}
6757c478bd9Sstevel@tonic-gate 
6767c478bd9Sstevel@tonic-gate 	mutex_exit(&xap->xa_mutex);
6777c478bd9Sstevel@tonic-gate }
678