1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/note.h>
27 #include <sys/sysmacros.h>
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kmem.h>
32 #include <sys/cmn_err.h>
33 #include <sys/debug.h>
34 #include <sys/ddi.h>
35 #include <sys/sunndi.h>
36 #include <sys/ndi_impldefs.h>	/* include prototypes */
37 
38 /*
39  * Interrupt Resource Management (IRM).
40  */
41 
42 #define	DDI_IRM_BALANCE_DELAY	(60)	/* In seconds */
43 
44 #define	DDI_IRM_HAS_CB(c)	((c) && (c->cb_flags & DDI_CB_FLAG_INTR))
45 
46 #define	DDI_IRM_IS_REDUCIBLE(r)	(((r->ireq_flags & DDI_IRM_FLAG_CALLBACK) && \
47 				(r->ireq_type == DDI_INTR_TYPE_MSIX)) || \
48 				(r->ireq_flags & DDI_IRM_FLAG_NEW))
49 
50 extern pri_t	minclsyspri;
51 
52 /* Global policies */
53 int		irm_enable = 1;
54 boolean_t	irm_active = B_FALSE;
55 int		irm_default_policy = DDI_IRM_POLICY_LARGE;
56 uint_t		irm_balance_delay = DDI_IRM_BALANCE_DELAY;
57 
58 /* Global list of interrupt pools */
59 kmutex_t	irm_pools_lock;
60 list_t		irm_pools_list;
61 
62 /* Global debug tunables */
63 #ifdef	DEBUG
64 int		irm_debug_policy = 0;
65 uint_t		irm_debug_size = 0;
66 #endif	/* DEBUG */
67 
68 static void	irm_balance_thread(ddi_irm_pool_t *);
69 static void	i_ddi_irm_balance(ddi_irm_pool_t *);
70 static void	i_ddi_irm_enqueue(ddi_irm_pool_t *, boolean_t);
71 static void	i_ddi_irm_reduce(ddi_irm_pool_t *pool);
72 static int	i_ddi_irm_reduce_large(ddi_irm_pool_t *, int);
73 static void	i_ddi_irm_reduce_large_resort(ddi_irm_pool_t *);
74 static int	i_ddi_irm_reduce_even(ddi_irm_pool_t *, int);
75 static void	i_ddi_irm_reduce_new(ddi_irm_pool_t *, int);
76 static void	i_ddi_irm_insertion_sort(list_t *, ddi_irm_req_t *);
77 static int	i_ddi_irm_notify(ddi_irm_pool_t *, ddi_irm_req_t *);
78 
79 /*
80  * OS Initialization Routines
81  */
82 
83 /*
84  * irm_init()
85  *
86  *	Initialize IRM subsystem before any drivers are attached.
87  */
88 void
89 irm_init(void)
90 {
91 	/* Do nothing if IRM is disabled */
92 	if (!irm_enable)
93 		return;
94 
95 	/* Verify that the default balancing policy is valid */
96 	if (!DDI_IRM_POLICY_VALID(irm_default_policy))
97 		irm_default_policy = DDI_IRM_POLICY_LARGE;
98 
99 	/* Initialize the global list of interrupt pools */
100 	mutex_init(&irm_pools_lock, NULL, MUTEX_DRIVER, NULL);
101 	list_create(&irm_pools_list, sizeof (ddi_irm_pool_t),
102 	    offsetof(ddi_irm_pool_t, ipool_link));
103 }
104 
105 /*
106  * i_ddi_irm_poststartup()
107  *
108  *	IRM is not activated until after the IO subsystem is initialized.
109  *	When activated, per-pool balancing threads are spawned and a flag
110  *	is set so that all future pools will be activated when created.
111  *
112  *	NOTE: the global variable 'irm_enable' disables IRM if zero.
113  */
114 void
115 i_ddi_irm_poststartup(void)
116 {
117 	ddi_irm_pool_t	*pool_p;
118 
119 	/* Do nothing if IRM is disabled */
120 	if (!irm_enable)
121 		return;
122 
123 	/* Lock the global list */
124 	mutex_enter(&irm_pools_lock);
125 
126 	/* Activate all defined pools */
127 	for (pool_p = list_head(&irm_pools_list); pool_p;
128 	    pool_p = list_next(&irm_pools_list, pool_p))
129 		pool_p->ipool_thread = thread_create(NULL, 0,
130 		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
131 
132 	/* Set future pools to be active */
133 	irm_active = B_TRUE;
134 
135 	/* Unlock the global list */
136 	mutex_exit(&irm_pools_lock);
137 }
138 
139 /*
140  * NDI interfaces for creating/destroying IRM pools.
141  */
142 
143 /*
144  * ndi_irm_create()
145  *
146  *	Nexus interface to create an IRM pool.  Create the new
147  *	pool and add it to the global list of interrupt pools.
148  */
149 int
150 ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp,
151     ddi_irm_pool_t **pool_retp)
152 {
153 	ddi_irm_pool_t	*pool_p;
154 
155 	ASSERT(dip != NULL);
156 	ASSERT(paramsp != NULL);
157 	ASSERT(pool_retp != NULL);
158 	ASSERT(paramsp->iparams_total >= 1);
159 	ASSERT(paramsp->iparams_types != 0);
160 
161 	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip));
162 
163 	/* Check if IRM is enabled */
164 	if (!irm_enable)
165 		return (NDI_FAILURE);
166 
167 	/* Validate parameters */
168 	if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) ||
169 	    (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0))
170 		return (NDI_FAILURE);
171 
172 	/* Allocate and initialize the pool */
173 	pool_p = kmem_zalloc(sizeof (ddi_irm_pool_t), KM_SLEEP);
174 	pool_p->ipool_owner = dip;
175 	pool_p->ipool_policy = irm_default_policy;
176 	pool_p->ipool_types = paramsp->iparams_types;
177 	pool_p->ipool_totsz = paramsp->iparams_total;
178 	pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC, MAX(DDI_MIN_MSIX_ALLOC,
179 	    paramsp->iparams_total / DDI_MSIX_ALLOC_DIVIDER));
180 	list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t),
181 	    offsetof(ddi_irm_req_t, ireq_link));
182 	list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t),
183 	    offsetof(ddi_irm_req_t, ireq_scratch_link));
184 	cv_init(&pool_p->ipool_cv, NULL, CV_DRIVER, NULL);
185 	mutex_init(&pool_p->ipool_lock, NULL, MUTEX_DRIVER, NULL);
186 	mutex_init(&pool_p->ipool_navail_lock, NULL, MUTEX_DRIVER, NULL);
187 
188 	/* Add to global list of pools */
189 	mutex_enter(&irm_pools_lock);
190 	list_insert_tail(&irm_pools_list, pool_p);
191 	mutex_exit(&irm_pools_lock);
192 
193 	/* If IRM is active, then activate the pool */
194 	if (irm_active)
195 		pool_p->ipool_thread = thread_create(NULL, 0,
196 		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
197 
198 	*pool_retp = pool_p;
199 	return (NDI_SUCCESS);
200 }
201 
202 /*
203  * ndi_irm_destroy()
204  *
205  *	Nexus interface to destroy an IRM pool.  Destroy the pool
206  *	and remove it from the global list of interrupt pools.
207  */
208 int
209 ndi_irm_destroy(ddi_irm_pool_t *pool_p)
210 {
211 	ASSERT(pool_p != NULL);
212 	ASSERT(pool_p->ipool_resno == 0);
213 
214 	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_destroy: pool_p %p\n",
215 	    (void *)pool_p));
216 
217 	/* Validate parameters */
218 	if (pool_p == NULL)
219 		return (NDI_FAILURE);
220 
221 	/* Validate that pool is empty */
222 	if (pool_p->ipool_resno != 0)
223 		return (NDI_BUSY);
224 
225 	/* Remove the pool from the global list */
226 	mutex_enter(&irm_pools_lock);
227 	list_remove(&irm_pools_list, pool_p);
228 	mutex_exit(&irm_pools_lock);
229 
230 	/* Terminate the balancing thread */
231 	mutex_enter(&pool_p->ipool_lock);
232 	if (pool_p->ipool_thread &&
233 	    (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) {
234 		pool_p->ipool_flags |= DDI_IRM_FLAG_EXIT;
235 		cv_signal(&pool_p->ipool_cv);
236 		mutex_exit(&pool_p->ipool_lock);
237 		thread_join(pool_p->ipool_thread->t_did);
238 	} else
239 		mutex_exit(&pool_p->ipool_lock);
240 
241 	/* Destroy the pool */
242 	cv_destroy(&pool_p->ipool_cv);
243 	mutex_destroy(&pool_p->ipool_lock);
244 	mutex_destroy(&pool_p->ipool_navail_lock);
245 	list_destroy(&pool_p->ipool_req_list);
246 	list_destroy(&pool_p->ipool_scratch_list);
247 	kmem_free(pool_p, sizeof (ddi_irm_pool_t));
248 
249 	return (NDI_SUCCESS);
250 }
251 
252 /*
253  * Insert/Modify/Remove Interrupt Requests
254  */
255 
256 /*
257  * i_ddi_irm_insert()
258  *
259  *	Insert a new request into an interrupt pool, and balance the pool.
260  */
261 int
262 i_ddi_irm_insert(dev_info_t *dip, int type, int count)
263 {
264 	ddi_cb_t	*cb_p;
265 	ddi_irm_req_t	*req_p;
266 	devinfo_intr_t	*intr_p;
267 	ddi_irm_pool_t	*pool_p;
268 	uint_t		nreq, nmin, npartial;
269 	boolean_t	irm_flag = B_FALSE;
270 
271 	ASSERT(dip != NULL);
272 	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
273 	ASSERT(count > 0);
274 
275 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: dip %p type %d count %d\n",
276 	    (void *)dip, type, count));
277 
278 	/* Validate parameters */
279 	if ((dip == NULL) || (count < 1) || !DDI_INTR_TYPE_FLAG_VALID(type)) {
280 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: invalid args\n"));
281 		return (DDI_EINVAL);
282 	}
283 
284 	/* Check for an existing request */
285 	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
286 	    (intr_p->devi_irm_req_p != NULL))
287 		return (DDI_SUCCESS);
288 
289 	/* Check for IRM support from the system */
290 	if ((pool_p = i_ddi_intr_get_pool(dip, type)) == NULL) {
291 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: not supported\n"));
292 		return (DDI_ENOTSUP);
293 	}
294 
295 	/* Check for IRM support from the driver */
296 	if (((cb_p = DEVI(dip)->devi_cb_p) != NULL) && DDI_IRM_HAS_CB(cb_p) &&
297 	    (type == DDI_INTR_TYPE_MSIX))
298 		irm_flag = B_TRUE;
299 
300 	/* Determine request size */
301 	nreq = (irm_flag) ? count : i_ddi_intr_get_current_navail(dip, type);
302 	nmin = (irm_flag) ? 1 : nreq;
303 	npartial = MIN(nreq, pool_p->ipool_defsz);
304 
305 	/* Allocate and initialize the request */
306 	req_p = kmem_zalloc(sizeof (ddi_irm_req_t), KM_SLEEP);
307 	req_p->ireq_type = type;
308 	req_p->ireq_dip = dip;
309 	req_p->ireq_pool_p = pool_p;
310 	req_p->ireq_nreq = nreq;
311 	req_p->ireq_flags = DDI_IRM_FLAG_NEW;
312 	if (DDI_IRM_HAS_CB(cb_p))
313 		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
314 
315 	/* Lock the pool */
316 	mutex_enter(&pool_p->ipool_lock);
317 
318 	/* Check for minimal fit before inserting */
319 	if ((pool_p->ipool_minno + nmin) > pool_p->ipool_totsz) {
320 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
321 		    ddi_driver_name(dip), ddi_get_instance(dip));
322 		mutex_exit(&pool_p->ipool_lock);
323 		kmem_free(req_p, sizeof (ddi_irm_req_t));
324 		return (DDI_EAGAIN);
325 	}
326 
327 	/* Insert the request into the pool */
328 	pool_p->ipool_reqno += nreq;
329 	pool_p->ipool_minno += nmin;
330 	i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
331 
332 	/*
333 	 * Try to fulfill the request.
334 	 *
335 	 * If all the interrupts are available, and either the request
336 	 * is static or the pool is active, then just take them directly.
337 	 *
338 	 * If only some of the interrupts are available, and the request
339 	 * can receive future callbacks, then take some now but queue the
340 	 * pool to be rebalanced later.
341 	 *
342 	 * Otherwise, immediately rebalance the pool and wait.
343 	 */
344 	if ((!irm_flag || (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) &&
345 	    ((pool_p->ipool_resno + nreq) <= pool_p->ipool_totsz)) {
346 
347 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
348 		    "request completely fulfilled.\n"));
349 		pool_p->ipool_resno += nreq;
350 		req_p->ireq_navail = nreq;
351 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
352 
353 	} else if (irm_flag &&
354 	    ((pool_p->ipool_resno + npartial) <= pool_p->ipool_totsz)) {
355 
356 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
357 		    "request partially fulfilled.\n"));
358 		pool_p->ipool_resno += npartial;
359 		req_p->ireq_navail = npartial;
360 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
361 		i_ddi_irm_enqueue(pool_p, B_FALSE);
362 
363 	} else {
364 
365 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
366 		    "request needs immediate rebalance.\n"));
367 		i_ddi_irm_enqueue(pool_p, B_TRUE);
368 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
369 	}
370 
371 	/* Fail if the request cannot be fulfilled at all */
372 	if (req_p->ireq_navail == 0) {
373 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
374 		    ddi_driver_name(dip), ddi_get_instance(dip));
375 		mutex_exit(&pool_p->ipool_lock);
376 		pool_p->ipool_reqno -= nreq;
377 		pool_p->ipool_minno -= nmin;
378 		list_remove(&pool_p->ipool_req_list, req_p);
379 		kmem_free(req_p, sizeof (ddi_irm_req_t));
380 		return (DDI_EAGAIN);
381 	}
382 
383 	/* Unlock the pool */
384 	mutex_exit(&pool_p->ipool_lock);
385 
386 	intr_p->devi_irm_req_p = req_p;
387 	return (DDI_SUCCESS);
388 }
389 
390 /*
391  * i_ddi_irm_modify()
392  *
393  *	Modify an existing request in an interrupt pool, and balance the pool.
394  */
395 int
396 i_ddi_irm_modify(dev_info_t *dip, int nreq)
397 {
398 	devinfo_intr_t	*intr_p;
399 	ddi_irm_req_t	*req_p;
400 	ddi_irm_pool_t	*pool_p;
401 
402 	ASSERT(dip != NULL);
403 
404 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: dip %p nreq %d\n",
405 	    (void *)dip, nreq));
406 
407 	/* Validate parameters */
408 	if ((dip == NULL) || (nreq < 1)) {
409 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
410 		return (DDI_EINVAL);
411 	}
412 
413 	/* Check that the operation is supported */
414 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
415 	    !(req_p = intr_p->devi_irm_req_p) ||
416 	    !DDI_IRM_IS_REDUCIBLE(req_p)) {
417 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not supported\n"));
418 		return (DDI_ENOTSUP);
419 	}
420 
421 	/* Validate request size is not too large */
422 	if (nreq > intr_p->devi_intr_sup_nintrs) {
423 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
424 		return (DDI_EINVAL);
425 	}
426 
427 	/*
428 	 * Modify request, but only if new size is different.
429 	 */
430 	if (nreq != req_p->ireq_nreq) {
431 
432 		/* Lock the pool */
433 		pool_p = req_p->ireq_pool_p;
434 		mutex_enter(&pool_p->ipool_lock);
435 
436 		/* Update pool and request */
437 		pool_p->ipool_reqno -= req_p->ireq_nreq;
438 		pool_p->ipool_reqno += nreq;
439 		req_p->ireq_nreq = nreq;
440 
441 		/* Re-sort request in the pool */
442 		list_remove(&pool_p->ipool_req_list, req_p);
443 		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
444 
445 		/* Queue pool to be rebalanced */
446 		i_ddi_irm_enqueue(pool_p, B_FALSE);
447 
448 		/* Unlock the pool */
449 		mutex_exit(&pool_p->ipool_lock);
450 	}
451 
452 	return (DDI_SUCCESS);
453 }
454 
455 /*
456  * i_ddi_irm_remove()
457  *
458  *	Remove a request from an interrupt pool, and balance the pool.
459  */
460 int
461 i_ddi_irm_remove(dev_info_t *dip)
462 {
463 	devinfo_intr_t	*intr_p;
464 	ddi_irm_pool_t	*pool_p;
465 	ddi_irm_req_t	*req_p;
466 	uint_t		nmin;
467 
468 	ASSERT(dip != NULL);
469 
470 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: dip %p\n", (void *)dip));
471 
472 	/* Validate parameters */
473 	if (dip == NULL) {
474 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: invalid args\n"));
475 		return (DDI_EINVAL);
476 	}
477 
478 	/* Check if the device has a request */
479 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
480 	    !(req_p = intr_p->devi_irm_req_p)) {
481 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not found\n"));
482 		return (DDI_EINVAL);
483 	}
484 
485 	/* Lock the pool */
486 	pool_p = req_p->ireq_pool_p;
487 	mutex_enter(&pool_p->ipool_lock);
488 
489 	/* Remove request */
490 	nmin = DDI_IRM_IS_REDUCIBLE(req_p) ? 1 : req_p->ireq_nreq;
491 	pool_p->ipool_minno -= nmin;
492 	pool_p->ipool_reqno -= req_p->ireq_nreq;
493 	pool_p->ipool_resno -= req_p->ireq_navail;
494 	list_remove(&pool_p->ipool_req_list, req_p);
495 
496 	/* Queue pool to be rebalanced */
497 	i_ddi_irm_enqueue(pool_p, B_FALSE);
498 
499 	/* Unlock the pool */
500 	mutex_exit(&pool_p->ipool_lock);
501 
502 	/* Destroy the request */
503 	intr_p->devi_irm_req_p = NULL;
504 	kmem_free(req_p, sizeof (ddi_irm_req_t));
505 
506 	return (DDI_SUCCESS);
507 }
508 
509 /*
510  * i_ddi_irm_set_cb()
511  *
512  *	Change the callback flag for a request, in response to
513  *	a change in its callback registration.  Then rebalance
514  *	the interrupt pool.
515  *
516  *	NOTE: the request is not locked because the navail value
517  *	      is not directly affected.  The balancing thread may
518  *	      modify the navail value in the background after it
519  *	      locks the request itself.
520  */
521 void
522 i_ddi_irm_set_cb(dev_info_t *dip, boolean_t has_cb_flag)
523 {
524 	devinfo_intr_t	*intr_p;
525 	ddi_irm_pool_t	*pool_p;
526 	ddi_irm_req_t	*req_p;
527 	uint_t		nreq;
528 
529 	ASSERT(dip != NULL);
530 
531 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: dip %p has_cb_flag %d\n",
532 	    (void *)dip, (int)has_cb_flag));
533 
534 	/* Validate parameters */
535 	if (dip == NULL)
536 		return;
537 
538 	/* Check for association with interrupt pool */
539 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
540 	    !(req_p = intr_p->devi_irm_req_p)) {
541 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: not in pool\n"));
542 		return;
543 	}
544 
545 	/* Lock the pool */
546 	pool_p = req_p->ireq_pool_p;
547 	mutex_enter(&pool_p->ipool_lock);
548 
549 	/*
550 	 * Update the request and the pool
551 	 */
552 	if (has_cb_flag) {
553 
554 		/* Update pool statistics */
555 		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
556 			pool_p->ipool_minno -= (req_p->ireq_nreq - 1);
557 
558 		/* Update request */
559 		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
560 
561 		/* Rebalance in background */
562 		i_ddi_irm_enqueue(pool_p, B_FALSE);
563 
564 	} else {
565 
566 		/* Determine new request size */
567 		nreq = MIN(req_p->ireq_nreq, pool_p->ipool_defsz);
568 
569 		/* Update pool statistics */
570 		pool_p->ipool_reqno -= req_p->ireq_nreq;
571 		pool_p->ipool_reqno += nreq;
572 		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) {
573 			pool_p->ipool_minno -= 1;
574 			pool_p->ipool_minno += nreq;
575 		} else {
576 			pool_p->ipool_minno -= req_p->ireq_nreq;
577 			pool_p->ipool_minno += nreq;
578 		}
579 
580 		/* Update request size, and re-sort in pool */
581 		req_p->ireq_nreq = nreq;
582 		list_remove(&pool_p->ipool_req_list, req_p);
583 		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
584 
585 		/* Rebalance synchronously, before losing callback */
586 		i_ddi_irm_enqueue(pool_p, B_TRUE);
587 
588 		/* Remove callback flag */
589 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_CALLBACK);
590 	}
591 
592 	/* Unlock the pool */
593 	mutex_exit(&pool_p->ipool_lock);
594 }
595 
596 /*
597  * Interrupt Pool Balancing
598  */
599 
600 /*
601  * irm_balance_thread()
602  *
603  *	One instance of this thread operates per each defined IRM pool.
604  *	It does the initial activation of the pool, as well as balancing
605  *	any requests that were queued up before the pool was active.
606  *	Once active, it waits forever to service balance operations.
607  */
608 static void
609 irm_balance_thread(ddi_irm_pool_t *pool_p)
610 {
611 	clock_t		interval, wakeup;
612 
613 	DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: pool_p %p\n",
614 	    (void *)pool_p));
615 
616 	/* Lock the pool */
617 	mutex_enter(&pool_p->ipool_lock);
618 
619 	/* Perform initial balance if required */
620 	if (pool_p->ipool_reqno > pool_p->ipool_resno)
621 		i_ddi_irm_balance(pool_p);
622 
623 	/* Activate the pool */
624 	pool_p->ipool_flags |= DDI_IRM_FLAG_ACTIVE;
625 
626 	/* Main loop */
627 	for (;;) {
628 
629 		/* Compute the delay interval */
630 		interval = drv_usectohz(irm_balance_delay * 1000000);
631 
632 		/* Sleep until queued */
633 		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
634 
635 		DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: signaled.\n"));
636 
637 		/* Wait one interval, or until there are waiters */
638 		if ((interval > 0) &&
639 		    !(pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) &&
640 		    !(pool_p->ipool_flags & DDI_IRM_FLAG_EXIT)) {
641 			wakeup = ddi_get_lbolt() + interval;
642 			(void) cv_timedwait(&pool_p->ipool_cv,
643 			    &pool_p->ipool_lock, wakeup);
644 		}
645 
646 		/* Check if awakened to exit */
647 		if (pool_p->ipool_flags & DDI_IRM_FLAG_EXIT) {
648 			DDI_INTR_IRMDBG((CE_CONT,
649 			    "irm_balance_thread: exiting...\n"));
650 			mutex_exit(&pool_p->ipool_lock);
651 			thread_exit();
652 		}
653 
654 		/* Balance the pool */
655 		i_ddi_irm_balance(pool_p);
656 
657 		/* Notify waiters */
658 		if (pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) {
659 			cv_broadcast(&pool_p->ipool_cv);
660 			pool_p->ipool_flags &= ~(DDI_IRM_FLAG_WAITERS);
661 		}
662 
663 		/* Clear QUEUED condition */
664 		pool_p->ipool_flags &= ~(DDI_IRM_FLAG_QUEUED);
665 	}
666 }
667 
668 /*
669  * i_ddi_irm_balance()
670  *
671  *	Balance a pool.  The general algorithm is to first reset all
672  *	requests to their maximum size, use reduction algorithms to
673  *	solve any imbalance, and then notify affected drivers.
674  */
675 static void
676 i_ddi_irm_balance(ddi_irm_pool_t *pool_p)
677 {
678 	ddi_irm_req_t	*req_p;
679 
680 #ifdef	DEBUG
681 	uint_t		debug_totsz = 0;
682 	int		debug_policy = 0;
683 #endif	/* DEBUG */
684 
685 	ASSERT(pool_p != NULL);
686 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
687 
688 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: pool_p %p\n",
689 	    (void *)pool_p));
690 
691 #ifdef	DEBUG	/* Adjust size and policy settings */
692 	if (irm_debug_size > pool_p->ipool_minno) {
693 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: debug size %d\n",
694 		    irm_debug_size));
695 		debug_totsz = pool_p->ipool_totsz;
696 		pool_p->ipool_totsz = irm_debug_size;
697 	}
698 	if (DDI_IRM_POLICY_VALID(irm_debug_policy)) {
699 		DDI_INTR_IRMDBG((CE_CONT,
700 		    "i_ddi_irm_balance: debug policy %d\n", irm_debug_policy));
701 		debug_policy = pool_p->ipool_policy;
702 		pool_p->ipool_policy = irm_debug_policy;
703 	}
704 #endif	/* DEBUG */
705 
706 	/* Lock the availability lock */
707 	mutex_enter(&pool_p->ipool_navail_lock);
708 
709 	/*
710 	 * Put all of the reducible requests into a scratch list.
711 	 * Reset each one of them to their maximum availability.
712 	 */
713 	for (req_p = list_head(&pool_p->ipool_req_list); req_p;
714 	    req_p = list_next(&pool_p->ipool_req_list, req_p)) {
715 		if (DDI_IRM_IS_REDUCIBLE(req_p)) {
716 			pool_p->ipool_resno -= req_p->ireq_navail;
717 			req_p->ireq_scratch = req_p->ireq_navail;
718 			req_p->ireq_navail = req_p->ireq_nreq;
719 			pool_p->ipool_resno += req_p->ireq_navail;
720 			list_insert_tail(&pool_p->ipool_scratch_list, req_p);
721 		}
722 	}
723 
724 	/* Balance the requests */
725 	i_ddi_irm_reduce(pool_p);
726 
727 	/* Unlock the availability lock */
728 	mutex_exit(&pool_p->ipool_navail_lock);
729 
730 	/*
731 	 * Process REMOVE notifications.
732 	 *
733 	 * If a driver fails to release interrupts: exclude it from
734 	 * further processing, correct the resulting imbalance, and
735 	 * start over again at the head of the scratch list.
736 	 */
737 	req_p = list_head(&pool_p->ipool_scratch_list);
738 	while (req_p) {
739 		if ((req_p->ireq_navail < req_p->ireq_scratch) &&
740 		    (i_ddi_irm_notify(pool_p, req_p) != DDI_SUCCESS)) {
741 			list_remove(&pool_p->ipool_scratch_list, req_p);
742 			mutex_enter(&pool_p->ipool_navail_lock);
743 			i_ddi_irm_reduce(pool_p);
744 			mutex_exit(&pool_p->ipool_navail_lock);
745 			req_p = list_head(&pool_p->ipool_scratch_list);
746 		} else {
747 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
748 		}
749 	}
750 
751 	/*
752 	 * Process ADD notifications.
753 	 *
754 	 * This is the last use of the scratch list, so empty it.
755 	 */
756 	while (req_p = list_remove_head(&pool_p->ipool_scratch_list)) {
757 		if (req_p->ireq_navail > req_p->ireq_scratch) {
758 			(void) i_ddi_irm_notify(pool_p, req_p);
759 		}
760 	}
761 
762 #ifdef	DEBUG	/* Restore size and policy settings */
763 	if (debug_totsz != 0)
764 		pool_p->ipool_totsz = debug_totsz;
765 	if (debug_policy != 0)
766 		pool_p->ipool_policy = debug_policy;
767 #endif	/* DEBUG */
768 }
769 
770 /*
771  * i_ddi_irm_reduce()
772  *
773  *	Use reduction algorithms to correct an imbalance in a pool.
774  */
775 static void
776 i_ddi_irm_reduce(ddi_irm_pool_t *pool_p)
777 {
778 	int	ret, imbalance;
779 
780 	ASSERT(pool_p != NULL);
781 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
782 	ASSERT(DDI_IRM_POLICY_VALID(pool_p->ipool_policy));
783 
784 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce: pool_p %p\n",
785 	    (void *)pool_p));
786 
787 	/* Compute the imbalance.  Do nothing if already balanced. */
788 	if ((imbalance = pool_p->ipool_resno - pool_p->ipool_totsz) <= 0)
789 		return;
790 
791 	/* Reduce by policy */
792 	switch (pool_p->ipool_policy) {
793 	case DDI_IRM_POLICY_LARGE:
794 		ret = i_ddi_irm_reduce_large(pool_p, imbalance);
795 		break;
796 	case DDI_IRM_POLICY_EVEN:
797 		ret = i_ddi_irm_reduce_even(pool_p, imbalance);
798 		break;
799 	}
800 
801 	/*
802 	 * If the policy based reductions failed, then
803 	 * possibly reduce new requests as a last resort.
804 	 */
805 	if (ret != DDI_SUCCESS) {
806 
807 		DDI_INTR_IRMDBG((CE_CONT,
808 		    "i_ddi_irm_reduce: policy reductions failed.\n"));
809 
810 		/* Compute remaining imbalance */
811 		imbalance = pool_p->ipool_resno - pool_p->ipool_totsz;
812 
813 		ASSERT(imbalance > 0);
814 
815 		i_ddi_irm_reduce_new(pool_p, imbalance);
816 	}
817 }
818 
819 /*
820  * i_ddi_irm_enqueue()
821  *
822  *	Queue a pool to be balanced.  Signals the balancing thread to wake
823  *	up and process the pool.  If 'wait_flag' is true, then the current
824  *	thread becomes a waiter and blocks until the balance is completed.
825  */
826 static void
827 i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag)
828 {
829 	ASSERT(pool_p != NULL);
830 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
831 
832 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool_p %p wait_flag %d\n",
833 	    (void *)pool_p, (int)wait_flag));
834 
835 	/* Do nothing if pool is already balanced */
836 #ifndef	DEBUG
837 	if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
838 #else
839 	if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
840 #endif	/* DEBUG */
841 		DDI_INTR_IRMDBG((CE_CONT,
842 		    "i_ddi_irm_enqueue: pool already balanced\n"));
843 		return;
844 	}
845 
846 	/* Avoid deadlocks when IRM is not active */
847 	if (!irm_active && wait_flag) {
848 		DDI_INTR_IRMDBG((CE_CONT,
849 		    "i_ddi_irm_enqueue: pool not active.\n"));
850 		return;
851 	}
852 
853 	if (wait_flag)
854 		pool_p->ipool_flags |= DDI_IRM_FLAG_WAITERS;
855 
856 	if (wait_flag || !(pool_p->ipool_flags & DDI_IRM_FLAG_QUEUED)) {
857 		pool_p->ipool_flags |= DDI_IRM_FLAG_QUEUED;
858 		cv_signal(&pool_p->ipool_cv);
859 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool queued.\n"));
860 	}
861 
862 	if (wait_flag) {
863 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: waiting...\n"));
864 		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
865 	}
866 }
867 
868 /*
869  * Reduction Algorithms, Used For Balancing
870  */
871 
872 /*
873  * i_ddi_irm_reduce_large()
874  *
875  *	Algorithm for the DDI_IRM_POLICY_LARGE reduction policy.
876  *
877  *	This algorithm generally reduces larger requests first, before
878  *	advancing to smaller requests.  The scratch list is initially
879  *	sorted in descending order by current navail values, which are
880  *	maximized prior to reduction.  This sorted order is preserved,
881  *	but within a range of equally sized requests they are secondarily
882  *	sorted in ascending order by initial nreq value.  The head of the
883  *	list is always selected for reduction, since it is the current
884  *	largest request.  After being reduced, it is sorted further into
885  *	the list before the next iteration.
886  *
887  *	Optimizations in this algorithm include trying to reduce multiple
888  *	requests together if they are equally sized.  And the algorithm
889  *	attempts to reduce in larger increments when possible to minimize
890  *	the total number of iterations.
891  */
892 static int
893 i_ddi_irm_reduce_large(ddi_irm_pool_t *pool_p, int imbalance)
894 {
895 	ddi_irm_req_t	*head_p, *next_p;
896 	int		next_navail, nreqs, reduction;
897 
898 	ASSERT(pool_p != NULL);
899 	ASSERT(imbalance > 0);
900 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
901 
902 	DDI_INTR_IRMDBG((CE_CONT,
903 	    "i_ddi_irm_reduce_large: pool_p %p imbalance %d\n", (void *)pool_p,
904 	    imbalance));
905 
906 	while (imbalance > 0) {
907 
908 		head_p = list_head(&pool_p->ipool_scratch_list);
909 
910 		/* Fail if nothing is reducible */
911 		if (head_p->ireq_navail <= pool_p->ipool_defsz) {
912 			DDI_INTR_IRMDBG((CE_CONT,
913 			    "i_ddi_irm_reduce_large: Failure. "
914 			    "All requests have downsized to low limit.\n"));
915 			return (DDI_FAILURE);
916 		}
917 
918 		/* Count the number of equally sized requests */
919 		for (nreqs = 1, next_p = head_p;
920 		    (next_p = list_next(&pool_p->ipool_scratch_list, next_p)) !=
921 		    NULL && (head_p->ireq_navail == next_p->ireq_navail);
922 		    nreqs++)
923 			;
924 
925 		next_navail = next_p ? next_p->ireq_navail : 0;
926 		reduction = head_p->ireq_navail -
927 		    MAX(next_navail, pool_p->ipool_defsz);
928 
929 		if ((reduction * nreqs) > imbalance) {
930 			reduction = imbalance / nreqs;
931 
932 			if (reduction == 0) {
933 				reduction = 1;
934 				nreqs = imbalance;
935 			}
936 		}
937 
938 		next_p = head_p;
939 		while (nreqs--) {
940 			imbalance -= reduction;
941 			next_p->ireq_navail -= reduction;
942 			pool_p->ipool_resno -= reduction;
943 			next_p = list_next(&pool_p->ipool_scratch_list, next_p);
944 		}
945 
946 		if (next_p && next_p->ireq_navail > head_p->ireq_navail) {
947 			ASSERT(imbalance == 0);
948 			i_ddi_irm_reduce_large_resort(pool_p);
949 		}
950 	}
951 
952 	return (DDI_SUCCESS);
953 }
954 
955 /*
956  * i_ddi_irm_reduce_large_resort()
957  *
958  *	Helper function for i_ddi_irm_reduce_large().  Once a request
959  *	is reduced, this resorts it further down into the list as necessary.
960  */
961 static void
962 i_ddi_irm_reduce_large_resort(ddi_irm_pool_t *pool_p)
963 {
964 	ddi_irm_req_t	*start_p, *end_p, *next_p;
965 
966 	ASSERT(pool_p != NULL);
967 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
968 
969 	start_p = list_head(&pool_p->ipool_scratch_list);
970 	end_p = list_next(&pool_p->ipool_scratch_list, start_p);
971 	while (end_p && start_p->ireq_navail == end_p->ireq_navail)
972 		end_p = list_next(&pool_p->ipool_scratch_list, end_p);
973 
974 	next_p = end_p;
975 	while (next_p && (next_p->ireq_navail > start_p->ireq_navail))
976 		next_p = list_next(&pool_p->ipool_scratch_list, next_p);
977 
978 	while (start_p != end_p) {
979 		list_remove(&pool_p->ipool_scratch_list, start_p);
980 		list_insert_before(&pool_p->ipool_scratch_list, next_p,
981 		    start_p);
982 		start_p = list_head(&pool_p->ipool_scratch_list);
983 	}
984 }
985 
986 /*
987  * i_ddi_irm_reduce_even()
988  *
989  *	Algorithm for the DDI_IRM_POLICY_EVEN reduction policy.
990  *
991  *	This algorithm reduces requests evenly, without giving a
992  *	specific preference to smaller or larger requests.  Each
993  *	iteration reduces all reducible requests by the same amount
994  *	until the imbalance is corrected.  Although when possible,
995  *	it tries to avoid reducing requests below the threshold of
996  *	the interrupt pool's default allocation size.
997  *
998  *	An optimization in this algorithm is to reduce the requests
999  *	in larger increments during each iteration, to minimize the
1000  *	total number of iterations required.
1001  */
1002 static int
1003 i_ddi_irm_reduce_even(ddi_irm_pool_t *pool_p, int imbalance)
1004 {
1005 	ddi_irm_req_t	*req_p, *last_p;
1006 	uint_t		nmin = pool_p->ipool_defsz;
1007 	uint_t		nreduce, reduction;
1008 
1009 	ASSERT(pool_p != NULL);
1010 	ASSERT(imbalance > 0);
1011 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1012 
1013 	DDI_INTR_IRMDBG((CE_CONT,
1014 	    "i_ddi_irm_reduce_even: pool_p %p imbalance %d\n",
1015 	    (void *)pool_p, imbalance));
1016 
1017 	while (imbalance > 0) {
1018 
1019 		/* Count reducible requests */
1020 		nreduce = 0;
1021 		for (req_p = list_head(&pool_p->ipool_scratch_list); req_p;
1022 		    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1023 			if (req_p->ireq_navail <= nmin)
1024 				break;
1025 			last_p = req_p;
1026 			nreduce++;
1027 		}
1028 
1029 		/* Fail if none are reducible */
1030 		if (nreduce == 0) {
1031 			DDI_INTR_IRMDBG((CE_CONT,
1032 			    "i_ddi_irm_reduce_even: Failure. "
1033 			    "All requests have downsized to low limit.\n"));
1034 			return (DDI_FAILURE);
1035 		}
1036 
1037 		/* Compute reduction */
1038 		if (nreduce < imbalance) {
1039 			reduction = last_p->ireq_navail - nmin;
1040 			if ((reduction * nreduce) > imbalance) {
1041 				reduction = imbalance / nreduce;
1042 			}
1043 		} else {
1044 			reduction = 1;
1045 		}
1046 
1047 		/* Start at head of list, but skip excess */
1048 		req_p = list_head(&pool_p->ipool_scratch_list);
1049 		while (nreduce > imbalance) {
1050 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
1051 			nreduce--;
1052 		}
1053 
1054 		/* Do reductions */
1055 		while (req_p && (nreduce > 0)) {
1056 			imbalance -= reduction;
1057 			req_p->ireq_navail -= reduction;
1058 			pool_p->ipool_resno -= reduction;
1059 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
1060 			nreduce--;
1061 		}
1062 	}
1063 
1064 	return (DDI_SUCCESS);
1065 }
1066 
1067 /*
1068  * i_ddi_irm_reduce_new()
1069  *
1070  *	Reduces new requests.  This is only used as a last resort
1071  *	after another reduction algorithm failed.
1072  */
1073 static void
1074 i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance)
1075 {
1076 	ddi_irm_req_t	*req_p;
1077 	uint_t		nreduce;
1078 
1079 	ASSERT(pool_p != NULL);
1080 	ASSERT(imbalance > 0);
1081 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1082 
1083 	while (imbalance > 0) {
1084 		nreduce = 0;
1085 		for (req_p = list_head(&pool_p->ipool_scratch_list);
1086 		    req_p && (imbalance > 0);
1087 		    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1088 			if (req_p->ireq_flags & DDI_IRM_FLAG_NEW &&
1089 			    req_p->ireq_navail > 1) {
1090 				req_p->ireq_navail--;
1091 				pool_p->ipool_resno--;
1092 				imbalance--;
1093 				nreduce++;
1094 			}
1095 		}
1096 
1097 		if (nreduce == 0)
1098 			break;
1099 	}
1100 
1101 	for (req_p = list_head(&pool_p->ipool_scratch_list);
1102 	    req_p && (imbalance > 0);
1103 	    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1104 		if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) {
1105 			ASSERT(req_p->ireq_navail == 1);
1106 			req_p->ireq_navail--;
1107 			pool_p->ipool_resno--;
1108 			imbalance--;
1109 		}
1110 	}
1111 }
1112 
1113 /*
1114  * Miscellaneous Helper Functions
1115  */
1116 
1117 /*
1118  * i_ddi_intr_get_pool()
1119  *
1120  *	Get an IRM pool that supplies interrupts of a specified type.
1121  *	Invokes a DDI_INTROP_GETPOOL to the bus nexus driver.  Fails
1122  *	if no pool exists.
1123  */
1124 ddi_irm_pool_t *
1125 i_ddi_intr_get_pool(dev_info_t *dip, int type)
1126 {
1127 	devinfo_intr_t		*intr_p;
1128 	ddi_irm_pool_t		*pool_p;
1129 	ddi_irm_req_t		*req_p;
1130 	ddi_intr_handle_impl_t	hdl;
1131 
1132 	ASSERT(dip != NULL);
1133 	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
1134 
1135 	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
1136 	    ((req_p = intr_p->devi_irm_req_p) != NULL) &&
1137 	    ((pool_p = req_p->ireq_pool_p) != NULL) &&
1138 	    (pool_p->ipool_types & type)) {
1139 		return (pool_p);
1140 	}
1141 
1142 	bzero(&hdl, sizeof (ddi_intr_handle_impl_t));
1143 	hdl.ih_dip = dip;
1144 	hdl.ih_type = type;
1145 
1146 	if (i_ddi_intr_ops(dip, dip, DDI_INTROP_GETPOOL,
1147 	    &hdl, (void *)&pool_p) == DDI_SUCCESS)
1148 		return (pool_p);
1149 
1150 	return (NULL);
1151 }
1152 
1153 /*
1154  * i_ddi_irm_insertion_sort()
1155  *
1156  *	Use the insertion sort method to insert a request into a list.
1157  *	The list is sorted in descending order by request size.
1158  */
1159 static void
1160 i_ddi_irm_insertion_sort(list_t *req_list, ddi_irm_req_t *req_p)
1161 {
1162 	ddi_irm_req_t	*next_p;
1163 
1164 	next_p = list_head(req_list);
1165 
1166 	while (next_p && (next_p->ireq_nreq > req_p->ireq_nreq))
1167 		next_p = list_next(req_list, next_p);
1168 
1169 	list_insert_before(req_list, next_p, req_p);
1170 }
1171 
1172 /*
1173  * i_ddi_irm_notify()
1174  *
1175  *	Notify a driver of changes to its interrupt request using the
1176  *	generic callback mechanism.  Checks for errors in processing.
1177  */
1178 static int
1179 i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p)
1180 {
1181 	ddi_cb_action_t	action;
1182 	ddi_cb_t	*cb_p;
1183 	uint_t		nintrs;
1184 	int		ret, count;
1185 
1186 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: pool_p %p req_p %p\n",
1187 	    (void *)pool_p, (void *)req_p));
1188 
1189 	/* Do not notify new or unchanged requests */
1190 	if ((req_p->ireq_navail == req_p->ireq_scratch) ||
1191 	    (req_p->ireq_flags & DDI_IRM_FLAG_NEW))
1192 		return (DDI_SUCCESS);
1193 
1194 	/* Determine action and count */
1195 	if (req_p->ireq_navail > req_p->ireq_scratch) {
1196 		action = DDI_CB_INTR_ADD;
1197 		count = req_p->ireq_navail - req_p->ireq_scratch;
1198 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: adding %d\n",
1199 		    count));
1200 	} else {
1201 		action = DDI_CB_INTR_REMOVE;
1202 		count = req_p->ireq_scratch - req_p->ireq_navail;
1203 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: removing %d\n",
1204 		    count));
1205 	}
1206 
1207 	/* Lookup driver callback */
1208 	if ((cb_p = DEVI(req_p->ireq_dip)->devi_cb_p) == NULL) {
1209 		DDI_INTR_IRMDBG((CE_WARN, "i_ddi_irm_notify: no callback!\n"));
1210 		return (DDI_FAILURE);
1211 	}
1212 
1213 	/* Do callback */
1214 	ret = cb_p->cb_func(req_p->ireq_dip, action, (void *)(uintptr_t)count,
1215 	    cb_p->cb_arg1, cb_p->cb_arg2);
1216 
1217 	/* Log callback errors */
1218 	if (ret != DDI_SUCCESS) {
1219 		cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n",
1220 		    ddi_driver_name(req_p->ireq_dip),
1221 		    ddi_get_instance(req_p->ireq_dip), (int)action, ret);
1222 	}
1223 
1224 	/* Check if the driver exceeds its availability */
1225 	nintrs = i_ddi_intr_get_current_nintrs(req_p->ireq_dip);
1226 	if (nintrs > req_p->ireq_navail) {
1227 		cmn_err(CE_WARN, "%s%d: failed to release interrupts "
1228 		    "(nintrs=%d, navail=%d).\n",
1229 		    ddi_driver_name(req_p->ireq_dip),
1230 		    ddi_get_instance(req_p->ireq_dip), nintrs,
1231 		    req_p->ireq_navail);
1232 		pool_p->ipool_resno += (nintrs - req_p->ireq_navail);
1233 		req_p->ireq_navail = nintrs;
1234 		return (DDI_FAILURE);
1235 	}
1236 
1237 	/* Update request */
1238 	req_p->ireq_scratch = req_p->ireq_navail;
1239 
1240 	return (DDI_SUCCESS);
1241 }
1242 
1243 /*
1244  * i_ddi_irm_debug_balance()
1245  *
1246  *	A debug/test only routine to force the immediate,
1247  *	synchronous rebalancing of an interrupt pool.
1248  */
1249 #ifdef	DEBUG
1250 void
1251 i_ddi_irm_debug_balance(dev_info_t *dip, boolean_t wait_flag)
1252 {
1253 	ddi_irm_pool_t	*pool_p;
1254 	int		type;
1255 
1256 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_debug_balance: dip %p wait %d\n",
1257 	    (void *)dip, (int)wait_flag));
1258 
1259 	if (((type = i_ddi_intr_get_current_type(dip)) != 0) &&
1260 	    ((pool_p = i_ddi_intr_get_pool(dip, type)) != NULL)) {
1261 		mutex_enter(&pool_p->ipool_lock);
1262 		i_ddi_irm_enqueue(pool_p, wait_flag);
1263 		mutex_exit(&pool_p->ipool_lock);
1264 	}
1265 }
1266 #endif
1267