xref: /illumos-gate/usr/src/uts/common/os/callout.c (revision 1671524d)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5f635d46aSqiao  * Common Development and Distribution License (the "License").
6f635d46aSqiao  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22113d3ed7SMadhavan Venkataraman  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/callo.h>
277c478bd9Sstevel@tonic-gate #include <sys/param.h>
287c478bd9Sstevel@tonic-gate #include <sys/types.h>
297c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
307c478bd9Sstevel@tonic-gate #include <sys/thread.h>
317c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
3287a18d3fSMadhavan Venkataraman #include <sys/kmem_impl.h>
337c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
347c478bd9Sstevel@tonic-gate #include <sys/callb.h>
357c478bd9Sstevel@tonic-gate #include <sys/debug.h>
367c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
377c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
387c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
397c478bd9Sstevel@tonic-gate 
407c478bd9Sstevel@tonic-gate /*
417c478bd9Sstevel@tonic-gate  * Callout tables.  See timeout(9F) for details.
427c478bd9Sstevel@tonic-gate  */
4351b32bddSMadhavan Venkataraman static int callout_threads;			/* callout normal threads */
4487a18d3fSMadhavan Venkataraman static hrtime_t callout_debug_hrtime;		/* debugger entry time */
45060cedfbSMadhavan Venkataraman static int callout_chunk;			/* callout heap chunk size */
4651b32bddSMadhavan Venkataraman static int callout_min_reap;			/* callout minimum reap count */
4751b32bddSMadhavan Venkataraman static int callout_tolerance;			/* callout hires tolerance */
4887a18d3fSMadhavan Venkataraman static callout_table_t *callout_boot_ct;	/* Boot CPU's callout tables */
49454ab202SMadhavan Venkataraman static clock_t callout_max_ticks;		/* max interval */
5087a18d3fSMadhavan Venkataraman static hrtime_t callout_longterm;		/* longterm nanoseconds */
5187a18d3fSMadhavan Venkataraman static ulong_t callout_counter_low;		/* callout ID increment */
5287a18d3fSMadhavan Venkataraman static ulong_t callout_table_bits;		/* number of table bits in ID */
5387a18d3fSMadhavan Venkataraman static ulong_t callout_table_mask;		/* mask for the table bits */
5487a18d3fSMadhavan Venkataraman static callout_cache_t *callout_caches;		/* linked list of caches */
5587a18d3fSMadhavan Venkataraman #pragma align 64(callout_table)
5687a18d3fSMadhavan Venkataraman static callout_table_t *callout_table;		/* global callout table array */
577c478bd9Sstevel@tonic-gate 
5807247649SMadhavan Venkataraman /*
594c06356bSdh142964  * We run 'realtime' callouts at PIL 1 (CY_LOW_LEVEL). For 'normal'
604c06356bSdh142964  * callouts, from PIL 10 (CY_LOCK_LEVEL) we dispatch the callout,
614c06356bSdh142964  * via taskq, to a thread that executes at PIL 0 - so we end up running
624c06356bSdh142964  * 'normal' callouts at PIL 0.
6307247649SMadhavan Venkataraman  */
6451b32bddSMadhavan Venkataraman static volatile int callout_realtime_level = CY_LOW_LEVEL;
6551b32bddSMadhavan Venkataraman static volatile int callout_normal_level = CY_LOCK_LEVEL;
6607247649SMadhavan Venkataraman 
6787a18d3fSMadhavan Venkataraman static char *callout_kstat_names[] = {
6887a18d3fSMadhavan Venkataraman 	"callout_timeouts",
6987a18d3fSMadhavan Venkataraman 	"callout_timeouts_pending",
7087a18d3fSMadhavan Venkataraman 	"callout_untimeouts_unexpired",
7187a18d3fSMadhavan Venkataraman 	"callout_untimeouts_executing",
7287a18d3fSMadhavan Venkataraman 	"callout_untimeouts_expired",
7387a18d3fSMadhavan Venkataraman 	"callout_expirations",
7487a18d3fSMadhavan Venkataraman 	"callout_allocations",
7551b32bddSMadhavan Venkataraman 	"callout_cleanups",
7687a18d3fSMadhavan Venkataraman };
7787a18d3fSMadhavan Venkataraman 
7851b32bddSMadhavan Venkataraman static hrtime_t	callout_heap_process(callout_table_t *, hrtime_t, int);
7951b32bddSMadhavan Venkataraman 
8087a18d3fSMadhavan Venkataraman #define	CALLOUT_HASH_INSERT(hash, cp, cnext, cprev)	\
817c478bd9Sstevel@tonic-gate {							\
8287a18d3fSMadhavan Venkataraman 	callout_hash_t *hashp = &(hash);		\
8387a18d3fSMadhavan Venkataraman 							\
847c478bd9Sstevel@tonic-gate 	cp->cprev = NULL;				\
8587a18d3fSMadhavan Venkataraman 	cp->cnext = hashp->ch_head;			\
8687a18d3fSMadhavan Venkataraman 	if (hashp->ch_head == NULL)			\
8787a18d3fSMadhavan Venkataraman 		hashp->ch_tail = cp;			\
887c478bd9Sstevel@tonic-gate 	else						\
8987a18d3fSMadhavan Venkataraman 		cp->cnext->cprev = cp;			\
9087a18d3fSMadhavan Venkataraman 	hashp->ch_head = cp;				\
917c478bd9Sstevel@tonic-gate }
927c478bd9Sstevel@tonic-gate 
9387a18d3fSMadhavan Venkataraman #define	CALLOUT_HASH_APPEND(hash, cp, cnext, cprev)	\
9487a18d3fSMadhavan Venkataraman {							\
9587a18d3fSMadhavan Venkataraman 	callout_hash_t *hashp = &(hash);		\
9687a18d3fSMadhavan Venkataraman 							\
9787a18d3fSMadhavan Venkataraman 	cp->cnext = NULL;				\
9887a18d3fSMadhavan Venkataraman 	cp->cprev = hashp->ch_tail;			\
9987a18d3fSMadhavan Venkataraman 	if (hashp->ch_tail == NULL)			\
10087a18d3fSMadhavan Venkataraman 		hashp->ch_head = cp;			\
10187a18d3fSMadhavan Venkataraman 	else						\
10287a18d3fSMadhavan Venkataraman 		cp->cprev->cnext = cp;			\
10387a18d3fSMadhavan Venkataraman 	hashp->ch_tail = cp;				\
10487a18d3fSMadhavan Venkataraman }
10587a18d3fSMadhavan Venkataraman 
10687a18d3fSMadhavan Venkataraman #define	CALLOUT_HASH_DELETE(hash, cp, cnext, cprev)	\
10787a18d3fSMadhavan Venkataraman {							\
10887a18d3fSMadhavan Venkataraman 	callout_hash_t *hashp = &(hash);		\
10987a18d3fSMadhavan Venkataraman 							\
11087a18d3fSMadhavan Venkataraman 	if (cp->cnext == NULL)				\
11187a18d3fSMadhavan Venkataraman 		hashp->ch_tail = cp->cprev;		\
11287a18d3fSMadhavan Venkataraman 	else						\
11387a18d3fSMadhavan Venkataraman 		cp->cnext->cprev = cp->cprev;		\
11487a18d3fSMadhavan Venkataraman 	if (cp->cprev == NULL)				\
11587a18d3fSMadhavan Venkataraman 		hashp->ch_head = cp->cnext;		\
11687a18d3fSMadhavan Venkataraman 	else						\
11787a18d3fSMadhavan Venkataraman 		cp->cprev->cnext = cp->cnext;		\
11887a18d3fSMadhavan Venkataraman }
11987a18d3fSMadhavan Venkataraman 
12087a18d3fSMadhavan Venkataraman /*
12187a18d3fSMadhavan Venkataraman  * These definitions help us queue callouts and callout lists. Here is
12287a18d3fSMadhavan Venkataraman  * the queueing rationale:
12387a18d3fSMadhavan Venkataraman  *
12487a18d3fSMadhavan Venkataraman  *	- callouts are queued in a FIFO manner in the ID hash table.
12587a18d3fSMadhavan Venkataraman  *	  TCP timers are typically cancelled in the same order that they
12687a18d3fSMadhavan Venkataraman  *	  were issued. The FIFO queueing shortens the search for a callout
12787a18d3fSMadhavan Venkataraman  *	  during untimeout().
12887a18d3fSMadhavan Venkataraman  *
12987a18d3fSMadhavan Venkataraman  *	- callouts are queued in a FIFO manner in their callout lists.
13087a18d3fSMadhavan Venkataraman  *	  This ensures that the callouts are executed in the same order that
13187a18d3fSMadhavan Venkataraman  *	  they were queued. This is fair. Plus, it helps to make each
13287a18d3fSMadhavan Venkataraman  *	  callout expiration timely. It also favors cancellations.
13387a18d3fSMadhavan Venkataraman  *
13451b32bddSMadhavan Venkataraman  *	- callout lists are queued in the following manner in the callout
13551b32bddSMadhavan Venkataraman  *	  hash table buckets:
13651b32bddSMadhavan Venkataraman  *
13751b32bddSMadhavan Venkataraman  *		- appended, if the callout list is a 1-nanosecond resolution
13851b32bddSMadhavan Venkataraman  *		  callout list. When a callout is created, we first look for
13951b32bddSMadhavan Venkataraman  *		  a callout list that has the same expiration so we can avoid
14051b32bddSMadhavan Venkataraman  *		  allocating a callout list and inserting the expiration into
14151b32bddSMadhavan Venkataraman  *		  the heap. However, we do not want to look at 1-nanosecond
14251b32bddSMadhavan Venkataraman  *		  resolution callout lists as we will seldom find a match in
14351b32bddSMadhavan Venkataraman  *		  them. Keeping these callout lists in the rear of the hash
14451b32bddSMadhavan Venkataraman  *		  buckets allows us to skip these during the lookup.
14551b32bddSMadhavan Venkataraman  *
14651b32bddSMadhavan Venkataraman  *		- inserted at the beginning, if the callout list is not a
14751b32bddSMadhavan Venkataraman  *		  1-nanosecond resolution callout list. This also has the
14851b32bddSMadhavan Venkataraman  *		  side-effect of keeping the long term timers away from the
14951b32bddSMadhavan Venkataraman  *		  front of the buckets.
15087a18d3fSMadhavan Venkataraman  *
15187a18d3fSMadhavan Venkataraman  *	- callout lists are queued in a FIFO manner in the expired callouts
15287a18d3fSMadhavan Venkataraman  *	  list. This ensures that callout lists are executed in the order
15387a18d3fSMadhavan Venkataraman  *	  of expiration.
15487a18d3fSMadhavan Venkataraman  */
15587a18d3fSMadhavan Venkataraman #define	CALLOUT_APPEND(ct, cp)						\
15687a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)],	\
15787a18d3fSMadhavan Venkataraman 		cp, c_idnext, c_idprev);				\
15887a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev)
15987a18d3fSMadhavan Venkataraman 
16087a18d3fSMadhavan Venkataraman #define	CALLOUT_DELETE(ct, cp)						\
16187a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)],	\
16287a18d3fSMadhavan Venkataraman 		cp, c_idnext, c_idprev);				\
16387a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev)
16487a18d3fSMadhavan Venkataraman 
16587a18d3fSMadhavan Venkataraman #define	CALLOUT_LIST_INSERT(hash, cl)				\
16687a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev)
16787a18d3fSMadhavan Venkataraman 
16887a18d3fSMadhavan Venkataraman #define	CALLOUT_LIST_APPEND(hash, cl)				\
16987a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev)
17087a18d3fSMadhavan Venkataraman 
17187a18d3fSMadhavan Venkataraman #define	CALLOUT_LIST_DELETE(hash, cl)				\
17287a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev)
1737c478bd9Sstevel@tonic-gate 
174060cedfbSMadhavan Venkataraman #define	CALLOUT_LIST_BEFORE(cl, nextcl)			\
175060cedfbSMadhavan Venkataraman {							\
176060cedfbSMadhavan Venkataraman 	(cl)->cl_prev = (nextcl)->cl_prev;		\
177060cedfbSMadhavan Venkataraman 	(cl)->cl_next = (nextcl);			\
178060cedfbSMadhavan Venkataraman 	(nextcl)->cl_prev = (cl);			\
179060cedfbSMadhavan Venkataraman 	if (cl->cl_prev != NULL)			\
180060cedfbSMadhavan Venkataraman 		cl->cl_prev->cl_next = cl;		\
181060cedfbSMadhavan Venkataraman }
182060cedfbSMadhavan Venkataraman 
1837c478bd9Sstevel@tonic-gate /*
18407247649SMadhavan Venkataraman  * For normal callouts, there is a deadlock scenario if two callouts that
18507247649SMadhavan Venkataraman  * have an inter-dependency end up on the same callout list. To break the
18607247649SMadhavan Venkataraman  * deadlock, you need two taskq threads running in parallel. We compute
18707247649SMadhavan Venkataraman  * the number of taskq threads here using a bunch of conditions to make
18807247649SMadhavan Venkataraman  * it optimal for the common case. This is an ugly hack, but one that is
18907247649SMadhavan Venkataraman  * necessary (sigh).
19007247649SMadhavan Venkataraman  */
19107247649SMadhavan Venkataraman #define	CALLOUT_THRESHOLD	100000000
192060cedfbSMadhavan Venkataraman #define	CALLOUT_EXEC_COMPUTE(ct, nextexp, exec)				\
19307247649SMadhavan Venkataraman {									\
19407247649SMadhavan Venkataraman 	callout_list_t *cl;						\
19507247649SMadhavan Venkataraman 									\
19607247649SMadhavan Venkataraman 	cl = ct->ct_expired.ch_head;					\
19707247649SMadhavan Venkataraman 	if (cl == NULL) {						\
19807247649SMadhavan Venkataraman 		/*							\
19907247649SMadhavan Venkataraman 		 * If the expired list is NULL, there is nothing to	\
20007247649SMadhavan Venkataraman 		 * process.						\
20107247649SMadhavan Venkataraman 		 */							\
20207247649SMadhavan Venkataraman 		exec = 0;						\
20307247649SMadhavan Venkataraman 	} else if ((cl->cl_next == NULL) &&				\
20407247649SMadhavan Venkataraman 	    (cl->cl_callouts.ch_head == cl->cl_callouts.ch_tail)) {	\
20507247649SMadhavan Venkataraman 		/*							\
20607247649SMadhavan Venkataraman 		 * If there is only one callout list and it contains	\
20707247649SMadhavan Venkataraman 		 * only one callout, there is no need for two threads.	\
20807247649SMadhavan Venkataraman 		 */							\
20907247649SMadhavan Venkataraman 		exec = 1;						\
210060cedfbSMadhavan Venkataraman 	} else if ((nextexp) > (gethrtime() + CALLOUT_THRESHOLD)) {	\
21107247649SMadhavan Venkataraman 		/*							\
212060cedfbSMadhavan Venkataraman 		 * If the next expiration of the cyclic is way out into	\
213060cedfbSMadhavan Venkataraman 		 * the future, we need two threads.			\
21407247649SMadhavan Venkataraman 		 */							\
21507247649SMadhavan Venkataraman 		exec = 2;						\
21607247649SMadhavan Venkataraman 	} else {							\
21707247649SMadhavan Venkataraman 		/*							\
21807247649SMadhavan Venkataraman 		 * We have multiple callouts to process. But the cyclic	\
21907247649SMadhavan Venkataraman 		 * will fire in the near future. So, we only need one	\
22007247649SMadhavan Venkataraman 		 * thread for now.					\
22107247649SMadhavan Venkataraman 		 */							\
22207247649SMadhavan Venkataraman 		exec = 1;						\
22307247649SMadhavan Venkataraman 	}								\
22407247649SMadhavan Venkataraman }
22507247649SMadhavan Venkataraman 
22607247649SMadhavan Venkataraman /*
22751b32bddSMadhavan Venkataraman  * Macro to swap two heap items.
22851b32bddSMadhavan Venkataraman  */
22951b32bddSMadhavan Venkataraman #define	CALLOUT_SWAP(h1, h2)		\
23051b32bddSMadhavan Venkataraman {					\
23151b32bddSMadhavan Venkataraman 	callout_heap_t tmp;		\
23251b32bddSMadhavan Venkataraman 					\
23351b32bddSMadhavan Venkataraman 	tmp = *h1;			\
23451b32bddSMadhavan Venkataraman 	*h1 = *h2;			\
23551b32bddSMadhavan Venkataraman 	*h2 = tmp;			\
23651b32bddSMadhavan Venkataraman }
23751b32bddSMadhavan Venkataraman 
23851b32bddSMadhavan Venkataraman /*
23951b32bddSMadhavan Venkataraman  * Macro to free a callout list.
24051b32bddSMadhavan Venkataraman  */
24151b32bddSMadhavan Venkataraman #define	CALLOUT_LIST_FREE(ct, cl)			\
24251b32bddSMadhavan Venkataraman {							\
24351b32bddSMadhavan Venkataraman 	cl->cl_next = ct->ct_lfree;			\
24451b32bddSMadhavan Venkataraman 	ct->ct_lfree = cl;				\
24551b32bddSMadhavan Venkataraman 	cl->cl_flags |= CALLOUT_LIST_FLAG_FREE;		\
24651b32bddSMadhavan Venkataraman }
24751b32bddSMadhavan Venkataraman 
24851b32bddSMadhavan Venkataraman /*
249060cedfbSMadhavan Venkataraman  * Macro to free a callout.
250060cedfbSMadhavan Venkataraman  */
251060cedfbSMadhavan Venkataraman #define	CALLOUT_FREE(ct, cl)			\
252060cedfbSMadhavan Venkataraman {						\
253060cedfbSMadhavan Venkataraman 	cp->c_idnext = ct->ct_free;		\
254060cedfbSMadhavan Venkataraman 	ct->ct_free = cp;			\
255060cedfbSMadhavan Venkataraman 	cp->c_xid |= CALLOUT_ID_FREE;		\
256060cedfbSMadhavan Venkataraman }
257060cedfbSMadhavan Venkataraman 
258060cedfbSMadhavan Venkataraman /*
2597c478bd9Sstevel@tonic-gate  * Allocate a callout structure.  We try quite hard because we
2607c478bd9Sstevel@tonic-gate  * can't sleep, and if we can't do the allocation, we're toast.
26187a18d3fSMadhavan Venkataraman  * Failing all, we try a KM_PANIC allocation. Note that we never
26287a18d3fSMadhavan Venkataraman  * deallocate a callout. See untimeout() for the reasoning.
2637c478bd9Sstevel@tonic-gate  */
2647c478bd9Sstevel@tonic-gate static callout_t *
2657c478bd9Sstevel@tonic-gate callout_alloc(callout_table_t *ct)
2667c478bd9Sstevel@tonic-gate {
26787a18d3fSMadhavan Venkataraman 	size_t size;
26887a18d3fSMadhavan Venkataraman 	callout_t *cp;
2697c478bd9Sstevel@tonic-gate 
27087a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
27187a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
27287a18d3fSMadhavan Venkataraman 
27387a18d3fSMadhavan Venkataraman 	cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP);
27487a18d3fSMadhavan Venkataraman 	if (cp == NULL) {
27587a18d3fSMadhavan Venkataraman 		size = sizeof (callout_t);
27687a18d3fSMadhavan Venkataraman 		cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
27787a18d3fSMadhavan Venkataraman 	}
27887a18d3fSMadhavan Venkataraman 	cp->c_xid = 0;
27907247649SMadhavan Venkataraman 	cp->c_executor = NULL;
28007247649SMadhavan Venkataraman 	cv_init(&cp->c_done, NULL, CV_DEFAULT, NULL);
28107247649SMadhavan Venkataraman 	cp->c_waiting = 0;
28287a18d3fSMadhavan Venkataraman 
28387a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
28487a18d3fSMadhavan Venkataraman 	ct->ct_allocations++;
2857c478bd9Sstevel@tonic-gate 	return (cp);
2867c478bd9Sstevel@tonic-gate }
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate /*
28987a18d3fSMadhavan Venkataraman  * Allocate a callout list structure.  We try quite hard because we
29087a18d3fSMadhavan Venkataraman  * can't sleep, and if we can't do the allocation, we're toast.
29187a18d3fSMadhavan Venkataraman  * Failing all, we try a KM_PANIC allocation. Note that we never
29287a18d3fSMadhavan Venkataraman  * deallocate a callout list.
2937c478bd9Sstevel@tonic-gate  */
29487a18d3fSMadhavan Venkataraman static void
29587a18d3fSMadhavan Venkataraman callout_list_alloc(callout_table_t *ct)
2967c478bd9Sstevel@tonic-gate {
29787a18d3fSMadhavan Venkataraman 	size_t size;
29887a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
29987a18d3fSMadhavan Venkataraman 
30087a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
30187a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
30287a18d3fSMadhavan Venkataraman 
30387a18d3fSMadhavan Venkataraman 	cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP);
30487a18d3fSMadhavan Venkataraman 	if (cl == NULL) {
30587a18d3fSMadhavan Venkataraman 		size = sizeof (callout_list_t);
30687a18d3fSMadhavan Venkataraman 		cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
30787a18d3fSMadhavan Venkataraman 	}
30887a18d3fSMadhavan Venkataraman 	bzero(cl, sizeof (callout_list_t));
30987a18d3fSMadhavan Venkataraman 
31087a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
31151b32bddSMadhavan Venkataraman 	CALLOUT_LIST_FREE(ct, cl);
31287a18d3fSMadhavan Venkataraman }
31387a18d3fSMadhavan Venkataraman 
31487a18d3fSMadhavan Venkataraman /*
31551b32bddSMadhavan Venkataraman  * Find a callout list that corresponds to an expiration and matching flags.
31687a18d3fSMadhavan Venkataraman  */
31787a18d3fSMadhavan Venkataraman static callout_list_t *
31807247649SMadhavan Venkataraman callout_list_get(callout_table_t *ct, hrtime_t expiration, int flags, int hash)
31987a18d3fSMadhavan Venkataraman {
32087a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
32151b32bddSMadhavan Venkataraman 	int clflags;
32287a18d3fSMadhavan Venkataraman 
32387a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
32487a18d3fSMadhavan Venkataraman 
32551b32bddSMadhavan Venkataraman 	if (flags & CALLOUT_LIST_FLAG_NANO) {
32651b32bddSMadhavan Venkataraman 		/*
32751b32bddSMadhavan Venkataraman 		 * This is a 1-nanosecond resolution callout. We will rarely
32851b32bddSMadhavan Venkataraman 		 * find a match for this. So, bail out.
32951b32bddSMadhavan Venkataraman 		 */
33051b32bddSMadhavan Venkataraman 		return (NULL);
33151b32bddSMadhavan Venkataraman 	}
33251b32bddSMadhavan Venkataraman 
33351b32bddSMadhavan Venkataraman 	clflags = (CALLOUT_LIST_FLAG_ABSOLUTE | CALLOUT_LIST_FLAG_HRESTIME);
33487a18d3fSMadhavan Venkataraman 	for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) {
33551b32bddSMadhavan Venkataraman 		/*
33651b32bddSMadhavan Venkataraman 		 * If we have reached a 1-nanosecond resolution callout list,
33751b32bddSMadhavan Venkataraman 		 * we don't have much hope of finding a match in this hash
33851b32bddSMadhavan Venkataraman 		 * bucket. So, just bail out.
33951b32bddSMadhavan Venkataraman 		 */
34051b32bddSMadhavan Venkataraman 		if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO)
34151b32bddSMadhavan Venkataraman 			return (NULL);
34251b32bddSMadhavan Venkataraman 
34307247649SMadhavan Venkataraman 		if ((cl->cl_expiration == expiration) &&
34451b32bddSMadhavan Venkataraman 		    ((cl->cl_flags & clflags) == (flags & clflags)))
34587a18d3fSMadhavan Venkataraman 			return (cl);
34687a18d3fSMadhavan Venkataraman 	}
34787a18d3fSMadhavan Venkataraman 
34887a18d3fSMadhavan Venkataraman 	return (NULL);
34987a18d3fSMadhavan Venkataraman }
35087a18d3fSMadhavan Venkataraman 
35187a18d3fSMadhavan Venkataraman /*
352060cedfbSMadhavan Venkataraman  * Add a new callout list into a callout table's queue in sorted order by
353060cedfbSMadhavan Venkataraman  * expiration.
354060cedfbSMadhavan Venkataraman  */
355060cedfbSMadhavan Venkataraman static int
356060cedfbSMadhavan Venkataraman callout_queue_add(callout_table_t *ct, callout_list_t *cl)
357060cedfbSMadhavan Venkataraman {
358060cedfbSMadhavan Venkataraman 	callout_list_t *nextcl;
359060cedfbSMadhavan Venkataraman 	hrtime_t expiration;
360060cedfbSMadhavan Venkataraman 
361060cedfbSMadhavan Venkataraman 	expiration = cl->cl_expiration;
362060cedfbSMadhavan Venkataraman 	nextcl = ct->ct_queue.ch_head;
363060cedfbSMadhavan Venkataraman 	if ((nextcl == NULL) || (expiration < nextcl->cl_expiration)) {
364060cedfbSMadhavan Venkataraman 		CALLOUT_LIST_INSERT(ct->ct_queue, cl);
365060cedfbSMadhavan Venkataraman 		return (1);
366060cedfbSMadhavan Venkataraman 	}
367060cedfbSMadhavan Venkataraman 
368060cedfbSMadhavan Venkataraman 	while (nextcl != NULL) {
369060cedfbSMadhavan Venkataraman 		if (expiration < nextcl->cl_expiration) {
370060cedfbSMadhavan Venkataraman 			CALLOUT_LIST_BEFORE(cl, nextcl);
371060cedfbSMadhavan Venkataraman 			return (0);
372060cedfbSMadhavan Venkataraman 		}
373060cedfbSMadhavan Venkataraman 		nextcl = nextcl->cl_next;
374060cedfbSMadhavan Venkataraman 	}
375060cedfbSMadhavan Venkataraman 	CALLOUT_LIST_APPEND(ct->ct_queue, cl);
376060cedfbSMadhavan Venkataraman 
377060cedfbSMadhavan Venkataraman 	return (0);
378060cedfbSMadhavan Venkataraman }
379060cedfbSMadhavan Venkataraman 
380060cedfbSMadhavan Venkataraman /*
381060cedfbSMadhavan Venkataraman  * Insert a callout list into a callout table's queue and reprogram the queue
382060cedfbSMadhavan Venkataraman  * cyclic if needed.
383060cedfbSMadhavan Venkataraman  */
384060cedfbSMadhavan Venkataraman static void
385060cedfbSMadhavan Venkataraman callout_queue_insert(callout_table_t *ct, callout_list_t *cl)
386060cedfbSMadhavan Venkataraman {
387060cedfbSMadhavan Venkataraman 	cl->cl_flags |= CALLOUT_LIST_FLAG_QUEUED;
388060cedfbSMadhavan Venkataraman 
389060cedfbSMadhavan Venkataraman 	/*
390060cedfbSMadhavan Venkataraman 	 * Add the callout to the callout queue. If it ends up at the head,
391060cedfbSMadhavan Venkataraman 	 * the cyclic needs to be reprogrammed as we have an earlier
392060cedfbSMadhavan Venkataraman 	 * expiration.
393060cedfbSMadhavan Venkataraman 	 *
394060cedfbSMadhavan Venkataraman 	 * Also, during the CPR suspend phase, do not reprogram the cyclic.
395060cedfbSMadhavan Venkataraman 	 * We don't want any callout activity. When the CPR resume phase is
396060cedfbSMadhavan Venkataraman 	 * entered, the cyclic will be programmed for the earliest expiration
397060cedfbSMadhavan Venkataraman 	 * in the queue.
398060cedfbSMadhavan Venkataraman 	 */
399060cedfbSMadhavan Venkataraman 	if (callout_queue_add(ct, cl) && (ct->ct_suspend == 0))
400060cedfbSMadhavan Venkataraman 		(void) cyclic_reprogram(ct->ct_qcyclic, cl->cl_expiration);
401060cedfbSMadhavan Venkataraman }
402060cedfbSMadhavan Venkataraman 
403060cedfbSMadhavan Venkataraman /*
404060cedfbSMadhavan Venkataraman  * Delete and handle all past expirations in a callout table's queue.
405060cedfbSMadhavan Venkataraman  */
406060cedfbSMadhavan Venkataraman static hrtime_t
407060cedfbSMadhavan Venkataraman callout_queue_delete(callout_table_t *ct)
408060cedfbSMadhavan Venkataraman {
409060cedfbSMadhavan Venkataraman 	callout_list_t *cl;
410060cedfbSMadhavan Venkataraman 	hrtime_t now;
411060cedfbSMadhavan Venkataraman 
412060cedfbSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
413060cedfbSMadhavan Venkataraman 
414060cedfbSMadhavan Venkataraman 	now = gethrtime();
415060cedfbSMadhavan Venkataraman 	while ((cl = ct->ct_queue.ch_head) != NULL) {
416060cedfbSMadhavan Venkataraman 		if (cl->cl_expiration > now)
417060cedfbSMadhavan Venkataraman 			break;
418060cedfbSMadhavan Venkataraman 		cl->cl_flags &= ~CALLOUT_LIST_FLAG_QUEUED;
419060cedfbSMadhavan Venkataraman 		CALLOUT_LIST_DELETE(ct->ct_queue, cl);
420060cedfbSMadhavan Venkataraman 		CALLOUT_LIST_APPEND(ct->ct_expired, cl);
421060cedfbSMadhavan Venkataraman 	}
422060cedfbSMadhavan Venkataraman 
423060cedfbSMadhavan Venkataraman 	/*
424060cedfbSMadhavan Venkataraman 	 * If this callout queue is empty or callouts have been suspended,
425060cedfbSMadhavan Venkataraman 	 * just return.
426060cedfbSMadhavan Venkataraman 	 */
427060cedfbSMadhavan Venkataraman 	if ((cl == NULL) || (ct->ct_suspend > 0))
428060cedfbSMadhavan Venkataraman 		return (CY_INFINITY);
429060cedfbSMadhavan Venkataraman 
430060cedfbSMadhavan Venkataraman 	(void) cyclic_reprogram(ct->ct_qcyclic, cl->cl_expiration);
431060cedfbSMadhavan Venkataraman 
432060cedfbSMadhavan Venkataraman 	return (cl->cl_expiration);
433060cedfbSMadhavan Venkataraman }
434060cedfbSMadhavan Venkataraman 
435060cedfbSMadhavan Venkataraman static hrtime_t
436060cedfbSMadhavan Venkataraman callout_queue_process(callout_table_t *ct, hrtime_t delta, int timechange)
437060cedfbSMadhavan Venkataraman {
438060cedfbSMadhavan Venkataraman 	callout_list_t *firstcl, *cl;
439060cedfbSMadhavan Venkataraman 	hrtime_t expiration, now;
440060cedfbSMadhavan Venkataraman 	int clflags;
441060cedfbSMadhavan Venkataraman 	callout_hash_t temp;
442060cedfbSMadhavan Venkataraman 
443060cedfbSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
444060cedfbSMadhavan Venkataraman 
445060cedfbSMadhavan Venkataraman 	firstcl = ct->ct_queue.ch_head;
446060cedfbSMadhavan Venkataraman 	if (firstcl == NULL)
447060cedfbSMadhavan Venkataraman 		return (CY_INFINITY);
448060cedfbSMadhavan Venkataraman 
449060cedfbSMadhavan Venkataraman 	/*
450060cedfbSMadhavan Venkataraman 	 * We walk the callout queue. If we encounter a hrestime entry that
451060cedfbSMadhavan Venkataraman 	 * must be removed, we clean it out. Otherwise, we apply any
452060cedfbSMadhavan Venkataraman 	 * adjustments needed to it. Because of the latter, we need to
453060cedfbSMadhavan Venkataraman 	 * recreate the list as we go along.
454060cedfbSMadhavan Venkataraman 	 */
455060cedfbSMadhavan Venkataraman 	temp = ct->ct_queue;
456060cedfbSMadhavan Venkataraman 	ct->ct_queue.ch_head = NULL;
457060cedfbSMadhavan Venkataraman 	ct->ct_queue.ch_tail = NULL;
458060cedfbSMadhavan Venkataraman 
459060cedfbSMadhavan Venkataraman 	clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE);
460060cedfbSMadhavan Venkataraman 	now = gethrtime();
461060cedfbSMadhavan Venkataraman 	while ((cl = temp.ch_head) != NULL) {
462060cedfbSMadhavan Venkataraman 		CALLOUT_LIST_DELETE(temp, cl);
463060cedfbSMadhavan Venkataraman 
464060cedfbSMadhavan Venkataraman 		/*
465060cedfbSMadhavan Venkataraman 		 * Delete the callout and expire it, if one of the following
466060cedfbSMadhavan Venkataraman 		 * is true:
467060cedfbSMadhavan Venkataraman 		 *	- the callout has expired
468060cedfbSMadhavan Venkataraman 		 *	- the callout is an absolute hrestime one and
469060cedfbSMadhavan Venkataraman 		 *	  there has been a system time change
470060cedfbSMadhavan Venkataraman 		 */
471060cedfbSMadhavan Venkataraman 		if ((cl->cl_expiration <= now) ||
472060cedfbSMadhavan Venkataraman 		    (timechange && ((cl->cl_flags & clflags) == clflags))) {
473060cedfbSMadhavan Venkataraman 			cl->cl_flags &= ~CALLOUT_LIST_FLAG_QUEUED;
474060cedfbSMadhavan Venkataraman 			CALLOUT_LIST_APPEND(ct->ct_expired, cl);
475060cedfbSMadhavan Venkataraman 			continue;
476060cedfbSMadhavan Venkataraman 		}
477060cedfbSMadhavan Venkataraman 
478060cedfbSMadhavan Venkataraman 		/*
479060cedfbSMadhavan Venkataraman 		 * Apply adjustments, if any. Adjustments are applied after
480060cedfbSMadhavan Venkataraman 		 * the system returns from KMDB or OBP. They are only applied
481060cedfbSMadhavan Venkataraman 		 * to relative callout lists.
482060cedfbSMadhavan Venkataraman 		 */
483060cedfbSMadhavan Venkataraman 		if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) {
484060cedfbSMadhavan Venkataraman 			expiration = cl->cl_expiration + delta;
485060cedfbSMadhavan Venkataraman 			if (expiration <= 0)
486060cedfbSMadhavan Venkataraman 				expiration = CY_INFINITY;
487060cedfbSMadhavan Venkataraman 			cl->cl_expiration = expiration;
488060cedfbSMadhavan Venkataraman 		}
489060cedfbSMadhavan Venkataraman 
490060cedfbSMadhavan Venkataraman 		(void) callout_queue_add(ct, cl);
491060cedfbSMadhavan Venkataraman 	}
492060cedfbSMadhavan Venkataraman 
493060cedfbSMadhavan Venkataraman 	/*
494060cedfbSMadhavan Venkataraman 	 * We need to return the expiration to help program the cyclic.
495060cedfbSMadhavan Venkataraman 	 * If there are expired callouts, the cyclic needs to go off
496060cedfbSMadhavan Venkataraman 	 * immediately. If the queue has become empty, then we return infinity.
497060cedfbSMadhavan Venkataraman 	 * Else, we return the expiration of the earliest callout in the queue.
498060cedfbSMadhavan Venkataraman 	 */
499060cedfbSMadhavan Venkataraman 	if (ct->ct_expired.ch_head != NULL)
500060cedfbSMadhavan Venkataraman 		return (gethrtime());
501060cedfbSMadhavan Venkataraman 
502060cedfbSMadhavan Venkataraman 	cl = ct->ct_queue.ch_head;
503060cedfbSMadhavan Venkataraman 	if (cl == NULL)
504060cedfbSMadhavan Venkataraman 		return (CY_INFINITY);
505060cedfbSMadhavan Venkataraman 
506060cedfbSMadhavan Venkataraman 	return (cl->cl_expiration);
507060cedfbSMadhavan Venkataraman }
508060cedfbSMadhavan Venkataraman 
509060cedfbSMadhavan Venkataraman /*
51087a18d3fSMadhavan Venkataraman  * Initialize a callout table's heap, if necessary. Preallocate some free
51187a18d3fSMadhavan Venkataraman  * entries so we don't have to check for NULL elsewhere.
51287a18d3fSMadhavan Venkataraman  */
51387a18d3fSMadhavan Venkataraman static void
51487a18d3fSMadhavan Venkataraman callout_heap_init(callout_table_t *ct)
51587a18d3fSMadhavan Venkataraman {
51687a18d3fSMadhavan Venkataraman 	size_t size;
51787a18d3fSMadhavan Venkataraman 
51887a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
51987a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap == NULL);
52087a18d3fSMadhavan Venkataraman 
52187a18d3fSMadhavan Venkataraman 	ct->ct_heap_num = 0;
522060cedfbSMadhavan Venkataraman 	ct->ct_heap_max = callout_chunk;
523060cedfbSMadhavan Venkataraman 	size = sizeof (callout_heap_t) * callout_chunk;
52487a18d3fSMadhavan Venkataraman 	ct->ct_heap = kmem_alloc(size, KM_SLEEP);
52587a18d3fSMadhavan Venkataraman }
52687a18d3fSMadhavan Venkataraman 
52787a18d3fSMadhavan Venkataraman /*
528060cedfbSMadhavan Venkataraman  * Reallocate the heap. Return 0 if the heap is still full at the end of it.
529060cedfbSMadhavan Venkataraman  * Return 1 otherwise. Note that the heap only expands, it never contracts.
53087a18d3fSMadhavan Venkataraman  */
531060cedfbSMadhavan Venkataraman static int
53287a18d3fSMadhavan Venkataraman callout_heap_expand(callout_table_t *ct)
53387a18d3fSMadhavan Venkataraman {
53487a18d3fSMadhavan Venkataraman 	size_t max, size, osize;
53551b32bddSMadhavan Venkataraman 	callout_heap_t *heap;
53687a18d3fSMadhavan Venkataraman 
53787a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
53887a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num <= ct->ct_heap_max);
53987a18d3fSMadhavan Venkataraman 
54087a18d3fSMadhavan Venkataraman 	while (ct->ct_heap_num == ct->ct_heap_max) {
54187a18d3fSMadhavan Venkataraman 		max = ct->ct_heap_max;
54287a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
54387a18d3fSMadhavan Venkataraman 
54451b32bddSMadhavan Venkataraman 		osize = sizeof (callout_heap_t) * max;
545060cedfbSMadhavan Venkataraman 		size = sizeof (callout_heap_t) * (max + callout_chunk);
546060cedfbSMadhavan Venkataraman 		heap = kmem_alloc(size, KM_NOSLEEP);
54787a18d3fSMadhavan Venkataraman 
54887a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
549060cedfbSMadhavan Venkataraman 		if (heap == NULL) {
550060cedfbSMadhavan Venkataraman 			/*
551060cedfbSMadhavan Venkataraman 			 * We could not allocate memory. If we can free up
552060cedfbSMadhavan Venkataraman 			 * some entries, that would be great.
553060cedfbSMadhavan Venkataraman 			 */
554060cedfbSMadhavan Venkataraman 			if (ct->ct_nreap > 0)
555060cedfbSMadhavan Venkataraman 				(void) callout_heap_process(ct, 0, 0);
556060cedfbSMadhavan Venkataraman 			/*
557060cedfbSMadhavan Venkataraman 			 * If we still have no space in the heap, inform the
558060cedfbSMadhavan Venkataraman 			 * caller.
559060cedfbSMadhavan Venkataraman 			 */
560060cedfbSMadhavan Venkataraman 			if (ct->ct_heap_num == ct->ct_heap_max)
561060cedfbSMadhavan Venkataraman 				return (0);
562060cedfbSMadhavan Venkataraman 			return (1);
563060cedfbSMadhavan Venkataraman 		}
56487a18d3fSMadhavan Venkataraman 		if (max < ct->ct_heap_max) {
56587a18d3fSMadhavan Venkataraman 			/*
56687a18d3fSMadhavan Venkataraman 			 * Someone beat us to the allocation. Free what we
56787a18d3fSMadhavan Venkataraman 			 * just allocated and proceed.
56887a18d3fSMadhavan Venkataraman 			 */
56987a18d3fSMadhavan Venkataraman 			kmem_free(heap, size);
57087a18d3fSMadhavan Venkataraman 			continue;
57187a18d3fSMadhavan Venkataraman 		}
57287a18d3fSMadhavan Venkataraman 
57387a18d3fSMadhavan Venkataraman 		bcopy(ct->ct_heap, heap, osize);
57487a18d3fSMadhavan Venkataraman 		kmem_free(ct->ct_heap, osize);
57587a18d3fSMadhavan Venkataraman 		ct->ct_heap = heap;
57651b32bddSMadhavan Venkataraman 		ct->ct_heap_max = size / sizeof (callout_heap_t);
57787a18d3fSMadhavan Venkataraman 	}
578060cedfbSMadhavan Venkataraman 
579060cedfbSMadhavan Venkataraman 	return (1);
58087a18d3fSMadhavan Venkataraman }
58187a18d3fSMadhavan Venkataraman 
58287a18d3fSMadhavan Venkataraman /*
58387a18d3fSMadhavan Venkataraman  * Move an expiration from the bottom of the heap to its correct place
58487a18d3fSMadhavan Venkataraman  * in the heap. If we reached the root doing this, return 1. Else,
58587a18d3fSMadhavan Venkataraman  * return 0.
58687a18d3fSMadhavan Venkataraman  */
58787a18d3fSMadhavan Venkataraman static int
58887a18d3fSMadhavan Venkataraman callout_upheap(callout_table_t *ct)
58987a18d3fSMadhavan Venkataraman {
59087a18d3fSMadhavan Venkataraman 	int current, parent;
59151b32bddSMadhavan Venkataraman 	callout_heap_t *heap, *hcurrent, *hparent;
59287a18d3fSMadhavan Venkataraman 
59387a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
59487a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num >= 1);
59587a18d3fSMadhavan Venkataraman 
59687a18d3fSMadhavan Venkataraman 	if (ct->ct_heap_num == 1) {
59787a18d3fSMadhavan Venkataraman 		return (1);
59887a18d3fSMadhavan Venkataraman 	}
59987a18d3fSMadhavan Venkataraman 
60087a18d3fSMadhavan Venkataraman 	heap = ct->ct_heap;
60187a18d3fSMadhavan Venkataraman 	current = ct->ct_heap_num - 1;
60287a18d3fSMadhavan Venkataraman 
60387a18d3fSMadhavan Venkataraman 	for (;;) {
60487a18d3fSMadhavan Venkataraman 		parent = CALLOUT_HEAP_PARENT(current);
60551b32bddSMadhavan Venkataraman 		hparent = &heap[parent];
60651b32bddSMadhavan Venkataraman 		hcurrent = &heap[current];
60787a18d3fSMadhavan Venkataraman 
60887a18d3fSMadhavan Venkataraman 		/*
60987a18d3fSMadhavan Venkataraman 		 * We have an expiration later than our parent; we're done.
61087a18d3fSMadhavan Venkataraman 		 */
61151b32bddSMadhavan Venkataraman 		if (hcurrent->ch_expiration >= hparent->ch_expiration) {
61287a18d3fSMadhavan Venkataraman 			return (0);
61387a18d3fSMadhavan Venkataraman 		}
61487a18d3fSMadhavan Venkataraman 
61587a18d3fSMadhavan Venkataraman 		/*
61687a18d3fSMadhavan Venkataraman 		 * We need to swap with our parent, and continue up the heap.
61787a18d3fSMadhavan Venkataraman 		 */
61851b32bddSMadhavan Venkataraman 		CALLOUT_SWAP(hparent, hcurrent);
61987a18d3fSMadhavan Venkataraman 
62087a18d3fSMadhavan Venkataraman 		/*
62187a18d3fSMadhavan Venkataraman 		 * If we just reached the root, we're done.
62287a18d3fSMadhavan Venkataraman 		 */
62387a18d3fSMadhavan Venkataraman 		if (parent == 0) {
62487a18d3fSMadhavan Venkataraman 			return (1);
62587a18d3fSMadhavan Venkataraman 		}
62687a18d3fSMadhavan Venkataraman 
62787a18d3fSMadhavan Venkataraman 		current = parent;
62887a18d3fSMadhavan Venkataraman 	}
62987a18d3fSMadhavan Venkataraman 	/*NOTREACHED*/
63087a18d3fSMadhavan Venkataraman }
63187a18d3fSMadhavan Venkataraman 
63287a18d3fSMadhavan Venkataraman /*
63351b32bddSMadhavan Venkataraman  * Insert a new heap item into a callout table's heap.
63487a18d3fSMadhavan Venkataraman  */
63587a18d3fSMadhavan Venkataraman static void
63651b32bddSMadhavan Venkataraman callout_heap_insert(callout_table_t *ct, callout_list_t *cl)
63787a18d3fSMadhavan Venkataraman {
63887a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
63987a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num < ct->ct_heap_max);
64087a18d3fSMadhavan Venkataraman 
641060cedfbSMadhavan Venkataraman 	cl->cl_flags |= CALLOUT_LIST_FLAG_HEAPED;
64287a18d3fSMadhavan Venkataraman 	/*
64351b32bddSMadhavan Venkataraman 	 * First, copy the expiration and callout list pointer to the bottom
64451b32bddSMadhavan Venkataraman 	 * of the heap.
64587a18d3fSMadhavan Venkataraman 	 */
64651b32bddSMadhavan Venkataraman 	ct->ct_heap[ct->ct_heap_num].ch_expiration = cl->cl_expiration;
64751b32bddSMadhavan Venkataraman 	ct->ct_heap[ct->ct_heap_num].ch_list = cl;
64887a18d3fSMadhavan Venkataraman 	ct->ct_heap_num++;
64987a18d3fSMadhavan Venkataraman 
65087a18d3fSMadhavan Venkataraman 	/*
65187a18d3fSMadhavan Venkataraman 	 * Now, perform an upheap operation. If we reached the root, then
65287a18d3fSMadhavan Venkataraman 	 * the cyclic needs to be reprogrammed as we have an earlier
65387a18d3fSMadhavan Venkataraman 	 * expiration.
65487a18d3fSMadhavan Venkataraman 	 *
65587a18d3fSMadhavan Venkataraman 	 * Also, during the CPR suspend phase, do not reprogram the cyclic.
65687a18d3fSMadhavan Venkataraman 	 * We don't want any callout activity. When the CPR resume phase is
65787a18d3fSMadhavan Venkataraman 	 * entered, the cyclic will be programmed for the earliest expiration
65887a18d3fSMadhavan Venkataraman 	 * in the heap.
65987a18d3fSMadhavan Venkataraman 	 */
660454ab202SMadhavan Venkataraman 	if (callout_upheap(ct) && (ct->ct_suspend == 0))
66151b32bddSMadhavan Venkataraman 		(void) cyclic_reprogram(ct->ct_cyclic, cl->cl_expiration);
66287a18d3fSMadhavan Venkataraman }
66387a18d3fSMadhavan Venkataraman 
66487a18d3fSMadhavan Venkataraman /*
66587a18d3fSMadhavan Venkataraman  * Move an expiration from the top of the heap to its correct place
66687a18d3fSMadhavan Venkataraman  * in the heap.
66787a18d3fSMadhavan Venkataraman  */
66887a18d3fSMadhavan Venkataraman static void
66987a18d3fSMadhavan Venkataraman callout_downheap(callout_table_t *ct)
67087a18d3fSMadhavan Venkataraman {
67151b32bddSMadhavan Venkataraman 	int current, left, right, nelems;
67251b32bddSMadhavan Venkataraman 	callout_heap_t *heap, *hleft, *hright, *hcurrent;
67387a18d3fSMadhavan Venkataraman 
67487a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
67587a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num >= 1);
67687a18d3fSMadhavan Venkataraman 
67787a18d3fSMadhavan Venkataraman 	heap = ct->ct_heap;
67887a18d3fSMadhavan Venkataraman 	current = 0;
67987a18d3fSMadhavan Venkataraman 	nelems = ct->ct_heap_num;
68087a18d3fSMadhavan Venkataraman 
68187a18d3fSMadhavan Venkataraman 	for (;;) {
68287a18d3fSMadhavan Venkataraman 		/*
68387a18d3fSMadhavan Venkataraman 		 * If we don't have a left child (i.e., we're a leaf), we're
68487a18d3fSMadhavan Venkataraman 		 * done.
68587a18d3fSMadhavan Venkataraman 		 */
68687a18d3fSMadhavan Venkataraman 		if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems)
68787a18d3fSMadhavan Venkataraman 			return;
68887a18d3fSMadhavan Venkataraman 
68951b32bddSMadhavan Venkataraman 		hleft = &heap[left];
69051b32bddSMadhavan Venkataraman 		hcurrent = &heap[current];
69187a18d3fSMadhavan Venkataraman 
69287a18d3fSMadhavan Venkataraman 		right = CALLOUT_HEAP_RIGHT(current);
69387a18d3fSMadhavan Venkataraman 
69487a18d3fSMadhavan Venkataraman 		/*
69587a18d3fSMadhavan Venkataraman 		 * Even if we don't have a right child, we still need to compare
69687a18d3fSMadhavan Venkataraman 		 * our expiration against that of our left child.
69787a18d3fSMadhavan Venkataraman 		 */
69887a18d3fSMadhavan Venkataraman 		if (right >= nelems)
69987a18d3fSMadhavan Venkataraman 			goto comp_left;
70087a18d3fSMadhavan Venkataraman 
70151b32bddSMadhavan Venkataraman 		hright = &heap[right];
70287a18d3fSMadhavan Venkataraman 
70387a18d3fSMadhavan Venkataraman 		/*
70487a18d3fSMadhavan Venkataraman 		 * We have both a left and a right child.  We need to compare
70587a18d3fSMadhavan Venkataraman 		 * the expiration of the children to determine which
70687a18d3fSMadhavan Venkataraman 		 * expires earlier.
70787a18d3fSMadhavan Venkataraman 		 */
70851b32bddSMadhavan Venkataraman 		if (hright->ch_expiration < hleft->ch_expiration) {
70987a18d3fSMadhavan Venkataraman 			/*
71087a18d3fSMadhavan Venkataraman 			 * Our right child is the earlier of our children.
71187a18d3fSMadhavan Venkataraman 			 * We'll now compare our expiration to its expiration.
71287a18d3fSMadhavan Venkataraman 			 * If ours is the earlier one, we're done.
71387a18d3fSMadhavan Venkataraman 			 */
71451b32bddSMadhavan Venkataraman 			if (hcurrent->ch_expiration <= hright->ch_expiration)
71587a18d3fSMadhavan Venkataraman 				return;
71687a18d3fSMadhavan Venkataraman 
71787a18d3fSMadhavan Venkataraman 			/*
71887a18d3fSMadhavan Venkataraman 			 * Our right child expires earlier than we do; swap
71987a18d3fSMadhavan Venkataraman 			 * with our right child, and descend right.
72087a18d3fSMadhavan Venkataraman 			 */
72151b32bddSMadhavan Venkataraman 			CALLOUT_SWAP(hright, hcurrent);
72287a18d3fSMadhavan Venkataraman 			current = right;
72387a18d3fSMadhavan Venkataraman 			continue;
72487a18d3fSMadhavan Venkataraman 		}
72587a18d3fSMadhavan Venkataraman 
72687a18d3fSMadhavan Venkataraman comp_left:
72787a18d3fSMadhavan Venkataraman 		/*
72887a18d3fSMadhavan Venkataraman 		 * Our left child is the earlier of our children (or we have
72987a18d3fSMadhavan Venkataraman 		 * no right child).  We'll now compare our expiration
73087a18d3fSMadhavan Venkataraman 		 * to its expiration. If ours is the earlier one, we're done.
73187a18d3fSMadhavan Venkataraman 		 */
73251b32bddSMadhavan Venkataraman 		if (hcurrent->ch_expiration <= hleft->ch_expiration)
73387a18d3fSMadhavan Venkataraman 			return;
73487a18d3fSMadhavan Venkataraman 
73587a18d3fSMadhavan Venkataraman 		/*
73687a18d3fSMadhavan Venkataraman 		 * Our left child expires earlier than we do; swap with our
73787a18d3fSMadhavan Venkataraman 		 * left child, and descend left.
73887a18d3fSMadhavan Venkataraman 		 */
73951b32bddSMadhavan Venkataraman 		CALLOUT_SWAP(hleft, hcurrent);
74087a18d3fSMadhavan Venkataraman 		current = left;
74187a18d3fSMadhavan Venkataraman 	}
74287a18d3fSMadhavan Venkataraman }
74387a18d3fSMadhavan Venkataraman 
74487a18d3fSMadhavan Venkataraman /*
74587a18d3fSMadhavan Venkataraman  * Delete and handle all past expirations in a callout table's heap.
74687a18d3fSMadhavan Venkataraman  */
747060cedfbSMadhavan Venkataraman static hrtime_t
74887a18d3fSMadhavan Venkataraman callout_heap_delete(callout_table_t *ct)
74987a18d3fSMadhavan Venkataraman {
75051b32bddSMadhavan Venkataraman 	hrtime_t now, expiration, next;
75187a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
75251b32bddSMadhavan Venkataraman 	callout_heap_t *heap;
75387a18d3fSMadhavan Venkataraman 	int hash;
75487a18d3fSMadhavan Venkataraman 
75587a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
75687a18d3fSMadhavan Venkataraman 
75751b32bddSMadhavan Venkataraman 	if (CALLOUT_CLEANUP(ct)) {
75851b32bddSMadhavan Venkataraman 		/*
75951b32bddSMadhavan Venkataraman 		 * There are too many heap elements pointing to empty callout
76051b32bddSMadhavan Venkataraman 		 * lists. Clean them out.
76151b32bddSMadhavan Venkataraman 		 */
76251b32bddSMadhavan Venkataraman 		(void) callout_heap_process(ct, 0, 0);
76351b32bddSMadhavan Venkataraman 	}
76451b32bddSMadhavan Venkataraman 
76587a18d3fSMadhavan Venkataraman 	now = gethrtime();
76651b32bddSMadhavan Venkataraman 	heap = ct->ct_heap;
76787a18d3fSMadhavan Venkataraman 
76887a18d3fSMadhavan Venkataraman 	while (ct->ct_heap_num > 0) {
76951b32bddSMadhavan Venkataraman 		expiration = heap->ch_expiration;
77087a18d3fSMadhavan Venkataraman 		hash = CALLOUT_CLHASH(expiration);
77151b32bddSMadhavan Venkataraman 		cl = heap->ch_list;
77251b32bddSMadhavan Venkataraman 		ASSERT(expiration == cl->cl_expiration);
77351b32bddSMadhavan Venkataraman 
77451b32bddSMadhavan Venkataraman 		if (cl->cl_callouts.ch_head == NULL) {
77587a18d3fSMadhavan Venkataraman 			/*
77651b32bddSMadhavan Venkataraman 			 * If the callout list is empty, reap it.
77751b32bddSMadhavan Venkataraman 			 * Decrement the reap count.
77851b32bddSMadhavan Venkataraman 			 */
77951b32bddSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
78051b32bddSMadhavan Venkataraman 			CALLOUT_LIST_FREE(ct, cl);
78151b32bddSMadhavan Venkataraman 			ct->ct_nreap--;
78251b32bddSMadhavan Venkataraman 		} else {
78351b32bddSMadhavan Venkataraman 			/*
78451b32bddSMadhavan Venkataraman 			 * If the root of the heap expires in the future,
78551b32bddSMadhavan Venkataraman 			 * bail out.
78687a18d3fSMadhavan Venkataraman 			 */
78787a18d3fSMadhavan Venkataraman 			if (expiration > now)
78887a18d3fSMadhavan Venkataraman 				break;
78987a18d3fSMadhavan Venkataraman 
79087a18d3fSMadhavan Venkataraman 			/*
79187a18d3fSMadhavan Venkataraman 			 * Move the callout list for this expiration to the
79287a18d3fSMadhavan Venkataraman 			 * list of expired callout lists. It will be processed
79387a18d3fSMadhavan Venkataraman 			 * by the callout executor.
79487a18d3fSMadhavan Venkataraman 			 */
795060cedfbSMadhavan Venkataraman 			cl->cl_flags &= ~CALLOUT_LIST_FLAG_HEAPED;
79687a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
79787a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_APPEND(ct->ct_expired, cl);
79887a18d3fSMadhavan Venkataraman 		}
79987a18d3fSMadhavan Venkataraman 
80087a18d3fSMadhavan Venkataraman 		/*
80187a18d3fSMadhavan Venkataraman 		 * Now delete the root. This is done by swapping the root with
80287a18d3fSMadhavan Venkataraman 		 * the last item in the heap and downheaping the item.
80387a18d3fSMadhavan Venkataraman 		 */
80487a18d3fSMadhavan Venkataraman 		ct->ct_heap_num--;
80587a18d3fSMadhavan Venkataraman 		if (ct->ct_heap_num > 0) {
80651b32bddSMadhavan Venkataraman 			heap[0] = heap[ct->ct_heap_num];
80787a18d3fSMadhavan Venkataraman 			callout_downheap(ct);
80887a18d3fSMadhavan Venkataraman 		}
80987a18d3fSMadhavan Venkataraman 	}
81087a18d3fSMadhavan Venkataraman 
81187a18d3fSMadhavan Venkataraman 	/*
81251b32bddSMadhavan Venkataraman 	 * If this callout table is empty or callouts have been suspended,
81351b32bddSMadhavan Venkataraman 	 * just return. The cyclic has already been programmed to
81487a18d3fSMadhavan Venkataraman 	 * infinity by the cyclic subsystem.
81587a18d3fSMadhavan Venkataraman 	 */
816454ab202SMadhavan Venkataraman 	if ((ct->ct_heap_num == 0) || (ct->ct_suspend > 0))
817060cedfbSMadhavan Venkataraman 		return (CY_INFINITY);
81887a18d3fSMadhavan Venkataraman 
81951b32bddSMadhavan Venkataraman 	/*
82051b32bddSMadhavan Venkataraman 	 * If the top expirations are within callout_tolerance of each other,
82151b32bddSMadhavan Venkataraman 	 * delay the cyclic expire so that they can be processed together.
82251b32bddSMadhavan Venkataraman 	 * This is to prevent high resolution timers from swamping the system
82351b32bddSMadhavan Venkataraman 	 * with cyclic activity.
82451b32bddSMadhavan Venkataraman 	 */
82551b32bddSMadhavan Venkataraman 	if (ct->ct_heap_num > 2) {
82651b32bddSMadhavan Venkataraman 		next = expiration + callout_tolerance;
82751b32bddSMadhavan Venkataraman 		if ((heap[1].ch_expiration < next) ||
82851b32bddSMadhavan Venkataraman 		    (heap[2].ch_expiration < next))
82951b32bddSMadhavan Venkataraman 			expiration = next;
83051b32bddSMadhavan Venkataraman 	}
83151b32bddSMadhavan Venkataraman 
83287a18d3fSMadhavan Venkataraman 	(void) cyclic_reprogram(ct->ct_cyclic, expiration);
833060cedfbSMadhavan Venkataraman 
834060cedfbSMadhavan Venkataraman 	return (expiration);
83587a18d3fSMadhavan Venkataraman }
83687a18d3fSMadhavan Venkataraman 
837454ab202SMadhavan Venkataraman /*
83851b32bddSMadhavan Venkataraman  * There are some situations when the entire heap is walked and processed.
83951b32bddSMadhavan Venkataraman  * This function is called to do the processing. These are the situations:
84051b32bddSMadhavan Venkataraman  *
84151b32bddSMadhavan Venkataraman  * 1. When the reap count reaches its threshold, the heap has to be cleared
84251b32bddSMadhavan Venkataraman  *    of all empty callout lists.
84351b32bddSMadhavan Venkataraman  *
84451b32bddSMadhavan Venkataraman  * 2. When the system enters and exits KMDB/OBP, all entries in the heap
84551b32bddSMadhavan Venkataraman  *    need to be adjusted by the interval spent in KMDB/OBP.
84651b32bddSMadhavan Venkataraman  *
84751b32bddSMadhavan Venkataraman  * 3. When system time is changed, the heap has to be scanned for
84851b32bddSMadhavan Venkataraman  *    absolute hrestime timers. These need to be removed from the heap
84951b32bddSMadhavan Venkataraman  *    and expired immediately.
85051b32bddSMadhavan Venkataraman  *
85151b32bddSMadhavan Venkataraman  * In cases 2 and 3, it is a good idea to do 1 as well since we are
85251b32bddSMadhavan Venkataraman  * scanning the heap anyway.
85351b32bddSMadhavan Venkataraman  *
85451b32bddSMadhavan Venkataraman  * If the root gets changed and/or callout lists are expired, return the
85551b32bddSMadhavan Venkataraman  * new expiration to the caller so he can reprogram the cyclic accordingly.
85651b32bddSMadhavan Venkataraman  */
85751b32bddSMadhavan Venkataraman static hrtime_t
85851b32bddSMadhavan Venkataraman callout_heap_process(callout_table_t *ct, hrtime_t delta, int timechange)
85951b32bddSMadhavan Venkataraman {
86051b32bddSMadhavan Venkataraman 	callout_heap_t *heap;
861060cedfbSMadhavan Venkataraman 	callout_list_t *cl;
86251b32bddSMadhavan Venkataraman 	hrtime_t expiration, now;
863060cedfbSMadhavan Venkataraman 	int i, hash, clflags;
86451b32bddSMadhavan Venkataraman 	ulong_t num;
86551b32bddSMadhavan Venkataraman 
86651b32bddSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
86751b32bddSMadhavan Venkataraman 
86851b32bddSMadhavan Venkataraman 	if (ct->ct_heap_num == 0)
869060cedfbSMadhavan Venkataraman 		return (CY_INFINITY);
87051b32bddSMadhavan Venkataraman 
87151b32bddSMadhavan Venkataraman 	if (ct->ct_nreap > 0)
87251b32bddSMadhavan Venkataraman 		ct->ct_cleanups++;
87351b32bddSMadhavan Venkataraman 
87451b32bddSMadhavan Venkataraman 	heap = ct->ct_heap;
87551b32bddSMadhavan Venkataraman 
87651b32bddSMadhavan Venkataraman 	/*
87751b32bddSMadhavan Venkataraman 	 * We walk the heap from the top to the bottom. If we encounter
87851b32bddSMadhavan Venkataraman 	 * a heap item that points to an empty callout list, we clean
87951b32bddSMadhavan Venkataraman 	 * it out. If we encounter a hrestime entry that must be removed,
88051b32bddSMadhavan Venkataraman 	 * again we clean it out. Otherwise, we apply any adjustments needed
88151b32bddSMadhavan Venkataraman 	 * to an element.
88251b32bddSMadhavan Venkataraman 	 *
88351b32bddSMadhavan Venkataraman 	 * During the walk, we also compact the heap from the bottom and
88451b32bddSMadhavan Venkataraman 	 * reconstruct the heap using upheap operations. This is very
88551b32bddSMadhavan Venkataraman 	 * efficient if the number of elements to be cleaned is greater than
88651b32bddSMadhavan Venkataraman 	 * or equal to half the heap. This is the common case.
88751b32bddSMadhavan Venkataraman 	 *
88851b32bddSMadhavan Venkataraman 	 * Even in the non-common case, the upheap operations should be short
88951b32bddSMadhavan Venkataraman 	 * as the entries below generally tend to be bigger than the entries
89051b32bddSMadhavan Venkataraman 	 * above.
89151b32bddSMadhavan Venkataraman 	 */
89251b32bddSMadhavan Venkataraman 	num = ct->ct_heap_num;
89351b32bddSMadhavan Venkataraman 	ct->ct_heap_num = 0;
89451b32bddSMadhavan Venkataraman 	clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE);
89551b32bddSMadhavan Venkataraman 	now = gethrtime();
89651b32bddSMadhavan Venkataraman 	for (i = 0; i < num; i++) {
89751b32bddSMadhavan Venkataraman 		cl = heap[i].ch_list;
89851b32bddSMadhavan Venkataraman 		/*
89951b32bddSMadhavan Venkataraman 		 * If the callout list is empty, delete the heap element and
90051b32bddSMadhavan Venkataraman 		 * free the callout list.
90151b32bddSMadhavan Venkataraman 		 */
90251b32bddSMadhavan Venkataraman 		if (cl->cl_callouts.ch_head == NULL) {
90351b32bddSMadhavan Venkataraman 			hash = CALLOUT_CLHASH(cl->cl_expiration);
90451b32bddSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
90551b32bddSMadhavan Venkataraman 			CALLOUT_LIST_FREE(ct, cl);
90651b32bddSMadhavan Venkataraman 			continue;
90751b32bddSMadhavan Venkataraman 		}
90851b32bddSMadhavan Venkataraman 
90951b32bddSMadhavan Venkataraman 		/*
91051b32bddSMadhavan Venkataraman 		 * Delete the heap element and expire the callout list, if
91151b32bddSMadhavan Venkataraman 		 * one of the following is true:
91251b32bddSMadhavan Venkataraman 		 *	- the callout list has expired
91351b32bddSMadhavan Venkataraman 		 *	- the callout list is an absolute hrestime one and
91451b32bddSMadhavan Venkataraman 		 *	  there has been a system time change
91551b32bddSMadhavan Venkataraman 		 */
91651b32bddSMadhavan Venkataraman 		if ((cl->cl_expiration <= now) ||
91751b32bddSMadhavan Venkataraman 		    (timechange && ((cl->cl_flags & clflags) == clflags))) {
91851b32bddSMadhavan Venkataraman 			hash = CALLOUT_CLHASH(cl->cl_expiration);
919060cedfbSMadhavan Venkataraman 			cl->cl_flags &= ~CALLOUT_LIST_FLAG_HEAPED;
92051b32bddSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
92151b32bddSMadhavan Venkataraman 			CALLOUT_LIST_APPEND(ct->ct_expired, cl);
92251b32bddSMadhavan Venkataraman 			continue;
92351b32bddSMadhavan Venkataraman 		}
92451b32bddSMadhavan Venkataraman 
92551b32bddSMadhavan Venkataraman 		/*
92651b32bddSMadhavan Venkataraman 		 * Apply adjustments, if any. Adjustments are applied after
92751b32bddSMadhavan Venkataraman 		 * the system returns from KMDB or OBP. They are only applied
92851b32bddSMadhavan Venkataraman 		 * to relative callout lists.
92951b32bddSMadhavan Venkataraman 		 */
93051b32bddSMadhavan Venkataraman 		if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) {
93151b32bddSMadhavan Venkataraman 			hash = CALLOUT_CLHASH(cl->cl_expiration);
93251b32bddSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
93351b32bddSMadhavan Venkataraman 			expiration = cl->cl_expiration + delta;
93451b32bddSMadhavan Venkataraman 			if (expiration <= 0)
93551b32bddSMadhavan Venkataraman 				expiration = CY_INFINITY;
93651b32bddSMadhavan Venkataraman 			heap[i].ch_expiration = expiration;
93751b32bddSMadhavan Venkataraman 			cl->cl_expiration = expiration;
93851b32bddSMadhavan Venkataraman 			hash = CALLOUT_CLHASH(cl->cl_expiration);
93951b32bddSMadhavan Venkataraman 			if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) {
94051b32bddSMadhavan Venkataraman 				CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl);
94151b32bddSMadhavan Venkataraman 			} else {
94251b32bddSMadhavan Venkataraman 				CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl);
94351b32bddSMadhavan Venkataraman 			}
94451b32bddSMadhavan Venkataraman 		}
94551b32bddSMadhavan Venkataraman 
94651b32bddSMadhavan Venkataraman 		heap[ct->ct_heap_num] = heap[i];
94751b32bddSMadhavan Venkataraman 		ct->ct_heap_num++;
94851b32bddSMadhavan Venkataraman 		(void) callout_upheap(ct);
94951b32bddSMadhavan Venkataraman 	}
95051b32bddSMadhavan Venkataraman 
95151b32bddSMadhavan Venkataraman 	ct->ct_nreap = 0;
95251b32bddSMadhavan Venkataraman 
953060cedfbSMadhavan Venkataraman 	/*
954060cedfbSMadhavan Venkataraman 	 * We need to return the expiration to help program the cyclic.
955060cedfbSMadhavan Venkataraman 	 * If there are expired callouts, the cyclic needs to go off
956060cedfbSMadhavan Venkataraman 	 * immediately. If the heap has become empty, then we return infinity.
957060cedfbSMadhavan Venkataraman 	 * Else, return the expiration of the earliest callout in the heap.
958060cedfbSMadhavan Venkataraman 	 */
959060cedfbSMadhavan Venkataraman 	if (ct->ct_expired.ch_head != NULL)
960060cedfbSMadhavan Venkataraman 		return (gethrtime());
96151b32bddSMadhavan Venkataraman 
962060cedfbSMadhavan Venkataraman 	if (ct->ct_heap_num == 0)
963060cedfbSMadhavan Venkataraman 		return (CY_INFINITY);
964060cedfbSMadhavan Venkataraman 
965060cedfbSMadhavan Venkataraman 	return (heap->ch_expiration);
96651b32bddSMadhavan Venkataraman }
96751b32bddSMadhavan Venkataraman 
96851b32bddSMadhavan Venkataraman /*
969454ab202SMadhavan Venkataraman  * Common function used to create normal and realtime callouts.
970454ab202SMadhavan Venkataraman  *
971454ab202SMadhavan Venkataraman  * Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So,
972454ab202SMadhavan Venkataraman  * there is one restriction on a realtime callout handler - it should not
973454ab202SMadhavan Venkataraman  * directly or indirectly acquire cpu_lock. CPU offline waits for pending
974454ab202SMadhavan Venkataraman  * cyclic handlers to complete while holding cpu_lock. So, if a realtime
975454ab202SMadhavan Venkataraman  * callout handler were to try to get cpu_lock, there would be a deadlock
976454ab202SMadhavan Venkataraman  * during CPU offline.
977454ab202SMadhavan Venkataraman  */
97887a18d3fSMadhavan Venkataraman callout_id_t
97987a18d3fSMadhavan Venkataraman timeout_generic(int type, void (*func)(void *), void *arg,
98087a18d3fSMadhavan Venkataraman 	hrtime_t expiration, hrtime_t resolution, int flags)
98187a18d3fSMadhavan Venkataraman {
98287a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
9837c478bd9Sstevel@tonic-gate 	callout_t *cp;
9847c478bd9Sstevel@tonic-gate 	callout_id_t id;
98587a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
986060cedfbSMadhavan Venkataraman 	hrtime_t now, interval;
98751b32bddSMadhavan Venkataraman 	int hash, clflags;
988f635d46aSqiao 
98987a18d3fSMadhavan Venkataraman 	ASSERT(resolution > 0);
99087a18d3fSMadhavan Venkataraman 	ASSERT(func != NULL);
9917c478bd9Sstevel@tonic-gate 
99287a18d3fSMadhavan Venkataraman 	/*
99351b32bddSMadhavan Venkataraman 	 * We get the current hrtime right upfront so that latencies in
99451b32bddSMadhavan Venkataraman 	 * this function do not affect the accuracy of the callout.
99587a18d3fSMadhavan Venkataraman 	 */
99651b32bddSMadhavan Venkataraman 	now = gethrtime();
9977c478bd9Sstevel@tonic-gate 
99887a18d3fSMadhavan Venkataraman 	/*
99987a18d3fSMadhavan Venkataraman 	 * We disable kernel preemption so that we remain on the same CPU
100087a18d3fSMadhavan Venkataraman 	 * throughout. If we needed to reprogram the callout table's cyclic,
100187a18d3fSMadhavan Venkataraman 	 * we can avoid X-calls if we are on the same CPU.
100287a18d3fSMadhavan Venkataraman 	 *
100387a18d3fSMadhavan Venkataraman 	 * Note that callout_alloc() releases and reacquires the callout
100487a18d3fSMadhavan Venkataraman 	 * table mutex. While reacquiring the mutex, it is possible for us
100587a18d3fSMadhavan Venkataraman 	 * to go to sleep and later migrate to another CPU. This should be
100687a18d3fSMadhavan Venkataraman 	 * pretty rare, though.
100787a18d3fSMadhavan Venkataraman 	 */
100887a18d3fSMadhavan Venkataraman 	kpreempt_disable();
100987a18d3fSMadhavan Venkataraman 
101087a18d3fSMadhavan Venkataraman 	ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)];
101187a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
101287a18d3fSMadhavan Venkataraman 
101387a18d3fSMadhavan Venkataraman 	if (ct->ct_cyclic == CYCLIC_NONE) {
101487a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
101587a18d3fSMadhavan Venkataraman 		/*
101687a18d3fSMadhavan Venkataraman 		 * The callout table has not yet been initialized fully.
101787a18d3fSMadhavan Venkataraman 		 * So, put this one on the boot callout table which is
101887a18d3fSMadhavan Venkataraman 		 * always initialized.
101987a18d3fSMadhavan Venkataraman 		 */
102087a18d3fSMadhavan Venkataraman 		ct = &callout_boot_ct[type];
102187a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
102287a18d3fSMadhavan Venkataraman 	}
102387a18d3fSMadhavan Venkataraman 
102451b32bddSMadhavan Venkataraman 	if (CALLOUT_CLEANUP(ct)) {
102551b32bddSMadhavan Venkataraman 		/*
102651b32bddSMadhavan Venkataraman 		 * There are too many heap elements pointing to empty callout
1027060cedfbSMadhavan Venkataraman 		 * lists. Clean them out. Since cleanup is only done once
1028060cedfbSMadhavan Venkataraman 		 * in a while, no need to reprogram the cyclic if the root
1029060cedfbSMadhavan Venkataraman 		 * of the heap gets cleaned out.
103051b32bddSMadhavan Venkataraman 		 */
1031060cedfbSMadhavan Venkataraman 		(void) callout_heap_process(ct, 0, 0);
103251b32bddSMadhavan Venkataraman 	}
103351b32bddSMadhavan Venkataraman 
103487a18d3fSMadhavan Venkataraman 	if ((cp = ct->ct_free) == NULL)
10357c478bd9Sstevel@tonic-gate 		cp = callout_alloc(ct);
10367c478bd9Sstevel@tonic-gate 	else
103787a18d3fSMadhavan Venkataraman 		ct->ct_free = cp->c_idnext;
10387c478bd9Sstevel@tonic-gate 
10397c478bd9Sstevel@tonic-gate 	cp->c_func = func;
10407c478bd9Sstevel@tonic-gate 	cp->c_arg = arg;
10417c478bd9Sstevel@tonic-gate 
10427c478bd9Sstevel@tonic-gate 	/*
104387a18d3fSMadhavan Venkataraman 	 * Compute the expiration hrtime.
104487a18d3fSMadhavan Venkataraman 	 */
104587a18d3fSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_ABSOLUTE) {
104687a18d3fSMadhavan Venkataraman 		interval = expiration - now;
104787a18d3fSMadhavan Venkataraman 	} else {
104887a18d3fSMadhavan Venkataraman 		interval = expiration;
104987a18d3fSMadhavan Venkataraman 		expiration += now;
105087a18d3fSMadhavan Venkataraman 	}
105151b32bddSMadhavan Venkataraman 
105251b32bddSMadhavan Venkataraman 	if (resolution > 1) {
105351b32bddSMadhavan Venkataraman 		/*
105451b32bddSMadhavan Venkataraman 		 * Align expiration to the specified resolution.
105551b32bddSMadhavan Venkataraman 		 */
105687a18d3fSMadhavan Venkataraman 		if (flags & CALLOUT_FLAG_ROUNDUP)
105787a18d3fSMadhavan Venkataraman 			expiration += resolution - 1;
105887a18d3fSMadhavan Venkataraman 		expiration = (expiration / resolution) * resolution;
105951b32bddSMadhavan Venkataraman 	}
106051b32bddSMadhavan Venkataraman 
1061454ab202SMadhavan Venkataraman 	if (expiration <= 0) {
1062454ab202SMadhavan Venkataraman 		/*
1063454ab202SMadhavan Venkataraman 		 * expiration hrtime overflow has occurred. Just set the
1064454ab202SMadhavan Venkataraman 		 * expiration to infinity.
1065454ab202SMadhavan Venkataraman 		 */
1066454ab202SMadhavan Venkataraman 		expiration = CY_INFINITY;
1067454ab202SMadhavan Venkataraman 	}
106887a18d3fSMadhavan Venkataraman 
106987a18d3fSMadhavan Venkataraman 	/*
107087a18d3fSMadhavan Venkataraman 	 * Assign an ID to this callout
107187a18d3fSMadhavan Venkataraman 	 */
107287a18d3fSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_32BIT) {
107387a18d3fSMadhavan Venkataraman 		if (interval > callout_longterm) {
107487a18d3fSMadhavan Venkataraman 			id = (ct->ct_long_id - callout_counter_low);
107587a18d3fSMadhavan Venkataraman 			id |= CALLOUT_COUNTER_HIGH;
107687a18d3fSMadhavan Venkataraman 			ct->ct_long_id = id;
107787a18d3fSMadhavan Venkataraman 		} else {
107887a18d3fSMadhavan Venkataraman 			id = (ct->ct_short_id - callout_counter_low);
107987a18d3fSMadhavan Venkataraman 			id |= CALLOUT_COUNTER_HIGH;
108087a18d3fSMadhavan Venkataraman 			ct->ct_short_id = id;
108187a18d3fSMadhavan Venkataraman 		}
108287a18d3fSMadhavan Venkataraman 	} else {
108387a18d3fSMadhavan Venkataraman 		id = (ct->ct_gen_id - callout_counter_low);
108487a18d3fSMadhavan Venkataraman 		if ((id & CALLOUT_COUNTER_HIGH) == 0) {
108587a18d3fSMadhavan Venkataraman 			id |= CALLOUT_COUNTER_HIGH;
108687a18d3fSMadhavan Venkataraman 			id += CALLOUT_GENERATION_LOW;
108787a18d3fSMadhavan Venkataraman 		}
108887a18d3fSMadhavan Venkataraman 		ct->ct_gen_id = id;
108987a18d3fSMadhavan Venkataraman 	}
109087a18d3fSMadhavan Venkataraman 
109187a18d3fSMadhavan Venkataraman 	cp->c_xid = id;
109287a18d3fSMadhavan Venkataraman 
109351b32bddSMadhavan Venkataraman 	clflags = 0;
109451b32bddSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_ABSOLUTE)
109551b32bddSMadhavan Venkataraman 		clflags |= CALLOUT_LIST_FLAG_ABSOLUTE;
109651b32bddSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_HRESTIME)
109751b32bddSMadhavan Venkataraman 		clflags |= CALLOUT_LIST_FLAG_HRESTIME;
109851b32bddSMadhavan Venkataraman 	if (resolution == 1)
109951b32bddSMadhavan Venkataraman 		clflags |= CALLOUT_LIST_FLAG_NANO;
110087a18d3fSMadhavan Venkataraman 	hash = CALLOUT_CLHASH(expiration);
110187a18d3fSMadhavan Venkataraman 
110287a18d3fSMadhavan Venkataraman again:
110387a18d3fSMadhavan Venkataraman 	/*
110487a18d3fSMadhavan Venkataraman 	 * Try to see if a callout list already exists for this expiration.
110587a18d3fSMadhavan Venkataraman 	 */
110651b32bddSMadhavan Venkataraman 	cl = callout_list_get(ct, expiration, clflags, hash);
110787a18d3fSMadhavan Venkataraman 	if (cl == NULL) {
110887a18d3fSMadhavan Venkataraman 		/*
110987a18d3fSMadhavan Venkataraman 		 * Check the free list. If we don't find one, we have to
111087a18d3fSMadhavan Venkataraman 		 * take the slow path and allocate from kmem.
111187a18d3fSMadhavan Venkataraman 		 */
111287a18d3fSMadhavan Venkataraman 		if ((cl = ct->ct_lfree) == NULL) {
111387a18d3fSMadhavan Venkataraman 			callout_list_alloc(ct);
111487a18d3fSMadhavan Venkataraman 			/*
111587a18d3fSMadhavan Venkataraman 			 * In the above call, we drop the lock, allocate and
111687a18d3fSMadhavan Venkataraman 			 * reacquire the lock. So, we could have been away
111787a18d3fSMadhavan Venkataraman 			 * for a while. In the meantime, someone could have
111887a18d3fSMadhavan Venkataraman 			 * inserted a callout list with the same expiration.
111987a18d3fSMadhavan Venkataraman 			 * Plus, the heap could have become full. So, the best
112087a18d3fSMadhavan Venkataraman 			 * course is to repeat the steps. This should be an
112187a18d3fSMadhavan Venkataraman 			 * infrequent event.
112287a18d3fSMadhavan Venkataraman 			 */
112387a18d3fSMadhavan Venkataraman 			goto again;
112487a18d3fSMadhavan Venkataraman 		}
112587a18d3fSMadhavan Venkataraman 		ct->ct_lfree = cl->cl_next;
112687a18d3fSMadhavan Venkataraman 		cl->cl_expiration = expiration;
112751b32bddSMadhavan Venkataraman 		cl->cl_flags = clflags;
112887a18d3fSMadhavan Venkataraman 
1129060cedfbSMadhavan Venkataraman 		/*
1130060cedfbSMadhavan Venkataraman 		 * Check if we have enough space in the heap to insert one
1131060cedfbSMadhavan Venkataraman 		 * expiration. If not, expand the heap.
1132060cedfbSMadhavan Venkataraman 		 */
1133060cedfbSMadhavan Venkataraman 		if (ct->ct_heap_num == ct->ct_heap_max) {
1134060cedfbSMadhavan Venkataraman 			if (callout_heap_expand(ct) == 0) {
1135060cedfbSMadhavan Venkataraman 				/*
1136060cedfbSMadhavan Venkataraman 				 * Could not expand the heap. Just queue it.
1137060cedfbSMadhavan Venkataraman 				 */
1138060cedfbSMadhavan Venkataraman 				callout_queue_insert(ct, cl);
1139060cedfbSMadhavan Venkataraman 				goto out;
1140060cedfbSMadhavan Venkataraman 			}
1141060cedfbSMadhavan Venkataraman 
1142060cedfbSMadhavan Venkataraman 			/*
1143060cedfbSMadhavan Venkataraman 			 * In the above call, we drop the lock, allocate and
1144060cedfbSMadhavan Venkataraman 			 * reacquire the lock. So, we could have been away
1145060cedfbSMadhavan Venkataraman 			 * for a while. In the meantime, someone could have
1146060cedfbSMadhavan Venkataraman 			 * inserted a callout list with the same expiration.
1147060cedfbSMadhavan Venkataraman 			 * But we will not go back and check for it as this
1148060cedfbSMadhavan Venkataraman 			 * should be a really infrequent event. There is no
1149060cedfbSMadhavan Venkataraman 			 * point.
1150060cedfbSMadhavan Venkataraman 			 */
1151060cedfbSMadhavan Venkataraman 		}
1152060cedfbSMadhavan Venkataraman 
115351b32bddSMadhavan Venkataraman 		if (clflags & CALLOUT_LIST_FLAG_NANO) {
115451b32bddSMadhavan Venkataraman 			CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl);
115551b32bddSMadhavan Venkataraman 		} else {
115687a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl);
115751b32bddSMadhavan Venkataraman 		}
115887a18d3fSMadhavan Venkataraman 
115987a18d3fSMadhavan Venkataraman 		/*
116087a18d3fSMadhavan Venkataraman 		 * This is a new expiration. So, insert it into the heap.
116187a18d3fSMadhavan Venkataraman 		 * This will also reprogram the cyclic, if the expiration
116287a18d3fSMadhavan Venkataraman 		 * propagated to the root of the heap.
116387a18d3fSMadhavan Venkataraman 		 */
116451b32bddSMadhavan Venkataraman 		callout_heap_insert(ct, cl);
116551b32bddSMadhavan Venkataraman 	} else {
116651b32bddSMadhavan Venkataraman 		/*
116751b32bddSMadhavan Venkataraman 		 * If the callout list was empty, untimeout_generic() would
116851b32bddSMadhavan Venkataraman 		 * have incremented a reap count. Decrement the reap count
116951b32bddSMadhavan Venkataraman 		 * as we are going to insert a callout into this list.
117051b32bddSMadhavan Venkataraman 		 */
117151b32bddSMadhavan Venkataraman 		if (cl->cl_callouts.ch_head == NULL)
117251b32bddSMadhavan Venkataraman 			ct->ct_nreap--;
117387a18d3fSMadhavan Venkataraman 	}
1174060cedfbSMadhavan Venkataraman out:
117587a18d3fSMadhavan Venkataraman 	cp->c_list = cl;
117687a18d3fSMadhavan Venkataraman 	CALLOUT_APPEND(ct, cp);
117787a18d3fSMadhavan Venkataraman 
117887a18d3fSMadhavan Venkataraman 	ct->ct_timeouts++;
117987a18d3fSMadhavan Venkataraman 	ct->ct_timeouts_pending++;
118087a18d3fSMadhavan Venkataraman 
118187a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
118287a18d3fSMadhavan Venkataraman 
118387a18d3fSMadhavan Venkataraman 	kpreempt_enable();
118487a18d3fSMadhavan Venkataraman 
118587a18d3fSMadhavan Venkataraman 	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
118687a18d3fSMadhavan Venkataraman 	    "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration,
118787a18d3fSMadhavan Venkataraman 	    cp);
118887a18d3fSMadhavan Venkataraman 
118987a18d3fSMadhavan Venkataraman 	return (id);
119087a18d3fSMadhavan Venkataraman }
119187a18d3fSMadhavan Venkataraman 
119287a18d3fSMadhavan Venkataraman timeout_id_t
119387a18d3fSMadhavan Venkataraman timeout(void (*func)(void *), void *arg, clock_t delta)
119487a18d3fSMadhavan Venkataraman {
119587a18d3fSMadhavan Venkataraman 	ulong_t id;
119687a18d3fSMadhavan Venkataraman 
119787a18d3fSMadhavan Venkataraman 	/*
11987c478bd9Sstevel@tonic-gate 	 * Make sure the callout runs at least 1 tick in the future.
11997c478bd9Sstevel@tonic-gate 	 */
12007c478bd9Sstevel@tonic-gate 	if (delta <= 0)
12017c478bd9Sstevel@tonic-gate 		delta = 1;
1202454ab202SMadhavan Venkataraman 	else if (delta > callout_max_ticks)
1203454ab202SMadhavan Venkataraman 		delta = callout_max_ticks;
12047c478bd9Sstevel@tonic-gate 
120587a18d3fSMadhavan Venkataraman 	id =  (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg,
120687a18d3fSMadhavan Venkataraman 	    TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY);
12077c478bd9Sstevel@tonic-gate 
12087c478bd9Sstevel@tonic-gate 	return ((timeout_id_t)id);
12097c478bd9Sstevel@tonic-gate }
12107c478bd9Sstevel@tonic-gate 
121187a18d3fSMadhavan Venkataraman /*
121287a18d3fSMadhavan Venkataraman  * Convenience function that creates a normal callout with default parameters
121387a18d3fSMadhavan Venkataraman  * and returns a full ID.
121487a18d3fSMadhavan Venkataraman  */
121587a18d3fSMadhavan Venkataraman callout_id_t
121687a18d3fSMadhavan Venkataraman timeout_default(void (*func)(void *), void *arg, clock_t delta)
12177c478bd9Sstevel@tonic-gate {
121887a18d3fSMadhavan Venkataraman 	callout_id_t id;
12197c478bd9Sstevel@tonic-gate 
122087a18d3fSMadhavan Venkataraman 	/*
122187a18d3fSMadhavan Venkataraman 	 * Make sure the callout runs at least 1 tick in the future.
122287a18d3fSMadhavan Venkataraman 	 */
122387a18d3fSMadhavan Venkataraman 	if (delta <= 0)
122487a18d3fSMadhavan Venkataraman 		delta = 1;
1225454ab202SMadhavan Venkataraman 	else if (delta > callout_max_ticks)
1226454ab202SMadhavan Venkataraman 		delta = callout_max_ticks;
122787a18d3fSMadhavan Venkataraman 
122887a18d3fSMadhavan Venkataraman 	id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta),
122987a18d3fSMadhavan Venkataraman 	    nsec_per_tick, 0);
123087a18d3fSMadhavan Venkataraman 
123187a18d3fSMadhavan Venkataraman 	return (id);
12327c478bd9Sstevel@tonic-gate }
12337c478bd9Sstevel@tonic-gate 
12347c478bd9Sstevel@tonic-gate timeout_id_t
12357c478bd9Sstevel@tonic-gate realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
12367c478bd9Sstevel@tonic-gate {
123787a18d3fSMadhavan Venkataraman 	ulong_t id;
123887a18d3fSMadhavan Venkataraman 
123987a18d3fSMadhavan Venkataraman 	/*
124087a18d3fSMadhavan Venkataraman 	 * Make sure the callout runs at least 1 tick in the future.
124187a18d3fSMadhavan Venkataraman 	 */
124287a18d3fSMadhavan Venkataraman 	if (delta <= 0)
124387a18d3fSMadhavan Venkataraman 		delta = 1;
1244454ab202SMadhavan Venkataraman 	else if (delta > callout_max_ticks)
1245454ab202SMadhavan Venkataraman 		delta = callout_max_ticks;
124687a18d3fSMadhavan Venkataraman 
124787a18d3fSMadhavan Venkataraman 	id =  (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg,
124887a18d3fSMadhavan Venkataraman 	    TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY);
124987a18d3fSMadhavan Venkataraman 
125087a18d3fSMadhavan Venkataraman 	return ((timeout_id_t)id);
12517c478bd9Sstevel@tonic-gate }
12527c478bd9Sstevel@tonic-gate 
125387a18d3fSMadhavan Venkataraman /*
125487a18d3fSMadhavan Venkataraman  * Convenience function that creates a realtime callout with default parameters
125587a18d3fSMadhavan Venkataraman  * and returns a full ID.
125687a18d3fSMadhavan Venkataraman  */
125787a18d3fSMadhavan Venkataraman callout_id_t
125887a18d3fSMadhavan Venkataraman realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta)
12597c478bd9Sstevel@tonic-gate {
126087a18d3fSMadhavan Venkataraman 	callout_id_t id;
126187a18d3fSMadhavan Venkataraman 
126287a18d3fSMadhavan Venkataraman 	/*
126387a18d3fSMadhavan Venkataraman 	 * Make sure the callout runs at least 1 tick in the future.
126487a18d3fSMadhavan Venkataraman 	 */
126587a18d3fSMadhavan Venkataraman 	if (delta <= 0)
126687a18d3fSMadhavan Venkataraman 		delta = 1;
1267454ab202SMadhavan Venkataraman 	else if (delta > callout_max_ticks)
1268454ab202SMadhavan Venkataraman 		delta = callout_max_ticks;
126987a18d3fSMadhavan Venkataraman 
127087a18d3fSMadhavan Venkataraman 	id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta),
127187a18d3fSMadhavan Venkataraman 	    nsec_per_tick, 0);
127287a18d3fSMadhavan Venkataraman 
127387a18d3fSMadhavan Venkataraman 	return (id);
127487a18d3fSMadhavan Venkataraman }
127587a18d3fSMadhavan Venkataraman 
127687a18d3fSMadhavan Venkataraman hrtime_t
127787a18d3fSMadhavan Venkataraman untimeout_generic(callout_id_t id, int nowait)
127887a18d3fSMadhavan Venkataraman {
12797c478bd9Sstevel@tonic-gate 	callout_table_t *ct;
12807c478bd9Sstevel@tonic-gate 	callout_t *cp;
12817c478bd9Sstevel@tonic-gate 	callout_id_t xid;
128251b32bddSMadhavan Venkataraman 	callout_list_t *cl;
1283060cedfbSMadhavan Venkataraman 	int hash, flags;
128487a18d3fSMadhavan Venkataraman 	callout_id_t bogus;
12857c478bd9Sstevel@tonic-gate 
128687a18d3fSMadhavan Venkataraman 	ct = &callout_table[CALLOUT_ID_TO_TABLE(id)];
128787a18d3fSMadhavan Venkataraman 	hash = CALLOUT_IDHASH(id);
12887c478bd9Sstevel@tonic-gate 
128987a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
12907c478bd9Sstevel@tonic-gate 
129187a18d3fSMadhavan Venkataraman 	/*
129287a18d3fSMadhavan Venkataraman 	 * Search the ID hash table for the callout.
129387a18d3fSMadhavan Venkataraman 	 */
129487a18d3fSMadhavan Venkataraman 	for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) {
12957c478bd9Sstevel@tonic-gate 
129687a18d3fSMadhavan Venkataraman 		xid = cp->c_xid;
12977c478bd9Sstevel@tonic-gate 
129887a18d3fSMadhavan Venkataraman 		/*
129987a18d3fSMadhavan Venkataraman 		 * Match the ID and generation number.
130087a18d3fSMadhavan Venkataraman 		 */
130187a18d3fSMadhavan Venkataraman 		if ((xid & CALLOUT_ID_MASK) != id)
13027c478bd9Sstevel@tonic-gate 			continue;
13037c478bd9Sstevel@tonic-gate 
130487a18d3fSMadhavan Venkataraman 		if ((xid & CALLOUT_EXECUTING) == 0) {
130587a18d3fSMadhavan Venkataraman 			hrtime_t expiration;
130687a18d3fSMadhavan Venkataraman 
130787a18d3fSMadhavan Venkataraman 			/*
130887a18d3fSMadhavan Venkataraman 			 * Delete the callout. If the callout list becomes
130987a18d3fSMadhavan Venkataraman 			 * NULL, we don't remove it from the table. This is
131087a18d3fSMadhavan Venkataraman 			 * so it can be reused. If the empty callout list
131187a18d3fSMadhavan Venkataraman 			 * corresponds to the top of the the callout heap, we
131287a18d3fSMadhavan Venkataraman 			 * don't reprogram the table cyclic here. This is in
131387a18d3fSMadhavan Venkataraman 			 * order to avoid lots of X-calls to the CPU associated
131487a18d3fSMadhavan Venkataraman 			 * with the callout table.
131587a18d3fSMadhavan Venkataraman 			 */
131651b32bddSMadhavan Venkataraman 			cl = cp->c_list;
131751b32bddSMadhavan Venkataraman 			expiration = cl->cl_expiration;
131887a18d3fSMadhavan Venkataraman 			CALLOUT_DELETE(ct, cp);
1319060cedfbSMadhavan Venkataraman 			CALLOUT_FREE(ct, cp);
132087a18d3fSMadhavan Venkataraman 			ct->ct_untimeouts_unexpired++;
132187a18d3fSMadhavan Venkataraman 			ct->ct_timeouts_pending--;
132251b32bddSMadhavan Venkataraman 
132351b32bddSMadhavan Venkataraman 			/*
1324060cedfbSMadhavan Venkataraman 			 * If the callout list has become empty, there are 3
1325060cedfbSMadhavan Venkataraman 			 * possibilities. If it is present:
1326060cedfbSMadhavan Venkataraman 			 *	- in the heap, it needs to be cleaned along
1327060cedfbSMadhavan Venkataraman 			 *	  with its heap entry. Increment a reap count.
1328060cedfbSMadhavan Venkataraman 			 *	- in the callout queue, free it.
1329060cedfbSMadhavan Venkataraman 			 *	- in the expired list, free it.
133051b32bddSMadhavan Venkataraman 			 */
1331060cedfbSMadhavan Venkataraman 			if (cl->cl_callouts.ch_head == NULL) {
1332060cedfbSMadhavan Venkataraman 				flags = cl->cl_flags;
1333060cedfbSMadhavan Venkataraman 				if (flags & CALLOUT_LIST_FLAG_HEAPED) {
133451b32bddSMadhavan Venkataraman 					ct->ct_nreap++;
1335060cedfbSMadhavan Venkataraman 				} else if (flags & CALLOUT_LIST_FLAG_QUEUED) {
1336060cedfbSMadhavan Venkataraman 					CALLOUT_LIST_DELETE(ct->ct_queue, cl);
1337060cedfbSMadhavan Venkataraman 					CALLOUT_LIST_FREE(ct, cl);
1338060cedfbSMadhavan Venkataraman 				} else {
1339060cedfbSMadhavan Venkataraman 					CALLOUT_LIST_DELETE(ct->ct_expired, cl);
1340060cedfbSMadhavan Venkataraman 					CALLOUT_LIST_FREE(ct, cl);
1341060cedfbSMadhavan Venkataraman 				}
1342060cedfbSMadhavan Venkataraman 			}
134387a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
134487a18d3fSMadhavan Venkataraman 
134587a18d3fSMadhavan Venkataraman 			expiration -= gethrtime();
134687a18d3fSMadhavan Venkataraman 			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
134787a18d3fSMadhavan Venkataraman 			    "untimeout:ID %lx hrtime left %llx", id,
134887a18d3fSMadhavan Venkataraman 			    expiration);
134987a18d3fSMadhavan Venkataraman 			return (expiration < 0 ? 0 : expiration);
135087a18d3fSMadhavan Venkataraman 		}
135187a18d3fSMadhavan Venkataraman 
135287a18d3fSMadhavan Venkataraman 		ct->ct_untimeouts_executing++;
13537c478bd9Sstevel@tonic-gate 		/*
13547c478bd9Sstevel@tonic-gate 		 * The callout we want to delete is currently executing.
13557c478bd9Sstevel@tonic-gate 		 * The DDI states that we must wait until the callout
135607247649SMadhavan Venkataraman 		 * completes before returning, so we block on c_done until the
135787a18d3fSMadhavan Venkataraman 		 * callout ID changes (to the old ID if it's on the freelist,
13587c478bd9Sstevel@tonic-gate 		 * or to a new callout ID if it's in use).  This implicitly
13597c478bd9Sstevel@tonic-gate 		 * assumes that callout structures are persistent (they are).
13607c478bd9Sstevel@tonic-gate 		 */
136107247649SMadhavan Venkataraman 		if (cp->c_executor == curthread) {
13627c478bd9Sstevel@tonic-gate 			/*
13637c478bd9Sstevel@tonic-gate 			 * The timeout handler called untimeout() on itself.
13647c478bd9Sstevel@tonic-gate 			 * Stupid, but legal.  We can't wait for the timeout
13657c478bd9Sstevel@tonic-gate 			 * to complete without deadlocking, so we just return.
13667c478bd9Sstevel@tonic-gate 			 */
136787a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
13687c478bd9Sstevel@tonic-gate 			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
13697c478bd9Sstevel@tonic-gate 			    "untimeout_self:ID %x", id);
13707c478bd9Sstevel@tonic-gate 			return (-1);
13717c478bd9Sstevel@tonic-gate 		}
137287a18d3fSMadhavan Venkataraman 		if (nowait == 0) {
137387a18d3fSMadhavan Venkataraman 			/*
137487a18d3fSMadhavan Venkataraman 			 * We need to wait. Indicate that we are waiting by
137507247649SMadhavan Venkataraman 			 * incrementing c_waiting. This prevents the executor
137607247649SMadhavan Venkataraman 			 * from doing a wakeup on c_done if there are no
137787a18d3fSMadhavan Venkataraman 			 * waiters.
137887a18d3fSMadhavan Venkataraman 			 */
137987a18d3fSMadhavan Venkataraman 			while (cp->c_xid == xid) {
138007247649SMadhavan Venkataraman 				cp->c_waiting = 1;
138107247649SMadhavan Venkataraman 				cv_wait(&cp->c_done, &ct->ct_mutex);
138287a18d3fSMadhavan Venkataraman 			}
138387a18d3fSMadhavan Venkataraman 		}
138487a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
13857c478bd9Sstevel@tonic-gate 		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
13867c478bd9Sstevel@tonic-gate 		    "untimeout_executing:ID %lx", id);
13877c478bd9Sstevel@tonic-gate 		return (-1);
13887c478bd9Sstevel@tonic-gate 	}
138987a18d3fSMadhavan Venkataraman 	ct->ct_untimeouts_expired++;
13907c478bd9Sstevel@tonic-gate 
139187a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
13927c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
13937c478bd9Sstevel@tonic-gate 	    "untimeout_bogus_id:ID %lx", id);
13947c478bd9Sstevel@tonic-gate 
13957c478bd9Sstevel@tonic-gate 	/*
13967c478bd9Sstevel@tonic-gate 	 * We didn't find the specified callout ID.  This means either
13977c478bd9Sstevel@tonic-gate 	 * (1) the callout already fired, or (2) the caller passed us
13987c478bd9Sstevel@tonic-gate 	 * a bogus value.  Perform a sanity check to detect case (2).
13997c478bd9Sstevel@tonic-gate 	 */
140051b32bddSMadhavan Venkataraman 	bogus = (CALLOUT_ID_FLAGS | CALLOUT_COUNTER_HIGH);
140187a18d3fSMadhavan Venkataraman 	if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0))
140287a18d3fSMadhavan Venkataraman 		panic("untimeout: impossible timeout id %llx",
140387a18d3fSMadhavan Venkataraman 		    (unsigned long long)id);
14047c478bd9Sstevel@tonic-gate 
14057c478bd9Sstevel@tonic-gate 	return (-1);
14067c478bd9Sstevel@tonic-gate }
14077c478bd9Sstevel@tonic-gate 
140887a18d3fSMadhavan Venkataraman clock_t
140987a18d3fSMadhavan Venkataraman untimeout(timeout_id_t id_arg)
141087a18d3fSMadhavan Venkataraman {
141187a18d3fSMadhavan Venkataraman 	hrtime_t hleft;
141287a18d3fSMadhavan Venkataraman 	clock_t tleft;
141387a18d3fSMadhavan Venkataraman 	callout_id_t id;
141487a18d3fSMadhavan Venkataraman 
141587a18d3fSMadhavan Venkataraman 	id = (ulong_t)id_arg;
141687a18d3fSMadhavan Venkataraman 	hleft = untimeout_generic(id, 0);
141787a18d3fSMadhavan Venkataraman 	if (hleft < 0)
141887a18d3fSMadhavan Venkataraman 		tleft = -1;
141987a18d3fSMadhavan Venkataraman 	else if (hleft == 0)
142087a18d3fSMadhavan Venkataraman 		tleft = 0;
142187a18d3fSMadhavan Venkataraman 	else
142287a18d3fSMadhavan Venkataraman 		tleft = NSEC_TO_TICK(hleft);
142387a18d3fSMadhavan Venkataraman 
142487a18d3fSMadhavan Venkataraman 	return (tleft);
142587a18d3fSMadhavan Venkataraman }
142687a18d3fSMadhavan Venkataraman 
14277c478bd9Sstevel@tonic-gate /*
142887a18d3fSMadhavan Venkataraman  * Convenience function to untimeout a timeout with a full ID with default
142987a18d3fSMadhavan Venkataraman  * parameters.
143087a18d3fSMadhavan Venkataraman  */
143187a18d3fSMadhavan Venkataraman clock_t
143287a18d3fSMadhavan Venkataraman untimeout_default(callout_id_t id, int nowait)
143387a18d3fSMadhavan Venkataraman {
143487a18d3fSMadhavan Venkataraman 	hrtime_t hleft;
143587a18d3fSMadhavan Venkataraman 	clock_t tleft;
143687a18d3fSMadhavan Venkataraman 
143787a18d3fSMadhavan Venkataraman 	hleft = untimeout_generic(id, nowait);
143887a18d3fSMadhavan Venkataraman 	if (hleft < 0)
143987a18d3fSMadhavan Venkataraman 		tleft = -1;
144087a18d3fSMadhavan Venkataraman 	else if (hleft == 0)
144187a18d3fSMadhavan Venkataraman 		tleft = 0;
144287a18d3fSMadhavan Venkataraman 	else
144387a18d3fSMadhavan Venkataraman 		tleft = NSEC_TO_TICK(hleft);
144487a18d3fSMadhavan Venkataraman 
144587a18d3fSMadhavan Venkataraman 	return (tleft);
144687a18d3fSMadhavan Venkataraman }
144787a18d3fSMadhavan Venkataraman 
144887a18d3fSMadhavan Venkataraman /*
144987a18d3fSMadhavan Venkataraman  * Expire all the callouts queued in the specified callout list.
14507c478bd9Sstevel@tonic-gate  */
14517c478bd9Sstevel@tonic-gate static void
145287a18d3fSMadhavan Venkataraman callout_list_expire(callout_table_t *ct, callout_list_t *cl)
14537c478bd9Sstevel@tonic-gate {
145407247649SMadhavan Venkataraman 	callout_t *cp, *cnext;
14557c478bd9Sstevel@tonic-gate 
145687a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
145787a18d3fSMadhavan Venkataraman 	ASSERT(cl != NULL);
14587c478bd9Sstevel@tonic-gate 
145907247649SMadhavan Venkataraman 	for (cp = cl->cl_callouts.ch_head; cp != NULL; cp = cnext) {
146007247649SMadhavan Venkataraman 		/*
146107247649SMadhavan Venkataraman 		 * Multiple executor threads could be running at the same
146207247649SMadhavan Venkataraman 		 * time. If this callout is already being executed,
146307247649SMadhavan Venkataraman 		 * go on to the next one.
146407247649SMadhavan Venkataraman 		 */
146507247649SMadhavan Venkataraman 		if (cp->c_xid & CALLOUT_EXECUTING) {
146607247649SMadhavan Venkataraman 			cnext = cp->c_clnext;
146707247649SMadhavan Venkataraman 			continue;
146807247649SMadhavan Venkataraman 		}
146987a18d3fSMadhavan Venkataraman 
1470f635d46aSqiao 		/*
147187a18d3fSMadhavan Venkataraman 		 * Indicate to untimeout() that a callout is
147287a18d3fSMadhavan Venkataraman 		 * being expired by the executor.
1473f635d46aSqiao 		 */
147487a18d3fSMadhavan Venkataraman 		cp->c_xid |= CALLOUT_EXECUTING;
147507247649SMadhavan Venkataraman 		cp->c_executor = curthread;
147687a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
147787a18d3fSMadhavan Venkataraman 
14787c478bd9Sstevel@tonic-gate 		DTRACE_PROBE1(callout__start, callout_t *, cp);
14797c478bd9Sstevel@tonic-gate 		(*cp->c_func)(cp->c_arg);
14807c478bd9Sstevel@tonic-gate 		DTRACE_PROBE1(callout__end, callout_t *, cp);
14817c478bd9Sstevel@tonic-gate 
148287a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
148387a18d3fSMadhavan Venkataraman 
148487a18d3fSMadhavan Venkataraman 		ct->ct_expirations++;
148587a18d3fSMadhavan Venkataraman 		ct->ct_timeouts_pending--;
14867c478bd9Sstevel@tonic-gate 		/*
148707247649SMadhavan Venkataraman 		 * Indicate completion for c_done.
14887c478bd9Sstevel@tonic-gate 		 */
148987a18d3fSMadhavan Venkataraman 		cp->c_xid &= ~CALLOUT_EXECUTING;
149007247649SMadhavan Venkataraman 		cp->c_executor = NULL;
149107247649SMadhavan Venkataraman 		cnext = cp->c_clnext;
1492f635d46aSqiao 
14937c478bd9Sstevel@tonic-gate 		/*
149487a18d3fSMadhavan Venkataraman 		 * Delete callout from ID hash table and the callout
149587a18d3fSMadhavan Venkataraman 		 * list, return to freelist, and tell any untimeout() that
149687a18d3fSMadhavan Venkataraman 		 * cares that we're done.
14977c478bd9Sstevel@tonic-gate 		 */
149887a18d3fSMadhavan Venkataraman 		CALLOUT_DELETE(ct, cp);
1499060cedfbSMadhavan Venkataraman 		CALLOUT_FREE(ct, cp);
150087a18d3fSMadhavan Venkataraman 
150107247649SMadhavan Venkataraman 		if (cp->c_waiting) {
150207247649SMadhavan Venkataraman 			cp->c_waiting = 0;
150307247649SMadhavan Venkataraman 			cv_broadcast(&cp->c_done);
15047c478bd9Sstevel@tonic-gate 		}
150587a18d3fSMadhavan Venkataraman 	}
15067c478bd9Sstevel@tonic-gate }
15077c478bd9Sstevel@tonic-gate 
15087c478bd9Sstevel@tonic-gate /*
150987a18d3fSMadhavan Venkataraman  * Execute all expired callout lists for a callout table.
15107c478bd9Sstevel@tonic-gate  */
15117c478bd9Sstevel@tonic-gate static void
151287a18d3fSMadhavan Venkataraman callout_expire(callout_table_t *ct)
15137c478bd9Sstevel@tonic-gate {
151487a18d3fSMadhavan Venkataraman 	callout_list_t *cl, *clnext;
1515f635d46aSqiao 
151687a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
15177c478bd9Sstevel@tonic-gate 
151887a18d3fSMadhavan Venkataraman 	for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) {
1519f635d46aSqiao 		/*
152087a18d3fSMadhavan Venkataraman 		 * Expire all the callouts in this callout list.
152187a18d3fSMadhavan Venkataraman 		 */
152287a18d3fSMadhavan Venkataraman 		callout_list_expire(ct, cl);
152387a18d3fSMadhavan Venkataraman 
152407247649SMadhavan Venkataraman 		clnext = cl->cl_next;
152507247649SMadhavan Venkataraman 		if (cl->cl_callouts.ch_head == NULL) {
152687a18d3fSMadhavan Venkataraman 			/*
152787a18d3fSMadhavan Venkataraman 			 * Free the callout list.
152887a18d3fSMadhavan Venkataraman 			 */
152987a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_expired, cl);
153051b32bddSMadhavan Venkataraman 			CALLOUT_LIST_FREE(ct, cl);
153187a18d3fSMadhavan Venkataraman 		}
153287a18d3fSMadhavan Venkataraman 	}
153307247649SMadhavan Venkataraman }
153487a18d3fSMadhavan Venkataraman 
153587a18d3fSMadhavan Venkataraman /*
153687a18d3fSMadhavan Venkataraman  * The cyclic handlers below process callouts in two steps:
153787a18d3fSMadhavan Venkataraman  *
153887a18d3fSMadhavan Venkataraman  *	1. Find all expired callout lists and queue them in a separate
153987a18d3fSMadhavan Venkataraman  *	   list of expired callouts.
154087a18d3fSMadhavan Venkataraman  *	2. Execute the expired callout lists.
154187a18d3fSMadhavan Venkataraman  *
154287a18d3fSMadhavan Venkataraman  * This is done for two reasons:
154387a18d3fSMadhavan Venkataraman  *
154487a18d3fSMadhavan Venkataraman  *	1. We want to quickly find the next earliest expiration to program
154587a18d3fSMadhavan Venkataraman  *	   the cyclic to and reprogram it. We can do this right at the end
154687a18d3fSMadhavan Venkataraman  *	   of step 1.
154787a18d3fSMadhavan Venkataraman  *	2. The realtime cyclic handler expires callouts in place. However,
154887a18d3fSMadhavan Venkataraman  *	   for normal callouts, callouts are expired by a taskq thread.
154987a18d3fSMadhavan Venkataraman  *	   So, it is simpler and more robust to have the taskq thread just
155087a18d3fSMadhavan Venkataraman  *	   do step 2.
155187a18d3fSMadhavan Venkataraman  */
155287a18d3fSMadhavan Venkataraman 
155387a18d3fSMadhavan Venkataraman /*
1554060cedfbSMadhavan Venkataraman  * Realtime callout cyclic handlers.
15557c478bd9Sstevel@tonic-gate  */
15567c478bd9Sstevel@tonic-gate void
155787a18d3fSMadhavan Venkataraman callout_realtime(callout_table_t *ct)
15587c478bd9Sstevel@tonic-gate {
155987a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
1560060cedfbSMadhavan Venkataraman 	(void) callout_heap_delete(ct);
1561060cedfbSMadhavan Venkataraman 	callout_expire(ct);
1562060cedfbSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
1563060cedfbSMadhavan Venkataraman }
1564060cedfbSMadhavan Venkataraman 
1565060cedfbSMadhavan Venkataraman void
1566060cedfbSMadhavan Venkataraman callout_queue_realtime(callout_table_t *ct)
1567060cedfbSMadhavan Venkataraman {
1568060cedfbSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
1569060cedfbSMadhavan Venkataraman 	(void) callout_queue_delete(ct);
157087a18d3fSMadhavan Venkataraman 	callout_expire(ct);
157187a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
157287a18d3fSMadhavan Venkataraman }
15737c478bd9Sstevel@tonic-gate 
157487a18d3fSMadhavan Venkataraman void
157587a18d3fSMadhavan Venkataraman callout_execute(callout_table_t *ct)
157687a18d3fSMadhavan Venkataraman {
157787a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
157887a18d3fSMadhavan Venkataraman 	callout_expire(ct);
157987a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
158087a18d3fSMadhavan Venkataraman }
15817c478bd9Sstevel@tonic-gate 
158287a18d3fSMadhavan Venkataraman /*
1583060cedfbSMadhavan Venkataraman  * Normal callout cyclic handlers.
158487a18d3fSMadhavan Venkataraman  */
158587a18d3fSMadhavan Venkataraman void
158687a18d3fSMadhavan Venkataraman callout_normal(callout_table_t *ct)
158787a18d3fSMadhavan Venkataraman {
158807247649SMadhavan Venkataraman 	int i, exec;
1589060cedfbSMadhavan Venkataraman 	hrtime_t exp;
159087a18d3fSMadhavan Venkataraman 
159187a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
1592060cedfbSMadhavan Venkataraman 	exp = callout_heap_delete(ct);
1593060cedfbSMadhavan Venkataraman 	CALLOUT_EXEC_COMPUTE(ct, exp, exec);
1594060cedfbSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
1595060cedfbSMadhavan Venkataraman 
1596060cedfbSMadhavan Venkataraman 	for (i = 0; i < exec; i++) {
1597060cedfbSMadhavan Venkataraman 		ASSERT(ct->ct_taskq != NULL);
1598060cedfbSMadhavan Venkataraman 		(void) taskq_dispatch(ct->ct_taskq,
1599060cedfbSMadhavan Venkataraman 		    (task_func_t *)callout_execute, ct, TQ_NOSLEEP);
1600060cedfbSMadhavan Venkataraman 	}
1601060cedfbSMadhavan Venkataraman }
1602060cedfbSMadhavan Venkataraman 
1603060cedfbSMadhavan Venkataraman void
1604060cedfbSMadhavan Venkataraman callout_queue_normal(callout_table_t *ct)
1605060cedfbSMadhavan Venkataraman {
1606060cedfbSMadhavan Venkataraman 	int i, exec;
1607060cedfbSMadhavan Venkataraman 	hrtime_t exp;
1608060cedfbSMadhavan Venkataraman 
1609060cedfbSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
1610060cedfbSMadhavan Venkataraman 	exp = callout_queue_delete(ct);
1611060cedfbSMadhavan Venkataraman 	CALLOUT_EXEC_COMPUTE(ct, exp, exec);
161287a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
161387a18d3fSMadhavan Venkataraman 
161407247649SMadhavan Venkataraman 	for (i = 0; i < exec; i++) {
161587a18d3fSMadhavan Venkataraman 		ASSERT(ct->ct_taskq != NULL);
161687a18d3fSMadhavan Venkataraman 		(void) taskq_dispatch(ct->ct_taskq,
161787a18d3fSMadhavan Venkataraman 		    (task_func_t *)callout_execute, ct, TQ_NOSLEEP);
161887a18d3fSMadhavan Venkataraman 	}
161987a18d3fSMadhavan Venkataraman }
162087a18d3fSMadhavan Venkataraman 
162187a18d3fSMadhavan Venkataraman /*
162287a18d3fSMadhavan Venkataraman  * Suspend callout processing.
162387a18d3fSMadhavan Venkataraman  */
162487a18d3fSMadhavan Venkataraman static void
162587a18d3fSMadhavan Venkataraman callout_suspend(void)
162687a18d3fSMadhavan Venkataraman {
162787a18d3fSMadhavan Venkataraman 	int t, f;
162887a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
162987a18d3fSMadhavan Venkataraman 
163087a18d3fSMadhavan Venkataraman 	/*
163187a18d3fSMadhavan Venkataraman 	 * Traverse every callout table in the system and suspend callout
163287a18d3fSMadhavan Venkataraman 	 * processing.
163387a18d3fSMadhavan Venkataraman 	 *
163487a18d3fSMadhavan Venkataraman 	 * We need to suspend all the tables (including the inactive ones)
163587a18d3fSMadhavan Venkataraman 	 * so that if a table is made active while the suspend is still on,
163687a18d3fSMadhavan Venkataraman 	 * the table remains suspended.
163787a18d3fSMadhavan Venkataraman 	 */
163887a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
163987a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
164087a18d3fSMadhavan Venkataraman 			ct = &callout_table[CALLOUT_TABLE(t, f)];
164187a18d3fSMadhavan Venkataraman 
164287a18d3fSMadhavan Venkataraman 			mutex_enter(&ct->ct_mutex);
1643454ab202SMadhavan Venkataraman 			ct->ct_suspend++;
164487a18d3fSMadhavan Venkataraman 			if (ct->ct_cyclic == CYCLIC_NONE) {
164587a18d3fSMadhavan Venkataraman 				mutex_exit(&ct->ct_mutex);
164687a18d3fSMadhavan Venkataraman 				continue;
164787a18d3fSMadhavan Venkataraman 			}
1648060cedfbSMadhavan Venkataraman 			if (ct->ct_suspend == 1) {
1649454ab202SMadhavan Venkataraman 				(void) cyclic_reprogram(ct->ct_cyclic,
1650454ab202SMadhavan Venkataraman 				    CY_INFINITY);
1651060cedfbSMadhavan Venkataraman 				(void) cyclic_reprogram(ct->ct_qcyclic,
1652060cedfbSMadhavan Venkataraman 				    CY_INFINITY);
1653060cedfbSMadhavan Venkataraman 			}
165487a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
165587a18d3fSMadhavan Venkataraman 		}
165687a18d3fSMadhavan Venkataraman 	}
165787a18d3fSMadhavan Venkataraman }
165887a18d3fSMadhavan Venkataraman 
165987a18d3fSMadhavan Venkataraman /*
166087a18d3fSMadhavan Venkataraman  * Resume callout processing.
166187a18d3fSMadhavan Venkataraman  */
166287a18d3fSMadhavan Venkataraman static void
166351b32bddSMadhavan Venkataraman callout_resume(hrtime_t delta, int timechange)
166487a18d3fSMadhavan Venkataraman {
1665060cedfbSMadhavan Venkataraman 	hrtime_t hexp, qexp;
166687a18d3fSMadhavan Venkataraman 	int t, f;
166787a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
166887a18d3fSMadhavan Venkataraman 
166987a18d3fSMadhavan Venkataraman 	/*
167087a18d3fSMadhavan Venkataraman 	 * Traverse every callout table in the system and resume callout
167187a18d3fSMadhavan Venkataraman 	 * processing. For active tables, perform any hrtime adjustments
167287a18d3fSMadhavan Venkataraman 	 * necessary.
167387a18d3fSMadhavan Venkataraman 	 */
167487a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
167587a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
167687a18d3fSMadhavan Venkataraman 			ct = &callout_table[CALLOUT_TABLE(t, f)];
167787a18d3fSMadhavan Venkataraman 
167887a18d3fSMadhavan Venkataraman 			mutex_enter(&ct->ct_mutex);
167987a18d3fSMadhavan Venkataraman 			if (ct->ct_cyclic == CYCLIC_NONE) {
1680454ab202SMadhavan Venkataraman 				ct->ct_suspend--;
168187a18d3fSMadhavan Venkataraman 				mutex_exit(&ct->ct_mutex);
168287a18d3fSMadhavan Venkataraman 				continue;
168387a18d3fSMadhavan Venkataraman 			}
168487a18d3fSMadhavan Venkataraman 
168551b32bddSMadhavan Venkataraman 			/*
168651b32bddSMadhavan Venkataraman 			 * If a delta is specified, adjust the expirations in
168751b32bddSMadhavan Venkataraman 			 * the heap by delta. Also, if the caller indicates
168851b32bddSMadhavan Venkataraman 			 * a timechange, process that. This step also cleans
168951b32bddSMadhavan Venkataraman 			 * out any empty callout lists that might happen to
169051b32bddSMadhavan Venkataraman 			 * be there.
169151b32bddSMadhavan Venkataraman 			 */
1692060cedfbSMadhavan Venkataraman 			hexp = callout_heap_process(ct, delta, timechange);
1693060cedfbSMadhavan Venkataraman 			qexp = callout_queue_process(ct, delta, timechange);
169487a18d3fSMadhavan Venkataraman 
1695454ab202SMadhavan Venkataraman 			ct->ct_suspend--;
1696454ab202SMadhavan Venkataraman 			if (ct->ct_suspend == 0) {
1697060cedfbSMadhavan Venkataraman 				(void) cyclic_reprogram(ct->ct_cyclic, hexp);
1698060cedfbSMadhavan Venkataraman 				(void) cyclic_reprogram(ct->ct_qcyclic, qexp);
1699454ab202SMadhavan Venkataraman 			}
170051b32bddSMadhavan Venkataraman 
170187a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
170287a18d3fSMadhavan Venkataraman 		}
170387a18d3fSMadhavan Venkataraman 	}
17047c478bd9Sstevel@tonic-gate }
17057c478bd9Sstevel@tonic-gate 
17067c478bd9Sstevel@tonic-gate /*
17077c478bd9Sstevel@tonic-gate  * Callback handler used by CPR to stop and resume callouts.
170851b32bddSMadhavan Venkataraman  * The cyclic subsystem saves and restores hrtime during CPR.
170951b32bddSMadhavan Venkataraman  * That is why callout_resume() is called with a 0 delta.
171051b32bddSMadhavan Venkataraman  * Although hrtime is the same, hrestime (system time) has
171151b32bddSMadhavan Venkataraman  * progressed during CPR. So, we have to indicate a time change
171251b32bddSMadhavan Venkataraman  * to expire the absolute hrestime timers.
17137c478bd9Sstevel@tonic-gate  */
17147c478bd9Sstevel@tonic-gate /*ARGSUSED*/
17157c478bd9Sstevel@tonic-gate static boolean_t
17167c478bd9Sstevel@tonic-gate callout_cpr_callb(void *arg, int code)
17177c478bd9Sstevel@tonic-gate {
171887a18d3fSMadhavan Venkataraman 	if (code == CB_CODE_CPR_CHKPT)
171987a18d3fSMadhavan Venkataraman 		callout_suspend();
172087a18d3fSMadhavan Venkataraman 	else
172151b32bddSMadhavan Venkataraman 		callout_resume(0, 1);
172287a18d3fSMadhavan Venkataraman 
17237c478bd9Sstevel@tonic-gate 	return (B_TRUE);
17247c478bd9Sstevel@tonic-gate }
17257c478bd9Sstevel@tonic-gate 
17267c478bd9Sstevel@tonic-gate /*
172787a18d3fSMadhavan Venkataraman  * Callback handler invoked when the debugger is entered or exited.
17287c478bd9Sstevel@tonic-gate  */
172987a18d3fSMadhavan Venkataraman /*ARGSUSED*/
173087a18d3fSMadhavan Venkataraman static boolean_t
173187a18d3fSMadhavan Venkataraman callout_debug_callb(void *arg, int code)
17327c478bd9Sstevel@tonic-gate {
173387a18d3fSMadhavan Venkataraman 	hrtime_t delta;
1734f635d46aSqiao 
1735f635d46aSqiao 	/*
173687a18d3fSMadhavan Venkataraman 	 * When the system enters the debugger. make a note of the hrtime.
173787a18d3fSMadhavan Venkataraman 	 * When it is resumed, compute how long the system was in the
173887a18d3fSMadhavan Venkataraman 	 * debugger. This interval should not be counted for callouts.
1739f635d46aSqiao 	 */
174087a18d3fSMadhavan Venkataraman 	if (code == 0) {
174187a18d3fSMadhavan Venkataraman 		callout_suspend();
174287a18d3fSMadhavan Venkataraman 		callout_debug_hrtime = gethrtime();
174387a18d3fSMadhavan Venkataraman 	} else {
174487a18d3fSMadhavan Venkataraman 		delta = gethrtime() - callout_debug_hrtime;
174551b32bddSMadhavan Venkataraman 		callout_resume(delta, 0);
174687a18d3fSMadhavan Venkataraman 	}
1747f635d46aSqiao 
174887a18d3fSMadhavan Venkataraman 	return (B_TRUE);
174987a18d3fSMadhavan Venkataraman }
175087a18d3fSMadhavan Venkataraman 
175187a18d3fSMadhavan Venkataraman /*
175207247649SMadhavan Venkataraman  * Move the absolute hrestime callouts to the expired list. Then program the
175307247649SMadhavan Venkataraman  * table's cyclic to expire immediately so that the callouts can be executed
175487a18d3fSMadhavan Venkataraman  * immediately.
175587a18d3fSMadhavan Venkataraman  */
175687a18d3fSMadhavan Venkataraman static void
175787a18d3fSMadhavan Venkataraman callout_hrestime_one(callout_table_t *ct)
175887a18d3fSMadhavan Venkataraman {
1759060cedfbSMadhavan Venkataraman 	hrtime_t hexp, qexp;
176087a18d3fSMadhavan Venkataraman 
176187a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
1762060cedfbSMadhavan Venkataraman 	if (ct->ct_cyclic == CYCLIC_NONE) {
176387a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
176487a18d3fSMadhavan Venkataraman 		return;
176587a18d3fSMadhavan Venkataraman 	}
176687a18d3fSMadhavan Venkataraman 
176751b32bddSMadhavan Venkataraman 	/*
176851b32bddSMadhavan Venkataraman 	 * Walk the heap and process all the absolute hrestime entries.
176951b32bddSMadhavan Venkataraman 	 */
1770060cedfbSMadhavan Venkataraman 	hexp = callout_heap_process(ct, 0, 1);
1771060cedfbSMadhavan Venkataraman 	qexp = callout_queue_process(ct, 0, 1);
177287a18d3fSMadhavan Venkataraman 
1773060cedfbSMadhavan Venkataraman 	if (ct->ct_suspend == 0) {
1774060cedfbSMadhavan Venkataraman 		(void) cyclic_reprogram(ct->ct_cyclic, hexp);
1775060cedfbSMadhavan Venkataraman 		(void) cyclic_reprogram(ct->ct_qcyclic, qexp);
1776060cedfbSMadhavan Venkataraman 	}
177707247649SMadhavan Venkataraman 
177887a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
177987a18d3fSMadhavan Venkataraman }
178087a18d3fSMadhavan Venkataraman 
178187a18d3fSMadhavan Venkataraman /*
178287a18d3fSMadhavan Venkataraman  * This function is called whenever system time (hrestime) is changed
178387a18d3fSMadhavan Venkataraman  * explicitly. All the HRESTIME callouts must be expired at once.
178487a18d3fSMadhavan Venkataraman  */
178587a18d3fSMadhavan Venkataraman /*ARGSUSED*/
178687a18d3fSMadhavan Venkataraman void
178787a18d3fSMadhavan Venkataraman callout_hrestime(void)
178887a18d3fSMadhavan Venkataraman {
178987a18d3fSMadhavan Venkataraman 	int t, f;
179087a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
179187a18d3fSMadhavan Venkataraman 
179287a18d3fSMadhavan Venkataraman 	/*
179387a18d3fSMadhavan Venkataraman 	 * Traverse every callout table in the system and process the hrestime
179487a18d3fSMadhavan Venkataraman 	 * callouts therein.
179587a18d3fSMadhavan Venkataraman 	 *
179687a18d3fSMadhavan Venkataraman 	 * We look at all the tables because we don't know which ones were
179787a18d3fSMadhavan Venkataraman 	 * onlined and offlined in the past. The offlined tables may still
179887a18d3fSMadhavan Venkataraman 	 * have active cyclics processing timers somewhere.
179987a18d3fSMadhavan Venkataraman 	 */
180087a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
180187a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
180287a18d3fSMadhavan Venkataraman 			ct = &callout_table[CALLOUT_TABLE(t, f)];
180387a18d3fSMadhavan Venkataraman 			callout_hrestime_one(ct);
180487a18d3fSMadhavan Venkataraman 		}
180587a18d3fSMadhavan Venkataraman 	}
180687a18d3fSMadhavan Venkataraman }
180787a18d3fSMadhavan Venkataraman 
180887a18d3fSMadhavan Venkataraman /*
180987a18d3fSMadhavan Venkataraman  * Create the hash tables for this callout table.
181087a18d3fSMadhavan Venkataraman  */
181187a18d3fSMadhavan Venkataraman static void
181287a18d3fSMadhavan Venkataraman callout_hash_init(callout_table_t *ct)
181387a18d3fSMadhavan Venkataraman {
181487a18d3fSMadhavan Venkataraman 	size_t size;
181587a18d3fSMadhavan Venkataraman 
181687a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
181787a18d3fSMadhavan Venkataraman 	ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL));
181887a18d3fSMadhavan Venkataraman 
181987a18d3fSMadhavan Venkataraman 	size = sizeof (callout_hash_t) * CALLOUT_BUCKETS;
182087a18d3fSMadhavan Venkataraman 	ct->ct_idhash = kmem_zalloc(size, KM_SLEEP);
182187a18d3fSMadhavan Venkataraman 	ct->ct_clhash = kmem_zalloc(size, KM_SLEEP);
182287a18d3fSMadhavan Venkataraman }
182387a18d3fSMadhavan Venkataraman 
182487a18d3fSMadhavan Venkataraman /*
182587a18d3fSMadhavan Venkataraman  * Create per-callout table kstats.
182687a18d3fSMadhavan Venkataraman  */
182787a18d3fSMadhavan Venkataraman static void
182887a18d3fSMadhavan Venkataraman callout_kstat_init(callout_table_t *ct)
182987a18d3fSMadhavan Venkataraman {
183087a18d3fSMadhavan Venkataraman 	callout_stat_type_t stat;
183187a18d3fSMadhavan Venkataraman 	kstat_t *ct_kstats;
183287a18d3fSMadhavan Venkataraman 	int ndx;
183387a18d3fSMadhavan Venkataraman 
183487a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
183587a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_kstats == NULL);
183687a18d3fSMadhavan Venkataraman 
183787a18d3fSMadhavan Venkataraman 	ndx = ct - callout_table;
183887a18d3fSMadhavan Venkataraman 	ct_kstats = kstat_create("unix", ndx, "callout",
183987a18d3fSMadhavan Venkataraman 	    "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL);
184087a18d3fSMadhavan Venkataraman 
184187a18d3fSMadhavan Venkataraman 	if (ct_kstats == NULL) {
184287a18d3fSMadhavan Venkataraman 		cmn_err(CE_WARN, "kstat_create for callout table %p failed",
184387a18d3fSMadhavan Venkataraman 		    (void *)ct);
184487a18d3fSMadhavan Venkataraman 	} else {
184587a18d3fSMadhavan Venkataraman 		ct_kstats->ks_data = ct->ct_kstat_data;
184687a18d3fSMadhavan Venkataraman 		for (stat = 0; stat < CALLOUT_NUM_STATS; stat++)
184787a18d3fSMadhavan Venkataraman 			kstat_named_init(&ct->ct_kstat_data[stat],
184887a18d3fSMadhavan Venkataraman 			    callout_kstat_names[stat], KSTAT_DATA_INT64);
184987a18d3fSMadhavan Venkataraman 		ct->ct_kstats = ct_kstats;
185087a18d3fSMadhavan Venkataraman 		kstat_install(ct_kstats);
185187a18d3fSMadhavan Venkataraman 	}
185287a18d3fSMadhavan Venkataraman }
185387a18d3fSMadhavan Venkataraman 
185487a18d3fSMadhavan Venkataraman static void
185587a18d3fSMadhavan Venkataraman callout_cyclic_init(callout_table_t *ct)
185687a18d3fSMadhavan Venkataraman {
185787a18d3fSMadhavan Venkataraman 	cyc_handler_t hdlr;
185887a18d3fSMadhavan Venkataraman 	cyc_time_t when;
185987a18d3fSMadhavan Venkataraman 	processorid_t seqid;
186087a18d3fSMadhavan Venkataraman 	int t;
1861060cedfbSMadhavan Venkataraman 	cyclic_id_t cyclic, qcyclic;
186287a18d3fSMadhavan Venkataraman 
186387a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
186487a18d3fSMadhavan Venkataraman 
1865060cedfbSMadhavan Venkataraman 	t = ct->ct_type;
186687a18d3fSMadhavan Venkataraman 	seqid = CALLOUT_TABLE_SEQID(ct);
186787a18d3fSMadhavan Venkataraman 
186887a18d3fSMadhavan Venkataraman 	/*
186987a18d3fSMadhavan Venkataraman 	 * Create the taskq thread if the table type is normal.
187087a18d3fSMadhavan Venkataraman 	 * Realtime tables are handled at PIL1 by a softint
187187a18d3fSMadhavan Venkataraman 	 * handler.
187287a18d3fSMadhavan Venkataraman 	 */
18737c478bd9Sstevel@tonic-gate 	if (t == CALLOUT_NORMAL) {
187487a18d3fSMadhavan Venkataraman 		ASSERT(ct->ct_taskq == NULL);
18757c478bd9Sstevel@tonic-gate 		/*
18767c478bd9Sstevel@tonic-gate 		 * Each callout thread consumes exactly one
18777c478bd9Sstevel@tonic-gate 		 * task structure while active.  Therefore,
187851b32bddSMadhavan Venkataraman 		 * prepopulating with 2 * callout_threads tasks
18797c478bd9Sstevel@tonic-gate 		 * ensures that there's at least one task per
18807c478bd9Sstevel@tonic-gate 		 * thread that's either scheduled or on the
18817c478bd9Sstevel@tonic-gate 		 * freelist.  In turn, this guarantees that
18827c478bd9Sstevel@tonic-gate 		 * taskq_dispatch() will always either succeed
18837c478bd9Sstevel@tonic-gate 		 * (because there's a free task structure) or
18847c478bd9Sstevel@tonic-gate 		 * be unnecessary (because "callout_excute(ct)"
18857c478bd9Sstevel@tonic-gate 		 * has already scheduled).
18867c478bd9Sstevel@tonic-gate 		 */
18877c478bd9Sstevel@tonic-gate 		ct->ct_taskq =
188887a18d3fSMadhavan Venkataraman 		    taskq_create_instance("callout_taskq", seqid,
188951b32bddSMadhavan Venkataraman 		    callout_threads, maxclsyspri,
189051b32bddSMadhavan Venkataraman 		    2 * callout_threads, 2 * callout_threads,
18917c478bd9Sstevel@tonic-gate 		    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
18927c478bd9Sstevel@tonic-gate 	}
189387a18d3fSMadhavan Venkataraman 
189487a18d3fSMadhavan Venkataraman 	/*
189587a18d3fSMadhavan Venkataraman 	 * callouts can only be created in a table whose
189687a18d3fSMadhavan Venkataraman 	 * cyclic has been initialized.
189787a18d3fSMadhavan Venkataraman 	 */
189887a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num == 0);
189987a18d3fSMadhavan Venkataraman 
190087a18d3fSMadhavan Venkataraman 	/*
1901113d3ed7SMadhavan Venkataraman 	 * Drop the mutex before creating the callout cyclics. cyclic_add()
1902113d3ed7SMadhavan Venkataraman 	 * could potentially expand the cyclic heap. We don't want to be
1903113d3ed7SMadhavan Venkataraman 	 * holding the callout table mutex in that case. Note that this
1904113d3ed7SMadhavan Venkataraman 	 * function is called during CPU online. cpu_lock is held at this
1905113d3ed7SMadhavan Venkataraman 	 * point. So, only one thread can be executing the cyclic add logic
1906113d3ed7SMadhavan Venkataraman 	 * below at any time.
1907113d3ed7SMadhavan Venkataraman 	 */
1908113d3ed7SMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
1909113d3ed7SMadhavan Venkataraman 
1910113d3ed7SMadhavan Venkataraman 	/*
191187a18d3fSMadhavan Venkataraman 	 * Create the callout table cyclics.
191207247649SMadhavan Venkataraman 	 *
191307247649SMadhavan Venkataraman 	 * The realtime cyclic handler executes at low PIL. The normal cyclic
191407247649SMadhavan Venkataraman 	 * handler executes at lock PIL. This is because there are cases
191507247649SMadhavan Venkataraman 	 * where code can block at PIL > 1 waiting for a normal callout handler
191607247649SMadhavan Venkataraman 	 * to unblock it directly or indirectly. If the normal cyclic were to
191707247649SMadhavan Venkataraman 	 * be executed at low PIL, it could get blocked out by the waiter
191807247649SMadhavan Venkataraman 	 * and cause a deadlock.
191987a18d3fSMadhavan Venkataraman 	 */
192087a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_cyclic == CYCLIC_NONE);
192187a18d3fSMadhavan Venkataraman 
1922060cedfbSMadhavan Venkataraman 	if (t == CALLOUT_REALTIME) {
192307247649SMadhavan Venkataraman 		hdlr.cyh_level = callout_realtime_level;
1924060cedfbSMadhavan Venkataraman 		hdlr.cyh_func = (cyc_func_t)callout_realtime;
1925060cedfbSMadhavan Venkataraman 	} else {
192607247649SMadhavan Venkataraman 		hdlr.cyh_level = callout_normal_level;
1927060cedfbSMadhavan Venkataraman 		hdlr.cyh_func = (cyc_func_t)callout_normal;
1928060cedfbSMadhavan Venkataraman 	}
192987a18d3fSMadhavan Venkataraman 	hdlr.cyh_arg = ct;
193087a18d3fSMadhavan Venkataraman 	when.cyt_when = CY_INFINITY;
193187a18d3fSMadhavan Venkataraman 	when.cyt_interval = CY_INFINITY;
193287a18d3fSMadhavan Venkataraman 
1933113d3ed7SMadhavan Venkataraman 	cyclic = cyclic_add(&hdlr, &when);
1934113d3ed7SMadhavan Venkataraman 
1935060cedfbSMadhavan Venkataraman 	if (t == CALLOUT_REALTIME)
1936060cedfbSMadhavan Venkataraman 		hdlr.cyh_func = (cyc_func_t)callout_queue_realtime;
1937060cedfbSMadhavan Venkataraman 	else
1938060cedfbSMadhavan Venkataraman 		hdlr.cyh_func = (cyc_func_t)callout_queue_normal;
1939060cedfbSMadhavan Venkataraman 
1940060cedfbSMadhavan Venkataraman 	qcyclic = cyclic_add(&hdlr, &when);
1941060cedfbSMadhavan Venkataraman 
1942113d3ed7SMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
1943113d3ed7SMadhavan Venkataraman 	ct->ct_cyclic = cyclic;
1944060cedfbSMadhavan Venkataraman 	ct->ct_qcyclic = qcyclic;
194587a18d3fSMadhavan Venkataraman }
194687a18d3fSMadhavan Venkataraman 
194787a18d3fSMadhavan Venkataraman void
194887a18d3fSMadhavan Venkataraman callout_cpu_online(cpu_t *cp)
194987a18d3fSMadhavan Venkataraman {
195087a18d3fSMadhavan Venkataraman 	lgrp_handle_t hand;
195187a18d3fSMadhavan Venkataraman 	callout_cache_t *cache;
195287a18d3fSMadhavan Venkataraman 	char s[KMEM_CACHE_NAMELEN];
195387a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
195487a18d3fSMadhavan Venkataraman 	processorid_t seqid;
195587a18d3fSMadhavan Venkataraman 	int t;
195687a18d3fSMadhavan Venkataraman 
195787a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&cpu_lock));
195887a18d3fSMadhavan Venkataraman 
195987a18d3fSMadhavan Venkataraman 	/*
196087a18d3fSMadhavan Venkataraman 	 * Locate the cache corresponding to the onlined CPU's lgroup.
196187a18d3fSMadhavan Venkataraman 	 * Note that access to callout_caches is protected by cpu_lock.
196287a18d3fSMadhavan Venkataraman 	 */
196387a18d3fSMadhavan Venkataraman 	hand = lgrp_plat_cpu_to_hand(cp->cpu_id);
196487a18d3fSMadhavan Venkataraman 	for (cache = callout_caches; cache != NULL; cache = cache->cc_next) {
196587a18d3fSMadhavan Venkataraman 		if (cache->cc_hand == hand)
196687a18d3fSMadhavan Venkataraman 			break;
196787a18d3fSMadhavan Venkataraman 	}
196887a18d3fSMadhavan Venkataraman 
196987a18d3fSMadhavan Venkataraman 	/*
197087a18d3fSMadhavan Venkataraman 	 * If not found, create one. The caches are never destroyed.
197187a18d3fSMadhavan Venkataraman 	 */
197287a18d3fSMadhavan Venkataraman 	if (cache == NULL) {
197387a18d3fSMadhavan Venkataraman 		cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP);
197487a18d3fSMadhavan Venkataraman 		cache->cc_hand = hand;
197587a18d3fSMadhavan Venkataraman 		(void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx",
197687a18d3fSMadhavan Venkataraman 		    (long)hand);
197787a18d3fSMadhavan Venkataraman 		cache->cc_cache = kmem_cache_create(s, sizeof (callout_t),
197887a18d3fSMadhavan Venkataraman 		    CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
197987a18d3fSMadhavan Venkataraman 		(void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx",
198087a18d3fSMadhavan Venkataraman 		    (long)hand);
198187a18d3fSMadhavan Venkataraman 		cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t),
198287a18d3fSMadhavan Venkataraman 		    CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
198387a18d3fSMadhavan Venkataraman 		cache->cc_next = callout_caches;
198487a18d3fSMadhavan Venkataraman 		callout_caches = cache;
198587a18d3fSMadhavan Venkataraman 	}
198687a18d3fSMadhavan Venkataraman 
198787a18d3fSMadhavan Venkataraman 	seqid = cp->cpu_seqid;
198887a18d3fSMadhavan Venkataraman 
198987a18d3fSMadhavan Venkataraman 	for (t = 0; t < CALLOUT_NTYPES; t++) {
199087a18d3fSMadhavan Venkataraman 		ct = &callout_table[CALLOUT_TABLE(t, seqid)];
199187a18d3fSMadhavan Venkataraman 
199287a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
199387a18d3fSMadhavan Venkataraman 		/*
199487a18d3fSMadhavan Venkataraman 		 * Store convinience pointers to the kmem caches
199587a18d3fSMadhavan Venkataraman 		 * in the callout table. These assignments should always be
199687a18d3fSMadhavan Venkataraman 		 * done as callout tables can map to different physical
199787a18d3fSMadhavan Venkataraman 		 * CPUs each time.
199887a18d3fSMadhavan Venkataraman 		 */
199987a18d3fSMadhavan Venkataraman 		ct->ct_cache = cache->cc_cache;
200087a18d3fSMadhavan Venkataraman 		ct->ct_lcache = cache->cc_lcache;
200187a18d3fSMadhavan Venkataraman 
200287a18d3fSMadhavan Venkataraman 		/*
200387a18d3fSMadhavan Venkataraman 		 * We use the heap pointer to check if stuff has been
200487a18d3fSMadhavan Venkataraman 		 * initialized for this callout table.
200587a18d3fSMadhavan Venkataraman 		 */
200687a18d3fSMadhavan Venkataraman 		if (ct->ct_heap == NULL) {
200787a18d3fSMadhavan Venkataraman 			callout_heap_init(ct);
200887a18d3fSMadhavan Venkataraman 			callout_hash_init(ct);
200987a18d3fSMadhavan Venkataraman 			callout_kstat_init(ct);
201087a18d3fSMadhavan Venkataraman 			callout_cyclic_init(ct);
201187a18d3fSMadhavan Venkataraman 		}
201287a18d3fSMadhavan Venkataraman 
201387a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
201487a18d3fSMadhavan Venkataraman 
201587a18d3fSMadhavan Venkataraman 		/*
2016060cedfbSMadhavan Venkataraman 		 * Move the cyclics to this CPU by doing a bind.
201787a18d3fSMadhavan Venkataraman 		 */
201887a18d3fSMadhavan Venkataraman 		cyclic_bind(ct->ct_cyclic, cp, NULL);
2019060cedfbSMadhavan Venkataraman 		cyclic_bind(ct->ct_qcyclic, cp, NULL);
2020454ab202SMadhavan Venkataraman 	}
2021454ab202SMadhavan Venkataraman }
2022454ab202SMadhavan Venkataraman 
2023454ab202SMadhavan Venkataraman void
2024454ab202SMadhavan Venkataraman callout_cpu_offline(cpu_t *cp)
2025454ab202SMadhavan Venkataraman {
2026454ab202SMadhavan Venkataraman 	callout_table_t *ct;
2027454ab202SMadhavan Venkataraman 	processorid_t seqid;
2028454ab202SMadhavan Venkataraman 	int t;
2029454ab202SMadhavan Venkataraman 
2030454ab202SMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&cpu_lock));
2031454ab202SMadhavan Venkataraman 
2032454ab202SMadhavan Venkataraman 	seqid = cp->cpu_seqid;
2033454ab202SMadhavan Venkataraman 
2034454ab202SMadhavan Venkataraman 	for (t = 0; t < CALLOUT_NTYPES; t++) {
2035454ab202SMadhavan Venkataraman 		ct = &callout_table[CALLOUT_TABLE(t, seqid)];
2036454ab202SMadhavan Venkataraman 
2037454ab202SMadhavan Venkataraman 		/*
2038060cedfbSMadhavan Venkataraman 		 * Unbind the cyclics. This will allow the cyclic subsystem
2039060cedfbSMadhavan Venkataraman 		 * to juggle the cyclics during CPU offline.
2040454ab202SMadhavan Venkataraman 		 */
204187a18d3fSMadhavan Venkataraman 		cyclic_bind(ct->ct_cyclic, NULL, NULL);
2042060cedfbSMadhavan Venkataraman 		cyclic_bind(ct->ct_qcyclic, NULL, NULL);
20437c478bd9Sstevel@tonic-gate 	}
20447c478bd9Sstevel@tonic-gate }
204587a18d3fSMadhavan Venkataraman 
204687a18d3fSMadhavan Venkataraman /*
204787a18d3fSMadhavan Venkataraman  * This is called to perform per-CPU initialization for slave CPUs at
204887a18d3fSMadhavan Venkataraman  * boot time.
204987a18d3fSMadhavan Venkataraman  */
205087a18d3fSMadhavan Venkataraman void
205187a18d3fSMadhavan Venkataraman callout_mp_init(void)
205287a18d3fSMadhavan Venkataraman {
205387a18d3fSMadhavan Venkataraman 	cpu_t *cp;
2054060cedfbSMadhavan Venkataraman 	size_t min, max;
2055060cedfbSMadhavan Venkataraman 
2056060cedfbSMadhavan Venkataraman 	if (callout_chunk == CALLOUT_CHUNK) {
2057060cedfbSMadhavan Venkataraman 		/*
2058060cedfbSMadhavan Venkataraman 		 * No one has specified a chunk in /etc/system. We need to
2059060cedfbSMadhavan Venkataraman 		 * compute it here based on the number of online CPUs and
2060060cedfbSMadhavan Venkataraman 		 * available physical memory.
2061060cedfbSMadhavan Venkataraman 		 */
2062060cedfbSMadhavan Venkataraman 		min = CALLOUT_MIN_HEAP_SIZE;
2063*1671524dSMadhavan Venkataraman 		max = ptob(physmem / CALLOUT_MEM_FRACTION);
2064060cedfbSMadhavan Venkataraman 		if (min > max)
2065060cedfbSMadhavan Venkataraman 			min = max;
2066060cedfbSMadhavan Venkataraman 		callout_chunk = min / sizeof (callout_heap_t);
2067060cedfbSMadhavan Venkataraman 		callout_chunk /= ncpus_online;
2068060cedfbSMadhavan Venkataraman 		callout_chunk = P2ROUNDUP(callout_chunk, CALLOUT_CHUNK);
2069060cedfbSMadhavan Venkataraman 	}
207087a18d3fSMadhavan Venkataraman 
207187a18d3fSMadhavan Venkataraman 	mutex_enter(&cpu_lock);
207287a18d3fSMadhavan Venkataraman 
207387a18d3fSMadhavan Venkataraman 	cp = cpu_active;
207487a18d3fSMadhavan Venkataraman 	do {
207587a18d3fSMadhavan Venkataraman 		callout_cpu_online(cp);
207687a18d3fSMadhavan Venkataraman 	} while ((cp = cp->cpu_next_onln) != cpu_active);
207787a18d3fSMadhavan Venkataraman 
207887a18d3fSMadhavan Venkataraman 	mutex_exit(&cpu_lock);
207987a18d3fSMadhavan Venkataraman }
208087a18d3fSMadhavan Venkataraman 
208187a18d3fSMadhavan Venkataraman /*
208287a18d3fSMadhavan Venkataraman  * Initialize all callout tables.  Called at boot time just before clkstart().
208387a18d3fSMadhavan Venkataraman  */
208487a18d3fSMadhavan Venkataraman void
208587a18d3fSMadhavan Venkataraman callout_init(void)
208687a18d3fSMadhavan Venkataraman {
208787a18d3fSMadhavan Venkataraman 	int f, t;
208887a18d3fSMadhavan Venkataraman 	size_t size;
208987a18d3fSMadhavan Venkataraman 	int table_id;
209087a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
209187a18d3fSMadhavan Venkataraman 	long bits, fanout;
209287a18d3fSMadhavan Venkataraman 	uintptr_t buf;
209387a18d3fSMadhavan Venkataraman 
209487a18d3fSMadhavan Venkataraman 	/*
209587a18d3fSMadhavan Venkataraman 	 * Initialize callout globals.
209687a18d3fSMadhavan Venkataraman 	 */
209787a18d3fSMadhavan Venkataraman 	bits = 0;
209887a18d3fSMadhavan Venkataraman 	for (fanout = 1; (fanout < max_ncpus); fanout <<= 1)
209987a18d3fSMadhavan Venkataraman 		bits++;
210087a18d3fSMadhavan Venkataraman 	callout_table_bits = CALLOUT_TYPE_BITS + bits;
210187a18d3fSMadhavan Venkataraman 	callout_table_mask = (1 << callout_table_bits) - 1;
210287a18d3fSMadhavan Venkataraman 	callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT;
210387a18d3fSMadhavan Venkataraman 	callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS);
2104454ab202SMadhavan Venkataraman 	callout_max_ticks = CALLOUT_MAX_TICKS;
210551b32bddSMadhavan Venkataraman 	if (callout_min_reap == 0)
210651b32bddSMadhavan Venkataraman 		callout_min_reap = CALLOUT_MIN_REAP;
210787a18d3fSMadhavan Venkataraman 
210851b32bddSMadhavan Venkataraman 	if (callout_tolerance <= 0)
210951b32bddSMadhavan Venkataraman 		callout_tolerance = CALLOUT_TOLERANCE;
211051b32bddSMadhavan Venkataraman 	if (callout_threads <= 0)
211151b32bddSMadhavan Venkataraman 		callout_threads = CALLOUT_THREADS;
2112060cedfbSMadhavan Venkataraman 	if (callout_chunk <= 0)
2113060cedfbSMadhavan Venkataraman 		callout_chunk = CALLOUT_CHUNK;
2114060cedfbSMadhavan Venkataraman 	else
2115060cedfbSMadhavan Venkataraman 		callout_chunk = P2ROUNDUP(callout_chunk, CALLOUT_CHUNK);
211687a18d3fSMadhavan Venkataraman 
211787a18d3fSMadhavan Venkataraman 	/*
211887a18d3fSMadhavan Venkataraman 	 * Allocate all the callout tables based on max_ncpus. We have chosen
211987a18d3fSMadhavan Venkataraman 	 * to do boot-time allocation instead of dynamic allocation because:
212087a18d3fSMadhavan Venkataraman 	 *
212187a18d3fSMadhavan Venkataraman 	 *	- the size of the callout tables is not too large.
212287a18d3fSMadhavan Venkataraman 	 *	- there are race conditions involved in making this dynamic.
212387a18d3fSMadhavan Venkataraman 	 *	- the hash tables that go with the callout tables consume
212487a18d3fSMadhavan Venkataraman 	 *	  most of the memory and they are only allocated in
212587a18d3fSMadhavan Venkataraman 	 *	  callout_cpu_online().
212687a18d3fSMadhavan Venkataraman 	 *
212787a18d3fSMadhavan Venkataraman 	 * Each CPU has two tables that are consecutive in the array. The first
212887a18d3fSMadhavan Venkataraman 	 * one is for realtime callouts and the second one is for normal ones.
212987a18d3fSMadhavan Venkataraman 	 *
213087a18d3fSMadhavan Venkataraman 	 * We do this alignment dance to make sure that callout table
213187a18d3fSMadhavan Venkataraman 	 * structures will always be on a cache line boundary.
213287a18d3fSMadhavan Venkataraman 	 */
213387a18d3fSMadhavan Venkataraman 	size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus;
213487a18d3fSMadhavan Venkataraman 	size += CALLOUT_ALIGN;
213587a18d3fSMadhavan Venkataraman 	buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP);
213687a18d3fSMadhavan Venkataraman 	callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN);
213787a18d3fSMadhavan Venkataraman 
213887a18d3fSMadhavan Venkataraman 	size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS;
213987a18d3fSMadhavan Venkataraman 	/*
214087a18d3fSMadhavan Venkataraman 	 * Now, initialize the tables for all the CPUs.
214187a18d3fSMadhavan Venkataraman 	 */
214287a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
214387a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
214487a18d3fSMadhavan Venkataraman 			table_id = CALLOUT_TABLE(t, f);
214587a18d3fSMadhavan Venkataraman 			ct = &callout_table[table_id];
2146454ab202SMadhavan Venkataraman 			ct->ct_type = t;
214787a18d3fSMadhavan Venkataraman 			mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
214887a18d3fSMadhavan Venkataraman 			/*
214987a18d3fSMadhavan Venkataraman 			 * Precompute the base IDs for long and short-term
215087a18d3fSMadhavan Venkataraman 			 * legacy IDs. This makes ID generation during
215187a18d3fSMadhavan Venkataraman 			 * timeout() fast.
215287a18d3fSMadhavan Venkataraman 			 */
215387a18d3fSMadhavan Venkataraman 			ct->ct_short_id = CALLOUT_SHORT_ID(table_id);
215487a18d3fSMadhavan Venkataraman 			ct->ct_long_id = CALLOUT_LONG_ID(table_id);
215587a18d3fSMadhavan Venkataraman 			/*
215687a18d3fSMadhavan Venkataraman 			 * Precompute the base ID for generation-based IDs.
215787a18d3fSMadhavan Venkataraman 			 * Note that when the first ID gets allocated, the
215887a18d3fSMadhavan Venkataraman 			 * ID will wrap. This will cause the generation
215987a18d3fSMadhavan Venkataraman 			 * number to be incremented to 1.
216087a18d3fSMadhavan Venkataraman 			 */
216187a18d3fSMadhavan Venkataraman 			ct->ct_gen_id = CALLOUT_SHORT_ID(table_id);
216287a18d3fSMadhavan Venkataraman 			/*
2163060cedfbSMadhavan Venkataraman 			 * Initialize the cyclics as NONE. This will get set
216487a18d3fSMadhavan Venkataraman 			 * during CPU online. This is so that partially
216587a18d3fSMadhavan Venkataraman 			 * populated systems will only have the required
216687a18d3fSMadhavan Venkataraman 			 * number of cyclics, not more.
216787a18d3fSMadhavan Venkataraman 			 */
216887a18d3fSMadhavan Venkataraman 			ct->ct_cyclic = CYCLIC_NONE;
2169060cedfbSMadhavan Venkataraman 			ct->ct_qcyclic = CYCLIC_NONE;
217087a18d3fSMadhavan Venkataraman 			ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP);
217187a18d3fSMadhavan Venkataraman 		}
217287a18d3fSMadhavan Venkataraman 	}
217387a18d3fSMadhavan Venkataraman 
217487a18d3fSMadhavan Venkataraman 	/*
217587a18d3fSMadhavan Venkataraman 	 * Add the callback for CPR. This is called during checkpoint
217687a18d3fSMadhavan Venkataraman 	 * resume to suspend and resume callouts.
217787a18d3fSMadhavan Venkataraman 	 */
217887a18d3fSMadhavan Venkataraman 	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT,
217987a18d3fSMadhavan Venkataraman 	    "callout_cpr");
218087a18d3fSMadhavan Venkataraman 	(void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER,
218187a18d3fSMadhavan Venkataraman 	    "callout_debug");
218287a18d3fSMadhavan Venkataraman 
218387a18d3fSMadhavan Venkataraman 	/*
218487a18d3fSMadhavan Venkataraman 	 * Call the per-CPU initialization function for the boot CPU. This
218587a18d3fSMadhavan Venkataraman 	 * is done here because the function is not called automatically for
218687a18d3fSMadhavan Venkataraman 	 * the boot CPU from the CPU online/offline hooks. Note that the
218787a18d3fSMadhavan Venkataraman 	 * CPU lock is taken here because of convention.
218887a18d3fSMadhavan Venkataraman 	 */
218987a18d3fSMadhavan Venkataraman 	mutex_enter(&cpu_lock);
219087a18d3fSMadhavan Venkataraman 	callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)];
219187a18d3fSMadhavan Venkataraman 	callout_cpu_online(CPU);
219287a18d3fSMadhavan Venkataraman 	mutex_exit(&cpu_lock);
21937c478bd9Sstevel@tonic-gate }
2194