xref: /illumos-gate/usr/src/uts/common/os/callout.c (revision 07247649)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5f635d46aSqiao  * Common Development and Distribution License (the "License").
6f635d46aSqiao  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22454ab202SMadhavan Venkataraman  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/callo.h>
277c478bd9Sstevel@tonic-gate #include <sys/param.h>
287c478bd9Sstevel@tonic-gate #include <sys/types.h>
297c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
307c478bd9Sstevel@tonic-gate #include <sys/thread.h>
317c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
3287a18d3fSMadhavan Venkataraman #include <sys/kmem_impl.h>
337c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
347c478bd9Sstevel@tonic-gate #include <sys/callb.h>
357c478bd9Sstevel@tonic-gate #include <sys/debug.h>
367c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
377c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
387c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
397c478bd9Sstevel@tonic-gate 
407c478bd9Sstevel@tonic-gate /*
417c478bd9Sstevel@tonic-gate  * Callout tables.  See timeout(9F) for details.
427c478bd9Sstevel@tonic-gate  */
4387a18d3fSMadhavan Venkataraman static hrtime_t callout_debug_hrtime;		/* debugger entry time */
4487a18d3fSMadhavan Venkataraman static int callout_min_resolution;		/* Minimum resolution */
4587a18d3fSMadhavan Venkataraman static callout_table_t *callout_boot_ct;	/* Boot CPU's callout tables */
46454ab202SMadhavan Venkataraman static clock_t callout_max_ticks;		/* max interval */
4787a18d3fSMadhavan Venkataraman static hrtime_t callout_longterm;		/* longterm nanoseconds */
4887a18d3fSMadhavan Venkataraman static ulong_t callout_counter_low;		/* callout ID increment */
4987a18d3fSMadhavan Venkataraman static ulong_t callout_table_bits;		/* number of table bits in ID */
5087a18d3fSMadhavan Venkataraman static ulong_t callout_table_mask;		/* mask for the table bits */
5187a18d3fSMadhavan Venkataraman static callout_cache_t *callout_caches;		/* linked list of caches */
5287a18d3fSMadhavan Venkataraman #pragma align 64(callout_table)
5387a18d3fSMadhavan Venkataraman static callout_table_t *callout_table;		/* global callout table array */
547c478bd9Sstevel@tonic-gate 
55*07247649SMadhavan Venkataraman /*
56*07247649SMadhavan Venkataraman  * We run normal callouts from PIL 10. This means that no other handler that
57*07247649SMadhavan Venkataraman  * runs at PIL 10 is allowed to wait for normal callouts directly or indirectly
58*07247649SMadhavan Venkataraman  * as it will cause a deadlock. This has always been an unwritten rule.
59*07247649SMadhavan Venkataraman  * We are making it explicit here.
60*07247649SMadhavan Venkataraman  */
61*07247649SMadhavan Venkataraman static int callout_realtime_level = CY_LOW_LEVEL;
62*07247649SMadhavan Venkataraman static int callout_normal_level = CY_LOCK_LEVEL;
63*07247649SMadhavan Venkataraman 
6487a18d3fSMadhavan Venkataraman static char *callout_kstat_names[] = {
6587a18d3fSMadhavan Venkataraman 	"callout_timeouts",
6687a18d3fSMadhavan Venkataraman 	"callout_timeouts_pending",
6787a18d3fSMadhavan Venkataraman 	"callout_untimeouts_unexpired",
6887a18d3fSMadhavan Venkataraman 	"callout_untimeouts_executing",
6987a18d3fSMadhavan Venkataraman 	"callout_untimeouts_expired",
7087a18d3fSMadhavan Venkataraman 	"callout_expirations",
7187a18d3fSMadhavan Venkataraman 	"callout_allocations",
7287a18d3fSMadhavan Venkataraman };
7387a18d3fSMadhavan Venkataraman 
7487a18d3fSMadhavan Venkataraman #define	CALLOUT_HASH_INSERT(hash, cp, cnext, cprev)	\
757c478bd9Sstevel@tonic-gate {							\
7687a18d3fSMadhavan Venkataraman 	callout_hash_t *hashp = &(hash);		\
7787a18d3fSMadhavan Venkataraman 							\
787c478bd9Sstevel@tonic-gate 	cp->cprev = NULL;				\
7987a18d3fSMadhavan Venkataraman 	cp->cnext = hashp->ch_head;			\
8087a18d3fSMadhavan Venkataraman 	if (hashp->ch_head == NULL)			\
8187a18d3fSMadhavan Venkataraman 		hashp->ch_tail = cp;			\
827c478bd9Sstevel@tonic-gate 	else						\
8387a18d3fSMadhavan Venkataraman 		cp->cnext->cprev = cp;			\
8487a18d3fSMadhavan Venkataraman 	hashp->ch_head = cp;				\
857c478bd9Sstevel@tonic-gate }
867c478bd9Sstevel@tonic-gate 
8787a18d3fSMadhavan Venkataraman #define	CALLOUT_HASH_APPEND(hash, cp, cnext, cprev)	\
8887a18d3fSMadhavan Venkataraman {							\
8987a18d3fSMadhavan Venkataraman 	callout_hash_t *hashp = &(hash);		\
9087a18d3fSMadhavan Venkataraman 							\
9187a18d3fSMadhavan Venkataraman 	cp->cnext = NULL;				\
9287a18d3fSMadhavan Venkataraman 	cp->cprev = hashp->ch_tail;			\
9387a18d3fSMadhavan Venkataraman 	if (hashp->ch_tail == NULL)			\
9487a18d3fSMadhavan Venkataraman 		hashp->ch_head = cp;			\
9587a18d3fSMadhavan Venkataraman 	else						\
9687a18d3fSMadhavan Venkataraman 		cp->cprev->cnext = cp;			\
9787a18d3fSMadhavan Venkataraman 	hashp->ch_tail = cp;				\
9887a18d3fSMadhavan Venkataraman }
9987a18d3fSMadhavan Venkataraman 
10087a18d3fSMadhavan Venkataraman #define	CALLOUT_HASH_DELETE(hash, cp, cnext, cprev)	\
10187a18d3fSMadhavan Venkataraman {							\
10287a18d3fSMadhavan Venkataraman 	callout_hash_t *hashp = &(hash);		\
10387a18d3fSMadhavan Venkataraman 							\
10487a18d3fSMadhavan Venkataraman 	if (cp->cnext == NULL)				\
10587a18d3fSMadhavan Venkataraman 		hashp->ch_tail = cp->cprev;		\
10687a18d3fSMadhavan Venkataraman 	else						\
10787a18d3fSMadhavan Venkataraman 		cp->cnext->cprev = cp->cprev;		\
10887a18d3fSMadhavan Venkataraman 	if (cp->cprev == NULL)				\
10987a18d3fSMadhavan Venkataraman 		hashp->ch_head = cp->cnext;		\
11087a18d3fSMadhavan Venkataraman 	else						\
11187a18d3fSMadhavan Venkataraman 		cp->cprev->cnext = cp->cnext;		\
11287a18d3fSMadhavan Venkataraman }
11387a18d3fSMadhavan Venkataraman 
11487a18d3fSMadhavan Venkataraman /*
11587a18d3fSMadhavan Venkataraman  * These definitions help us queue callouts and callout lists. Here is
11687a18d3fSMadhavan Venkataraman  * the queueing rationale:
11787a18d3fSMadhavan Venkataraman  *
11887a18d3fSMadhavan Venkataraman  *	- callouts are queued in a FIFO manner in the ID hash table.
11987a18d3fSMadhavan Venkataraman  *	  TCP timers are typically cancelled in the same order that they
12087a18d3fSMadhavan Venkataraman  *	  were issued. The FIFO queueing shortens the search for a callout
12187a18d3fSMadhavan Venkataraman  *	  during untimeout().
12287a18d3fSMadhavan Venkataraman  *
12387a18d3fSMadhavan Venkataraman  *	- callouts are queued in a FIFO manner in their callout lists.
12487a18d3fSMadhavan Venkataraman  *	  This ensures that the callouts are executed in the same order that
12587a18d3fSMadhavan Venkataraman  *	  they were queued. This is fair. Plus, it helps to make each
12687a18d3fSMadhavan Venkataraman  *	  callout expiration timely. It also favors cancellations.
12787a18d3fSMadhavan Venkataraman  *
12887a18d3fSMadhavan Venkataraman  *	- callout lists are queued in a LIFO manner in the callout list hash
12987a18d3fSMadhavan Venkataraman  *	  table. This ensures that long term timers stay at the rear of the
13087a18d3fSMadhavan Venkataraman  *	  hash lists.
13187a18d3fSMadhavan Venkataraman  *
13287a18d3fSMadhavan Venkataraman  *	- callout lists are queued in a FIFO manner in the expired callouts
13387a18d3fSMadhavan Venkataraman  *	  list. This ensures that callout lists are executed in the order
13487a18d3fSMadhavan Venkataraman  *	  of expiration.
13587a18d3fSMadhavan Venkataraman  */
13687a18d3fSMadhavan Venkataraman #define	CALLOUT_APPEND(ct, cp)						\
13787a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)],	\
13887a18d3fSMadhavan Venkataraman 		cp, c_idnext, c_idprev);				\
13987a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev)
14087a18d3fSMadhavan Venkataraman 
14187a18d3fSMadhavan Venkataraman #define	CALLOUT_DELETE(ct, cp)						\
14287a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)],	\
14387a18d3fSMadhavan Venkataraman 		cp, c_idnext, c_idprev);				\
14487a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev)
14587a18d3fSMadhavan Venkataraman 
14687a18d3fSMadhavan Venkataraman #define	CALLOUT_LIST_INSERT(hash, cl)				\
14787a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev)
14887a18d3fSMadhavan Venkataraman 
14987a18d3fSMadhavan Venkataraman #define	CALLOUT_LIST_APPEND(hash, cl)				\
15087a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev)
15187a18d3fSMadhavan Venkataraman 
15287a18d3fSMadhavan Venkataraman #define	CALLOUT_LIST_DELETE(hash, cl)				\
15387a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev)
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate /*
156*07247649SMadhavan Venkataraman  * For normal callouts, there is a deadlock scenario if two callouts that
157*07247649SMadhavan Venkataraman  * have an inter-dependency end up on the same callout list. To break the
158*07247649SMadhavan Venkataraman  * deadlock, you need two taskq threads running in parallel. We compute
159*07247649SMadhavan Venkataraman  * the number of taskq threads here using a bunch of conditions to make
160*07247649SMadhavan Venkataraman  * it optimal for the common case. This is an ugly hack, but one that is
161*07247649SMadhavan Venkataraman  * necessary (sigh).
162*07247649SMadhavan Venkataraman  */
163*07247649SMadhavan Venkataraman #define	CALLOUT_THRESHOLD	100000000
164*07247649SMadhavan Venkataraman #define	CALLOUT_EXEC_COMPUTE(ct, exec)					\
165*07247649SMadhavan Venkataraman {									\
166*07247649SMadhavan Venkataraman 	callout_list_t *cl;						\
167*07247649SMadhavan Venkataraman 									\
168*07247649SMadhavan Venkataraman 	cl = ct->ct_expired.ch_head;					\
169*07247649SMadhavan Venkataraman 	if (cl == NULL) {						\
170*07247649SMadhavan Venkataraman 		/*							\
171*07247649SMadhavan Venkataraman 		 * If the expired list is NULL, there is nothing to	\
172*07247649SMadhavan Venkataraman 		 * process.						\
173*07247649SMadhavan Venkataraman 		 */							\
174*07247649SMadhavan Venkataraman 		exec = 0;						\
175*07247649SMadhavan Venkataraman 	} else if ((cl->cl_next == NULL) &&				\
176*07247649SMadhavan Venkataraman 	    (cl->cl_callouts.ch_head == cl->cl_callouts.ch_tail)) {	\
177*07247649SMadhavan Venkataraman 		/*							\
178*07247649SMadhavan Venkataraman 		 * If there is only one callout list and it contains	\
179*07247649SMadhavan Venkataraman 		 * only one callout, there is no need for two threads.	\
180*07247649SMadhavan Venkataraman 		 */							\
181*07247649SMadhavan Venkataraman 		exec = 1;						\
182*07247649SMadhavan Venkataraman 	} else if ((ct->ct_heap_num == 0) ||				\
183*07247649SMadhavan Venkataraman 	    (ct->ct_heap[0] > gethrtime() + CALLOUT_THRESHOLD)) {	\
184*07247649SMadhavan Venkataraman 		/*							\
185*07247649SMadhavan Venkataraman 		 * If the heap has become empty, we need two threads as	\
186*07247649SMadhavan Venkataraman 		 * there is no one to kick off the second thread in the	\
187*07247649SMadhavan Venkataraman 		 * future. If the heap is not empty and the top of the	\
188*07247649SMadhavan Venkataraman 		 * heap does not expire in the near future, we need two	\
189*07247649SMadhavan Venkataraman 		 * threads.						\
190*07247649SMadhavan Venkataraman 		 */							\
191*07247649SMadhavan Venkataraman 		exec = 2;						\
192*07247649SMadhavan Venkataraman 	} else {							\
193*07247649SMadhavan Venkataraman 		/*							\
194*07247649SMadhavan Venkataraman 		 * We have multiple callouts to process. But the cyclic	\
195*07247649SMadhavan Venkataraman 		 * will fire in the near future. So, we only need one	\
196*07247649SMadhavan Venkataraman 		 * thread for now.					\
197*07247649SMadhavan Venkataraman 		 */							\
198*07247649SMadhavan Venkataraman 		exec = 1;						\
199*07247649SMadhavan Venkataraman 	}								\
200*07247649SMadhavan Venkataraman }
201*07247649SMadhavan Venkataraman 
202*07247649SMadhavan Venkataraman /*
2037c478bd9Sstevel@tonic-gate  * Allocate a callout structure.  We try quite hard because we
2047c478bd9Sstevel@tonic-gate  * can't sleep, and if we can't do the allocation, we're toast.
20587a18d3fSMadhavan Venkataraman  * Failing all, we try a KM_PANIC allocation. Note that we never
20687a18d3fSMadhavan Venkataraman  * deallocate a callout. See untimeout() for the reasoning.
2077c478bd9Sstevel@tonic-gate  */
2087c478bd9Sstevel@tonic-gate static callout_t *
2097c478bd9Sstevel@tonic-gate callout_alloc(callout_table_t *ct)
2107c478bd9Sstevel@tonic-gate {
21187a18d3fSMadhavan Venkataraman 	size_t size;
21287a18d3fSMadhavan Venkataraman 	callout_t *cp;
2137c478bd9Sstevel@tonic-gate 
21487a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
21587a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
21687a18d3fSMadhavan Venkataraman 
21787a18d3fSMadhavan Venkataraman 	cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP);
21887a18d3fSMadhavan Venkataraman 	if (cp == NULL) {
21987a18d3fSMadhavan Venkataraman 		size = sizeof (callout_t);
22087a18d3fSMadhavan Venkataraman 		cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
22187a18d3fSMadhavan Venkataraman 	}
22287a18d3fSMadhavan Venkataraman 	cp->c_xid = 0;
223*07247649SMadhavan Venkataraman 	cp->c_executor = NULL;
224*07247649SMadhavan Venkataraman 	cv_init(&cp->c_done, NULL, CV_DEFAULT, NULL);
225*07247649SMadhavan Venkataraman 	cp->c_waiting = 0;
22687a18d3fSMadhavan Venkataraman 
22787a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
22887a18d3fSMadhavan Venkataraman 	ct->ct_allocations++;
2297c478bd9Sstevel@tonic-gate 	return (cp);
2307c478bd9Sstevel@tonic-gate }
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate /*
23387a18d3fSMadhavan Venkataraman  * Allocate a callout list structure.  We try quite hard because we
23487a18d3fSMadhavan Venkataraman  * can't sleep, and if we can't do the allocation, we're toast.
23587a18d3fSMadhavan Venkataraman  * Failing all, we try a KM_PANIC allocation. Note that we never
23687a18d3fSMadhavan Venkataraman  * deallocate a callout list.
2377c478bd9Sstevel@tonic-gate  */
23887a18d3fSMadhavan Venkataraman static void
23987a18d3fSMadhavan Venkataraman callout_list_alloc(callout_table_t *ct)
2407c478bd9Sstevel@tonic-gate {
24187a18d3fSMadhavan Venkataraman 	size_t size;
24287a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
24387a18d3fSMadhavan Venkataraman 
24487a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
24587a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
24687a18d3fSMadhavan Venkataraman 
24787a18d3fSMadhavan Venkataraman 	cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP);
24887a18d3fSMadhavan Venkataraman 	if (cl == NULL) {
24987a18d3fSMadhavan Venkataraman 		size = sizeof (callout_list_t);
25087a18d3fSMadhavan Venkataraman 		cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
25187a18d3fSMadhavan Venkataraman 	}
25287a18d3fSMadhavan Venkataraman 	bzero(cl, sizeof (callout_list_t));
25387a18d3fSMadhavan Venkataraman 
25487a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
25587a18d3fSMadhavan Venkataraman 	cl->cl_next = ct->ct_lfree;
25687a18d3fSMadhavan Venkataraman 	ct->ct_lfree = cl;
25787a18d3fSMadhavan Venkataraman }
25887a18d3fSMadhavan Venkataraman 
25987a18d3fSMadhavan Venkataraman /*
260*07247649SMadhavan Venkataraman  * Find a callout list that corresponds to an expiration.
26187a18d3fSMadhavan Venkataraman  */
26287a18d3fSMadhavan Venkataraman static callout_list_t *
263*07247649SMadhavan Venkataraman callout_list_get(callout_table_t *ct, hrtime_t expiration, int flags, int hash)
26487a18d3fSMadhavan Venkataraman {
26587a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
26687a18d3fSMadhavan Venkataraman 
26787a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
26887a18d3fSMadhavan Venkataraman 
26987a18d3fSMadhavan Venkataraman 	for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) {
270*07247649SMadhavan Venkataraman 		if ((cl->cl_expiration == expiration) &&
271*07247649SMadhavan Venkataraman 		    (cl->cl_flags == flags))
27287a18d3fSMadhavan Venkataraman 			return (cl);
27387a18d3fSMadhavan Venkataraman 	}
27487a18d3fSMadhavan Venkataraman 
27587a18d3fSMadhavan Venkataraman 	return (NULL);
27687a18d3fSMadhavan Venkataraman }
27787a18d3fSMadhavan Venkataraman 
27887a18d3fSMadhavan Venkataraman /*
279*07247649SMadhavan Venkataraman  * Find the callout list that corresponds to an expiration.
280*07247649SMadhavan Venkataraman  * If the callout list is null, free it. Else, return it.
28187a18d3fSMadhavan Venkataraman  */
28287a18d3fSMadhavan Venkataraman static callout_list_t *
28387a18d3fSMadhavan Venkataraman callout_list_check(callout_table_t *ct, hrtime_t expiration, int hash)
28487a18d3fSMadhavan Venkataraman {
28587a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
28687a18d3fSMadhavan Venkataraman 
28787a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
28887a18d3fSMadhavan Venkataraman 
289*07247649SMadhavan Venkataraman 	for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) {
290*07247649SMadhavan Venkataraman 		if (cl->cl_expiration == expiration) {
29187a18d3fSMadhavan Venkataraman 			if (cl->cl_callouts.ch_head != NULL) {
29287a18d3fSMadhavan Venkataraman 				/*
293*07247649SMadhavan Venkataraman 				 * Found a match.
29487a18d3fSMadhavan Venkataraman 				 */
29587a18d3fSMadhavan Venkataraman 				return (cl);
29687a18d3fSMadhavan Venkataraman 			}
29787a18d3fSMadhavan Venkataraman 
29887a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
29987a18d3fSMadhavan Venkataraman 			cl->cl_next = ct->ct_lfree;
30087a18d3fSMadhavan Venkataraman 			ct->ct_lfree = cl;
301*07247649SMadhavan Venkataraman 
302*07247649SMadhavan Venkataraman 			return (NULL);
303*07247649SMadhavan Venkataraman 		}
30487a18d3fSMadhavan Venkataraman 	}
30587a18d3fSMadhavan Venkataraman 
30687a18d3fSMadhavan Venkataraman 	return (NULL);
30787a18d3fSMadhavan Venkataraman }
30887a18d3fSMadhavan Venkataraman /*
30987a18d3fSMadhavan Venkataraman  * Initialize a callout table's heap, if necessary. Preallocate some free
31087a18d3fSMadhavan Venkataraman  * entries so we don't have to check for NULL elsewhere.
31187a18d3fSMadhavan Venkataraman  */
31287a18d3fSMadhavan Venkataraman static void
31387a18d3fSMadhavan Venkataraman callout_heap_init(callout_table_t *ct)
31487a18d3fSMadhavan Venkataraman {
31587a18d3fSMadhavan Venkataraman 	size_t size;
31687a18d3fSMadhavan Venkataraman 
31787a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
31887a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap == NULL);
31987a18d3fSMadhavan Venkataraman 
32087a18d3fSMadhavan Venkataraman 	ct->ct_heap_num = 0;
32187a18d3fSMadhavan Venkataraman 	ct->ct_heap_max = CALLOUT_CHUNK;
32287a18d3fSMadhavan Venkataraman 	size = sizeof (hrtime_t) * CALLOUT_CHUNK;
32387a18d3fSMadhavan Venkataraman 	ct->ct_heap = kmem_alloc(size, KM_SLEEP);
32487a18d3fSMadhavan Venkataraman }
32587a18d3fSMadhavan Venkataraman 
32687a18d3fSMadhavan Venkataraman /*
32787a18d3fSMadhavan Venkataraman  * Reallocate the heap. We try quite hard because we can't sleep, and if
32887a18d3fSMadhavan Venkataraman  * we can't do the allocation, we're toast. Failing all, we try a KM_PANIC
32987a18d3fSMadhavan Venkataraman  * allocation. Note that the heap only expands, it never contracts.
33087a18d3fSMadhavan Venkataraman  */
33187a18d3fSMadhavan Venkataraman static void
33287a18d3fSMadhavan Venkataraman callout_heap_expand(callout_table_t *ct)
33387a18d3fSMadhavan Venkataraman {
33487a18d3fSMadhavan Venkataraman 	size_t max, size, osize;
33587a18d3fSMadhavan Venkataraman 	hrtime_t *heap;
33687a18d3fSMadhavan Venkataraman 
33787a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
33887a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num <= ct->ct_heap_max);
33987a18d3fSMadhavan Venkataraman 
34087a18d3fSMadhavan Venkataraman 	while (ct->ct_heap_num == ct->ct_heap_max) {
34187a18d3fSMadhavan Venkataraman 		max = ct->ct_heap_max;
34287a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
34387a18d3fSMadhavan Venkataraman 
34487a18d3fSMadhavan Venkataraman 		osize = sizeof (hrtime_t) * max;
34587a18d3fSMadhavan Venkataraman 		size = sizeof (hrtime_t) * (max + CALLOUT_CHUNK);
34687a18d3fSMadhavan Venkataraman 		heap = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
34787a18d3fSMadhavan Venkataraman 
34887a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
34987a18d3fSMadhavan Venkataraman 		if (max < ct->ct_heap_max) {
35087a18d3fSMadhavan Venkataraman 			/*
35187a18d3fSMadhavan Venkataraman 			 * Someone beat us to the allocation. Free what we
35287a18d3fSMadhavan Venkataraman 			 * just allocated and proceed.
35387a18d3fSMadhavan Venkataraman 			 */
35487a18d3fSMadhavan Venkataraman 			kmem_free(heap, size);
35587a18d3fSMadhavan Venkataraman 			continue;
35687a18d3fSMadhavan Venkataraman 		}
35787a18d3fSMadhavan Venkataraman 
35887a18d3fSMadhavan Venkataraman 		bcopy(ct->ct_heap, heap, osize);
35987a18d3fSMadhavan Venkataraman 		kmem_free(ct->ct_heap, osize);
36087a18d3fSMadhavan Venkataraman 		ct->ct_heap = heap;
36187a18d3fSMadhavan Venkataraman 		ct->ct_heap_max = size / sizeof (hrtime_t);
36287a18d3fSMadhavan Venkataraman 	}
36387a18d3fSMadhavan Venkataraman }
36487a18d3fSMadhavan Venkataraman 
36587a18d3fSMadhavan Venkataraman /*
36687a18d3fSMadhavan Venkataraman  * Move an expiration from the bottom of the heap to its correct place
36787a18d3fSMadhavan Venkataraman  * in the heap. If we reached the root doing this, return 1. Else,
36887a18d3fSMadhavan Venkataraman  * return 0.
36987a18d3fSMadhavan Venkataraman  */
37087a18d3fSMadhavan Venkataraman static int
37187a18d3fSMadhavan Venkataraman callout_upheap(callout_table_t *ct)
37287a18d3fSMadhavan Venkataraman {
37387a18d3fSMadhavan Venkataraman 	int current, parent;
37487a18d3fSMadhavan Venkataraman 	hrtime_t *heap, current_expiration, parent_expiration;
37587a18d3fSMadhavan Venkataraman 
37687a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
37787a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num >= 1);
37887a18d3fSMadhavan Venkataraman 
37987a18d3fSMadhavan Venkataraman 	if (ct->ct_heap_num == 1) {
38087a18d3fSMadhavan Venkataraman 		return (1);
38187a18d3fSMadhavan Venkataraman 	}
38287a18d3fSMadhavan Venkataraman 
38387a18d3fSMadhavan Venkataraman 	heap = ct->ct_heap;
38487a18d3fSMadhavan Venkataraman 	current = ct->ct_heap_num - 1;
38587a18d3fSMadhavan Venkataraman 
38687a18d3fSMadhavan Venkataraman 	for (;;) {
38787a18d3fSMadhavan Venkataraman 		parent = CALLOUT_HEAP_PARENT(current);
38887a18d3fSMadhavan Venkataraman 		current_expiration = heap[current];
38987a18d3fSMadhavan Venkataraman 		parent_expiration = heap[parent];
39087a18d3fSMadhavan Venkataraman 
39187a18d3fSMadhavan Venkataraman 		/*
39287a18d3fSMadhavan Venkataraman 		 * We have an expiration later than our parent; we're done.
39387a18d3fSMadhavan Venkataraman 		 */
39487a18d3fSMadhavan Venkataraman 		if (current_expiration >= parent_expiration) {
39587a18d3fSMadhavan Venkataraman 			return (0);
39687a18d3fSMadhavan Venkataraman 		}
39787a18d3fSMadhavan Venkataraman 
39887a18d3fSMadhavan Venkataraman 		/*
39987a18d3fSMadhavan Venkataraman 		 * We need to swap with our parent, and continue up the heap.
40087a18d3fSMadhavan Venkataraman 		 */
40187a18d3fSMadhavan Venkataraman 		heap[parent] = current_expiration;
40287a18d3fSMadhavan Venkataraman 		heap[current] = parent_expiration;
40387a18d3fSMadhavan Venkataraman 
40487a18d3fSMadhavan Venkataraman 		/*
40587a18d3fSMadhavan Venkataraman 		 * If we just reached the root, we're done.
40687a18d3fSMadhavan Venkataraman 		 */
40787a18d3fSMadhavan Venkataraman 		if (parent == 0) {
40887a18d3fSMadhavan Venkataraman 			return (1);
40987a18d3fSMadhavan Venkataraman 		}
41087a18d3fSMadhavan Venkataraman 
41187a18d3fSMadhavan Venkataraman 		current = parent;
41287a18d3fSMadhavan Venkataraman 	}
41387a18d3fSMadhavan Venkataraman 	/*NOTREACHED*/
41487a18d3fSMadhavan Venkataraman }
41587a18d3fSMadhavan Venkataraman 
41687a18d3fSMadhavan Venkataraman /*
417*07247649SMadhavan Venkataraman  * Insert a new expiration into a callout table's heap.
41887a18d3fSMadhavan Venkataraman  */
41987a18d3fSMadhavan Venkataraman static void
42087a18d3fSMadhavan Venkataraman callout_heap_insert(callout_table_t *ct, hrtime_t expiration)
42187a18d3fSMadhavan Venkataraman {
42287a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
42387a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num < ct->ct_heap_max);
42487a18d3fSMadhavan Venkataraman 
42587a18d3fSMadhavan Venkataraman 	/*
42687a18d3fSMadhavan Venkataraman 	 * First, copy the expiration to the bottom of the heap.
42787a18d3fSMadhavan Venkataraman 	 */
42887a18d3fSMadhavan Venkataraman 	ct->ct_heap[ct->ct_heap_num] = expiration;
42987a18d3fSMadhavan Venkataraman 	ct->ct_heap_num++;
43087a18d3fSMadhavan Venkataraman 
43187a18d3fSMadhavan Venkataraman 	/*
43287a18d3fSMadhavan Venkataraman 	 * Now, perform an upheap operation. If we reached the root, then
43387a18d3fSMadhavan Venkataraman 	 * the cyclic needs to be reprogrammed as we have an earlier
43487a18d3fSMadhavan Venkataraman 	 * expiration.
43587a18d3fSMadhavan Venkataraman 	 *
43687a18d3fSMadhavan Venkataraman 	 * Also, during the CPR suspend phase, do not reprogram the cyclic.
43787a18d3fSMadhavan Venkataraman 	 * We don't want any callout activity. When the CPR resume phase is
43887a18d3fSMadhavan Venkataraman 	 * entered, the cyclic will be programmed for the earliest expiration
43987a18d3fSMadhavan Venkataraman 	 * in the heap.
44087a18d3fSMadhavan Venkataraman 	 */
441454ab202SMadhavan Venkataraman 	if (callout_upheap(ct) && (ct->ct_suspend == 0))
44287a18d3fSMadhavan Venkataraman 		(void) cyclic_reprogram(ct->ct_cyclic, expiration);
44387a18d3fSMadhavan Venkataraman }
44487a18d3fSMadhavan Venkataraman 
44587a18d3fSMadhavan Venkataraman /*
44687a18d3fSMadhavan Venkataraman  * Move an expiration from the top of the heap to its correct place
44787a18d3fSMadhavan Venkataraman  * in the heap.
44887a18d3fSMadhavan Venkataraman  */
44987a18d3fSMadhavan Venkataraman static void
45087a18d3fSMadhavan Venkataraman callout_downheap(callout_table_t *ct)
45187a18d3fSMadhavan Venkataraman {
45287a18d3fSMadhavan Venkataraman 	int left, right, current, nelems;
45387a18d3fSMadhavan Venkataraman 	hrtime_t *heap, left_expiration, right_expiration, current_expiration;
45487a18d3fSMadhavan Venkataraman 
45587a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
45687a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num >= 1);
45787a18d3fSMadhavan Venkataraman 
45887a18d3fSMadhavan Venkataraman 	heap = ct->ct_heap;
45987a18d3fSMadhavan Venkataraman 	current = 0;
46087a18d3fSMadhavan Venkataraman 	nelems = ct->ct_heap_num;
46187a18d3fSMadhavan Venkataraman 
46287a18d3fSMadhavan Venkataraman 	for (;;) {
46387a18d3fSMadhavan Venkataraman 		/*
46487a18d3fSMadhavan Venkataraman 		 * If we don't have a left child (i.e., we're a leaf), we're
46587a18d3fSMadhavan Venkataraman 		 * done.
46687a18d3fSMadhavan Venkataraman 		 */
46787a18d3fSMadhavan Venkataraman 		if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems)
46887a18d3fSMadhavan Venkataraman 			return;
46987a18d3fSMadhavan Venkataraman 
47087a18d3fSMadhavan Venkataraman 		left_expiration = heap[left];
47187a18d3fSMadhavan Venkataraman 		current_expiration = heap[current];
47287a18d3fSMadhavan Venkataraman 
47387a18d3fSMadhavan Venkataraman 		right = CALLOUT_HEAP_RIGHT(current);
47487a18d3fSMadhavan Venkataraman 
47587a18d3fSMadhavan Venkataraman 		/*
47687a18d3fSMadhavan Venkataraman 		 * Even if we don't have a right child, we still need to compare
47787a18d3fSMadhavan Venkataraman 		 * our expiration against that of our left child.
47887a18d3fSMadhavan Venkataraman 		 */
47987a18d3fSMadhavan Venkataraman 		if (right >= nelems)
48087a18d3fSMadhavan Venkataraman 			goto comp_left;
48187a18d3fSMadhavan Venkataraman 
48287a18d3fSMadhavan Venkataraman 		right_expiration = heap[right];
48387a18d3fSMadhavan Venkataraman 
48487a18d3fSMadhavan Venkataraman 		/*
48587a18d3fSMadhavan Venkataraman 		 * We have both a left and a right child.  We need to compare
48687a18d3fSMadhavan Venkataraman 		 * the expiration of the children to determine which
48787a18d3fSMadhavan Venkataraman 		 * expires earlier.
48887a18d3fSMadhavan Venkataraman 		 */
48987a18d3fSMadhavan Venkataraman 		if (right_expiration < left_expiration) {
49087a18d3fSMadhavan Venkataraman 			/*
49187a18d3fSMadhavan Venkataraman 			 * Our right child is the earlier of our children.
49287a18d3fSMadhavan Venkataraman 			 * We'll now compare our expiration to its expiration.
49387a18d3fSMadhavan Venkataraman 			 * If ours is the earlier one, we're done.
49487a18d3fSMadhavan Venkataraman 			 */
49587a18d3fSMadhavan Venkataraman 			if (current_expiration <= right_expiration)
49687a18d3fSMadhavan Venkataraman 				return;
49787a18d3fSMadhavan Venkataraman 
49887a18d3fSMadhavan Venkataraman 			/*
49987a18d3fSMadhavan Venkataraman 			 * Our right child expires earlier than we do; swap
50087a18d3fSMadhavan Venkataraman 			 * with our right child, and descend right.
50187a18d3fSMadhavan Venkataraman 			 */
50287a18d3fSMadhavan Venkataraman 			heap[right] = current_expiration;
50387a18d3fSMadhavan Venkataraman 			heap[current] = right_expiration;
50487a18d3fSMadhavan Venkataraman 			current = right;
50587a18d3fSMadhavan Venkataraman 			continue;
50687a18d3fSMadhavan Venkataraman 		}
50787a18d3fSMadhavan Venkataraman 
50887a18d3fSMadhavan Venkataraman comp_left:
50987a18d3fSMadhavan Venkataraman 		/*
51087a18d3fSMadhavan Venkataraman 		 * Our left child is the earlier of our children (or we have
51187a18d3fSMadhavan Venkataraman 		 * no right child).  We'll now compare our expiration
51287a18d3fSMadhavan Venkataraman 		 * to its expiration. If ours is the earlier one, we're done.
51387a18d3fSMadhavan Venkataraman 		 */
51487a18d3fSMadhavan Venkataraman 		if (current_expiration <= left_expiration)
51587a18d3fSMadhavan Venkataraman 			return;
51687a18d3fSMadhavan Venkataraman 
51787a18d3fSMadhavan Venkataraman 		/*
51887a18d3fSMadhavan Venkataraman 		 * Our left child expires earlier than we do; swap with our
51987a18d3fSMadhavan Venkataraman 		 * left child, and descend left.
52087a18d3fSMadhavan Venkataraman 		 */
52187a18d3fSMadhavan Venkataraman 		heap[left] = current_expiration;
52287a18d3fSMadhavan Venkataraman 		heap[current] = left_expiration;
52387a18d3fSMadhavan Venkataraman 		current = left;
52487a18d3fSMadhavan Venkataraman 	}
52587a18d3fSMadhavan Venkataraman }
52687a18d3fSMadhavan Venkataraman 
52787a18d3fSMadhavan Venkataraman /*
52887a18d3fSMadhavan Venkataraman  * Delete and handle all past expirations in a callout table's heap.
52987a18d3fSMadhavan Venkataraman  */
53087a18d3fSMadhavan Venkataraman static void
53187a18d3fSMadhavan Venkataraman callout_heap_delete(callout_table_t *ct)
53287a18d3fSMadhavan Venkataraman {
53387a18d3fSMadhavan Venkataraman 	hrtime_t now, expiration;
53487a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
53587a18d3fSMadhavan Venkataraman 	int hash;
53687a18d3fSMadhavan Venkataraman 
53787a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
53887a18d3fSMadhavan Venkataraman 
53987a18d3fSMadhavan Venkataraman 	now = gethrtime();
54087a18d3fSMadhavan Venkataraman 
54187a18d3fSMadhavan Venkataraman 	while (ct->ct_heap_num > 0) {
54287a18d3fSMadhavan Venkataraman 		expiration = ct->ct_heap[0];
54387a18d3fSMadhavan Venkataraman 		/*
54487a18d3fSMadhavan Venkataraman 		 * Find the callout list that corresponds to the expiration.
54587a18d3fSMadhavan Venkataraman 		 * If the callout list is empty, callout_list_check()
54687a18d3fSMadhavan Venkataraman 		 * will free the callout list and return NULL.
54787a18d3fSMadhavan Venkataraman 		 */
54887a18d3fSMadhavan Venkataraman 		hash = CALLOUT_CLHASH(expiration);
54987a18d3fSMadhavan Venkataraman 		cl = callout_list_check(ct, expiration, hash);
55087a18d3fSMadhavan Venkataraman 		if (cl != NULL) {
55187a18d3fSMadhavan Venkataraman 			/*
55287a18d3fSMadhavan Venkataraman 			 * If the root of the heap expires in the future, we are
55387a18d3fSMadhavan Venkataraman 			 * done. We are doing this check here instead of at the
55487a18d3fSMadhavan Venkataraman 			 * beginning because we want to first free all the
55587a18d3fSMadhavan Venkataraman 			 * empty callout lists at the top of the heap.
55687a18d3fSMadhavan Venkataraman 			 */
55787a18d3fSMadhavan Venkataraman 			if (expiration > now)
55887a18d3fSMadhavan Venkataraman 				break;
55987a18d3fSMadhavan Venkataraman 
56087a18d3fSMadhavan Venkataraman 			/*
56187a18d3fSMadhavan Venkataraman 			 * Move the callout list for this expiration to the
56287a18d3fSMadhavan Venkataraman 			 * list of expired callout lists. It will be processed
56387a18d3fSMadhavan Venkataraman 			 * by the callout executor.
56487a18d3fSMadhavan Venkataraman 			 */
56587a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
56687a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_APPEND(ct->ct_expired, cl);
56787a18d3fSMadhavan Venkataraman 		}
56887a18d3fSMadhavan Venkataraman 
56987a18d3fSMadhavan Venkataraman 		/*
57087a18d3fSMadhavan Venkataraman 		 * Now delete the root. This is done by swapping the root with
57187a18d3fSMadhavan Venkataraman 		 * the last item in the heap and downheaping the item.
57287a18d3fSMadhavan Venkataraman 		 */
57387a18d3fSMadhavan Venkataraman 		ct->ct_heap_num--;
57487a18d3fSMadhavan Venkataraman 		if (ct->ct_heap_num > 0) {
57587a18d3fSMadhavan Venkataraman 			ct->ct_heap[0] = ct->ct_heap[ct->ct_heap_num];
57687a18d3fSMadhavan Venkataraman 			callout_downheap(ct);
57787a18d3fSMadhavan Venkataraman 		}
57887a18d3fSMadhavan Venkataraman 	}
57987a18d3fSMadhavan Venkataraman 
58087a18d3fSMadhavan Venkataraman 	/*
58187a18d3fSMadhavan Venkataraman 	 * If this callout table is empty or callouts have been suspended
58287a18d3fSMadhavan Venkataraman 	 * by CPR, just return. The cyclic has already been programmed to
58387a18d3fSMadhavan Venkataraman 	 * infinity by the cyclic subsystem.
58487a18d3fSMadhavan Venkataraman 	 */
585454ab202SMadhavan Venkataraman 	if ((ct->ct_heap_num == 0) || (ct->ct_suspend > 0))
58687a18d3fSMadhavan Venkataraman 		return;
58787a18d3fSMadhavan Venkataraman 
58887a18d3fSMadhavan Venkataraman 	(void) cyclic_reprogram(ct->ct_cyclic, expiration);
58987a18d3fSMadhavan Venkataraman }
59087a18d3fSMadhavan Venkataraman 
591454ab202SMadhavan Venkataraman /*
592454ab202SMadhavan Venkataraman  * Common function used to create normal and realtime callouts.
593454ab202SMadhavan Venkataraman  *
594454ab202SMadhavan Venkataraman  * Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So,
595454ab202SMadhavan Venkataraman  * there is one restriction on a realtime callout handler - it should not
596454ab202SMadhavan Venkataraman  * directly or indirectly acquire cpu_lock. CPU offline waits for pending
597454ab202SMadhavan Venkataraman  * cyclic handlers to complete while holding cpu_lock. So, if a realtime
598454ab202SMadhavan Venkataraman  * callout handler were to try to get cpu_lock, there would be a deadlock
599454ab202SMadhavan Venkataraman  * during CPU offline.
600454ab202SMadhavan Venkataraman  */
60187a18d3fSMadhavan Venkataraman callout_id_t
60287a18d3fSMadhavan Venkataraman timeout_generic(int type, void (*func)(void *), void *arg,
60387a18d3fSMadhavan Venkataraman 	hrtime_t expiration, hrtime_t resolution, int flags)
60487a18d3fSMadhavan Venkataraman {
60587a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
6067c478bd9Sstevel@tonic-gate 	callout_t *cp;
6077c478bd9Sstevel@tonic-gate 	callout_id_t id;
60887a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
60987a18d3fSMadhavan Venkataraman 	hrtime_t now, interval;
61087a18d3fSMadhavan Venkataraman 	int hash;
611f635d46aSqiao 
61287a18d3fSMadhavan Venkataraman 	ASSERT(resolution > 0);
61387a18d3fSMadhavan Venkataraman 	ASSERT(func != NULL);
6147c478bd9Sstevel@tonic-gate 
61587a18d3fSMadhavan Venkataraman 	/*
61687a18d3fSMadhavan Venkataraman 	 * Please see comment about minimum resolution in callout_init().
61787a18d3fSMadhavan Venkataraman 	 */
61887a18d3fSMadhavan Venkataraman 	if (resolution < callout_min_resolution)
61987a18d3fSMadhavan Venkataraman 		resolution = callout_min_resolution;
6207c478bd9Sstevel@tonic-gate 
62187a18d3fSMadhavan Venkataraman 	/*
62287a18d3fSMadhavan Venkataraman 	 * We disable kernel preemption so that we remain on the same CPU
62387a18d3fSMadhavan Venkataraman 	 * throughout. If we needed to reprogram the callout table's cyclic,
62487a18d3fSMadhavan Venkataraman 	 * we can avoid X-calls if we are on the same CPU.
62587a18d3fSMadhavan Venkataraman 	 *
62687a18d3fSMadhavan Venkataraman 	 * Note that callout_alloc() releases and reacquires the callout
62787a18d3fSMadhavan Venkataraman 	 * table mutex. While reacquiring the mutex, it is possible for us
62887a18d3fSMadhavan Venkataraman 	 * to go to sleep and later migrate to another CPU. This should be
62987a18d3fSMadhavan Venkataraman 	 * pretty rare, though.
63087a18d3fSMadhavan Venkataraman 	 */
63187a18d3fSMadhavan Venkataraman 	kpreempt_disable();
63287a18d3fSMadhavan Venkataraman 
63387a18d3fSMadhavan Venkataraman 	ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)];
63487a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
63587a18d3fSMadhavan Venkataraman 
63687a18d3fSMadhavan Venkataraman 	if (ct->ct_cyclic == CYCLIC_NONE) {
63787a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
63887a18d3fSMadhavan Venkataraman 		/*
63987a18d3fSMadhavan Venkataraman 		 * The callout table has not yet been initialized fully.
64087a18d3fSMadhavan Venkataraman 		 * So, put this one on the boot callout table which is
64187a18d3fSMadhavan Venkataraman 		 * always initialized.
64287a18d3fSMadhavan Venkataraman 		 */
64387a18d3fSMadhavan Venkataraman 		ct = &callout_boot_ct[type];
64487a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
64587a18d3fSMadhavan Venkataraman 	}
64687a18d3fSMadhavan Venkataraman 
64787a18d3fSMadhavan Venkataraman 	if ((cp = ct->ct_free) == NULL)
6487c478bd9Sstevel@tonic-gate 		cp = callout_alloc(ct);
6497c478bd9Sstevel@tonic-gate 	else
65087a18d3fSMadhavan Venkataraman 		ct->ct_free = cp->c_idnext;
6517c478bd9Sstevel@tonic-gate 
6527c478bd9Sstevel@tonic-gate 	cp->c_func = func;
6537c478bd9Sstevel@tonic-gate 	cp->c_arg = arg;
6547c478bd9Sstevel@tonic-gate 
6557c478bd9Sstevel@tonic-gate 	/*
65687a18d3fSMadhavan Venkataraman 	 * Compute the expiration hrtime.
65787a18d3fSMadhavan Venkataraman 	 */
65887a18d3fSMadhavan Venkataraman 	now = gethrtime();
65987a18d3fSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_ABSOLUTE) {
66087a18d3fSMadhavan Venkataraman 		interval = expiration - now;
66187a18d3fSMadhavan Venkataraman 	} else {
66287a18d3fSMadhavan Venkataraman 		interval = expiration;
66387a18d3fSMadhavan Venkataraman 		expiration += now;
66487a18d3fSMadhavan Venkataraman 	}
66587a18d3fSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_ROUNDUP)
66687a18d3fSMadhavan Venkataraman 		expiration += resolution - 1;
66787a18d3fSMadhavan Venkataraman 	expiration = (expiration / resolution) * resolution;
668454ab202SMadhavan Venkataraman 	if (expiration <= 0) {
669454ab202SMadhavan Venkataraman 		/*
670454ab202SMadhavan Venkataraman 		 * expiration hrtime overflow has occurred. Just set the
671454ab202SMadhavan Venkataraman 		 * expiration to infinity.
672454ab202SMadhavan Venkataraman 		 */
673454ab202SMadhavan Venkataraman 		expiration = CY_INFINITY;
674454ab202SMadhavan Venkataraman 	}
67587a18d3fSMadhavan Venkataraman 
67687a18d3fSMadhavan Venkataraman 	/*
67787a18d3fSMadhavan Venkataraman 	 * Assign an ID to this callout
67887a18d3fSMadhavan Venkataraman 	 */
67987a18d3fSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_32BIT) {
68087a18d3fSMadhavan Venkataraman 		if (interval > callout_longterm) {
68187a18d3fSMadhavan Venkataraman 			id = (ct->ct_long_id - callout_counter_low);
68287a18d3fSMadhavan Venkataraman 			id |= CALLOUT_COUNTER_HIGH;
68387a18d3fSMadhavan Venkataraman 			ct->ct_long_id = id;
68487a18d3fSMadhavan Venkataraman 		} else {
68587a18d3fSMadhavan Venkataraman 			id = (ct->ct_short_id - callout_counter_low);
68687a18d3fSMadhavan Venkataraman 			id |= CALLOUT_COUNTER_HIGH;
68787a18d3fSMadhavan Venkataraman 			ct->ct_short_id = id;
68887a18d3fSMadhavan Venkataraman 		}
68987a18d3fSMadhavan Venkataraman 	} else {
69087a18d3fSMadhavan Venkataraman 		id = (ct->ct_gen_id - callout_counter_low);
69187a18d3fSMadhavan Venkataraman 		if ((id & CALLOUT_COUNTER_HIGH) == 0) {
69287a18d3fSMadhavan Venkataraman 			id |= CALLOUT_COUNTER_HIGH;
69387a18d3fSMadhavan Venkataraman 			id += CALLOUT_GENERATION_LOW;
69487a18d3fSMadhavan Venkataraman 		}
69587a18d3fSMadhavan Venkataraman 		ct->ct_gen_id = id;
69687a18d3fSMadhavan Venkataraman 	}
69787a18d3fSMadhavan Venkataraman 
69887a18d3fSMadhavan Venkataraman 	cp->c_xid = id;
69987a18d3fSMadhavan Venkataraman 
700*07247649SMadhavan Venkataraman 	flags &= CALLOUT_LIST_FLAGS;
70187a18d3fSMadhavan Venkataraman 	hash = CALLOUT_CLHASH(expiration);
70287a18d3fSMadhavan Venkataraman 
70387a18d3fSMadhavan Venkataraman again:
70487a18d3fSMadhavan Venkataraman 	/*
70587a18d3fSMadhavan Venkataraman 	 * Try to see if a callout list already exists for this expiration.
70687a18d3fSMadhavan Venkataraman 	 * Most of the time, this will be the case.
70787a18d3fSMadhavan Venkataraman 	 */
708*07247649SMadhavan Venkataraman 	cl = callout_list_get(ct, expiration, flags, hash);
70987a18d3fSMadhavan Venkataraman 	if (cl == NULL) {
71087a18d3fSMadhavan Venkataraman 		/*
71187a18d3fSMadhavan Venkataraman 		 * Check if we have enough space in the heap to insert one
71287a18d3fSMadhavan Venkataraman 		 * expiration. If not, expand the heap.
71387a18d3fSMadhavan Venkataraman 		 */
71487a18d3fSMadhavan Venkataraman 		if (ct->ct_heap_num == ct->ct_heap_max) {
71587a18d3fSMadhavan Venkataraman 			callout_heap_expand(ct);
71687a18d3fSMadhavan Venkataraman 			/*
71787a18d3fSMadhavan Venkataraman 			 * In the above call, we drop the lock, allocate and
71887a18d3fSMadhavan Venkataraman 			 * reacquire the lock. So, we could have been away
71987a18d3fSMadhavan Venkataraman 			 * for a while. In the meantime, someone could have
72087a18d3fSMadhavan Venkataraman 			 * inserted a callout list with the same expiration.
72187a18d3fSMadhavan Venkataraman 			 * So, the best course is to repeat the steps. This
72287a18d3fSMadhavan Venkataraman 			 * should be an infrequent event.
72387a18d3fSMadhavan Venkataraman 			 */
72487a18d3fSMadhavan Venkataraman 			goto again;
72587a18d3fSMadhavan Venkataraman 		}
72687a18d3fSMadhavan Venkataraman 
72787a18d3fSMadhavan Venkataraman 		/*
72887a18d3fSMadhavan Venkataraman 		 * Check the free list. If we don't find one, we have to
72987a18d3fSMadhavan Venkataraman 		 * take the slow path and allocate from kmem.
73087a18d3fSMadhavan Venkataraman 		 */
73187a18d3fSMadhavan Venkataraman 		if ((cl = ct->ct_lfree) == NULL) {
73287a18d3fSMadhavan Venkataraman 			callout_list_alloc(ct);
73387a18d3fSMadhavan Venkataraman 			/*
73487a18d3fSMadhavan Venkataraman 			 * In the above call, we drop the lock, allocate and
73587a18d3fSMadhavan Venkataraman 			 * reacquire the lock. So, we could have been away
73687a18d3fSMadhavan Venkataraman 			 * for a while. In the meantime, someone could have
73787a18d3fSMadhavan Venkataraman 			 * inserted a callout list with the same expiration.
73887a18d3fSMadhavan Venkataraman 			 * Plus, the heap could have become full. So, the best
73987a18d3fSMadhavan Venkataraman 			 * course is to repeat the steps. This should be an
74087a18d3fSMadhavan Venkataraman 			 * infrequent event.
74187a18d3fSMadhavan Venkataraman 			 */
74287a18d3fSMadhavan Venkataraman 			goto again;
74387a18d3fSMadhavan Venkataraman 		}
74487a18d3fSMadhavan Venkataraman 		ct->ct_lfree = cl->cl_next;
74587a18d3fSMadhavan Venkataraman 		cl->cl_expiration = expiration;
746*07247649SMadhavan Venkataraman 		cl->cl_flags = flags;
74787a18d3fSMadhavan Venkataraman 
74887a18d3fSMadhavan Venkataraman 		CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl);
74987a18d3fSMadhavan Venkataraman 
75087a18d3fSMadhavan Venkataraman 		/*
75187a18d3fSMadhavan Venkataraman 		 * This is a new expiration. So, insert it into the heap.
75287a18d3fSMadhavan Venkataraman 		 * This will also reprogram the cyclic, if the expiration
75387a18d3fSMadhavan Venkataraman 		 * propagated to the root of the heap.
75487a18d3fSMadhavan Venkataraman 		 */
75587a18d3fSMadhavan Venkataraman 		callout_heap_insert(ct, expiration);
75687a18d3fSMadhavan Venkataraman 	}
75787a18d3fSMadhavan Venkataraman 	cp->c_list = cl;
75887a18d3fSMadhavan Venkataraman 	CALLOUT_APPEND(ct, cp);
75987a18d3fSMadhavan Venkataraman 
76087a18d3fSMadhavan Venkataraman 	ct->ct_timeouts++;
76187a18d3fSMadhavan Venkataraman 	ct->ct_timeouts_pending++;
76287a18d3fSMadhavan Venkataraman 
76387a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
76487a18d3fSMadhavan Venkataraman 
76587a18d3fSMadhavan Venkataraman 	kpreempt_enable();
76687a18d3fSMadhavan Venkataraman 
76787a18d3fSMadhavan Venkataraman 	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
76887a18d3fSMadhavan Venkataraman 	    "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration,
76987a18d3fSMadhavan Venkataraman 	    cp);
77087a18d3fSMadhavan Venkataraman 
77187a18d3fSMadhavan Venkataraman 	return (id);
77287a18d3fSMadhavan Venkataraman }
77387a18d3fSMadhavan Venkataraman 
77487a18d3fSMadhavan Venkataraman timeout_id_t
77587a18d3fSMadhavan Venkataraman timeout(void (*func)(void *), void *arg, clock_t delta)
77687a18d3fSMadhavan Venkataraman {
77787a18d3fSMadhavan Venkataraman 	ulong_t id;
77887a18d3fSMadhavan Venkataraman 
77987a18d3fSMadhavan Venkataraman 	/*
7807c478bd9Sstevel@tonic-gate 	 * Make sure the callout runs at least 1 tick in the future.
7817c478bd9Sstevel@tonic-gate 	 */
7827c478bd9Sstevel@tonic-gate 	if (delta <= 0)
7837c478bd9Sstevel@tonic-gate 		delta = 1;
784454ab202SMadhavan Venkataraman 	else if (delta > callout_max_ticks)
785454ab202SMadhavan Venkataraman 		delta = callout_max_ticks;
7867c478bd9Sstevel@tonic-gate 
78787a18d3fSMadhavan Venkataraman 	id =  (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg,
78887a18d3fSMadhavan Venkataraman 	    TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY);
7897c478bd9Sstevel@tonic-gate 
7907c478bd9Sstevel@tonic-gate 	return ((timeout_id_t)id);
7917c478bd9Sstevel@tonic-gate }
7927c478bd9Sstevel@tonic-gate 
79387a18d3fSMadhavan Venkataraman /*
79487a18d3fSMadhavan Venkataraman  * Convenience function that creates a normal callout with default parameters
79587a18d3fSMadhavan Venkataraman  * and returns a full ID.
79687a18d3fSMadhavan Venkataraman  */
79787a18d3fSMadhavan Venkataraman callout_id_t
79887a18d3fSMadhavan Venkataraman timeout_default(void (*func)(void *), void *arg, clock_t delta)
7997c478bd9Sstevel@tonic-gate {
80087a18d3fSMadhavan Venkataraman 	callout_id_t id;
8017c478bd9Sstevel@tonic-gate 
80287a18d3fSMadhavan Venkataraman 	/*
80387a18d3fSMadhavan Venkataraman 	 * Make sure the callout runs at least 1 tick in the future.
80487a18d3fSMadhavan Venkataraman 	 */
80587a18d3fSMadhavan Venkataraman 	if (delta <= 0)
80687a18d3fSMadhavan Venkataraman 		delta = 1;
807454ab202SMadhavan Venkataraman 	else if (delta > callout_max_ticks)
808454ab202SMadhavan Venkataraman 		delta = callout_max_ticks;
80987a18d3fSMadhavan Venkataraman 
81087a18d3fSMadhavan Venkataraman 	id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta),
81187a18d3fSMadhavan Venkataraman 	    nsec_per_tick, 0);
81287a18d3fSMadhavan Venkataraman 
81387a18d3fSMadhavan Venkataraman 	return (id);
8147c478bd9Sstevel@tonic-gate }
8157c478bd9Sstevel@tonic-gate 
8167c478bd9Sstevel@tonic-gate timeout_id_t
8177c478bd9Sstevel@tonic-gate realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
8187c478bd9Sstevel@tonic-gate {
81987a18d3fSMadhavan Venkataraman 	ulong_t id;
82087a18d3fSMadhavan Venkataraman 
82187a18d3fSMadhavan Venkataraman 	/*
82287a18d3fSMadhavan Venkataraman 	 * Make sure the callout runs at least 1 tick in the future.
82387a18d3fSMadhavan Venkataraman 	 */
82487a18d3fSMadhavan Venkataraman 	if (delta <= 0)
82587a18d3fSMadhavan Venkataraman 		delta = 1;
826454ab202SMadhavan Venkataraman 	else if (delta > callout_max_ticks)
827454ab202SMadhavan Venkataraman 		delta = callout_max_ticks;
82887a18d3fSMadhavan Venkataraman 
82987a18d3fSMadhavan Venkataraman 	id =  (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg,
83087a18d3fSMadhavan Venkataraman 	    TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY);
83187a18d3fSMadhavan Venkataraman 
83287a18d3fSMadhavan Venkataraman 	return ((timeout_id_t)id);
8337c478bd9Sstevel@tonic-gate }
8347c478bd9Sstevel@tonic-gate 
83587a18d3fSMadhavan Venkataraman /*
83687a18d3fSMadhavan Venkataraman  * Convenience function that creates a realtime callout with default parameters
83787a18d3fSMadhavan Venkataraman  * and returns a full ID.
83887a18d3fSMadhavan Venkataraman  */
83987a18d3fSMadhavan Venkataraman callout_id_t
84087a18d3fSMadhavan Venkataraman realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta)
8417c478bd9Sstevel@tonic-gate {
84287a18d3fSMadhavan Venkataraman 	callout_id_t id;
84387a18d3fSMadhavan Venkataraman 
84487a18d3fSMadhavan Venkataraman 	/*
84587a18d3fSMadhavan Venkataraman 	 * Make sure the callout runs at least 1 tick in the future.
84687a18d3fSMadhavan Venkataraman 	 */
84787a18d3fSMadhavan Venkataraman 	if (delta <= 0)
84887a18d3fSMadhavan Venkataraman 		delta = 1;
849454ab202SMadhavan Venkataraman 	else if (delta > callout_max_ticks)
850454ab202SMadhavan Venkataraman 		delta = callout_max_ticks;
85187a18d3fSMadhavan Venkataraman 
85287a18d3fSMadhavan Venkataraman 	id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta),
85387a18d3fSMadhavan Venkataraman 	    nsec_per_tick, 0);
85487a18d3fSMadhavan Venkataraman 
85587a18d3fSMadhavan Venkataraman 	return (id);
85687a18d3fSMadhavan Venkataraman }
85787a18d3fSMadhavan Venkataraman 
85887a18d3fSMadhavan Venkataraman hrtime_t
85987a18d3fSMadhavan Venkataraman untimeout_generic(callout_id_t id, int nowait)
86087a18d3fSMadhavan Venkataraman {
8617c478bd9Sstevel@tonic-gate 	callout_table_t *ct;
8627c478bd9Sstevel@tonic-gate 	callout_t *cp;
8637c478bd9Sstevel@tonic-gate 	callout_id_t xid;
86487a18d3fSMadhavan Venkataraman 	int hash;
86587a18d3fSMadhavan Venkataraman 	callout_id_t bogus;
8667c478bd9Sstevel@tonic-gate 
86787a18d3fSMadhavan Venkataraman 	ct = &callout_table[CALLOUT_ID_TO_TABLE(id)];
86887a18d3fSMadhavan Venkataraman 	hash = CALLOUT_IDHASH(id);
8697c478bd9Sstevel@tonic-gate 
87087a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
8717c478bd9Sstevel@tonic-gate 
87287a18d3fSMadhavan Venkataraman 	/*
87387a18d3fSMadhavan Venkataraman 	 * Search the ID hash table for the callout.
87487a18d3fSMadhavan Venkataraman 	 */
87587a18d3fSMadhavan Venkataraman 	for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) {
8767c478bd9Sstevel@tonic-gate 
87787a18d3fSMadhavan Venkataraman 		xid = cp->c_xid;
8787c478bd9Sstevel@tonic-gate 
87987a18d3fSMadhavan Venkataraman 		/*
88087a18d3fSMadhavan Venkataraman 		 * Match the ID and generation number.
88187a18d3fSMadhavan Venkataraman 		 */
88287a18d3fSMadhavan Venkataraman 		if ((xid & CALLOUT_ID_MASK) != id)
8837c478bd9Sstevel@tonic-gate 			continue;
8847c478bd9Sstevel@tonic-gate 
88587a18d3fSMadhavan Venkataraman 		if ((xid & CALLOUT_EXECUTING) == 0) {
88687a18d3fSMadhavan Venkataraman 			hrtime_t expiration;
88787a18d3fSMadhavan Venkataraman 
88887a18d3fSMadhavan Venkataraman 			/*
88987a18d3fSMadhavan Venkataraman 			 * Delete the callout. If the callout list becomes
89087a18d3fSMadhavan Venkataraman 			 * NULL, we don't remove it from the table. This is
89187a18d3fSMadhavan Venkataraman 			 * so it can be reused. If the empty callout list
89287a18d3fSMadhavan Venkataraman 			 * corresponds to the top of the the callout heap, we
89387a18d3fSMadhavan Venkataraman 			 * don't reprogram the table cyclic here. This is in
89487a18d3fSMadhavan Venkataraman 			 * order to avoid lots of X-calls to the CPU associated
89587a18d3fSMadhavan Venkataraman 			 * with the callout table.
89687a18d3fSMadhavan Venkataraman 			 */
897*07247649SMadhavan Venkataraman 			expiration = cp->c_list->cl_expiration;
89887a18d3fSMadhavan Venkataraman 			CALLOUT_DELETE(ct, cp);
89987a18d3fSMadhavan Venkataraman 			cp->c_idnext = ct->ct_free;
90087a18d3fSMadhavan Venkataraman 			ct->ct_free = cp;
90187a18d3fSMadhavan Venkataraman 			ct->ct_untimeouts_unexpired++;
90287a18d3fSMadhavan Venkataraman 			ct->ct_timeouts_pending--;
90387a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
90487a18d3fSMadhavan Venkataraman 
90587a18d3fSMadhavan Venkataraman 			expiration -= gethrtime();
90687a18d3fSMadhavan Venkataraman 			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
90787a18d3fSMadhavan Venkataraman 			    "untimeout:ID %lx hrtime left %llx", id,
90887a18d3fSMadhavan Venkataraman 			    expiration);
90987a18d3fSMadhavan Venkataraman 			return (expiration < 0 ? 0 : expiration);
91087a18d3fSMadhavan Venkataraman 		}
91187a18d3fSMadhavan Venkataraman 
91287a18d3fSMadhavan Venkataraman 		ct->ct_untimeouts_executing++;
9137c478bd9Sstevel@tonic-gate 		/*
9147c478bd9Sstevel@tonic-gate 		 * The callout we want to delete is currently executing.
9157c478bd9Sstevel@tonic-gate 		 * The DDI states that we must wait until the callout
916*07247649SMadhavan Venkataraman 		 * completes before returning, so we block on c_done until the
91787a18d3fSMadhavan Venkataraman 		 * callout ID changes (to the old ID if it's on the freelist,
9187c478bd9Sstevel@tonic-gate 		 * or to a new callout ID if it's in use).  This implicitly
9197c478bd9Sstevel@tonic-gate 		 * assumes that callout structures are persistent (they are).
9207c478bd9Sstevel@tonic-gate 		 */
921*07247649SMadhavan Venkataraman 		if (cp->c_executor == curthread) {
9227c478bd9Sstevel@tonic-gate 			/*
9237c478bd9Sstevel@tonic-gate 			 * The timeout handler called untimeout() on itself.
9247c478bd9Sstevel@tonic-gate 			 * Stupid, but legal.  We can't wait for the timeout
9257c478bd9Sstevel@tonic-gate 			 * to complete without deadlocking, so we just return.
9267c478bd9Sstevel@tonic-gate 			 */
92787a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
9287c478bd9Sstevel@tonic-gate 			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
9297c478bd9Sstevel@tonic-gate 			    "untimeout_self:ID %x", id);
9307c478bd9Sstevel@tonic-gate 			return (-1);
9317c478bd9Sstevel@tonic-gate 		}
93287a18d3fSMadhavan Venkataraman 		if (nowait == 0) {
93387a18d3fSMadhavan Venkataraman 			/*
93487a18d3fSMadhavan Venkataraman 			 * We need to wait. Indicate that we are waiting by
935*07247649SMadhavan Venkataraman 			 * incrementing c_waiting. This prevents the executor
936*07247649SMadhavan Venkataraman 			 * from doing a wakeup on c_done if there are no
93787a18d3fSMadhavan Venkataraman 			 * waiters.
93887a18d3fSMadhavan Venkataraman 			 */
93987a18d3fSMadhavan Venkataraman 			while (cp->c_xid == xid) {
940*07247649SMadhavan Venkataraman 				cp->c_waiting = 1;
941*07247649SMadhavan Venkataraman 				cv_wait(&cp->c_done, &ct->ct_mutex);
94287a18d3fSMadhavan Venkataraman 			}
94387a18d3fSMadhavan Venkataraman 		}
94487a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
9457c478bd9Sstevel@tonic-gate 		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
9467c478bd9Sstevel@tonic-gate 		    "untimeout_executing:ID %lx", id);
9477c478bd9Sstevel@tonic-gate 		return (-1);
9487c478bd9Sstevel@tonic-gate 	}
94987a18d3fSMadhavan Venkataraman 	ct->ct_untimeouts_expired++;
9507c478bd9Sstevel@tonic-gate 
95187a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
9527c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
9537c478bd9Sstevel@tonic-gate 	    "untimeout_bogus_id:ID %lx", id);
9547c478bd9Sstevel@tonic-gate 
9557c478bd9Sstevel@tonic-gate 	/*
9567c478bd9Sstevel@tonic-gate 	 * We didn't find the specified callout ID.  This means either
9577c478bd9Sstevel@tonic-gate 	 * (1) the callout already fired, or (2) the caller passed us
9587c478bd9Sstevel@tonic-gate 	 * a bogus value.  Perform a sanity check to detect case (2).
9597c478bd9Sstevel@tonic-gate 	 */
960*07247649SMadhavan Venkataraman 	bogus = (CALLOUT_EXECUTING | CALLOUT_COUNTER_HIGH);
96187a18d3fSMadhavan Venkataraman 	if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0))
96287a18d3fSMadhavan Venkataraman 		panic("untimeout: impossible timeout id %llx",
96387a18d3fSMadhavan Venkataraman 		    (unsigned long long)id);
9647c478bd9Sstevel@tonic-gate 
9657c478bd9Sstevel@tonic-gate 	return (-1);
9667c478bd9Sstevel@tonic-gate }
9677c478bd9Sstevel@tonic-gate 
96887a18d3fSMadhavan Venkataraman clock_t
96987a18d3fSMadhavan Venkataraman untimeout(timeout_id_t id_arg)
97087a18d3fSMadhavan Venkataraman {
97187a18d3fSMadhavan Venkataraman 	hrtime_t hleft;
97287a18d3fSMadhavan Venkataraman 	clock_t tleft;
97387a18d3fSMadhavan Venkataraman 	callout_id_t id;
97487a18d3fSMadhavan Venkataraman 
97587a18d3fSMadhavan Venkataraman 	id = (ulong_t)id_arg;
97687a18d3fSMadhavan Venkataraman 	hleft = untimeout_generic(id, 0);
97787a18d3fSMadhavan Venkataraman 	if (hleft < 0)
97887a18d3fSMadhavan Venkataraman 		tleft = -1;
97987a18d3fSMadhavan Venkataraman 	else if (hleft == 0)
98087a18d3fSMadhavan Venkataraman 		tleft = 0;
98187a18d3fSMadhavan Venkataraman 	else
98287a18d3fSMadhavan Venkataraman 		tleft = NSEC_TO_TICK(hleft);
98387a18d3fSMadhavan Venkataraman 
98487a18d3fSMadhavan Venkataraman 	return (tleft);
98587a18d3fSMadhavan Venkataraman }
98687a18d3fSMadhavan Venkataraman 
9877c478bd9Sstevel@tonic-gate /*
98887a18d3fSMadhavan Venkataraman  * Convenience function to untimeout a timeout with a full ID with default
98987a18d3fSMadhavan Venkataraman  * parameters.
99087a18d3fSMadhavan Venkataraman  */
99187a18d3fSMadhavan Venkataraman clock_t
99287a18d3fSMadhavan Venkataraman untimeout_default(callout_id_t id, int nowait)
99387a18d3fSMadhavan Venkataraman {
99487a18d3fSMadhavan Venkataraman 	hrtime_t hleft;
99587a18d3fSMadhavan Venkataraman 	clock_t tleft;
99687a18d3fSMadhavan Venkataraman 
99787a18d3fSMadhavan Venkataraman 	hleft = untimeout_generic(id, nowait);
99887a18d3fSMadhavan Venkataraman 	if (hleft < 0)
99987a18d3fSMadhavan Venkataraman 		tleft = -1;
100087a18d3fSMadhavan Venkataraman 	else if (hleft == 0)
100187a18d3fSMadhavan Venkataraman 		tleft = 0;
100287a18d3fSMadhavan Venkataraman 	else
100387a18d3fSMadhavan Venkataraman 		tleft = NSEC_TO_TICK(hleft);
100487a18d3fSMadhavan Venkataraman 
100587a18d3fSMadhavan Venkataraman 	return (tleft);
100687a18d3fSMadhavan Venkataraman }
100787a18d3fSMadhavan Venkataraman 
100887a18d3fSMadhavan Venkataraman /*
100987a18d3fSMadhavan Venkataraman  * Expire all the callouts queued in the specified callout list.
10107c478bd9Sstevel@tonic-gate  */
10117c478bd9Sstevel@tonic-gate static void
101287a18d3fSMadhavan Venkataraman callout_list_expire(callout_table_t *ct, callout_list_t *cl)
10137c478bd9Sstevel@tonic-gate {
1014*07247649SMadhavan Venkataraman 	callout_t *cp, *cnext;
10157c478bd9Sstevel@tonic-gate 
101687a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
101787a18d3fSMadhavan Venkataraman 	ASSERT(cl != NULL);
10187c478bd9Sstevel@tonic-gate 
1019*07247649SMadhavan Venkataraman 	for (cp = cl->cl_callouts.ch_head; cp != NULL; cp = cnext) {
1020*07247649SMadhavan Venkataraman 		/*
1021*07247649SMadhavan Venkataraman 		 * Multiple executor threads could be running at the same
1022*07247649SMadhavan Venkataraman 		 * time. If this callout is already being executed,
1023*07247649SMadhavan Venkataraman 		 * go on to the next one.
1024*07247649SMadhavan Venkataraman 		 */
1025*07247649SMadhavan Venkataraman 		if (cp->c_xid & CALLOUT_EXECUTING) {
1026*07247649SMadhavan Venkataraman 			cnext = cp->c_clnext;
1027*07247649SMadhavan Venkataraman 			continue;
1028*07247649SMadhavan Venkataraman 		}
102987a18d3fSMadhavan Venkataraman 
1030f635d46aSqiao 		/*
103187a18d3fSMadhavan Venkataraman 		 * Indicate to untimeout() that a callout is
103287a18d3fSMadhavan Venkataraman 		 * being expired by the executor.
1033f635d46aSqiao 		 */
103487a18d3fSMadhavan Venkataraman 		cp->c_xid |= CALLOUT_EXECUTING;
1035*07247649SMadhavan Venkataraman 		cp->c_executor = curthread;
103687a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
103787a18d3fSMadhavan Venkataraman 
10387c478bd9Sstevel@tonic-gate 		DTRACE_PROBE1(callout__start, callout_t *, cp);
10397c478bd9Sstevel@tonic-gate 		(*cp->c_func)(cp->c_arg);
10407c478bd9Sstevel@tonic-gate 		DTRACE_PROBE1(callout__end, callout_t *, cp);
10417c478bd9Sstevel@tonic-gate 
104287a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
104387a18d3fSMadhavan Venkataraman 
104487a18d3fSMadhavan Venkataraman 		ct->ct_expirations++;
104587a18d3fSMadhavan Venkataraman 		ct->ct_timeouts_pending--;
10467c478bd9Sstevel@tonic-gate 		/*
1047*07247649SMadhavan Venkataraman 		 * Indicate completion for c_done.
10487c478bd9Sstevel@tonic-gate 		 */
104987a18d3fSMadhavan Venkataraman 		cp->c_xid &= ~CALLOUT_EXECUTING;
1050*07247649SMadhavan Venkataraman 		cp->c_executor = NULL;
1051*07247649SMadhavan Venkataraman 		cnext = cp->c_clnext;
1052f635d46aSqiao 
10537c478bd9Sstevel@tonic-gate 		/*
105487a18d3fSMadhavan Venkataraman 		 * Delete callout from ID hash table and the callout
105587a18d3fSMadhavan Venkataraman 		 * list, return to freelist, and tell any untimeout() that
105687a18d3fSMadhavan Venkataraman 		 * cares that we're done.
10577c478bd9Sstevel@tonic-gate 		 */
105887a18d3fSMadhavan Venkataraman 		CALLOUT_DELETE(ct, cp);
105987a18d3fSMadhavan Venkataraman 		cp->c_idnext = ct->ct_free;
106087a18d3fSMadhavan Venkataraman 		ct->ct_free = cp;
106187a18d3fSMadhavan Venkataraman 
1062*07247649SMadhavan Venkataraman 		if (cp->c_waiting) {
1063*07247649SMadhavan Venkataraman 			cp->c_waiting = 0;
1064*07247649SMadhavan Venkataraman 			cv_broadcast(&cp->c_done);
10657c478bd9Sstevel@tonic-gate 		}
106687a18d3fSMadhavan Venkataraman 	}
10677c478bd9Sstevel@tonic-gate }
10687c478bd9Sstevel@tonic-gate 
10697c478bd9Sstevel@tonic-gate /*
107087a18d3fSMadhavan Venkataraman  * Execute all expired callout lists for a callout table.
10717c478bd9Sstevel@tonic-gate  */
10727c478bd9Sstevel@tonic-gate static void
107387a18d3fSMadhavan Venkataraman callout_expire(callout_table_t *ct)
10747c478bd9Sstevel@tonic-gate {
107587a18d3fSMadhavan Venkataraman 	callout_list_t *cl, *clnext;
1076f635d46aSqiao 
107787a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
10787c478bd9Sstevel@tonic-gate 
107987a18d3fSMadhavan Venkataraman 	for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) {
1080f635d46aSqiao 		/*
108187a18d3fSMadhavan Venkataraman 		 * Expire all the callouts in this callout list.
108287a18d3fSMadhavan Venkataraman 		 */
108387a18d3fSMadhavan Venkataraman 		callout_list_expire(ct, cl);
108487a18d3fSMadhavan Venkataraman 
1085*07247649SMadhavan Venkataraman 		clnext = cl->cl_next;
1086*07247649SMadhavan Venkataraman 		if (cl->cl_callouts.ch_head == NULL) {
108787a18d3fSMadhavan Venkataraman 			/*
108887a18d3fSMadhavan Venkataraman 			 * Free the callout list.
108987a18d3fSMadhavan Venkataraman 			 */
109087a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_expired, cl);
109187a18d3fSMadhavan Venkataraman 			cl->cl_next = ct->ct_lfree;
109287a18d3fSMadhavan Venkataraman 			ct->ct_lfree = cl;
109387a18d3fSMadhavan Venkataraman 		}
109487a18d3fSMadhavan Venkataraman 	}
1095*07247649SMadhavan Venkataraman }
109687a18d3fSMadhavan Venkataraman 
109787a18d3fSMadhavan Venkataraman /*
109887a18d3fSMadhavan Venkataraman  * The cyclic handlers below process callouts in two steps:
109987a18d3fSMadhavan Venkataraman  *
110087a18d3fSMadhavan Venkataraman  *	1. Find all expired callout lists and queue them in a separate
110187a18d3fSMadhavan Venkataraman  *	   list of expired callouts.
110287a18d3fSMadhavan Venkataraman  *	2. Execute the expired callout lists.
110387a18d3fSMadhavan Venkataraman  *
110487a18d3fSMadhavan Venkataraman  * This is done for two reasons:
110587a18d3fSMadhavan Venkataraman  *
110687a18d3fSMadhavan Venkataraman  *	1. We want to quickly find the next earliest expiration to program
110787a18d3fSMadhavan Venkataraman  *	   the cyclic to and reprogram it. We can do this right at the end
110887a18d3fSMadhavan Venkataraman  *	   of step 1.
110987a18d3fSMadhavan Venkataraman  *	2. The realtime cyclic handler expires callouts in place. However,
111087a18d3fSMadhavan Venkataraman  *	   for normal callouts, callouts are expired by a taskq thread.
111187a18d3fSMadhavan Venkataraman  *	   So, it is simpler and more robust to have the taskq thread just
111287a18d3fSMadhavan Venkataraman  *	   do step 2.
111387a18d3fSMadhavan Venkataraman  */
111487a18d3fSMadhavan Venkataraman 
111587a18d3fSMadhavan Venkataraman /*
111687a18d3fSMadhavan Venkataraman  * Realtime callout cyclic handler.
11177c478bd9Sstevel@tonic-gate  */
11187c478bd9Sstevel@tonic-gate void
111987a18d3fSMadhavan Venkataraman callout_realtime(callout_table_t *ct)
11207c478bd9Sstevel@tonic-gate {
112187a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
112287a18d3fSMadhavan Venkataraman 	callout_heap_delete(ct);
112387a18d3fSMadhavan Venkataraman 	callout_expire(ct);
112487a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
112587a18d3fSMadhavan Venkataraman }
11267c478bd9Sstevel@tonic-gate 
112787a18d3fSMadhavan Venkataraman void
112887a18d3fSMadhavan Venkataraman callout_execute(callout_table_t *ct)
112987a18d3fSMadhavan Venkataraman {
113087a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
113187a18d3fSMadhavan Venkataraman 	callout_expire(ct);
113287a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
113387a18d3fSMadhavan Venkataraman }
11347c478bd9Sstevel@tonic-gate 
113587a18d3fSMadhavan Venkataraman /*
113687a18d3fSMadhavan Venkataraman  * Normal callout cyclic handler.
113787a18d3fSMadhavan Venkataraman  */
113887a18d3fSMadhavan Venkataraman void
113987a18d3fSMadhavan Venkataraman callout_normal(callout_table_t *ct)
114087a18d3fSMadhavan Venkataraman {
1141*07247649SMadhavan Venkataraman 	int i, exec;
114287a18d3fSMadhavan Venkataraman 
114387a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
114487a18d3fSMadhavan Venkataraman 	callout_heap_delete(ct);
1145*07247649SMadhavan Venkataraman 	CALLOUT_EXEC_COMPUTE(ct, exec);
114687a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
114787a18d3fSMadhavan Venkataraman 
1148*07247649SMadhavan Venkataraman 	for (i = 0; i < exec; i++) {
114987a18d3fSMadhavan Venkataraman 		ASSERT(ct->ct_taskq != NULL);
115087a18d3fSMadhavan Venkataraman 		(void) taskq_dispatch(ct->ct_taskq,
115187a18d3fSMadhavan Venkataraman 		    (task_func_t *)callout_execute, ct, TQ_NOSLEEP);
115287a18d3fSMadhavan Venkataraman 	}
115387a18d3fSMadhavan Venkataraman }
115487a18d3fSMadhavan Venkataraman 
115587a18d3fSMadhavan Venkataraman /*
115687a18d3fSMadhavan Venkataraman  * Suspend callout processing.
115787a18d3fSMadhavan Venkataraman  */
115887a18d3fSMadhavan Venkataraman static void
115987a18d3fSMadhavan Venkataraman callout_suspend(void)
116087a18d3fSMadhavan Venkataraman {
116187a18d3fSMadhavan Venkataraman 	int t, f;
116287a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
116387a18d3fSMadhavan Venkataraman 
116487a18d3fSMadhavan Venkataraman 	/*
116587a18d3fSMadhavan Venkataraman 	 * Traverse every callout table in the system and suspend callout
116687a18d3fSMadhavan Venkataraman 	 * processing.
116787a18d3fSMadhavan Venkataraman 	 *
116887a18d3fSMadhavan Venkataraman 	 * We need to suspend all the tables (including the inactive ones)
116987a18d3fSMadhavan Venkataraman 	 * so that if a table is made active while the suspend is still on,
117087a18d3fSMadhavan Venkataraman 	 * the table remains suspended.
117187a18d3fSMadhavan Venkataraman 	 */
117287a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
117387a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
117487a18d3fSMadhavan Venkataraman 			ct = &callout_table[CALLOUT_TABLE(t, f)];
117587a18d3fSMadhavan Venkataraman 
117687a18d3fSMadhavan Venkataraman 			mutex_enter(&ct->ct_mutex);
1177454ab202SMadhavan Venkataraman 			ct->ct_suspend++;
117887a18d3fSMadhavan Venkataraman 			if (ct->ct_cyclic == CYCLIC_NONE) {
117987a18d3fSMadhavan Venkataraman 				mutex_exit(&ct->ct_mutex);
118087a18d3fSMadhavan Venkataraman 				continue;
118187a18d3fSMadhavan Venkataraman 			}
1182454ab202SMadhavan Venkataraman 			if (ct->ct_suspend == 1)
1183454ab202SMadhavan Venkataraman 				(void) cyclic_reprogram(ct->ct_cyclic,
1184454ab202SMadhavan Venkataraman 				    CY_INFINITY);
118587a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
118687a18d3fSMadhavan Venkataraman 		}
118787a18d3fSMadhavan Venkataraman 	}
118887a18d3fSMadhavan Venkataraman }
118987a18d3fSMadhavan Venkataraman 
119087a18d3fSMadhavan Venkataraman static void
119187a18d3fSMadhavan Venkataraman callout_adjust(callout_table_t *ct, hrtime_t delta)
119287a18d3fSMadhavan Venkataraman {
119387a18d3fSMadhavan Venkataraman 	int hash, newhash;
119487a18d3fSMadhavan Venkataraman 	hrtime_t expiration;
119587a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
119687a18d3fSMadhavan Venkataraman 	callout_hash_t list;
119787a18d3fSMadhavan Venkataraman 
119887a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
119987a18d3fSMadhavan Venkataraman 
120087a18d3fSMadhavan Venkataraman 	/*
120187a18d3fSMadhavan Venkataraman 	 * In order to adjust the expirations, we null out the heap. Then,
120287a18d3fSMadhavan Venkataraman 	 * we reinsert adjusted expirations in the heap. Keeps it simple.
120387a18d3fSMadhavan Venkataraman 	 * Note that since the CALLOUT_TABLE_SUSPENDED flag is set by the
120487a18d3fSMadhavan Venkataraman 	 * caller, the heap insert does not result in cyclic reprogramming.
120587a18d3fSMadhavan Venkataraman 	 */
120687a18d3fSMadhavan Venkataraman 	ct->ct_heap_num = 0;
120787a18d3fSMadhavan Venkataraman 
120887a18d3fSMadhavan Venkataraman 	/*
120987a18d3fSMadhavan Venkataraman 	 * First, remove all the callout lists from the table and string them
121087a18d3fSMadhavan Venkataraman 	 * in a list.
121187a18d3fSMadhavan Venkataraman 	 */
121287a18d3fSMadhavan Venkataraman 	list.ch_head = list.ch_tail = NULL;
121387a18d3fSMadhavan Venkataraman 	for (hash = 0; hash < CALLOUT_BUCKETS; hash++) {
121487a18d3fSMadhavan Venkataraman 		while ((cl = ct->ct_clhash[hash].ch_head) != NULL) {
121587a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
121687a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_APPEND(list, cl);
121787a18d3fSMadhavan Venkataraman 		}
121887a18d3fSMadhavan Venkataraman 	}
121987a18d3fSMadhavan Venkataraman 
122087a18d3fSMadhavan Venkataraman 	/*
122187a18d3fSMadhavan Venkataraman 	 * Now, traverse the callout lists and adjust their expirations.
122287a18d3fSMadhavan Venkataraman 	 */
122387a18d3fSMadhavan Venkataraman 	while ((cl = list.ch_head) != NULL) {
122487a18d3fSMadhavan Venkataraman 		CALLOUT_LIST_DELETE(list, cl);
122587a18d3fSMadhavan Venkataraman 		/*
122687a18d3fSMadhavan Venkataraman 		 * Set the new expiration and reinsert in the right
122787a18d3fSMadhavan Venkataraman 		 * hash bucket.
122887a18d3fSMadhavan Venkataraman 		 */
122987a18d3fSMadhavan Venkataraman 		expiration = cl->cl_expiration;
123087a18d3fSMadhavan Venkataraman 		expiration += delta;
123187a18d3fSMadhavan Venkataraman 		cl->cl_expiration = expiration;
123287a18d3fSMadhavan Venkataraman 		newhash = CALLOUT_CLHASH(expiration);
123387a18d3fSMadhavan Venkataraman 		CALLOUT_LIST_INSERT(ct->ct_clhash[newhash], cl);
123487a18d3fSMadhavan Venkataraman 		callout_heap_insert(ct, expiration);
123587a18d3fSMadhavan Venkataraman 	}
123687a18d3fSMadhavan Venkataraman }
123787a18d3fSMadhavan Venkataraman 
123887a18d3fSMadhavan Venkataraman /*
123987a18d3fSMadhavan Venkataraman  * Resume callout processing.
124087a18d3fSMadhavan Venkataraman  */
124187a18d3fSMadhavan Venkataraman static void
124287a18d3fSMadhavan Venkataraman callout_resume(hrtime_t delta)
124387a18d3fSMadhavan Venkataraman {
124487a18d3fSMadhavan Venkataraman 	hrtime_t exp;
124587a18d3fSMadhavan Venkataraman 	int t, f;
124687a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
124787a18d3fSMadhavan Venkataraman 
124887a18d3fSMadhavan Venkataraman 	/*
124987a18d3fSMadhavan Venkataraman 	 * Traverse every callout table in the system and resume callout
125087a18d3fSMadhavan Venkataraman 	 * processing. For active tables, perform any hrtime adjustments
125187a18d3fSMadhavan Venkataraman 	 * necessary.
125287a18d3fSMadhavan Venkataraman 	 */
125387a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
125487a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
125587a18d3fSMadhavan Venkataraman 			ct = &callout_table[CALLOUT_TABLE(t, f)];
125687a18d3fSMadhavan Venkataraman 
125787a18d3fSMadhavan Venkataraman 			mutex_enter(&ct->ct_mutex);
125887a18d3fSMadhavan Venkataraman 			if (ct->ct_cyclic == CYCLIC_NONE) {
1259454ab202SMadhavan Venkataraman 				ct->ct_suspend--;
126087a18d3fSMadhavan Venkataraman 				mutex_exit(&ct->ct_mutex);
126187a18d3fSMadhavan Venkataraman 				continue;
126287a18d3fSMadhavan Venkataraman 			}
126387a18d3fSMadhavan Venkataraman 
126487a18d3fSMadhavan Venkataraman 			if (delta)
126587a18d3fSMadhavan Venkataraman 				callout_adjust(ct, delta);
126687a18d3fSMadhavan Venkataraman 
1267454ab202SMadhavan Venkataraman 			ct->ct_suspend--;
1268454ab202SMadhavan Venkataraman 			if (ct->ct_suspend == 0) {
126987a18d3fSMadhavan Venkataraman 				/*
1270454ab202SMadhavan Venkataraman 				 * If the expired list is non-empty, then have
1271454ab202SMadhavan Venkataraman 				 * the cyclic expire immediately. Else, program
1272454ab202SMadhavan Venkataraman 				 * the cyclic based on the heap.
127387a18d3fSMadhavan Venkataraman 				 */
127487a18d3fSMadhavan Venkataraman 				if (ct->ct_expired.ch_head != NULL)
127587a18d3fSMadhavan Venkataraman 					exp = gethrtime();
127687a18d3fSMadhavan Venkataraman 				else if (ct->ct_heap_num > 0)
127787a18d3fSMadhavan Venkataraman 					exp = ct->ct_heap[0];
127887a18d3fSMadhavan Venkataraman 				else
127987a18d3fSMadhavan Venkataraman 					exp = 0;
128087a18d3fSMadhavan Venkataraman 				if (exp != 0)
1281454ab202SMadhavan Venkataraman 					(void) cyclic_reprogram(ct->ct_cyclic,
1282454ab202SMadhavan Venkataraman 					    exp);
1283454ab202SMadhavan Venkataraman 			}
128487a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
128587a18d3fSMadhavan Venkataraman 		}
128687a18d3fSMadhavan Venkataraman 	}
12877c478bd9Sstevel@tonic-gate }
12887c478bd9Sstevel@tonic-gate 
12897c478bd9Sstevel@tonic-gate /*
12907c478bd9Sstevel@tonic-gate  * Callback handler used by CPR to stop and resume callouts.
12917c478bd9Sstevel@tonic-gate  */
12927c478bd9Sstevel@tonic-gate /*ARGSUSED*/
12937c478bd9Sstevel@tonic-gate static boolean_t
12947c478bd9Sstevel@tonic-gate callout_cpr_callb(void *arg, int code)
12957c478bd9Sstevel@tonic-gate {
129687a18d3fSMadhavan Venkataraman 	if (code == CB_CODE_CPR_CHKPT)
129787a18d3fSMadhavan Venkataraman 		callout_suspend();
129887a18d3fSMadhavan Venkataraman 	else
129987a18d3fSMadhavan Venkataraman 		callout_resume(0);
130087a18d3fSMadhavan Venkataraman 
13017c478bd9Sstevel@tonic-gate 	return (B_TRUE);
13027c478bd9Sstevel@tonic-gate }
13037c478bd9Sstevel@tonic-gate 
13047c478bd9Sstevel@tonic-gate /*
130587a18d3fSMadhavan Venkataraman  * Callback handler invoked when the debugger is entered or exited.
13067c478bd9Sstevel@tonic-gate  */
130787a18d3fSMadhavan Venkataraman /*ARGSUSED*/
130887a18d3fSMadhavan Venkataraman static boolean_t
130987a18d3fSMadhavan Venkataraman callout_debug_callb(void *arg, int code)
13107c478bd9Sstevel@tonic-gate {
131187a18d3fSMadhavan Venkataraman 	hrtime_t delta;
1312f635d46aSqiao 
1313f635d46aSqiao 	/*
131487a18d3fSMadhavan Venkataraman 	 * When the system enters the debugger. make a note of the hrtime.
131587a18d3fSMadhavan Venkataraman 	 * When it is resumed, compute how long the system was in the
131687a18d3fSMadhavan Venkataraman 	 * debugger. This interval should not be counted for callouts.
1317f635d46aSqiao 	 */
131887a18d3fSMadhavan Venkataraman 	if (code == 0) {
131987a18d3fSMadhavan Venkataraman 		callout_suspend();
132087a18d3fSMadhavan Venkataraman 		callout_debug_hrtime = gethrtime();
132187a18d3fSMadhavan Venkataraman 	} else {
132287a18d3fSMadhavan Venkataraman 		delta = gethrtime() - callout_debug_hrtime;
132387a18d3fSMadhavan Venkataraman 		callout_resume(delta);
132487a18d3fSMadhavan Venkataraman 	}
1325f635d46aSqiao 
132687a18d3fSMadhavan Venkataraman 	return (B_TRUE);
132787a18d3fSMadhavan Venkataraman }
132887a18d3fSMadhavan Venkataraman 
132987a18d3fSMadhavan Venkataraman /*
1330*07247649SMadhavan Venkataraman  * Move the absolute hrestime callouts to the expired list. Then program the
1331*07247649SMadhavan Venkataraman  * table's cyclic to expire immediately so that the callouts can be executed
133287a18d3fSMadhavan Venkataraman  * immediately.
133387a18d3fSMadhavan Venkataraman  */
133487a18d3fSMadhavan Venkataraman static void
133587a18d3fSMadhavan Venkataraman callout_hrestime_one(callout_table_t *ct)
133687a18d3fSMadhavan Venkataraman {
1337*07247649SMadhavan Venkataraman 	callout_list_t *cl, *clnext;
1338*07247649SMadhavan Venkataraman 	int hash, flags;
133987a18d3fSMadhavan Venkataraman 
134087a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
134187a18d3fSMadhavan Venkataraman 	if (ct->ct_heap_num == 0) {
134287a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
134387a18d3fSMadhavan Venkataraman 		return;
134487a18d3fSMadhavan Venkataraman 	}
134587a18d3fSMadhavan Venkataraman 
1346*07247649SMadhavan Venkataraman 	flags = CALLOUT_LIST_FLAGS;
134787a18d3fSMadhavan Venkataraman 	for (hash = 0; hash < CALLOUT_BUCKETS; hash++) {
1348*07247649SMadhavan Venkataraman 		for (cl = ct->ct_clhash[hash].ch_head; cl; cl = clnext) {
1349*07247649SMadhavan Venkataraman 			clnext = cl->cl_next;
1350*07247649SMadhavan Venkataraman 			if (cl->cl_flags == flags) {
1351*07247649SMadhavan Venkataraman 				CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
1352*07247649SMadhavan Venkataraman 				CALLOUT_LIST_APPEND(ct->ct_expired, cl);
135387a18d3fSMadhavan Venkataraman 			}
135487a18d3fSMadhavan Venkataraman 		}
135587a18d3fSMadhavan Venkataraman 	}
135687a18d3fSMadhavan Venkataraman 
1357*07247649SMadhavan Venkataraman 	if ((ct->ct_expired.ch_head != NULL) && (ct->ct_suspend == 0))
135887a18d3fSMadhavan Venkataraman 		(void) cyclic_reprogram(ct->ct_cyclic, gethrtime());
1359*07247649SMadhavan Venkataraman 
136087a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
136187a18d3fSMadhavan Venkataraman }
136287a18d3fSMadhavan Venkataraman 
136387a18d3fSMadhavan Venkataraman /*
136487a18d3fSMadhavan Venkataraman  * This function is called whenever system time (hrestime) is changed
136587a18d3fSMadhavan Venkataraman  * explicitly. All the HRESTIME callouts must be expired at once.
136687a18d3fSMadhavan Venkataraman  */
136787a18d3fSMadhavan Venkataraman /*ARGSUSED*/
136887a18d3fSMadhavan Venkataraman void
136987a18d3fSMadhavan Venkataraman callout_hrestime(void)
137087a18d3fSMadhavan Venkataraman {
137187a18d3fSMadhavan Venkataraman 	int t, f;
137287a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
137387a18d3fSMadhavan Venkataraman 
137487a18d3fSMadhavan Venkataraman 	/*
137587a18d3fSMadhavan Venkataraman 	 * Traverse every callout table in the system and process the hrestime
137687a18d3fSMadhavan Venkataraman 	 * callouts therein.
137787a18d3fSMadhavan Venkataraman 	 *
137887a18d3fSMadhavan Venkataraman 	 * We look at all the tables because we don't know which ones were
137987a18d3fSMadhavan Venkataraman 	 * onlined and offlined in the past. The offlined tables may still
138087a18d3fSMadhavan Venkataraman 	 * have active cyclics processing timers somewhere.
138187a18d3fSMadhavan Venkataraman 	 */
138287a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
138387a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
138487a18d3fSMadhavan Venkataraman 			ct = &callout_table[CALLOUT_TABLE(t, f)];
138587a18d3fSMadhavan Venkataraman 			callout_hrestime_one(ct);
138687a18d3fSMadhavan Venkataraman 		}
138787a18d3fSMadhavan Venkataraman 	}
138887a18d3fSMadhavan Venkataraman }
138987a18d3fSMadhavan Venkataraman 
139087a18d3fSMadhavan Venkataraman /*
139187a18d3fSMadhavan Venkataraman  * Create the hash tables for this callout table.
139287a18d3fSMadhavan Venkataraman  */
139387a18d3fSMadhavan Venkataraman static void
139487a18d3fSMadhavan Venkataraman callout_hash_init(callout_table_t *ct)
139587a18d3fSMadhavan Venkataraman {
139687a18d3fSMadhavan Venkataraman 	size_t size;
139787a18d3fSMadhavan Venkataraman 
139887a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
139987a18d3fSMadhavan Venkataraman 	ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL));
140087a18d3fSMadhavan Venkataraman 
140187a18d3fSMadhavan Venkataraman 	size = sizeof (callout_hash_t) * CALLOUT_BUCKETS;
140287a18d3fSMadhavan Venkataraman 	ct->ct_idhash = kmem_zalloc(size, KM_SLEEP);
140387a18d3fSMadhavan Venkataraman 	ct->ct_clhash = kmem_zalloc(size, KM_SLEEP);
140487a18d3fSMadhavan Venkataraman }
140587a18d3fSMadhavan Venkataraman 
140687a18d3fSMadhavan Venkataraman /*
140787a18d3fSMadhavan Venkataraman  * Create per-callout table kstats.
140887a18d3fSMadhavan Venkataraman  */
140987a18d3fSMadhavan Venkataraman static void
141087a18d3fSMadhavan Venkataraman callout_kstat_init(callout_table_t *ct)
141187a18d3fSMadhavan Venkataraman {
141287a18d3fSMadhavan Venkataraman 	callout_stat_type_t stat;
141387a18d3fSMadhavan Venkataraman 	kstat_t *ct_kstats;
141487a18d3fSMadhavan Venkataraman 	int ndx;
141587a18d3fSMadhavan Venkataraman 
141687a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
141787a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_kstats == NULL);
141887a18d3fSMadhavan Venkataraman 
141987a18d3fSMadhavan Venkataraman 	ndx = ct - callout_table;
142087a18d3fSMadhavan Venkataraman 	ct_kstats = kstat_create("unix", ndx, "callout",
142187a18d3fSMadhavan Venkataraman 	    "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL);
142287a18d3fSMadhavan Venkataraman 
142387a18d3fSMadhavan Venkataraman 	if (ct_kstats == NULL) {
142487a18d3fSMadhavan Venkataraman 		cmn_err(CE_WARN, "kstat_create for callout table %p failed",
142587a18d3fSMadhavan Venkataraman 		    (void *)ct);
142687a18d3fSMadhavan Venkataraman 	} else {
142787a18d3fSMadhavan Venkataraman 		ct_kstats->ks_data = ct->ct_kstat_data;
142887a18d3fSMadhavan Venkataraman 		for (stat = 0; stat < CALLOUT_NUM_STATS; stat++)
142987a18d3fSMadhavan Venkataraman 			kstat_named_init(&ct->ct_kstat_data[stat],
143087a18d3fSMadhavan Venkataraman 			    callout_kstat_names[stat], KSTAT_DATA_INT64);
143187a18d3fSMadhavan Venkataraman 		ct->ct_kstats = ct_kstats;
143287a18d3fSMadhavan Venkataraman 		kstat_install(ct_kstats);
143387a18d3fSMadhavan Venkataraman 	}
143487a18d3fSMadhavan Venkataraman }
143587a18d3fSMadhavan Venkataraman 
143687a18d3fSMadhavan Venkataraman static void
143787a18d3fSMadhavan Venkataraman callout_cyclic_init(callout_table_t *ct)
143887a18d3fSMadhavan Venkataraman {
143987a18d3fSMadhavan Venkataraman 	cyc_handler_t hdlr;
144087a18d3fSMadhavan Venkataraman 	cyc_time_t when;
144187a18d3fSMadhavan Venkataraman 	processorid_t seqid;
144287a18d3fSMadhavan Venkataraman 	int t;
144387a18d3fSMadhavan Venkataraman 
144487a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
144587a18d3fSMadhavan Venkataraman 
144687a18d3fSMadhavan Venkataraman 	t = CALLOUT_TABLE_TYPE(ct);
144787a18d3fSMadhavan Venkataraman 	seqid = CALLOUT_TABLE_SEQID(ct);
144887a18d3fSMadhavan Venkataraman 
144987a18d3fSMadhavan Venkataraman 	/*
145087a18d3fSMadhavan Venkataraman 	 * Create the taskq thread if the table type is normal.
145187a18d3fSMadhavan Venkataraman 	 * Realtime tables are handled at PIL1 by a softint
145287a18d3fSMadhavan Venkataraman 	 * handler.
145387a18d3fSMadhavan Venkataraman 	 */
14547c478bd9Sstevel@tonic-gate 	if (t == CALLOUT_NORMAL) {
145587a18d3fSMadhavan Venkataraman 		ASSERT(ct->ct_taskq == NULL);
14567c478bd9Sstevel@tonic-gate 		/*
14577c478bd9Sstevel@tonic-gate 		 * Each callout thread consumes exactly one
14587c478bd9Sstevel@tonic-gate 		 * task structure while active.  Therefore,
14597c478bd9Sstevel@tonic-gate 		 * prepopulating with 2 * CALLOUT_THREADS tasks
14607c478bd9Sstevel@tonic-gate 		 * ensures that there's at least one task per
14617c478bd9Sstevel@tonic-gate 		 * thread that's either scheduled or on the
14627c478bd9Sstevel@tonic-gate 		 * freelist.  In turn, this guarantees that
14637c478bd9Sstevel@tonic-gate 		 * taskq_dispatch() will always either succeed
14647c478bd9Sstevel@tonic-gate 		 * (because there's a free task structure) or
14657c478bd9Sstevel@tonic-gate 		 * be unnecessary (because "callout_excute(ct)"
14667c478bd9Sstevel@tonic-gate 		 * has already scheduled).
14677c478bd9Sstevel@tonic-gate 		 */
14687c478bd9Sstevel@tonic-gate 		ct->ct_taskq =
146987a18d3fSMadhavan Venkataraman 		    taskq_create_instance("callout_taskq", seqid,
14707c478bd9Sstevel@tonic-gate 		    CALLOUT_THREADS, maxclsyspri,
14717c478bd9Sstevel@tonic-gate 		    2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS,
14727c478bd9Sstevel@tonic-gate 		    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
14737c478bd9Sstevel@tonic-gate 	}
147487a18d3fSMadhavan Venkataraman 
147587a18d3fSMadhavan Venkataraman 	/*
147687a18d3fSMadhavan Venkataraman 	 * callouts can only be created in a table whose
147787a18d3fSMadhavan Venkataraman 	 * cyclic has been initialized.
147887a18d3fSMadhavan Venkataraman 	 */
147987a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num == 0);
148087a18d3fSMadhavan Venkataraman 
148187a18d3fSMadhavan Venkataraman 	/*
148287a18d3fSMadhavan Venkataraman 	 * Create the callout table cyclics.
1483*07247649SMadhavan Venkataraman 	 *
1484*07247649SMadhavan Venkataraman 	 * The realtime cyclic handler executes at low PIL. The normal cyclic
1485*07247649SMadhavan Venkataraman 	 * handler executes at lock PIL. This is because there are cases
1486*07247649SMadhavan Venkataraman 	 * where code can block at PIL > 1 waiting for a normal callout handler
1487*07247649SMadhavan Venkataraman 	 * to unblock it directly or indirectly. If the normal cyclic were to
1488*07247649SMadhavan Venkataraman 	 * be executed at low PIL, it could get blocked out by the waiter
1489*07247649SMadhavan Venkataraman 	 * and cause a deadlock.
149087a18d3fSMadhavan Venkataraman 	 */
149187a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_cyclic == CYCLIC_NONE);
149287a18d3fSMadhavan Venkataraman 
149387a18d3fSMadhavan Venkataraman 	hdlr.cyh_func = (cyc_func_t)CALLOUT_CYCLIC_HANDLER(t);
1494*07247649SMadhavan Venkataraman 	if (ct->ct_type == CALLOUT_REALTIME)
1495*07247649SMadhavan Venkataraman 		hdlr.cyh_level = callout_realtime_level;
1496*07247649SMadhavan Venkataraman 	else
1497*07247649SMadhavan Venkataraman 		hdlr.cyh_level = callout_normal_level;
149887a18d3fSMadhavan Venkataraman 	hdlr.cyh_arg = ct;
149987a18d3fSMadhavan Venkataraman 	when.cyt_when = CY_INFINITY;
150087a18d3fSMadhavan Venkataraman 	when.cyt_interval = CY_INFINITY;
150187a18d3fSMadhavan Venkataraman 
150287a18d3fSMadhavan Venkataraman 	ct->ct_cyclic = cyclic_add(&hdlr, &when);
150387a18d3fSMadhavan Venkataraman }
150487a18d3fSMadhavan Venkataraman 
150587a18d3fSMadhavan Venkataraman void
150687a18d3fSMadhavan Venkataraman callout_cpu_online(cpu_t *cp)
150787a18d3fSMadhavan Venkataraman {
150887a18d3fSMadhavan Venkataraman 	lgrp_handle_t hand;
150987a18d3fSMadhavan Venkataraman 	callout_cache_t *cache;
151087a18d3fSMadhavan Venkataraman 	char s[KMEM_CACHE_NAMELEN];
151187a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
151287a18d3fSMadhavan Venkataraman 	processorid_t seqid;
151387a18d3fSMadhavan Venkataraman 	int t;
151487a18d3fSMadhavan Venkataraman 
151587a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&cpu_lock));
151687a18d3fSMadhavan Venkataraman 
151787a18d3fSMadhavan Venkataraman 	/*
151887a18d3fSMadhavan Venkataraman 	 * Locate the cache corresponding to the onlined CPU's lgroup.
151987a18d3fSMadhavan Venkataraman 	 * Note that access to callout_caches is protected by cpu_lock.
152087a18d3fSMadhavan Venkataraman 	 */
152187a18d3fSMadhavan Venkataraman 	hand = lgrp_plat_cpu_to_hand(cp->cpu_id);
152287a18d3fSMadhavan Venkataraman 	for (cache = callout_caches; cache != NULL; cache = cache->cc_next) {
152387a18d3fSMadhavan Venkataraman 		if (cache->cc_hand == hand)
152487a18d3fSMadhavan Venkataraman 			break;
152587a18d3fSMadhavan Venkataraman 	}
152687a18d3fSMadhavan Venkataraman 
152787a18d3fSMadhavan Venkataraman 	/*
152887a18d3fSMadhavan Venkataraman 	 * If not found, create one. The caches are never destroyed.
152987a18d3fSMadhavan Venkataraman 	 */
153087a18d3fSMadhavan Venkataraman 	if (cache == NULL) {
153187a18d3fSMadhavan Venkataraman 		cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP);
153287a18d3fSMadhavan Venkataraman 		cache->cc_hand = hand;
153387a18d3fSMadhavan Venkataraman 		(void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx",
153487a18d3fSMadhavan Venkataraman 		    (long)hand);
153587a18d3fSMadhavan Venkataraman 		cache->cc_cache = kmem_cache_create(s, sizeof (callout_t),
153687a18d3fSMadhavan Venkataraman 		    CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
153787a18d3fSMadhavan Venkataraman 		(void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx",
153887a18d3fSMadhavan Venkataraman 		    (long)hand);
153987a18d3fSMadhavan Venkataraman 		cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t),
154087a18d3fSMadhavan Venkataraman 		    CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
154187a18d3fSMadhavan Venkataraman 		cache->cc_next = callout_caches;
154287a18d3fSMadhavan Venkataraman 		callout_caches = cache;
154387a18d3fSMadhavan Venkataraman 	}
154487a18d3fSMadhavan Venkataraman 
154587a18d3fSMadhavan Venkataraman 	seqid = cp->cpu_seqid;
154687a18d3fSMadhavan Venkataraman 
154787a18d3fSMadhavan Venkataraman 	for (t = 0; t < CALLOUT_NTYPES; t++) {
154887a18d3fSMadhavan Venkataraman 		ct = &callout_table[CALLOUT_TABLE(t, seqid)];
154987a18d3fSMadhavan Venkataraman 
155087a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
155187a18d3fSMadhavan Venkataraman 		/*
155287a18d3fSMadhavan Venkataraman 		 * Store convinience pointers to the kmem caches
155387a18d3fSMadhavan Venkataraman 		 * in the callout table. These assignments should always be
155487a18d3fSMadhavan Venkataraman 		 * done as callout tables can map to different physical
155587a18d3fSMadhavan Venkataraman 		 * CPUs each time.
155687a18d3fSMadhavan Venkataraman 		 */
155787a18d3fSMadhavan Venkataraman 		ct->ct_cache = cache->cc_cache;
155887a18d3fSMadhavan Venkataraman 		ct->ct_lcache = cache->cc_lcache;
155987a18d3fSMadhavan Venkataraman 
156087a18d3fSMadhavan Venkataraman 		/*
156187a18d3fSMadhavan Venkataraman 		 * We use the heap pointer to check if stuff has been
156287a18d3fSMadhavan Venkataraman 		 * initialized for this callout table.
156387a18d3fSMadhavan Venkataraman 		 */
156487a18d3fSMadhavan Venkataraman 		if (ct->ct_heap == NULL) {
156587a18d3fSMadhavan Venkataraman 			callout_heap_init(ct);
156687a18d3fSMadhavan Venkataraman 			callout_hash_init(ct);
156787a18d3fSMadhavan Venkataraman 			callout_kstat_init(ct);
156887a18d3fSMadhavan Venkataraman 			callout_cyclic_init(ct);
156987a18d3fSMadhavan Venkataraman 		}
157087a18d3fSMadhavan Venkataraman 
157187a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
157287a18d3fSMadhavan Venkataraman 
157387a18d3fSMadhavan Venkataraman 		/*
1574454ab202SMadhavan Venkataraman 		 * Move the cyclic to this CPU by doing a bind.
157587a18d3fSMadhavan Venkataraman 		 */
157687a18d3fSMadhavan Venkataraman 		cyclic_bind(ct->ct_cyclic, cp, NULL);
1577454ab202SMadhavan Venkataraman 	}
1578454ab202SMadhavan Venkataraman }
1579454ab202SMadhavan Venkataraman 
1580454ab202SMadhavan Venkataraman void
1581454ab202SMadhavan Venkataraman callout_cpu_offline(cpu_t *cp)
1582454ab202SMadhavan Venkataraman {
1583454ab202SMadhavan Venkataraman 	callout_table_t *ct;
1584454ab202SMadhavan Venkataraman 	processorid_t seqid;
1585454ab202SMadhavan Venkataraman 	int t;
1586454ab202SMadhavan Venkataraman 
1587454ab202SMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&cpu_lock));
1588454ab202SMadhavan Venkataraman 
1589454ab202SMadhavan Venkataraman 	seqid = cp->cpu_seqid;
1590454ab202SMadhavan Venkataraman 
1591454ab202SMadhavan Venkataraman 	for (t = 0; t < CALLOUT_NTYPES; t++) {
1592454ab202SMadhavan Venkataraman 		ct = &callout_table[CALLOUT_TABLE(t, seqid)];
1593454ab202SMadhavan Venkataraman 
1594454ab202SMadhavan Venkataraman 		/*
1595454ab202SMadhavan Venkataraman 		 * Unbind the cyclic. This will allow the cyclic subsystem
1596454ab202SMadhavan Venkataraman 		 * to juggle the cyclic during CPU offline.
1597454ab202SMadhavan Venkataraman 		 */
159887a18d3fSMadhavan Venkataraman 		cyclic_bind(ct->ct_cyclic, NULL, NULL);
15997c478bd9Sstevel@tonic-gate 	}
16007c478bd9Sstevel@tonic-gate }
160187a18d3fSMadhavan Venkataraman 
160287a18d3fSMadhavan Venkataraman /*
160387a18d3fSMadhavan Venkataraman  * This is called to perform per-CPU initialization for slave CPUs at
160487a18d3fSMadhavan Venkataraman  * boot time.
160587a18d3fSMadhavan Venkataraman  */
160687a18d3fSMadhavan Venkataraman void
160787a18d3fSMadhavan Venkataraman callout_mp_init(void)
160887a18d3fSMadhavan Venkataraman {
160987a18d3fSMadhavan Venkataraman 	cpu_t *cp;
161087a18d3fSMadhavan Venkataraman 
161187a18d3fSMadhavan Venkataraman 	mutex_enter(&cpu_lock);
161287a18d3fSMadhavan Venkataraman 
161387a18d3fSMadhavan Venkataraman 	cp = cpu_active;
161487a18d3fSMadhavan Venkataraman 	do {
161587a18d3fSMadhavan Venkataraman 		callout_cpu_online(cp);
161687a18d3fSMadhavan Venkataraman 	} while ((cp = cp->cpu_next_onln) != cpu_active);
161787a18d3fSMadhavan Venkataraman 
161887a18d3fSMadhavan Venkataraman 	mutex_exit(&cpu_lock);
161987a18d3fSMadhavan Venkataraman }
162087a18d3fSMadhavan Venkataraman 
162187a18d3fSMadhavan Venkataraman /*
162287a18d3fSMadhavan Venkataraman  * Initialize all callout tables.  Called at boot time just before clkstart().
162387a18d3fSMadhavan Venkataraman  */
162487a18d3fSMadhavan Venkataraman void
162587a18d3fSMadhavan Venkataraman callout_init(void)
162687a18d3fSMadhavan Venkataraman {
162787a18d3fSMadhavan Venkataraman 	int f, t;
162887a18d3fSMadhavan Venkataraman 	size_t size;
162987a18d3fSMadhavan Venkataraman 	int table_id;
163087a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
163187a18d3fSMadhavan Venkataraman 	long bits, fanout;
163287a18d3fSMadhavan Venkataraman 	uintptr_t buf;
163387a18d3fSMadhavan Venkataraman 
163487a18d3fSMadhavan Venkataraman 	/*
163587a18d3fSMadhavan Venkataraman 	 * Initialize callout globals.
163687a18d3fSMadhavan Venkataraman 	 */
163787a18d3fSMadhavan Venkataraman 	bits = 0;
163887a18d3fSMadhavan Venkataraman 	for (fanout = 1; (fanout < max_ncpus); fanout <<= 1)
163987a18d3fSMadhavan Venkataraman 		bits++;
164087a18d3fSMadhavan Venkataraman 	callout_table_bits = CALLOUT_TYPE_BITS + bits;
164187a18d3fSMadhavan Venkataraman 	callout_table_mask = (1 << callout_table_bits) - 1;
164287a18d3fSMadhavan Venkataraman 	callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT;
164387a18d3fSMadhavan Venkataraman 	callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS);
1644454ab202SMadhavan Venkataraman 	callout_max_ticks = CALLOUT_MAX_TICKS;
164587a18d3fSMadhavan Venkataraman 
164687a18d3fSMadhavan Venkataraman 	/*
164787a18d3fSMadhavan Venkataraman 	 * Because of the variability in timing behavior across systems with
164887a18d3fSMadhavan Venkataraman 	 * different architectures, we cannot allow arbitrarily low
164987a18d3fSMadhavan Venkataraman 	 * resolutions. The minimum resolution has to be determined in a
165087a18d3fSMadhavan Venkataraman 	 * platform-specific way. Until then, we define a blanket minimum
165187a18d3fSMadhavan Venkataraman 	 * resolution for callouts of CALLOUT_MIN_RESOLUTION.
165287a18d3fSMadhavan Venkataraman 	 *
165387a18d3fSMadhavan Venkataraman 	 * If, in the future, someone requires lower resolution timers, they
165487a18d3fSMadhavan Venkataraman 	 * can do one of two things:
165587a18d3fSMadhavan Venkataraman 	 *
165687a18d3fSMadhavan Venkataraman 	 *	- Define a lower value for callout_min_resolution. This would
165787a18d3fSMadhavan Venkataraman 	 *	  affect all clients of the callout subsystem. If this done
165887a18d3fSMadhavan Venkataraman 	 *	  via /etc/system, then no code changes are required and it
165987a18d3fSMadhavan Venkataraman 	 *	  would affect only that customer.
166087a18d3fSMadhavan Venkataraman 	 *
166187a18d3fSMadhavan Venkataraman 	 *	- Define a flag to be passed to timeout creation that allows
166287a18d3fSMadhavan Venkataraman 	 *	  the lower resolution. This involves code changes. But it
166387a18d3fSMadhavan Venkataraman 	 *	  would affect only the calling module. It is the developer's
166487a18d3fSMadhavan Venkataraman 	 *	  responsibility to test on all systems and make sure that
166587a18d3fSMadhavan Venkataraman 	 *	  everything works.
166687a18d3fSMadhavan Venkataraman 	 */
166787a18d3fSMadhavan Venkataraman 	if (callout_min_resolution <= 0)
166887a18d3fSMadhavan Venkataraman 		callout_min_resolution = CALLOUT_MIN_RESOLUTION;
166987a18d3fSMadhavan Venkataraman 
167087a18d3fSMadhavan Venkataraman 	/*
167187a18d3fSMadhavan Venkataraman 	 * Allocate all the callout tables based on max_ncpus. We have chosen
167287a18d3fSMadhavan Venkataraman 	 * to do boot-time allocation instead of dynamic allocation because:
167387a18d3fSMadhavan Venkataraman 	 *
167487a18d3fSMadhavan Venkataraman 	 *	- the size of the callout tables is not too large.
167587a18d3fSMadhavan Venkataraman 	 *	- there are race conditions involved in making this dynamic.
167687a18d3fSMadhavan Venkataraman 	 *	- the hash tables that go with the callout tables consume
167787a18d3fSMadhavan Venkataraman 	 *	  most of the memory and they are only allocated in
167887a18d3fSMadhavan Venkataraman 	 *	  callout_cpu_online().
167987a18d3fSMadhavan Venkataraman 	 *
168087a18d3fSMadhavan Venkataraman 	 * Each CPU has two tables that are consecutive in the array. The first
168187a18d3fSMadhavan Venkataraman 	 * one is for realtime callouts and the second one is for normal ones.
168287a18d3fSMadhavan Venkataraman 	 *
168387a18d3fSMadhavan Venkataraman 	 * We do this alignment dance to make sure that callout table
168487a18d3fSMadhavan Venkataraman 	 * structures will always be on a cache line boundary.
168587a18d3fSMadhavan Venkataraman 	 */
168687a18d3fSMadhavan Venkataraman 	size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus;
168787a18d3fSMadhavan Venkataraman 	size += CALLOUT_ALIGN;
168887a18d3fSMadhavan Venkataraman 	buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP);
168987a18d3fSMadhavan Venkataraman 	callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN);
169087a18d3fSMadhavan Venkataraman 
169187a18d3fSMadhavan Venkataraman 	size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS;
169287a18d3fSMadhavan Venkataraman 	/*
169387a18d3fSMadhavan Venkataraman 	 * Now, initialize the tables for all the CPUs.
169487a18d3fSMadhavan Venkataraman 	 */
169587a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
169687a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
169787a18d3fSMadhavan Venkataraman 			table_id = CALLOUT_TABLE(t, f);
169887a18d3fSMadhavan Venkataraman 			ct = &callout_table[table_id];
1699454ab202SMadhavan Venkataraman 			ct->ct_type = t;
170087a18d3fSMadhavan Venkataraman 			mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
170187a18d3fSMadhavan Venkataraman 			/*
170287a18d3fSMadhavan Venkataraman 			 * Precompute the base IDs for long and short-term
170387a18d3fSMadhavan Venkataraman 			 * legacy IDs. This makes ID generation during
170487a18d3fSMadhavan Venkataraman 			 * timeout() fast.
170587a18d3fSMadhavan Venkataraman 			 */
170687a18d3fSMadhavan Venkataraman 			ct->ct_short_id = CALLOUT_SHORT_ID(table_id);
170787a18d3fSMadhavan Venkataraman 			ct->ct_long_id = CALLOUT_LONG_ID(table_id);
170887a18d3fSMadhavan Venkataraman 			/*
170987a18d3fSMadhavan Venkataraman 			 * Precompute the base ID for generation-based IDs.
171087a18d3fSMadhavan Venkataraman 			 * Note that when the first ID gets allocated, the
171187a18d3fSMadhavan Venkataraman 			 * ID will wrap. This will cause the generation
171287a18d3fSMadhavan Venkataraman 			 * number to be incremented to 1.
171387a18d3fSMadhavan Venkataraman 			 */
171487a18d3fSMadhavan Venkataraman 			ct->ct_gen_id = CALLOUT_SHORT_ID(table_id);
171587a18d3fSMadhavan Venkataraman 			/*
171687a18d3fSMadhavan Venkataraman 			 * Initialize the cyclic as NONE. This will get set
171787a18d3fSMadhavan Venkataraman 			 * during CPU online. This is so that partially
171887a18d3fSMadhavan Venkataraman 			 * populated systems will only have the required
171987a18d3fSMadhavan Venkataraman 			 * number of cyclics, not more.
172087a18d3fSMadhavan Venkataraman 			 */
172187a18d3fSMadhavan Venkataraman 			ct->ct_cyclic = CYCLIC_NONE;
172287a18d3fSMadhavan Venkataraman 			ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP);
172387a18d3fSMadhavan Venkataraman 		}
172487a18d3fSMadhavan Venkataraman 	}
172587a18d3fSMadhavan Venkataraman 
172687a18d3fSMadhavan Venkataraman 	/*
172787a18d3fSMadhavan Venkataraman 	 * Add the callback for CPR. This is called during checkpoint
172887a18d3fSMadhavan Venkataraman 	 * resume to suspend and resume callouts.
172987a18d3fSMadhavan Venkataraman 	 */
173087a18d3fSMadhavan Venkataraman 	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT,
173187a18d3fSMadhavan Venkataraman 	    "callout_cpr");
173287a18d3fSMadhavan Venkataraman 	(void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER,
173387a18d3fSMadhavan Venkataraman 	    "callout_debug");
173487a18d3fSMadhavan Venkataraman 
173587a18d3fSMadhavan Venkataraman 	/*
173687a18d3fSMadhavan Venkataraman 	 * Call the per-CPU initialization function for the boot CPU. This
173787a18d3fSMadhavan Venkataraman 	 * is done here because the function is not called automatically for
173887a18d3fSMadhavan Venkataraman 	 * the boot CPU from the CPU online/offline hooks. Note that the
173987a18d3fSMadhavan Venkataraman 	 * CPU lock is taken here because of convention.
174087a18d3fSMadhavan Venkataraman 	 */
174187a18d3fSMadhavan Venkataraman 	mutex_enter(&cpu_lock);
174287a18d3fSMadhavan Venkataraman 	callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)];
174387a18d3fSMadhavan Venkataraman 	callout_cpu_online(CPU);
174487a18d3fSMadhavan Venkataraman 	mutex_exit(&cpu_lock);
17457c478bd9Sstevel@tonic-gate }
1746