xref: /illumos-gate/usr/src/uts/common/os/callout.c (revision 87a18d3f)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5f635d46aSqiao  * Common Development and Distribution License (the "License").
6f635d46aSqiao  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22f635d46aSqiao  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/callo.h>
277c478bd9Sstevel@tonic-gate #include <sys/param.h>
287c478bd9Sstevel@tonic-gate #include <sys/types.h>
297c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
307c478bd9Sstevel@tonic-gate #include <sys/thread.h>
317c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
32*87a18d3fSMadhavan Venkataraman #include <sys/kmem_impl.h>
337c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
347c478bd9Sstevel@tonic-gate #include <sys/callb.h>
357c478bd9Sstevel@tonic-gate #include <sys/debug.h>
367c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
377c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
387c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
397c478bd9Sstevel@tonic-gate 
407c478bd9Sstevel@tonic-gate /*
417c478bd9Sstevel@tonic-gate  * Callout tables.  See timeout(9F) for details.
427c478bd9Sstevel@tonic-gate  */
43*87a18d3fSMadhavan Venkataraman static hrtime_t callout_debug_hrtime;		/* debugger entry time */
44*87a18d3fSMadhavan Venkataraman static int callout_min_resolution;		/* Minimum resolution */
45*87a18d3fSMadhavan Venkataraman static callout_table_t *callout_boot_ct;	/* Boot CPU's callout tables */
46*87a18d3fSMadhavan Venkataraman static hrtime_t callout_longterm;		/* longterm nanoseconds */
47*87a18d3fSMadhavan Venkataraman static ulong_t callout_counter_low;		/* callout ID increment */
48*87a18d3fSMadhavan Venkataraman static ulong_t callout_table_bits;		/* number of table bits in ID */
49*87a18d3fSMadhavan Venkataraman static ulong_t callout_table_mask;		/* mask for the table bits */
50*87a18d3fSMadhavan Venkataraman static callout_cache_t *callout_caches;		/* linked list of caches */
51*87a18d3fSMadhavan Venkataraman #pragma align 64(callout_table)
52*87a18d3fSMadhavan Venkataraman static callout_table_t *callout_table;		/* global callout table array */
537c478bd9Sstevel@tonic-gate 
54*87a18d3fSMadhavan Venkataraman static char *callout_kstat_names[] = {
55*87a18d3fSMadhavan Venkataraman 	"callout_timeouts",
56*87a18d3fSMadhavan Venkataraman 	"callout_timeouts_pending",
57*87a18d3fSMadhavan Venkataraman 	"callout_untimeouts_unexpired",
58*87a18d3fSMadhavan Venkataraman 	"callout_untimeouts_executing",
59*87a18d3fSMadhavan Venkataraman 	"callout_untimeouts_expired",
60*87a18d3fSMadhavan Venkataraman 	"callout_expirations",
61*87a18d3fSMadhavan Venkataraman 	"callout_allocations",
62*87a18d3fSMadhavan Venkataraman };
63*87a18d3fSMadhavan Venkataraman 
64*87a18d3fSMadhavan Venkataraman #define	CALLOUT_HASH_INSERT(hash, cp, cnext, cprev)	\
657c478bd9Sstevel@tonic-gate {							\
66*87a18d3fSMadhavan Venkataraman 	callout_hash_t *hashp = &(hash);		\
67*87a18d3fSMadhavan Venkataraman 							\
687c478bd9Sstevel@tonic-gate 	cp->cprev = NULL;				\
69*87a18d3fSMadhavan Venkataraman 	cp->cnext = hashp->ch_head;			\
70*87a18d3fSMadhavan Venkataraman 	if (hashp->ch_head == NULL)			\
71*87a18d3fSMadhavan Venkataraman 		hashp->ch_tail = cp;			\
727c478bd9Sstevel@tonic-gate 	else						\
73*87a18d3fSMadhavan Venkataraman 		cp->cnext->cprev = cp;			\
74*87a18d3fSMadhavan Venkataraman 	hashp->ch_head = cp;				\
757c478bd9Sstevel@tonic-gate }
767c478bd9Sstevel@tonic-gate 
77*87a18d3fSMadhavan Venkataraman #define	CALLOUT_HASH_APPEND(hash, cp, cnext, cprev)	\
78*87a18d3fSMadhavan Venkataraman {							\
79*87a18d3fSMadhavan Venkataraman 	callout_hash_t *hashp = &(hash);		\
80*87a18d3fSMadhavan Venkataraman 							\
81*87a18d3fSMadhavan Venkataraman 	cp->cnext = NULL;				\
82*87a18d3fSMadhavan Venkataraman 	cp->cprev = hashp->ch_tail;			\
83*87a18d3fSMadhavan Venkataraman 	if (hashp->ch_tail == NULL)			\
84*87a18d3fSMadhavan Venkataraman 		hashp->ch_head = cp;			\
85*87a18d3fSMadhavan Venkataraman 	else						\
86*87a18d3fSMadhavan Venkataraman 		cp->cprev->cnext = cp;			\
87*87a18d3fSMadhavan Venkataraman 	hashp->ch_tail = cp;				\
88*87a18d3fSMadhavan Venkataraman }
89*87a18d3fSMadhavan Venkataraman 
90*87a18d3fSMadhavan Venkataraman #define	CALLOUT_HASH_DELETE(hash, cp, cnext, cprev)	\
91*87a18d3fSMadhavan Venkataraman {							\
92*87a18d3fSMadhavan Venkataraman 	callout_hash_t *hashp = &(hash);		\
93*87a18d3fSMadhavan Venkataraman 							\
94*87a18d3fSMadhavan Venkataraman 	if (cp->cnext == NULL)				\
95*87a18d3fSMadhavan Venkataraman 		hashp->ch_tail = cp->cprev;		\
96*87a18d3fSMadhavan Venkataraman 	else						\
97*87a18d3fSMadhavan Venkataraman 		cp->cnext->cprev = cp->cprev;		\
98*87a18d3fSMadhavan Venkataraman 	if (cp->cprev == NULL)				\
99*87a18d3fSMadhavan Venkataraman 		hashp->ch_head = cp->cnext;		\
100*87a18d3fSMadhavan Venkataraman 	else						\
101*87a18d3fSMadhavan Venkataraman 		cp->cprev->cnext = cp->cnext;		\
102*87a18d3fSMadhavan Venkataraman }
103*87a18d3fSMadhavan Venkataraman 
104*87a18d3fSMadhavan Venkataraman /*
105*87a18d3fSMadhavan Venkataraman  * These definitions help us queue callouts and callout lists. Here is
106*87a18d3fSMadhavan Venkataraman  * the queueing rationale:
107*87a18d3fSMadhavan Venkataraman  *
108*87a18d3fSMadhavan Venkataraman  *	- callouts are queued in a FIFO manner in the ID hash table.
109*87a18d3fSMadhavan Venkataraman  *	  TCP timers are typically cancelled in the same order that they
110*87a18d3fSMadhavan Venkataraman  *	  were issued. The FIFO queueing shortens the search for a callout
111*87a18d3fSMadhavan Venkataraman  *	  during untimeout().
112*87a18d3fSMadhavan Venkataraman  *
113*87a18d3fSMadhavan Venkataraman  *	- callouts are queued in a FIFO manner in their callout lists.
114*87a18d3fSMadhavan Venkataraman  *	  This ensures that the callouts are executed in the same order that
115*87a18d3fSMadhavan Venkataraman  *	  they were queued. This is fair. Plus, it helps to make each
116*87a18d3fSMadhavan Venkataraman  *	  callout expiration timely. It also favors cancellations.
117*87a18d3fSMadhavan Venkataraman  *
118*87a18d3fSMadhavan Venkataraman  *	- callout lists are queued in a LIFO manner in the callout list hash
119*87a18d3fSMadhavan Venkataraman  *	  table. This ensures that long term timers stay at the rear of the
120*87a18d3fSMadhavan Venkataraman  *	  hash lists.
121*87a18d3fSMadhavan Venkataraman  *
122*87a18d3fSMadhavan Venkataraman  *	- callout lists are queued in a FIFO manner in the expired callouts
123*87a18d3fSMadhavan Venkataraman  *	  list. This ensures that callout lists are executed in the order
124*87a18d3fSMadhavan Venkataraman  *	  of expiration.
125*87a18d3fSMadhavan Venkataraman  */
126*87a18d3fSMadhavan Venkataraman #define	CALLOUT_APPEND(ct, cp)						\
127*87a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)],	\
128*87a18d3fSMadhavan Venkataraman 		cp, c_idnext, c_idprev);				\
129*87a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev)
130*87a18d3fSMadhavan Venkataraman 
131*87a18d3fSMadhavan Venkataraman #define	CALLOUT_DELETE(ct, cp)						\
132*87a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)],	\
133*87a18d3fSMadhavan Venkataraman 		cp, c_idnext, c_idprev);				\
134*87a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev)
135*87a18d3fSMadhavan Venkataraman 
136*87a18d3fSMadhavan Venkataraman #define	CALLOUT_LIST_INSERT(hash, cl)				\
137*87a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev)
138*87a18d3fSMadhavan Venkataraman 
139*87a18d3fSMadhavan Venkataraman #define	CALLOUT_LIST_APPEND(hash, cl)				\
140*87a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev)
141*87a18d3fSMadhavan Venkataraman 
142*87a18d3fSMadhavan Venkataraman #define	CALLOUT_LIST_DELETE(hash, cl)				\
143*87a18d3fSMadhavan Venkataraman 	CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev)
1447c478bd9Sstevel@tonic-gate 
1457c478bd9Sstevel@tonic-gate /*
1467c478bd9Sstevel@tonic-gate  * Allocate a callout structure.  We try quite hard because we
1477c478bd9Sstevel@tonic-gate  * can't sleep, and if we can't do the allocation, we're toast.
148*87a18d3fSMadhavan Venkataraman  * Failing all, we try a KM_PANIC allocation. Note that we never
149*87a18d3fSMadhavan Venkataraman  * deallocate a callout. See untimeout() for the reasoning.
1507c478bd9Sstevel@tonic-gate  */
1517c478bd9Sstevel@tonic-gate static callout_t *
1527c478bd9Sstevel@tonic-gate callout_alloc(callout_table_t *ct)
1537c478bd9Sstevel@tonic-gate {
154*87a18d3fSMadhavan Venkataraman 	size_t size;
155*87a18d3fSMadhavan Venkataraman 	callout_t *cp;
1567c478bd9Sstevel@tonic-gate 
157*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
158*87a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
159*87a18d3fSMadhavan Venkataraman 
160*87a18d3fSMadhavan Venkataraman 	cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP);
161*87a18d3fSMadhavan Venkataraman 	if (cp == NULL) {
162*87a18d3fSMadhavan Venkataraman 		size = sizeof (callout_t);
163*87a18d3fSMadhavan Venkataraman 		cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
164*87a18d3fSMadhavan Venkataraman 	}
165*87a18d3fSMadhavan Venkataraman 	cp->c_xid = 0;
166*87a18d3fSMadhavan Venkataraman 
167*87a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
168*87a18d3fSMadhavan Venkataraman 	ct->ct_allocations++;
1697c478bd9Sstevel@tonic-gate 	return (cp);
1707c478bd9Sstevel@tonic-gate }
1717c478bd9Sstevel@tonic-gate 
1727c478bd9Sstevel@tonic-gate /*
173*87a18d3fSMadhavan Venkataraman  * Allocate a callout list structure.  We try quite hard because we
174*87a18d3fSMadhavan Venkataraman  * can't sleep, and if we can't do the allocation, we're toast.
175*87a18d3fSMadhavan Venkataraman  * Failing all, we try a KM_PANIC allocation. Note that we never
176*87a18d3fSMadhavan Venkataraman  * deallocate a callout list.
1777c478bd9Sstevel@tonic-gate  */
178*87a18d3fSMadhavan Venkataraman static void
179*87a18d3fSMadhavan Venkataraman callout_list_alloc(callout_table_t *ct)
1807c478bd9Sstevel@tonic-gate {
181*87a18d3fSMadhavan Venkataraman 	size_t size;
182*87a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
183*87a18d3fSMadhavan Venkataraman 
184*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
185*87a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
186*87a18d3fSMadhavan Venkataraman 
187*87a18d3fSMadhavan Venkataraman 	cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP);
188*87a18d3fSMadhavan Venkataraman 	if (cl == NULL) {
189*87a18d3fSMadhavan Venkataraman 		size = sizeof (callout_list_t);
190*87a18d3fSMadhavan Venkataraman 		cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
191*87a18d3fSMadhavan Venkataraman 	}
192*87a18d3fSMadhavan Venkataraman 	bzero(cl, sizeof (callout_list_t));
193*87a18d3fSMadhavan Venkataraman 
194*87a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
195*87a18d3fSMadhavan Venkataraman 	cl->cl_next = ct->ct_lfree;
196*87a18d3fSMadhavan Venkataraman 	ct->ct_lfree = cl;
197*87a18d3fSMadhavan Venkataraman }
198*87a18d3fSMadhavan Venkataraman 
199*87a18d3fSMadhavan Venkataraman /*
200*87a18d3fSMadhavan Venkataraman  * Find the callout list that corresponds to an expiration. There can
201*87a18d3fSMadhavan Venkataraman  * be only one.
202*87a18d3fSMadhavan Venkataraman  */
203*87a18d3fSMadhavan Venkataraman static callout_list_t *
204*87a18d3fSMadhavan Venkataraman callout_list_get(callout_table_t *ct, hrtime_t expiration, int hash)
205*87a18d3fSMadhavan Venkataraman {
206*87a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
207*87a18d3fSMadhavan Venkataraman 
208*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
209*87a18d3fSMadhavan Venkataraman 
210*87a18d3fSMadhavan Venkataraman 	for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) {
211*87a18d3fSMadhavan Venkataraman 		if (cl->cl_expiration == expiration)
212*87a18d3fSMadhavan Venkataraman 			return (cl);
213*87a18d3fSMadhavan Venkataraman 	}
214*87a18d3fSMadhavan Venkataraman 
215*87a18d3fSMadhavan Venkataraman 	return (NULL);
216*87a18d3fSMadhavan Venkataraman }
217*87a18d3fSMadhavan Venkataraman 
218*87a18d3fSMadhavan Venkataraman /*
219*87a18d3fSMadhavan Venkataraman  * Find the callout list that corresponds to an expiration. There can
220*87a18d3fSMadhavan Venkataraman  * be only one. If the callout list is null, free it. Else, return it.
221*87a18d3fSMadhavan Venkataraman  */
222*87a18d3fSMadhavan Venkataraman static callout_list_t *
223*87a18d3fSMadhavan Venkataraman callout_list_check(callout_table_t *ct, hrtime_t expiration, int hash)
224*87a18d3fSMadhavan Venkataraman {
225*87a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
226*87a18d3fSMadhavan Venkataraman 
227*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
228*87a18d3fSMadhavan Venkataraman 
229*87a18d3fSMadhavan Venkataraman 	cl = callout_list_get(ct, expiration, hash);
230*87a18d3fSMadhavan Venkataraman 	if (cl != NULL) {
231*87a18d3fSMadhavan Venkataraman 		if (cl->cl_callouts.ch_head != NULL) {
232*87a18d3fSMadhavan Venkataraman 			/*
233*87a18d3fSMadhavan Venkataraman 			 * There is exactly one callout list for every
234*87a18d3fSMadhavan Venkataraman 			 * unique expiration. So, we are done.
235*87a18d3fSMadhavan Venkataraman 			 */
236*87a18d3fSMadhavan Venkataraman 			return (cl);
237*87a18d3fSMadhavan Venkataraman 		}
238*87a18d3fSMadhavan Venkataraman 
239*87a18d3fSMadhavan Venkataraman 		CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
240*87a18d3fSMadhavan Venkataraman 		cl->cl_next = ct->ct_lfree;
241*87a18d3fSMadhavan Venkataraman 		ct->ct_lfree = cl;
242*87a18d3fSMadhavan Venkataraman 	}
243*87a18d3fSMadhavan Venkataraman 
244*87a18d3fSMadhavan Venkataraman 	return (NULL);
245*87a18d3fSMadhavan Venkataraman }
246*87a18d3fSMadhavan Venkataraman 
247*87a18d3fSMadhavan Venkataraman /*
248*87a18d3fSMadhavan Venkataraman  * Initialize a callout table's heap, if necessary. Preallocate some free
249*87a18d3fSMadhavan Venkataraman  * entries so we don't have to check for NULL elsewhere.
250*87a18d3fSMadhavan Venkataraman  */
251*87a18d3fSMadhavan Venkataraman static void
252*87a18d3fSMadhavan Venkataraman callout_heap_init(callout_table_t *ct)
253*87a18d3fSMadhavan Venkataraman {
254*87a18d3fSMadhavan Venkataraman 	size_t size;
255*87a18d3fSMadhavan Venkataraman 
256*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
257*87a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap == NULL);
258*87a18d3fSMadhavan Venkataraman 
259*87a18d3fSMadhavan Venkataraman 	ct->ct_heap_num = 0;
260*87a18d3fSMadhavan Venkataraman 	ct->ct_heap_max = CALLOUT_CHUNK;
261*87a18d3fSMadhavan Venkataraman 	size = sizeof (hrtime_t) * CALLOUT_CHUNK;
262*87a18d3fSMadhavan Venkataraman 	ct->ct_heap = kmem_alloc(size, KM_SLEEP);
263*87a18d3fSMadhavan Venkataraman }
264*87a18d3fSMadhavan Venkataraman 
265*87a18d3fSMadhavan Venkataraman /*
266*87a18d3fSMadhavan Venkataraman  * Reallocate the heap. We try quite hard because we can't sleep, and if
267*87a18d3fSMadhavan Venkataraman  * we can't do the allocation, we're toast. Failing all, we try a KM_PANIC
268*87a18d3fSMadhavan Venkataraman  * allocation. Note that the heap only expands, it never contracts.
269*87a18d3fSMadhavan Venkataraman  */
270*87a18d3fSMadhavan Venkataraman static void
271*87a18d3fSMadhavan Venkataraman callout_heap_expand(callout_table_t *ct)
272*87a18d3fSMadhavan Venkataraman {
273*87a18d3fSMadhavan Venkataraman 	size_t max, size, osize;
274*87a18d3fSMadhavan Venkataraman 	hrtime_t *heap;
275*87a18d3fSMadhavan Venkataraman 
276*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
277*87a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num <= ct->ct_heap_max);
278*87a18d3fSMadhavan Venkataraman 
279*87a18d3fSMadhavan Venkataraman 	while (ct->ct_heap_num == ct->ct_heap_max) {
280*87a18d3fSMadhavan Venkataraman 		max = ct->ct_heap_max;
281*87a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
282*87a18d3fSMadhavan Venkataraman 
283*87a18d3fSMadhavan Venkataraman 		osize = sizeof (hrtime_t) * max;
284*87a18d3fSMadhavan Venkataraman 		size = sizeof (hrtime_t) * (max + CALLOUT_CHUNK);
285*87a18d3fSMadhavan Venkataraman 		heap = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
286*87a18d3fSMadhavan Venkataraman 
287*87a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
288*87a18d3fSMadhavan Venkataraman 		if (max < ct->ct_heap_max) {
289*87a18d3fSMadhavan Venkataraman 			/*
290*87a18d3fSMadhavan Venkataraman 			 * Someone beat us to the allocation. Free what we
291*87a18d3fSMadhavan Venkataraman 			 * just allocated and proceed.
292*87a18d3fSMadhavan Venkataraman 			 */
293*87a18d3fSMadhavan Venkataraman 			kmem_free(heap, size);
294*87a18d3fSMadhavan Venkataraman 			continue;
295*87a18d3fSMadhavan Venkataraman 		}
296*87a18d3fSMadhavan Venkataraman 
297*87a18d3fSMadhavan Venkataraman 		bcopy(ct->ct_heap, heap, osize);
298*87a18d3fSMadhavan Venkataraman 		kmem_free(ct->ct_heap, osize);
299*87a18d3fSMadhavan Venkataraman 		ct->ct_heap = heap;
300*87a18d3fSMadhavan Venkataraman 		ct->ct_heap_max = size / sizeof (hrtime_t);
301*87a18d3fSMadhavan Venkataraman 	}
302*87a18d3fSMadhavan Venkataraman }
303*87a18d3fSMadhavan Venkataraman 
304*87a18d3fSMadhavan Venkataraman /*
305*87a18d3fSMadhavan Venkataraman  * Move an expiration from the bottom of the heap to its correct place
306*87a18d3fSMadhavan Venkataraman  * in the heap. If we reached the root doing this, return 1. Else,
307*87a18d3fSMadhavan Venkataraman  * return 0.
308*87a18d3fSMadhavan Venkataraman  */
309*87a18d3fSMadhavan Venkataraman static int
310*87a18d3fSMadhavan Venkataraman callout_upheap(callout_table_t *ct)
311*87a18d3fSMadhavan Venkataraman {
312*87a18d3fSMadhavan Venkataraman 	int current, parent;
313*87a18d3fSMadhavan Venkataraman 	hrtime_t *heap, current_expiration, parent_expiration;
314*87a18d3fSMadhavan Venkataraman 
315*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
316*87a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num >= 1);
317*87a18d3fSMadhavan Venkataraman 
318*87a18d3fSMadhavan Venkataraman 	if (ct->ct_heap_num == 1) {
319*87a18d3fSMadhavan Venkataraman 		return (1);
320*87a18d3fSMadhavan Venkataraman 	}
321*87a18d3fSMadhavan Venkataraman 
322*87a18d3fSMadhavan Venkataraman 	heap = ct->ct_heap;
323*87a18d3fSMadhavan Venkataraman 	current = ct->ct_heap_num - 1;
324*87a18d3fSMadhavan Venkataraman 
325*87a18d3fSMadhavan Venkataraman 	for (;;) {
326*87a18d3fSMadhavan Venkataraman 		parent = CALLOUT_HEAP_PARENT(current);
327*87a18d3fSMadhavan Venkataraman 		current_expiration = heap[current];
328*87a18d3fSMadhavan Venkataraman 		parent_expiration = heap[parent];
329*87a18d3fSMadhavan Venkataraman 
330*87a18d3fSMadhavan Venkataraman 		/*
331*87a18d3fSMadhavan Venkataraman 		 * We have an expiration later than our parent; we're done.
332*87a18d3fSMadhavan Venkataraman 		 */
333*87a18d3fSMadhavan Venkataraman 		if (current_expiration >= parent_expiration) {
334*87a18d3fSMadhavan Venkataraman 			return (0);
335*87a18d3fSMadhavan Venkataraman 		}
336*87a18d3fSMadhavan Venkataraman 
337*87a18d3fSMadhavan Venkataraman 		/*
338*87a18d3fSMadhavan Venkataraman 		 * We need to swap with our parent, and continue up the heap.
339*87a18d3fSMadhavan Venkataraman 		 */
340*87a18d3fSMadhavan Venkataraman 		heap[parent] = current_expiration;
341*87a18d3fSMadhavan Venkataraman 		heap[current] = parent_expiration;
342*87a18d3fSMadhavan Venkataraman 
343*87a18d3fSMadhavan Venkataraman 		/*
344*87a18d3fSMadhavan Venkataraman 		 * If we just reached the root, we're done.
345*87a18d3fSMadhavan Venkataraman 		 */
346*87a18d3fSMadhavan Venkataraman 		if (parent == 0) {
347*87a18d3fSMadhavan Venkataraman 			return (1);
348*87a18d3fSMadhavan Venkataraman 		}
349*87a18d3fSMadhavan Venkataraman 
350*87a18d3fSMadhavan Venkataraman 		current = parent;
351*87a18d3fSMadhavan Venkataraman 	}
352*87a18d3fSMadhavan Venkataraman 	/*NOTREACHED*/
353*87a18d3fSMadhavan Venkataraman }
354*87a18d3fSMadhavan Venkataraman 
355*87a18d3fSMadhavan Venkataraman /*
356*87a18d3fSMadhavan Venkataraman  * Insert a new, unique expiration into a callout table's heap.
357*87a18d3fSMadhavan Venkataraman  */
358*87a18d3fSMadhavan Venkataraman static void
359*87a18d3fSMadhavan Venkataraman callout_heap_insert(callout_table_t *ct, hrtime_t expiration)
360*87a18d3fSMadhavan Venkataraman {
361*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
362*87a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num < ct->ct_heap_max);
363*87a18d3fSMadhavan Venkataraman 
364*87a18d3fSMadhavan Venkataraman 	/*
365*87a18d3fSMadhavan Venkataraman 	 * First, copy the expiration to the bottom of the heap.
366*87a18d3fSMadhavan Venkataraman 	 */
367*87a18d3fSMadhavan Venkataraman 	ct->ct_heap[ct->ct_heap_num] = expiration;
368*87a18d3fSMadhavan Venkataraman 	ct->ct_heap_num++;
369*87a18d3fSMadhavan Venkataraman 
370*87a18d3fSMadhavan Venkataraman 	/*
371*87a18d3fSMadhavan Venkataraman 	 * Now, perform an upheap operation. If we reached the root, then
372*87a18d3fSMadhavan Venkataraman 	 * the cyclic needs to be reprogrammed as we have an earlier
373*87a18d3fSMadhavan Venkataraman 	 * expiration.
374*87a18d3fSMadhavan Venkataraman 	 *
375*87a18d3fSMadhavan Venkataraman 	 * Also, during the CPR suspend phase, do not reprogram the cyclic.
376*87a18d3fSMadhavan Venkataraman 	 * We don't want any callout activity. When the CPR resume phase is
377*87a18d3fSMadhavan Venkataraman 	 * entered, the cyclic will be programmed for the earliest expiration
378*87a18d3fSMadhavan Venkataraman 	 * in the heap.
379*87a18d3fSMadhavan Venkataraman 	 */
380*87a18d3fSMadhavan Venkataraman 	if (callout_upheap(ct) && !(ct->ct_flags & CALLOUT_TABLE_SUSPENDED))
381*87a18d3fSMadhavan Venkataraman 		(void) cyclic_reprogram(ct->ct_cyclic, expiration);
382*87a18d3fSMadhavan Venkataraman }
383*87a18d3fSMadhavan Venkataraman 
384*87a18d3fSMadhavan Venkataraman /*
385*87a18d3fSMadhavan Venkataraman  * Move an expiration from the top of the heap to its correct place
386*87a18d3fSMadhavan Venkataraman  * in the heap.
387*87a18d3fSMadhavan Venkataraman  */
388*87a18d3fSMadhavan Venkataraman static void
389*87a18d3fSMadhavan Venkataraman callout_downheap(callout_table_t *ct)
390*87a18d3fSMadhavan Venkataraman {
391*87a18d3fSMadhavan Venkataraman 	int left, right, current, nelems;
392*87a18d3fSMadhavan Venkataraman 	hrtime_t *heap, left_expiration, right_expiration, current_expiration;
393*87a18d3fSMadhavan Venkataraman 
394*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
395*87a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num >= 1);
396*87a18d3fSMadhavan Venkataraman 
397*87a18d3fSMadhavan Venkataraman 	heap = ct->ct_heap;
398*87a18d3fSMadhavan Venkataraman 	current = 0;
399*87a18d3fSMadhavan Venkataraman 	nelems = ct->ct_heap_num;
400*87a18d3fSMadhavan Venkataraman 
401*87a18d3fSMadhavan Venkataraman 	for (;;) {
402*87a18d3fSMadhavan Venkataraman 		/*
403*87a18d3fSMadhavan Venkataraman 		 * If we don't have a left child (i.e., we're a leaf), we're
404*87a18d3fSMadhavan Venkataraman 		 * done.
405*87a18d3fSMadhavan Venkataraman 		 */
406*87a18d3fSMadhavan Venkataraman 		if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems)
407*87a18d3fSMadhavan Venkataraman 			return;
408*87a18d3fSMadhavan Venkataraman 
409*87a18d3fSMadhavan Venkataraman 		left_expiration = heap[left];
410*87a18d3fSMadhavan Venkataraman 		current_expiration = heap[current];
411*87a18d3fSMadhavan Venkataraman 
412*87a18d3fSMadhavan Venkataraman 		right = CALLOUT_HEAP_RIGHT(current);
413*87a18d3fSMadhavan Venkataraman 
414*87a18d3fSMadhavan Venkataraman 		/*
415*87a18d3fSMadhavan Venkataraman 		 * Even if we don't have a right child, we still need to compare
416*87a18d3fSMadhavan Venkataraman 		 * our expiration against that of our left child.
417*87a18d3fSMadhavan Venkataraman 		 */
418*87a18d3fSMadhavan Venkataraman 		if (right >= nelems)
419*87a18d3fSMadhavan Venkataraman 			goto comp_left;
420*87a18d3fSMadhavan Venkataraman 
421*87a18d3fSMadhavan Venkataraman 		right_expiration = heap[right];
422*87a18d3fSMadhavan Venkataraman 
423*87a18d3fSMadhavan Venkataraman 		/*
424*87a18d3fSMadhavan Venkataraman 		 * We have both a left and a right child.  We need to compare
425*87a18d3fSMadhavan Venkataraman 		 * the expiration of the children to determine which
426*87a18d3fSMadhavan Venkataraman 		 * expires earlier.
427*87a18d3fSMadhavan Venkataraman 		 */
428*87a18d3fSMadhavan Venkataraman 		if (right_expiration < left_expiration) {
429*87a18d3fSMadhavan Venkataraman 			/*
430*87a18d3fSMadhavan Venkataraman 			 * Our right child is the earlier of our children.
431*87a18d3fSMadhavan Venkataraman 			 * We'll now compare our expiration to its expiration.
432*87a18d3fSMadhavan Venkataraman 			 * If ours is the earlier one, we're done.
433*87a18d3fSMadhavan Venkataraman 			 */
434*87a18d3fSMadhavan Venkataraman 			if (current_expiration <= right_expiration)
435*87a18d3fSMadhavan Venkataraman 				return;
436*87a18d3fSMadhavan Venkataraman 
437*87a18d3fSMadhavan Venkataraman 			/*
438*87a18d3fSMadhavan Venkataraman 			 * Our right child expires earlier than we do; swap
439*87a18d3fSMadhavan Venkataraman 			 * with our right child, and descend right.
440*87a18d3fSMadhavan Venkataraman 			 */
441*87a18d3fSMadhavan Venkataraman 			heap[right] = current_expiration;
442*87a18d3fSMadhavan Venkataraman 			heap[current] = right_expiration;
443*87a18d3fSMadhavan Venkataraman 			current = right;
444*87a18d3fSMadhavan Venkataraman 			continue;
445*87a18d3fSMadhavan Venkataraman 		}
446*87a18d3fSMadhavan Venkataraman 
447*87a18d3fSMadhavan Venkataraman comp_left:
448*87a18d3fSMadhavan Venkataraman 		/*
449*87a18d3fSMadhavan Venkataraman 		 * Our left child is the earlier of our children (or we have
450*87a18d3fSMadhavan Venkataraman 		 * no right child).  We'll now compare our expiration
451*87a18d3fSMadhavan Venkataraman 		 * to its expiration. If ours is the earlier one, we're done.
452*87a18d3fSMadhavan Venkataraman 		 */
453*87a18d3fSMadhavan Venkataraman 		if (current_expiration <= left_expiration)
454*87a18d3fSMadhavan Venkataraman 			return;
455*87a18d3fSMadhavan Venkataraman 
456*87a18d3fSMadhavan Venkataraman 		/*
457*87a18d3fSMadhavan Venkataraman 		 * Our left child expires earlier than we do; swap with our
458*87a18d3fSMadhavan Venkataraman 		 * left child, and descend left.
459*87a18d3fSMadhavan Venkataraman 		 */
460*87a18d3fSMadhavan Venkataraman 		heap[left] = current_expiration;
461*87a18d3fSMadhavan Venkataraman 		heap[current] = left_expiration;
462*87a18d3fSMadhavan Venkataraman 		current = left;
463*87a18d3fSMadhavan Venkataraman 	}
464*87a18d3fSMadhavan Venkataraman }
465*87a18d3fSMadhavan Venkataraman 
466*87a18d3fSMadhavan Venkataraman /*
467*87a18d3fSMadhavan Venkataraman  * Delete and handle all past expirations in a callout table's heap.
468*87a18d3fSMadhavan Venkataraman  */
469*87a18d3fSMadhavan Venkataraman static void
470*87a18d3fSMadhavan Venkataraman callout_heap_delete(callout_table_t *ct)
471*87a18d3fSMadhavan Venkataraman {
472*87a18d3fSMadhavan Venkataraman 	hrtime_t now, expiration;
473*87a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
474*87a18d3fSMadhavan Venkataraman 	int hash;
475*87a18d3fSMadhavan Venkataraman 
476*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
477*87a18d3fSMadhavan Venkataraman 
478*87a18d3fSMadhavan Venkataraman 	now = gethrtime();
479*87a18d3fSMadhavan Venkataraman 
480*87a18d3fSMadhavan Venkataraman 	while (ct->ct_heap_num > 0) {
481*87a18d3fSMadhavan Venkataraman 		expiration = ct->ct_heap[0];
482*87a18d3fSMadhavan Venkataraman 		/*
483*87a18d3fSMadhavan Venkataraman 		 * Find the callout list that corresponds to the expiration.
484*87a18d3fSMadhavan Venkataraman 		 * If the callout list is empty, callout_list_check()
485*87a18d3fSMadhavan Venkataraman 		 * will free the callout list and return NULL.
486*87a18d3fSMadhavan Venkataraman 		 */
487*87a18d3fSMadhavan Venkataraman 		hash = CALLOUT_CLHASH(expiration);
488*87a18d3fSMadhavan Venkataraman 		cl = callout_list_check(ct, expiration, hash);
489*87a18d3fSMadhavan Venkataraman 		if (cl != NULL) {
490*87a18d3fSMadhavan Venkataraman 			/*
491*87a18d3fSMadhavan Venkataraman 			 * If the root of the heap expires in the future, we are
492*87a18d3fSMadhavan Venkataraman 			 * done. We are doing this check here instead of at the
493*87a18d3fSMadhavan Venkataraman 			 * beginning because we want to first free all the
494*87a18d3fSMadhavan Venkataraman 			 * empty callout lists at the top of the heap.
495*87a18d3fSMadhavan Venkataraman 			 */
496*87a18d3fSMadhavan Venkataraman 			if (expiration > now)
497*87a18d3fSMadhavan Venkataraman 				break;
498*87a18d3fSMadhavan Venkataraman 
499*87a18d3fSMadhavan Venkataraman 			/*
500*87a18d3fSMadhavan Venkataraman 			 * Move the callout list for this expiration to the
501*87a18d3fSMadhavan Venkataraman 			 * list of expired callout lists. It will be processed
502*87a18d3fSMadhavan Venkataraman 			 * by the callout executor.
503*87a18d3fSMadhavan Venkataraman 			 */
504*87a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
505*87a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_APPEND(ct->ct_expired, cl);
506*87a18d3fSMadhavan Venkataraman 		}
507*87a18d3fSMadhavan Venkataraman 
508*87a18d3fSMadhavan Venkataraman 		/*
509*87a18d3fSMadhavan Venkataraman 		 * Now delete the root. This is done by swapping the root with
510*87a18d3fSMadhavan Venkataraman 		 * the last item in the heap and downheaping the item.
511*87a18d3fSMadhavan Venkataraman 		 */
512*87a18d3fSMadhavan Venkataraman 		ct->ct_heap_num--;
513*87a18d3fSMadhavan Venkataraman 		if (ct->ct_heap_num > 0) {
514*87a18d3fSMadhavan Venkataraman 			ct->ct_heap[0] = ct->ct_heap[ct->ct_heap_num];
515*87a18d3fSMadhavan Venkataraman 			callout_downheap(ct);
516*87a18d3fSMadhavan Venkataraman 		}
517*87a18d3fSMadhavan Venkataraman 	}
518*87a18d3fSMadhavan Venkataraman 
519*87a18d3fSMadhavan Venkataraman 	/*
520*87a18d3fSMadhavan Venkataraman 	 * If this callout table is empty or callouts have been suspended
521*87a18d3fSMadhavan Venkataraman 	 * by CPR, just return. The cyclic has already been programmed to
522*87a18d3fSMadhavan Venkataraman 	 * infinity by the cyclic subsystem.
523*87a18d3fSMadhavan Venkataraman 	 */
524*87a18d3fSMadhavan Venkataraman 	if ((ct->ct_heap_num == 0) || (ct->ct_flags & CALLOUT_TABLE_SUSPENDED))
525*87a18d3fSMadhavan Venkataraman 		return;
526*87a18d3fSMadhavan Venkataraman 
527*87a18d3fSMadhavan Venkataraman 	(void) cyclic_reprogram(ct->ct_cyclic, expiration);
528*87a18d3fSMadhavan Venkataraman }
529*87a18d3fSMadhavan Venkataraman 
530*87a18d3fSMadhavan Venkataraman callout_id_t
531*87a18d3fSMadhavan Venkataraman timeout_generic(int type, void (*func)(void *), void *arg,
532*87a18d3fSMadhavan Venkataraman 	hrtime_t expiration, hrtime_t resolution, int flags)
533*87a18d3fSMadhavan Venkataraman {
534*87a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
5357c478bd9Sstevel@tonic-gate 	callout_t *cp;
5367c478bd9Sstevel@tonic-gate 	callout_id_t id;
537*87a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
538*87a18d3fSMadhavan Venkataraman 	hrtime_t now, interval;
539*87a18d3fSMadhavan Venkataraman 	int hash;
540f635d46aSqiao 
541*87a18d3fSMadhavan Venkataraman 	ASSERT(resolution > 0);
542*87a18d3fSMadhavan Venkataraman 	ASSERT(func != NULL);
5437c478bd9Sstevel@tonic-gate 
544*87a18d3fSMadhavan Venkataraman 	/*
545*87a18d3fSMadhavan Venkataraman 	 * Please see comment about minimum resolution in callout_init().
546*87a18d3fSMadhavan Venkataraman 	 */
547*87a18d3fSMadhavan Venkataraman 	if (resolution < callout_min_resolution)
548*87a18d3fSMadhavan Venkataraman 		resolution = callout_min_resolution;
5497c478bd9Sstevel@tonic-gate 
550*87a18d3fSMadhavan Venkataraman 	/*
551*87a18d3fSMadhavan Venkataraman 	 * We disable kernel preemption so that we remain on the same CPU
552*87a18d3fSMadhavan Venkataraman 	 * throughout. If we needed to reprogram the callout table's cyclic,
553*87a18d3fSMadhavan Venkataraman 	 * we can avoid X-calls if we are on the same CPU.
554*87a18d3fSMadhavan Venkataraman 	 *
555*87a18d3fSMadhavan Venkataraman 	 * Note that callout_alloc() releases and reacquires the callout
556*87a18d3fSMadhavan Venkataraman 	 * table mutex. While reacquiring the mutex, it is possible for us
557*87a18d3fSMadhavan Venkataraman 	 * to go to sleep and later migrate to another CPU. This should be
558*87a18d3fSMadhavan Venkataraman 	 * pretty rare, though.
559*87a18d3fSMadhavan Venkataraman 	 */
560*87a18d3fSMadhavan Venkataraman 	kpreempt_disable();
561*87a18d3fSMadhavan Venkataraman 
562*87a18d3fSMadhavan Venkataraman 	ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)];
563*87a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
564*87a18d3fSMadhavan Venkataraman 
565*87a18d3fSMadhavan Venkataraman 	if (ct->ct_cyclic == CYCLIC_NONE) {
566*87a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
567*87a18d3fSMadhavan Venkataraman 		/*
568*87a18d3fSMadhavan Venkataraman 		 * The callout table has not yet been initialized fully.
569*87a18d3fSMadhavan Venkataraman 		 * So, put this one on the boot callout table which is
570*87a18d3fSMadhavan Venkataraman 		 * always initialized.
571*87a18d3fSMadhavan Venkataraman 		 */
572*87a18d3fSMadhavan Venkataraman 		ct = &callout_boot_ct[type];
573*87a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
574*87a18d3fSMadhavan Venkataraman 	}
575*87a18d3fSMadhavan Venkataraman 
576*87a18d3fSMadhavan Venkataraman 	if ((cp = ct->ct_free) == NULL)
5777c478bd9Sstevel@tonic-gate 		cp = callout_alloc(ct);
5787c478bd9Sstevel@tonic-gate 	else
579*87a18d3fSMadhavan Venkataraman 		ct->ct_free = cp->c_idnext;
5807c478bd9Sstevel@tonic-gate 
5817c478bd9Sstevel@tonic-gate 	cp->c_func = func;
5827c478bd9Sstevel@tonic-gate 	cp->c_arg = arg;
5837c478bd9Sstevel@tonic-gate 
5847c478bd9Sstevel@tonic-gate 	/*
585*87a18d3fSMadhavan Venkataraman 	 * Compute the expiration hrtime.
586*87a18d3fSMadhavan Venkataraman 	 */
587*87a18d3fSMadhavan Venkataraman 	now = gethrtime();
588*87a18d3fSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_ABSOLUTE) {
589*87a18d3fSMadhavan Venkataraman 		ASSERT(expiration > 0);
590*87a18d3fSMadhavan Venkataraman 		interval = expiration - now;
591*87a18d3fSMadhavan Venkataraman 	} else {
592*87a18d3fSMadhavan Venkataraman 		interval = expiration;
593*87a18d3fSMadhavan Venkataraman 		expiration += now;
594*87a18d3fSMadhavan Venkataraman 		ASSERT(expiration > 0);
595*87a18d3fSMadhavan Venkataraman 	}
596*87a18d3fSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_ROUNDUP)
597*87a18d3fSMadhavan Venkataraman 		expiration += resolution - 1;
598*87a18d3fSMadhavan Venkataraman 	expiration = (expiration / resolution) * resolution;
599*87a18d3fSMadhavan Venkataraman 
600*87a18d3fSMadhavan Venkataraman 	/*
601*87a18d3fSMadhavan Venkataraman 	 * Assign an ID to this callout
602*87a18d3fSMadhavan Venkataraman 	 */
603*87a18d3fSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_32BIT) {
604*87a18d3fSMadhavan Venkataraman 		if (interval > callout_longterm) {
605*87a18d3fSMadhavan Venkataraman 			id = (ct->ct_long_id - callout_counter_low);
606*87a18d3fSMadhavan Venkataraman 			id |= CALLOUT_COUNTER_HIGH;
607*87a18d3fSMadhavan Venkataraman 			ct->ct_long_id = id;
608*87a18d3fSMadhavan Venkataraman 		} else {
609*87a18d3fSMadhavan Venkataraman 			id = (ct->ct_short_id - callout_counter_low);
610*87a18d3fSMadhavan Venkataraman 			id |= CALLOUT_COUNTER_HIGH;
611*87a18d3fSMadhavan Venkataraman 			ct->ct_short_id = id;
612*87a18d3fSMadhavan Venkataraman 		}
613*87a18d3fSMadhavan Venkataraman 	} else {
614*87a18d3fSMadhavan Venkataraman 		id = (ct->ct_gen_id - callout_counter_low);
615*87a18d3fSMadhavan Venkataraman 		if ((id & CALLOUT_COUNTER_HIGH) == 0) {
616*87a18d3fSMadhavan Venkataraman 			id |= CALLOUT_COUNTER_HIGH;
617*87a18d3fSMadhavan Venkataraman 			id += CALLOUT_GENERATION_LOW;
618*87a18d3fSMadhavan Venkataraman 		}
619*87a18d3fSMadhavan Venkataraman 		ct->ct_gen_id = id;
620*87a18d3fSMadhavan Venkataraman 	}
621*87a18d3fSMadhavan Venkataraman 
622*87a18d3fSMadhavan Venkataraman 	cp->c_xid = id;
623*87a18d3fSMadhavan Venkataraman 	if (flags & CALLOUT_FLAG_HRESTIME)
624*87a18d3fSMadhavan Venkataraman 		cp->c_xid |= CALLOUT_HRESTIME;
625*87a18d3fSMadhavan Venkataraman 
626*87a18d3fSMadhavan Venkataraman 	hash = CALLOUT_CLHASH(expiration);
627*87a18d3fSMadhavan Venkataraman 
628*87a18d3fSMadhavan Venkataraman again:
629*87a18d3fSMadhavan Venkataraman 	/*
630*87a18d3fSMadhavan Venkataraman 	 * Try to see if a callout list already exists for this expiration.
631*87a18d3fSMadhavan Venkataraman 	 * Most of the time, this will be the case.
632*87a18d3fSMadhavan Venkataraman 	 */
633*87a18d3fSMadhavan Venkataraman 	cl = callout_list_get(ct, expiration, hash);
634*87a18d3fSMadhavan Venkataraman 	if (cl == NULL) {
635*87a18d3fSMadhavan Venkataraman 		/*
636*87a18d3fSMadhavan Venkataraman 		 * Check if we have enough space in the heap to insert one
637*87a18d3fSMadhavan Venkataraman 		 * expiration. If not, expand the heap.
638*87a18d3fSMadhavan Venkataraman 		 */
639*87a18d3fSMadhavan Venkataraman 		if (ct->ct_heap_num == ct->ct_heap_max) {
640*87a18d3fSMadhavan Venkataraman 			callout_heap_expand(ct);
641*87a18d3fSMadhavan Venkataraman 			/*
642*87a18d3fSMadhavan Venkataraman 			 * In the above call, we drop the lock, allocate and
643*87a18d3fSMadhavan Venkataraman 			 * reacquire the lock. So, we could have been away
644*87a18d3fSMadhavan Venkataraman 			 * for a while. In the meantime, someone could have
645*87a18d3fSMadhavan Venkataraman 			 * inserted a callout list with the same expiration.
646*87a18d3fSMadhavan Venkataraman 			 * So, the best course is to repeat the steps. This
647*87a18d3fSMadhavan Venkataraman 			 * should be an infrequent event.
648*87a18d3fSMadhavan Venkataraman 			 */
649*87a18d3fSMadhavan Venkataraman 			goto again;
650*87a18d3fSMadhavan Venkataraman 		}
651*87a18d3fSMadhavan Venkataraman 
652*87a18d3fSMadhavan Venkataraman 		/*
653*87a18d3fSMadhavan Venkataraman 		 * Check the free list. If we don't find one, we have to
654*87a18d3fSMadhavan Venkataraman 		 * take the slow path and allocate from kmem.
655*87a18d3fSMadhavan Venkataraman 		 */
656*87a18d3fSMadhavan Venkataraman 		if ((cl = ct->ct_lfree) == NULL) {
657*87a18d3fSMadhavan Venkataraman 			callout_list_alloc(ct);
658*87a18d3fSMadhavan Venkataraman 			/*
659*87a18d3fSMadhavan Venkataraman 			 * In the above call, we drop the lock, allocate and
660*87a18d3fSMadhavan Venkataraman 			 * reacquire the lock. So, we could have been away
661*87a18d3fSMadhavan Venkataraman 			 * for a while. In the meantime, someone could have
662*87a18d3fSMadhavan Venkataraman 			 * inserted a callout list with the same expiration.
663*87a18d3fSMadhavan Venkataraman 			 * Plus, the heap could have become full. So, the best
664*87a18d3fSMadhavan Venkataraman 			 * course is to repeat the steps. This should be an
665*87a18d3fSMadhavan Venkataraman 			 * infrequent event.
666*87a18d3fSMadhavan Venkataraman 			 */
667*87a18d3fSMadhavan Venkataraman 			goto again;
668*87a18d3fSMadhavan Venkataraman 		}
669*87a18d3fSMadhavan Venkataraman 		ct->ct_lfree = cl->cl_next;
670*87a18d3fSMadhavan Venkataraman 		cl->cl_expiration = expiration;
671*87a18d3fSMadhavan Venkataraman 
672*87a18d3fSMadhavan Venkataraman 		CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl);
673*87a18d3fSMadhavan Venkataraman 
674*87a18d3fSMadhavan Venkataraman 		/*
675*87a18d3fSMadhavan Venkataraman 		 * This is a new expiration. So, insert it into the heap.
676*87a18d3fSMadhavan Venkataraman 		 * This will also reprogram the cyclic, if the expiration
677*87a18d3fSMadhavan Venkataraman 		 * propagated to the root of the heap.
678*87a18d3fSMadhavan Venkataraman 		 */
679*87a18d3fSMadhavan Venkataraman 		callout_heap_insert(ct, expiration);
680*87a18d3fSMadhavan Venkataraman 	}
681*87a18d3fSMadhavan Venkataraman 	cp->c_list = cl;
682*87a18d3fSMadhavan Venkataraman 	CALLOUT_APPEND(ct, cp);
683*87a18d3fSMadhavan Venkataraman 
684*87a18d3fSMadhavan Venkataraman 	ct->ct_timeouts++;
685*87a18d3fSMadhavan Venkataraman 	ct->ct_timeouts_pending++;
686*87a18d3fSMadhavan Venkataraman 
687*87a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
688*87a18d3fSMadhavan Venkataraman 
689*87a18d3fSMadhavan Venkataraman 	kpreempt_enable();
690*87a18d3fSMadhavan Venkataraman 
691*87a18d3fSMadhavan Venkataraman 	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
692*87a18d3fSMadhavan Venkataraman 	    "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration,
693*87a18d3fSMadhavan Venkataraman 	    cp);
694*87a18d3fSMadhavan Venkataraman 
695*87a18d3fSMadhavan Venkataraman 	return (id);
696*87a18d3fSMadhavan Venkataraman }
697*87a18d3fSMadhavan Venkataraman 
698*87a18d3fSMadhavan Venkataraman timeout_id_t
699*87a18d3fSMadhavan Venkataraman timeout(void (*func)(void *), void *arg, clock_t delta)
700*87a18d3fSMadhavan Venkataraman {
701*87a18d3fSMadhavan Venkataraman 	ulong_t id;
702*87a18d3fSMadhavan Venkataraman 
703*87a18d3fSMadhavan Venkataraman 	/*
7047c478bd9Sstevel@tonic-gate 	 * Make sure the callout runs at least 1 tick in the future.
7057c478bd9Sstevel@tonic-gate 	 */
7067c478bd9Sstevel@tonic-gate 	if (delta <= 0)
7077c478bd9Sstevel@tonic-gate 		delta = 1;
7087c478bd9Sstevel@tonic-gate 
709*87a18d3fSMadhavan Venkataraman 	id =  (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg,
710*87a18d3fSMadhavan Venkataraman 	    TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY);
7117c478bd9Sstevel@tonic-gate 
7127c478bd9Sstevel@tonic-gate 	return ((timeout_id_t)id);
7137c478bd9Sstevel@tonic-gate }
7147c478bd9Sstevel@tonic-gate 
715*87a18d3fSMadhavan Venkataraman /*
716*87a18d3fSMadhavan Venkataraman  * Convenience function that creates a normal callout with default parameters
717*87a18d3fSMadhavan Venkataraman  * and returns a full ID.
718*87a18d3fSMadhavan Venkataraman  */
719*87a18d3fSMadhavan Venkataraman callout_id_t
720*87a18d3fSMadhavan Venkataraman timeout_default(void (*func)(void *), void *arg, clock_t delta)
7217c478bd9Sstevel@tonic-gate {
722*87a18d3fSMadhavan Venkataraman 	callout_id_t id;
7237c478bd9Sstevel@tonic-gate 
724*87a18d3fSMadhavan Venkataraman 	/*
725*87a18d3fSMadhavan Venkataraman 	 * Make sure the callout runs at least 1 tick in the future.
726*87a18d3fSMadhavan Venkataraman 	 */
727*87a18d3fSMadhavan Venkataraman 	if (delta <= 0)
728*87a18d3fSMadhavan Venkataraman 		delta = 1;
729*87a18d3fSMadhavan Venkataraman 
730*87a18d3fSMadhavan Venkataraman 	id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta),
731*87a18d3fSMadhavan Venkataraman 	    nsec_per_tick, 0);
732*87a18d3fSMadhavan Venkataraman 
733*87a18d3fSMadhavan Venkataraman 	return (id);
7347c478bd9Sstevel@tonic-gate }
7357c478bd9Sstevel@tonic-gate 
7367c478bd9Sstevel@tonic-gate timeout_id_t
7377c478bd9Sstevel@tonic-gate realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
7387c478bd9Sstevel@tonic-gate {
739*87a18d3fSMadhavan Venkataraman 	ulong_t id;
740*87a18d3fSMadhavan Venkataraman 
741*87a18d3fSMadhavan Venkataraman 	/*
742*87a18d3fSMadhavan Venkataraman 	 * Make sure the callout runs at least 1 tick in the future.
743*87a18d3fSMadhavan Venkataraman 	 */
744*87a18d3fSMadhavan Venkataraman 	if (delta <= 0)
745*87a18d3fSMadhavan Venkataraman 		delta = 1;
746*87a18d3fSMadhavan Venkataraman 
747*87a18d3fSMadhavan Venkataraman 	id =  (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg,
748*87a18d3fSMadhavan Venkataraman 	    TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY);
749*87a18d3fSMadhavan Venkataraman 
750*87a18d3fSMadhavan Venkataraman 	return ((timeout_id_t)id);
7517c478bd9Sstevel@tonic-gate }
7527c478bd9Sstevel@tonic-gate 
753*87a18d3fSMadhavan Venkataraman /*
754*87a18d3fSMadhavan Venkataraman  * Convenience function that creates a realtime callout with default parameters
755*87a18d3fSMadhavan Venkataraman  * and returns a full ID.
756*87a18d3fSMadhavan Venkataraman  */
757*87a18d3fSMadhavan Venkataraman callout_id_t
758*87a18d3fSMadhavan Venkataraman realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta)
7597c478bd9Sstevel@tonic-gate {
760*87a18d3fSMadhavan Venkataraman 	callout_id_t id;
761*87a18d3fSMadhavan Venkataraman 
762*87a18d3fSMadhavan Venkataraman 	/*
763*87a18d3fSMadhavan Venkataraman 	 * Make sure the callout runs at least 1 tick in the future.
764*87a18d3fSMadhavan Venkataraman 	 */
765*87a18d3fSMadhavan Venkataraman 	if (delta <= 0)
766*87a18d3fSMadhavan Venkataraman 		delta = 1;
767*87a18d3fSMadhavan Venkataraman 
768*87a18d3fSMadhavan Venkataraman 	id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta),
769*87a18d3fSMadhavan Venkataraman 	    nsec_per_tick, 0);
770*87a18d3fSMadhavan Venkataraman 
771*87a18d3fSMadhavan Venkataraman 	return (id);
772*87a18d3fSMadhavan Venkataraman }
773*87a18d3fSMadhavan Venkataraman 
774*87a18d3fSMadhavan Venkataraman hrtime_t
775*87a18d3fSMadhavan Venkataraman untimeout_generic(callout_id_t id, int nowait)
776*87a18d3fSMadhavan Venkataraman {
7777c478bd9Sstevel@tonic-gate 	callout_table_t *ct;
7787c478bd9Sstevel@tonic-gate 	callout_t *cp;
7797c478bd9Sstevel@tonic-gate 	callout_id_t xid;
780*87a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
781*87a18d3fSMadhavan Venkataraman 	int hash;
782*87a18d3fSMadhavan Venkataraman 	callout_id_t bogus;
7837c478bd9Sstevel@tonic-gate 
784*87a18d3fSMadhavan Venkataraman 	ct = &callout_table[CALLOUT_ID_TO_TABLE(id)];
785*87a18d3fSMadhavan Venkataraman 	hash = CALLOUT_IDHASH(id);
7867c478bd9Sstevel@tonic-gate 
787*87a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
7887c478bd9Sstevel@tonic-gate 
789*87a18d3fSMadhavan Venkataraman 	/*
790*87a18d3fSMadhavan Venkataraman 	 * Search the ID hash table for the callout.
791*87a18d3fSMadhavan Venkataraman 	 */
792*87a18d3fSMadhavan Venkataraman 	for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) {
7937c478bd9Sstevel@tonic-gate 
794*87a18d3fSMadhavan Venkataraman 		xid = cp->c_xid;
7957c478bd9Sstevel@tonic-gate 
796*87a18d3fSMadhavan Venkataraman 		/*
797*87a18d3fSMadhavan Venkataraman 		 * Match the ID and generation number.
798*87a18d3fSMadhavan Venkataraman 		 */
799*87a18d3fSMadhavan Venkataraman 		if ((xid & CALLOUT_ID_MASK) != id)
8007c478bd9Sstevel@tonic-gate 			continue;
8017c478bd9Sstevel@tonic-gate 
802*87a18d3fSMadhavan Venkataraman 		cl = cp->c_list;
803*87a18d3fSMadhavan Venkataraman 		if ((xid & CALLOUT_EXECUTING) == 0) {
804*87a18d3fSMadhavan Venkataraman 			hrtime_t expiration;
805*87a18d3fSMadhavan Venkataraman 
806*87a18d3fSMadhavan Venkataraman 			/*
807*87a18d3fSMadhavan Venkataraman 			 * Delete the callout. If the callout list becomes
808*87a18d3fSMadhavan Venkataraman 			 * NULL, we don't remove it from the table. This is
809*87a18d3fSMadhavan Venkataraman 			 * so it can be reused. If the empty callout list
810*87a18d3fSMadhavan Venkataraman 			 * corresponds to the top of the the callout heap, we
811*87a18d3fSMadhavan Venkataraman 			 * don't reprogram the table cyclic here. This is in
812*87a18d3fSMadhavan Venkataraman 			 * order to avoid lots of X-calls to the CPU associated
813*87a18d3fSMadhavan Venkataraman 			 * with the callout table.
814*87a18d3fSMadhavan Venkataraman 			 */
815*87a18d3fSMadhavan Venkataraman 			expiration = cl->cl_expiration;
816*87a18d3fSMadhavan Venkataraman 			CALLOUT_DELETE(ct, cp);
817*87a18d3fSMadhavan Venkataraman 			cp->c_idnext = ct->ct_free;
818*87a18d3fSMadhavan Venkataraman 			ct->ct_free = cp;
819*87a18d3fSMadhavan Venkataraman 			ct->ct_untimeouts_unexpired++;
820*87a18d3fSMadhavan Venkataraman 			ct->ct_timeouts_pending--;
821*87a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
822*87a18d3fSMadhavan Venkataraman 
823*87a18d3fSMadhavan Venkataraman 			expiration -= gethrtime();
824*87a18d3fSMadhavan Venkataraman 			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
825*87a18d3fSMadhavan Venkataraman 			    "untimeout:ID %lx hrtime left %llx", id,
826*87a18d3fSMadhavan Venkataraman 			    expiration);
827*87a18d3fSMadhavan Venkataraman 			return (expiration < 0 ? 0 : expiration);
828*87a18d3fSMadhavan Venkataraman 		}
829*87a18d3fSMadhavan Venkataraman 
830*87a18d3fSMadhavan Venkataraman 		ct->ct_untimeouts_executing++;
8317c478bd9Sstevel@tonic-gate 		/*
8327c478bd9Sstevel@tonic-gate 		 * The callout we want to delete is currently executing.
8337c478bd9Sstevel@tonic-gate 		 * The DDI states that we must wait until the callout
834*87a18d3fSMadhavan Venkataraman 		 * completes before returning, so we block on cl_done until the
835*87a18d3fSMadhavan Venkataraman 		 * callout ID changes (to the old ID if it's on the freelist,
8367c478bd9Sstevel@tonic-gate 		 * or to a new callout ID if it's in use).  This implicitly
8377c478bd9Sstevel@tonic-gate 		 * assumes that callout structures are persistent (they are).
8387c478bd9Sstevel@tonic-gate 		 */
839*87a18d3fSMadhavan Venkataraman 		if (cl->cl_executor == curthread) {
8407c478bd9Sstevel@tonic-gate 			/*
8417c478bd9Sstevel@tonic-gate 			 * The timeout handler called untimeout() on itself.
8427c478bd9Sstevel@tonic-gate 			 * Stupid, but legal.  We can't wait for the timeout
8437c478bd9Sstevel@tonic-gate 			 * to complete without deadlocking, so we just return.
8447c478bd9Sstevel@tonic-gate 			 */
845*87a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
8467c478bd9Sstevel@tonic-gate 			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
8477c478bd9Sstevel@tonic-gate 			    "untimeout_self:ID %x", id);
8487c478bd9Sstevel@tonic-gate 			return (-1);
8497c478bd9Sstevel@tonic-gate 		}
850*87a18d3fSMadhavan Venkataraman 		if (nowait == 0) {
851*87a18d3fSMadhavan Venkataraman 			/*
852*87a18d3fSMadhavan Venkataraman 			 * We need to wait. Indicate that we are waiting by
853*87a18d3fSMadhavan Venkataraman 			 * incrementing cl_waiting. This prevents the executor
854*87a18d3fSMadhavan Venkataraman 			 * from doing a wakeup on cl_done if there are no
855*87a18d3fSMadhavan Venkataraman 			 * waiters.
856*87a18d3fSMadhavan Venkataraman 			 */
857*87a18d3fSMadhavan Venkataraman 			while (cp->c_xid == xid) {
858*87a18d3fSMadhavan Venkataraman 				cl->cl_waiting = 1;
859*87a18d3fSMadhavan Venkataraman 				cv_wait(&cl->cl_done, &ct->ct_mutex);
860*87a18d3fSMadhavan Venkataraman 			}
861*87a18d3fSMadhavan Venkataraman 		}
862*87a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
8637c478bd9Sstevel@tonic-gate 		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
8647c478bd9Sstevel@tonic-gate 		    "untimeout_executing:ID %lx", id);
8657c478bd9Sstevel@tonic-gate 		return (-1);
8667c478bd9Sstevel@tonic-gate 	}
867*87a18d3fSMadhavan Venkataraman 	ct->ct_untimeouts_expired++;
8687c478bd9Sstevel@tonic-gate 
869*87a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
8707c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
8717c478bd9Sstevel@tonic-gate 	    "untimeout_bogus_id:ID %lx", id);
8727c478bd9Sstevel@tonic-gate 
8737c478bd9Sstevel@tonic-gate 	/*
8747c478bd9Sstevel@tonic-gate 	 * We didn't find the specified callout ID.  This means either
8757c478bd9Sstevel@tonic-gate 	 * (1) the callout already fired, or (2) the caller passed us
8767c478bd9Sstevel@tonic-gate 	 * a bogus value.  Perform a sanity check to detect case (2).
8777c478bd9Sstevel@tonic-gate 	 */
878*87a18d3fSMadhavan Venkataraman 	bogus = (CALLOUT_EXECUTING | CALLOUT_HRESTIME | CALLOUT_COUNTER_HIGH);
879*87a18d3fSMadhavan Venkataraman 	if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0))
880*87a18d3fSMadhavan Venkataraman 		panic("untimeout: impossible timeout id %llx",
881*87a18d3fSMadhavan Venkataraman 		    (unsigned long long)id);
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate 	return (-1);
8847c478bd9Sstevel@tonic-gate }
8857c478bd9Sstevel@tonic-gate 
886*87a18d3fSMadhavan Venkataraman clock_t
887*87a18d3fSMadhavan Venkataraman untimeout(timeout_id_t id_arg)
888*87a18d3fSMadhavan Venkataraman {
889*87a18d3fSMadhavan Venkataraman 	hrtime_t hleft;
890*87a18d3fSMadhavan Venkataraman 	clock_t tleft;
891*87a18d3fSMadhavan Venkataraman 	callout_id_t id;
892*87a18d3fSMadhavan Venkataraman 
893*87a18d3fSMadhavan Venkataraman 	id = (ulong_t)id_arg;
894*87a18d3fSMadhavan Venkataraman 	hleft = untimeout_generic(id, 0);
895*87a18d3fSMadhavan Venkataraman 	if (hleft < 0)
896*87a18d3fSMadhavan Venkataraman 		tleft = -1;
897*87a18d3fSMadhavan Venkataraman 	else if (hleft == 0)
898*87a18d3fSMadhavan Venkataraman 		tleft = 0;
899*87a18d3fSMadhavan Venkataraman 	else
900*87a18d3fSMadhavan Venkataraman 		tleft = NSEC_TO_TICK(hleft);
901*87a18d3fSMadhavan Venkataraman 
902*87a18d3fSMadhavan Venkataraman 	return (tleft);
903*87a18d3fSMadhavan Venkataraman }
904*87a18d3fSMadhavan Venkataraman 
9057c478bd9Sstevel@tonic-gate /*
906*87a18d3fSMadhavan Venkataraman  * Convenience function to untimeout a timeout with a full ID with default
907*87a18d3fSMadhavan Venkataraman  * parameters.
908*87a18d3fSMadhavan Venkataraman  */
909*87a18d3fSMadhavan Venkataraman clock_t
910*87a18d3fSMadhavan Venkataraman untimeout_default(callout_id_t id, int nowait)
911*87a18d3fSMadhavan Venkataraman {
912*87a18d3fSMadhavan Venkataraman 	hrtime_t hleft;
913*87a18d3fSMadhavan Venkataraman 	clock_t tleft;
914*87a18d3fSMadhavan Venkataraman 
915*87a18d3fSMadhavan Venkataraman 	hleft = untimeout_generic(id, nowait);
916*87a18d3fSMadhavan Venkataraman 	if (hleft < 0)
917*87a18d3fSMadhavan Venkataraman 		tleft = -1;
918*87a18d3fSMadhavan Venkataraman 	else if (hleft == 0)
919*87a18d3fSMadhavan Venkataraman 		tleft = 0;
920*87a18d3fSMadhavan Venkataraman 	else
921*87a18d3fSMadhavan Venkataraman 		tleft = NSEC_TO_TICK(hleft);
922*87a18d3fSMadhavan Venkataraman 
923*87a18d3fSMadhavan Venkataraman 	return (tleft);
924*87a18d3fSMadhavan Venkataraman }
925*87a18d3fSMadhavan Venkataraman 
926*87a18d3fSMadhavan Venkataraman /*
927*87a18d3fSMadhavan Venkataraman  * Expire all the callouts queued in the specified callout list.
9287c478bd9Sstevel@tonic-gate  */
9297c478bd9Sstevel@tonic-gate static void
930*87a18d3fSMadhavan Venkataraman callout_list_expire(callout_table_t *ct, callout_list_t *cl)
9317c478bd9Sstevel@tonic-gate {
9327c478bd9Sstevel@tonic-gate 	callout_t *cp;
9337c478bd9Sstevel@tonic-gate 
934*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
935*87a18d3fSMadhavan Venkataraman 	ASSERT(cl != NULL);
9367c478bd9Sstevel@tonic-gate 
937*87a18d3fSMadhavan Venkataraman 	cl->cl_executor = curthread;
938*87a18d3fSMadhavan Venkataraman 
939*87a18d3fSMadhavan Venkataraman 	while ((cp = cl->cl_callouts.ch_head) != NULL) {
940f635d46aSqiao 		/*
941*87a18d3fSMadhavan Venkataraman 		 * Indicate to untimeout() that a callout is
942*87a18d3fSMadhavan Venkataraman 		 * being expired by the executor.
943f635d46aSqiao 		 */
944*87a18d3fSMadhavan Venkataraman 		cp->c_xid |= CALLOUT_EXECUTING;
945*87a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
946*87a18d3fSMadhavan Venkataraman 
9477c478bd9Sstevel@tonic-gate 		DTRACE_PROBE1(callout__start, callout_t *, cp);
9487c478bd9Sstevel@tonic-gate 		(*cp->c_func)(cp->c_arg);
9497c478bd9Sstevel@tonic-gate 		DTRACE_PROBE1(callout__end, callout_t *, cp);
9507c478bd9Sstevel@tonic-gate 
951*87a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
952*87a18d3fSMadhavan Venkataraman 
953*87a18d3fSMadhavan Venkataraman 		ct->ct_expirations++;
954*87a18d3fSMadhavan Venkataraman 		ct->ct_timeouts_pending--;
9557c478bd9Sstevel@tonic-gate 		/*
956*87a18d3fSMadhavan Venkataraman 		 * Indicate completion for cl_done.
9577c478bd9Sstevel@tonic-gate 		 */
958*87a18d3fSMadhavan Venkataraman 		cp->c_xid &= ~CALLOUT_EXECUTING;
959f635d46aSqiao 
9607c478bd9Sstevel@tonic-gate 		/*
961*87a18d3fSMadhavan Venkataraman 		 * Delete callout from ID hash table and the callout
962*87a18d3fSMadhavan Venkataraman 		 * list, return to freelist, and tell any untimeout() that
963*87a18d3fSMadhavan Venkataraman 		 * cares that we're done.
9647c478bd9Sstevel@tonic-gate 		 */
965*87a18d3fSMadhavan Venkataraman 		CALLOUT_DELETE(ct, cp);
966*87a18d3fSMadhavan Venkataraman 		cp->c_idnext = ct->ct_free;
967*87a18d3fSMadhavan Venkataraman 		ct->ct_free = cp;
968*87a18d3fSMadhavan Venkataraman 
969*87a18d3fSMadhavan Venkataraman 		if (cl->cl_waiting) {
970*87a18d3fSMadhavan Venkataraman 			cl->cl_waiting = 0;
971*87a18d3fSMadhavan Venkataraman 			cv_broadcast(&cl->cl_done);
9727c478bd9Sstevel@tonic-gate 		}
973*87a18d3fSMadhavan Venkataraman 	}
974*87a18d3fSMadhavan Venkataraman 
975*87a18d3fSMadhavan Venkataraman 	cl->cl_executor = NULL;
9767c478bd9Sstevel@tonic-gate }
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate /*
979*87a18d3fSMadhavan Venkataraman  * Execute all expired callout lists for a callout table.
9807c478bd9Sstevel@tonic-gate  */
9817c478bd9Sstevel@tonic-gate static void
982*87a18d3fSMadhavan Venkataraman callout_expire(callout_table_t *ct)
9837c478bd9Sstevel@tonic-gate {
984*87a18d3fSMadhavan Venkataraman 	callout_list_t *cl, *clnext;
985f635d46aSqiao 
986*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
9877c478bd9Sstevel@tonic-gate 
988*87a18d3fSMadhavan Venkataraman 	for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) {
989f635d46aSqiao 		/*
990*87a18d3fSMadhavan Venkataraman 		 * Multiple executor threads could be running at the same
991*87a18d3fSMadhavan Venkataraman 		 * time. Each callout list is processed by only one thread.
992*87a18d3fSMadhavan Venkataraman 		 * If this callout list is already being processed by another
993*87a18d3fSMadhavan Venkataraman 		 * executor, go on to the next one.
994f635d46aSqiao 		 */
995*87a18d3fSMadhavan Venkataraman 		if (cl->cl_executor != NULL) {
996*87a18d3fSMadhavan Venkataraman 			clnext = cl->cl_next;
9977c478bd9Sstevel@tonic-gate 			continue;
9987c478bd9Sstevel@tonic-gate 		}
9997c478bd9Sstevel@tonic-gate 
10007c478bd9Sstevel@tonic-gate 		/*
1001*87a18d3fSMadhavan Venkataraman 		 * Expire all the callouts in this callout list.
1002*87a18d3fSMadhavan Venkataraman 		 */
1003*87a18d3fSMadhavan Venkataraman 		callout_list_expire(ct, cl);
1004*87a18d3fSMadhavan Venkataraman 
1005*87a18d3fSMadhavan Venkataraman 		/*
1006*87a18d3fSMadhavan Venkataraman 		 * Free the callout list.
1007*87a18d3fSMadhavan Venkataraman 		 */
1008*87a18d3fSMadhavan Venkataraman 		clnext = cl->cl_next;
1009*87a18d3fSMadhavan Venkataraman 		CALLOUT_LIST_DELETE(ct->ct_expired, cl);
1010*87a18d3fSMadhavan Venkataraman 		cl->cl_next = ct->ct_lfree;
1011*87a18d3fSMadhavan Venkataraman 		ct->ct_lfree = cl;
1012*87a18d3fSMadhavan Venkataraman 	}
1013*87a18d3fSMadhavan Venkataraman }
1014*87a18d3fSMadhavan Venkataraman 
1015*87a18d3fSMadhavan Venkataraman /*
1016*87a18d3fSMadhavan Venkataraman  * The cyclic handlers below process callouts in two steps:
1017*87a18d3fSMadhavan Venkataraman  *
1018*87a18d3fSMadhavan Venkataraman  *	1. Find all expired callout lists and queue them in a separate
1019*87a18d3fSMadhavan Venkataraman  *	   list of expired callouts.
1020*87a18d3fSMadhavan Venkataraman  *	2. Execute the expired callout lists.
1021*87a18d3fSMadhavan Venkataraman  *
1022*87a18d3fSMadhavan Venkataraman  * This is done for two reasons:
1023*87a18d3fSMadhavan Venkataraman  *
1024*87a18d3fSMadhavan Venkataraman  *	1. We want to quickly find the next earliest expiration to program
1025*87a18d3fSMadhavan Venkataraman  *	   the cyclic to and reprogram it. We can do this right at the end
1026*87a18d3fSMadhavan Venkataraman  *	   of step 1.
1027*87a18d3fSMadhavan Venkataraman  *	2. The realtime cyclic handler expires callouts in place. However,
1028*87a18d3fSMadhavan Venkataraman  *	   for normal callouts, callouts are expired by a taskq thread.
1029*87a18d3fSMadhavan Venkataraman  *	   So, it is simpler and more robust to have the taskq thread just
1030*87a18d3fSMadhavan Venkataraman  *	   do step 2.
1031*87a18d3fSMadhavan Venkataraman  */
1032*87a18d3fSMadhavan Venkataraman 
1033*87a18d3fSMadhavan Venkataraman /*
1034*87a18d3fSMadhavan Venkataraman  * Realtime callout cyclic handler.
10357c478bd9Sstevel@tonic-gate  */
10367c478bd9Sstevel@tonic-gate void
1037*87a18d3fSMadhavan Venkataraman callout_realtime(callout_table_t *ct)
10387c478bd9Sstevel@tonic-gate {
1039*87a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
1040*87a18d3fSMadhavan Venkataraman 	callout_heap_delete(ct);
1041*87a18d3fSMadhavan Venkataraman 	callout_expire(ct);
1042*87a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
1043*87a18d3fSMadhavan Venkataraman }
10447c478bd9Sstevel@tonic-gate 
1045*87a18d3fSMadhavan Venkataraman void
1046*87a18d3fSMadhavan Venkataraman callout_execute(callout_table_t *ct)
1047*87a18d3fSMadhavan Venkataraman {
1048*87a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
1049*87a18d3fSMadhavan Venkataraman 	callout_expire(ct);
1050*87a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
1051*87a18d3fSMadhavan Venkataraman }
10527c478bd9Sstevel@tonic-gate 
1053*87a18d3fSMadhavan Venkataraman /*
1054*87a18d3fSMadhavan Venkataraman  * Normal callout cyclic handler.
1055*87a18d3fSMadhavan Venkataraman  */
1056*87a18d3fSMadhavan Venkataraman void
1057*87a18d3fSMadhavan Venkataraman callout_normal(callout_table_t *ct)
1058*87a18d3fSMadhavan Venkataraman {
1059*87a18d3fSMadhavan Venkataraman 	int exec;
1060*87a18d3fSMadhavan Venkataraman 
1061*87a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
1062*87a18d3fSMadhavan Venkataraman 	callout_heap_delete(ct);
1063*87a18d3fSMadhavan Venkataraman 	exec = (ct->ct_expired.ch_head != NULL);
1064*87a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
1065*87a18d3fSMadhavan Venkataraman 
1066*87a18d3fSMadhavan Venkataraman 	if (exec) {
1067*87a18d3fSMadhavan Venkataraman 		ASSERT(ct->ct_taskq != NULL);
1068*87a18d3fSMadhavan Venkataraman 		(void) taskq_dispatch(ct->ct_taskq,
1069*87a18d3fSMadhavan Venkataraman 		    (task_func_t *)callout_execute, ct, TQ_NOSLEEP);
1070*87a18d3fSMadhavan Venkataraman 	}
1071*87a18d3fSMadhavan Venkataraman }
1072*87a18d3fSMadhavan Venkataraman 
1073*87a18d3fSMadhavan Venkataraman /*
1074*87a18d3fSMadhavan Venkataraman  * Suspend callout processing.
1075*87a18d3fSMadhavan Venkataraman  */
1076*87a18d3fSMadhavan Venkataraman static void
1077*87a18d3fSMadhavan Venkataraman callout_suspend(void)
1078*87a18d3fSMadhavan Venkataraman {
1079*87a18d3fSMadhavan Venkataraman 	int t, f;
1080*87a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
1081*87a18d3fSMadhavan Venkataraman 
1082*87a18d3fSMadhavan Venkataraman 	/*
1083*87a18d3fSMadhavan Venkataraman 	 * Traverse every callout table in the system and suspend callout
1084*87a18d3fSMadhavan Venkataraman 	 * processing.
1085*87a18d3fSMadhavan Venkataraman 	 *
1086*87a18d3fSMadhavan Venkataraman 	 * We need to suspend all the tables (including the inactive ones)
1087*87a18d3fSMadhavan Venkataraman 	 * so that if a table is made active while the suspend is still on,
1088*87a18d3fSMadhavan Venkataraman 	 * the table remains suspended.
1089*87a18d3fSMadhavan Venkataraman 	 */
1090*87a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
1091*87a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
1092*87a18d3fSMadhavan Venkataraman 			ct = &callout_table[CALLOUT_TABLE(t, f)];
1093*87a18d3fSMadhavan Venkataraman 
1094*87a18d3fSMadhavan Venkataraman 			mutex_enter(&ct->ct_mutex);
1095*87a18d3fSMadhavan Venkataraman 			ct->ct_flags |= CALLOUT_TABLE_SUSPENDED;
1096*87a18d3fSMadhavan Venkataraman 			if (ct->ct_cyclic == CYCLIC_NONE) {
1097*87a18d3fSMadhavan Venkataraman 				mutex_exit(&ct->ct_mutex);
1098*87a18d3fSMadhavan Venkataraman 				continue;
1099*87a18d3fSMadhavan Venkataraman 			}
1100*87a18d3fSMadhavan Venkataraman 			(void) cyclic_reprogram(ct->ct_cyclic, CY_INFINITY);
1101*87a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
1102*87a18d3fSMadhavan Venkataraman 		}
1103*87a18d3fSMadhavan Venkataraman 	}
1104*87a18d3fSMadhavan Venkataraman }
1105*87a18d3fSMadhavan Venkataraman 
1106*87a18d3fSMadhavan Venkataraman static void
1107*87a18d3fSMadhavan Venkataraman callout_adjust(callout_table_t *ct, hrtime_t delta)
1108*87a18d3fSMadhavan Venkataraman {
1109*87a18d3fSMadhavan Venkataraman 	int hash, newhash;
1110*87a18d3fSMadhavan Venkataraman 	hrtime_t expiration;
1111*87a18d3fSMadhavan Venkataraman 	callout_list_t *cl;
1112*87a18d3fSMadhavan Venkataraman 	callout_hash_t list;
1113*87a18d3fSMadhavan Venkataraman 
1114*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1115*87a18d3fSMadhavan Venkataraman 
1116*87a18d3fSMadhavan Venkataraman 	/*
1117*87a18d3fSMadhavan Venkataraman 	 * In order to adjust the expirations, we null out the heap. Then,
1118*87a18d3fSMadhavan Venkataraman 	 * we reinsert adjusted expirations in the heap. Keeps it simple.
1119*87a18d3fSMadhavan Venkataraman 	 * Note that since the CALLOUT_TABLE_SUSPENDED flag is set by the
1120*87a18d3fSMadhavan Venkataraman 	 * caller, the heap insert does not result in cyclic reprogramming.
1121*87a18d3fSMadhavan Venkataraman 	 */
1122*87a18d3fSMadhavan Venkataraman 	ct->ct_heap_num = 0;
1123*87a18d3fSMadhavan Venkataraman 
1124*87a18d3fSMadhavan Venkataraman 	/*
1125*87a18d3fSMadhavan Venkataraman 	 * First, remove all the callout lists from the table and string them
1126*87a18d3fSMadhavan Venkataraman 	 * in a list.
1127*87a18d3fSMadhavan Venkataraman 	 */
1128*87a18d3fSMadhavan Venkataraman 	list.ch_head = list.ch_tail = NULL;
1129*87a18d3fSMadhavan Venkataraman 	for (hash = 0; hash < CALLOUT_BUCKETS; hash++) {
1130*87a18d3fSMadhavan Venkataraman 		while ((cl = ct->ct_clhash[hash].ch_head) != NULL) {
1131*87a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
1132*87a18d3fSMadhavan Venkataraman 			CALLOUT_LIST_APPEND(list, cl);
1133*87a18d3fSMadhavan Venkataraman 		}
1134*87a18d3fSMadhavan Venkataraman 	}
1135*87a18d3fSMadhavan Venkataraman 
1136*87a18d3fSMadhavan Venkataraman 	/*
1137*87a18d3fSMadhavan Venkataraman 	 * Now, traverse the callout lists and adjust their expirations.
1138*87a18d3fSMadhavan Venkataraman 	 */
1139*87a18d3fSMadhavan Venkataraman 	while ((cl = list.ch_head) != NULL) {
1140*87a18d3fSMadhavan Venkataraman 		CALLOUT_LIST_DELETE(list, cl);
1141*87a18d3fSMadhavan Venkataraman 		/*
1142*87a18d3fSMadhavan Venkataraman 		 * Set the new expiration and reinsert in the right
1143*87a18d3fSMadhavan Venkataraman 		 * hash bucket.
1144*87a18d3fSMadhavan Venkataraman 		 */
1145*87a18d3fSMadhavan Venkataraman 		expiration = cl->cl_expiration;
1146*87a18d3fSMadhavan Venkataraman 		expiration += delta;
1147*87a18d3fSMadhavan Venkataraman 		cl->cl_expiration = expiration;
1148*87a18d3fSMadhavan Venkataraman 		newhash = CALLOUT_CLHASH(expiration);
1149*87a18d3fSMadhavan Venkataraman 		CALLOUT_LIST_INSERT(ct->ct_clhash[newhash], cl);
1150*87a18d3fSMadhavan Venkataraman 		callout_heap_insert(ct, expiration);
1151*87a18d3fSMadhavan Venkataraman 	}
1152*87a18d3fSMadhavan Venkataraman }
1153*87a18d3fSMadhavan Venkataraman 
1154*87a18d3fSMadhavan Venkataraman /*
1155*87a18d3fSMadhavan Venkataraman  * Resume callout processing.
1156*87a18d3fSMadhavan Venkataraman  */
1157*87a18d3fSMadhavan Venkataraman static void
1158*87a18d3fSMadhavan Venkataraman callout_resume(hrtime_t delta)
1159*87a18d3fSMadhavan Venkataraman {
1160*87a18d3fSMadhavan Venkataraman 	hrtime_t exp;
1161*87a18d3fSMadhavan Venkataraman 	int t, f;
1162*87a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
1163*87a18d3fSMadhavan Venkataraman 
1164*87a18d3fSMadhavan Venkataraman 	/*
1165*87a18d3fSMadhavan Venkataraman 	 * Traverse every callout table in the system and resume callout
1166*87a18d3fSMadhavan Venkataraman 	 * processing. For active tables, perform any hrtime adjustments
1167*87a18d3fSMadhavan Venkataraman 	 * necessary.
1168*87a18d3fSMadhavan Venkataraman 	 */
1169*87a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
1170*87a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
1171*87a18d3fSMadhavan Venkataraman 			ct = &callout_table[CALLOUT_TABLE(t, f)];
1172*87a18d3fSMadhavan Venkataraman 
1173*87a18d3fSMadhavan Venkataraman 			mutex_enter(&ct->ct_mutex);
1174*87a18d3fSMadhavan Venkataraman 			if (ct->ct_cyclic == CYCLIC_NONE) {
1175*87a18d3fSMadhavan Venkataraman 				ct->ct_flags &= ~CALLOUT_TABLE_SUSPENDED;
1176*87a18d3fSMadhavan Venkataraman 				mutex_exit(&ct->ct_mutex);
1177*87a18d3fSMadhavan Venkataraman 				continue;
1178*87a18d3fSMadhavan Venkataraman 			}
1179*87a18d3fSMadhavan Venkataraman 
1180*87a18d3fSMadhavan Venkataraman 			if (delta)
1181*87a18d3fSMadhavan Venkataraman 				callout_adjust(ct, delta);
1182*87a18d3fSMadhavan Venkataraman 
1183*87a18d3fSMadhavan Venkataraman 			ct->ct_flags &= ~CALLOUT_TABLE_SUSPENDED;
1184*87a18d3fSMadhavan Venkataraman 
1185*87a18d3fSMadhavan Venkataraman 			/*
1186*87a18d3fSMadhavan Venkataraman 			 * If the expired list is non-empty, then have the
1187*87a18d3fSMadhavan Venkataraman 			 * cyclic expire immediately. Else, program the
1188*87a18d3fSMadhavan Venkataraman 			 * cyclic based on the heap.
1189*87a18d3fSMadhavan Venkataraman 			 */
1190*87a18d3fSMadhavan Venkataraman 			if (ct->ct_expired.ch_head != NULL)
1191*87a18d3fSMadhavan Venkataraman 				exp = gethrtime();
1192*87a18d3fSMadhavan Venkataraman 			else if (ct->ct_heap_num > 0)
1193*87a18d3fSMadhavan Venkataraman 				exp = ct->ct_heap[0];
1194*87a18d3fSMadhavan Venkataraman 			else
1195*87a18d3fSMadhavan Venkataraman 				exp = 0;
1196*87a18d3fSMadhavan Venkataraman 			if (exp != 0)
1197*87a18d3fSMadhavan Venkataraman 				(void) cyclic_reprogram(ct->ct_cyclic, exp);
1198*87a18d3fSMadhavan Venkataraman 			mutex_exit(&ct->ct_mutex);
1199*87a18d3fSMadhavan Venkataraman 		}
1200*87a18d3fSMadhavan Venkataraman 	}
12017c478bd9Sstevel@tonic-gate }
12027c478bd9Sstevel@tonic-gate 
12037c478bd9Sstevel@tonic-gate /*
12047c478bd9Sstevel@tonic-gate  * Callback handler used by CPR to stop and resume callouts.
12057c478bd9Sstevel@tonic-gate  */
12067c478bd9Sstevel@tonic-gate /*ARGSUSED*/
12077c478bd9Sstevel@tonic-gate static boolean_t
12087c478bd9Sstevel@tonic-gate callout_cpr_callb(void *arg, int code)
12097c478bd9Sstevel@tonic-gate {
1210*87a18d3fSMadhavan Venkataraman 	if (code == CB_CODE_CPR_CHKPT)
1211*87a18d3fSMadhavan Venkataraman 		callout_suspend();
1212*87a18d3fSMadhavan Venkataraman 	else
1213*87a18d3fSMadhavan Venkataraman 		callout_resume(0);
1214*87a18d3fSMadhavan Venkataraman 
12157c478bd9Sstevel@tonic-gate 	return (B_TRUE);
12167c478bd9Sstevel@tonic-gate }
12177c478bd9Sstevel@tonic-gate 
12187c478bd9Sstevel@tonic-gate /*
1219*87a18d3fSMadhavan Venkataraman  * Callback handler invoked when the debugger is entered or exited.
12207c478bd9Sstevel@tonic-gate  */
1221*87a18d3fSMadhavan Venkataraman /*ARGSUSED*/
1222*87a18d3fSMadhavan Venkataraman static boolean_t
1223*87a18d3fSMadhavan Venkataraman callout_debug_callb(void *arg, int code)
12247c478bd9Sstevel@tonic-gate {
1225*87a18d3fSMadhavan Venkataraman 	hrtime_t delta;
1226f635d46aSqiao 
1227f635d46aSqiao 	/*
1228*87a18d3fSMadhavan Venkataraman 	 * When the system enters the debugger. make a note of the hrtime.
1229*87a18d3fSMadhavan Venkataraman 	 * When it is resumed, compute how long the system was in the
1230*87a18d3fSMadhavan Venkataraman 	 * debugger. This interval should not be counted for callouts.
1231f635d46aSqiao 	 */
1232*87a18d3fSMadhavan Venkataraman 	if (code == 0) {
1233*87a18d3fSMadhavan Venkataraman 		callout_suspend();
1234*87a18d3fSMadhavan Venkataraman 		callout_debug_hrtime = gethrtime();
1235*87a18d3fSMadhavan Venkataraman 	} else {
1236*87a18d3fSMadhavan Venkataraman 		delta = gethrtime() - callout_debug_hrtime;
1237*87a18d3fSMadhavan Venkataraman 		callout_resume(delta);
1238*87a18d3fSMadhavan Venkataraman 	}
1239f635d46aSqiao 
1240*87a18d3fSMadhavan Venkataraman 	return (B_TRUE);
1241*87a18d3fSMadhavan Venkataraman }
1242*87a18d3fSMadhavan Venkataraman 
1243*87a18d3fSMadhavan Venkataraman /*
1244*87a18d3fSMadhavan Venkataraman  * Move the hrestime callouts to the expired list. Then program the table's
1245*87a18d3fSMadhavan Venkataraman  * cyclic to expire immediately so that the callouts can be executed
1246*87a18d3fSMadhavan Venkataraman  * immediately.
1247*87a18d3fSMadhavan Venkataraman  */
1248*87a18d3fSMadhavan Venkataraman static void
1249*87a18d3fSMadhavan Venkataraman callout_hrestime_one(callout_table_t *ct)
1250*87a18d3fSMadhavan Venkataraman {
1251*87a18d3fSMadhavan Venkataraman 	callout_list_t *cl, *ecl;
1252*87a18d3fSMadhavan Venkataraman 	callout_t *cp;
1253*87a18d3fSMadhavan Venkataraman 	int hash;
1254*87a18d3fSMadhavan Venkataraman 
1255*87a18d3fSMadhavan Venkataraman 	mutex_enter(&ct->ct_mutex);
1256*87a18d3fSMadhavan Venkataraman 	if (ct->ct_heap_num == 0) {
1257*87a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
1258*87a18d3fSMadhavan Venkataraman 		return;
1259*87a18d3fSMadhavan Venkataraman 	}
1260*87a18d3fSMadhavan Venkataraman 
1261*87a18d3fSMadhavan Venkataraman 	if (ct->ct_lfree == NULL)
1262*87a18d3fSMadhavan Venkataraman 		callout_list_alloc(ct);
1263*87a18d3fSMadhavan Venkataraman 	ecl = ct->ct_lfree;
1264*87a18d3fSMadhavan Venkataraman 	ct->ct_lfree = ecl->cl_next;
1265*87a18d3fSMadhavan Venkataraman 
1266*87a18d3fSMadhavan Venkataraman 	for (hash = 0; hash < CALLOUT_BUCKETS; hash++) {
1267*87a18d3fSMadhavan Venkataraman 		for (cl = ct->ct_clhash[hash].ch_head; cl; cl = cl->cl_next) {
1268*87a18d3fSMadhavan Venkataraman 			for (cp = cl->cl_callouts.ch_head; cp;
1269*87a18d3fSMadhavan Venkataraman 			    cp = cp->c_clnext) {
1270*87a18d3fSMadhavan Venkataraman 				if ((cp->c_xid & CALLOUT_HRESTIME) == 0)
1271*87a18d3fSMadhavan Venkataraman 					continue;
1272*87a18d3fSMadhavan Venkataraman 				CALLOUT_HASH_DELETE(cl->cl_callouts, cp,
1273*87a18d3fSMadhavan Venkataraman 				    c_clnext, c_clprev);
1274*87a18d3fSMadhavan Venkataraman 				cp->c_list = ecl;
1275*87a18d3fSMadhavan Venkataraman 				CALLOUT_HASH_APPEND(ecl->cl_callouts, cp,
1276*87a18d3fSMadhavan Venkataraman 				    c_clnext, c_clprev);
1277*87a18d3fSMadhavan Venkataraman 			}
1278*87a18d3fSMadhavan Venkataraman 		}
1279*87a18d3fSMadhavan Venkataraman 	}
1280*87a18d3fSMadhavan Venkataraman 
1281*87a18d3fSMadhavan Venkataraman 	if (ecl->cl_callouts.ch_head != NULL) {
1282*87a18d3fSMadhavan Venkataraman 		CALLOUT_LIST_APPEND(ct->ct_expired, ecl);
1283*87a18d3fSMadhavan Venkataraman 		if (!(ct->ct_flags & CALLOUT_TABLE_SUSPENDED))
1284*87a18d3fSMadhavan Venkataraman 			(void) cyclic_reprogram(ct->ct_cyclic, gethrtime());
1285*87a18d3fSMadhavan Venkataraman 	} else {
1286*87a18d3fSMadhavan Venkataraman 		ecl->cl_next = ct->ct_lfree;
1287*87a18d3fSMadhavan Venkataraman 		ct->ct_lfree = ecl;
1288*87a18d3fSMadhavan Venkataraman 	}
1289*87a18d3fSMadhavan Venkataraman 	mutex_exit(&ct->ct_mutex);
1290*87a18d3fSMadhavan Venkataraman }
1291*87a18d3fSMadhavan Venkataraman 
1292*87a18d3fSMadhavan Venkataraman /*
1293*87a18d3fSMadhavan Venkataraman  * This function is called whenever system time (hrestime) is changed
1294*87a18d3fSMadhavan Venkataraman  * explicitly. All the HRESTIME callouts must be expired at once.
1295*87a18d3fSMadhavan Venkataraman  */
1296*87a18d3fSMadhavan Venkataraman /*ARGSUSED*/
1297*87a18d3fSMadhavan Venkataraman void
1298*87a18d3fSMadhavan Venkataraman callout_hrestime(void)
1299*87a18d3fSMadhavan Venkataraman {
1300*87a18d3fSMadhavan Venkataraman 	int t, f;
1301*87a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
1302*87a18d3fSMadhavan Venkataraman 
1303*87a18d3fSMadhavan Venkataraman 	/*
1304*87a18d3fSMadhavan Venkataraman 	 * Traverse every callout table in the system and process the hrestime
1305*87a18d3fSMadhavan Venkataraman 	 * callouts therein.
1306*87a18d3fSMadhavan Venkataraman 	 *
1307*87a18d3fSMadhavan Venkataraman 	 * We look at all the tables because we don't know which ones were
1308*87a18d3fSMadhavan Venkataraman 	 * onlined and offlined in the past. The offlined tables may still
1309*87a18d3fSMadhavan Venkataraman 	 * have active cyclics processing timers somewhere.
1310*87a18d3fSMadhavan Venkataraman 	 */
1311*87a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
1312*87a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
1313*87a18d3fSMadhavan Venkataraman 			ct = &callout_table[CALLOUT_TABLE(t, f)];
1314*87a18d3fSMadhavan Venkataraman 			callout_hrestime_one(ct);
1315*87a18d3fSMadhavan Venkataraman 		}
1316*87a18d3fSMadhavan Venkataraman 	}
1317*87a18d3fSMadhavan Venkataraman }
1318*87a18d3fSMadhavan Venkataraman 
1319*87a18d3fSMadhavan Venkataraman /*
1320*87a18d3fSMadhavan Venkataraman  * Create the hash tables for this callout table.
1321*87a18d3fSMadhavan Venkataraman  */
1322*87a18d3fSMadhavan Venkataraman static void
1323*87a18d3fSMadhavan Venkataraman callout_hash_init(callout_table_t *ct)
1324*87a18d3fSMadhavan Venkataraman {
1325*87a18d3fSMadhavan Venkataraman 	size_t size;
1326*87a18d3fSMadhavan Venkataraman 
1327*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1328*87a18d3fSMadhavan Venkataraman 	ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL));
1329*87a18d3fSMadhavan Venkataraman 
1330*87a18d3fSMadhavan Venkataraman 	size = sizeof (callout_hash_t) * CALLOUT_BUCKETS;
1331*87a18d3fSMadhavan Venkataraman 	ct->ct_idhash = kmem_zalloc(size, KM_SLEEP);
1332*87a18d3fSMadhavan Venkataraman 	ct->ct_clhash = kmem_zalloc(size, KM_SLEEP);
1333*87a18d3fSMadhavan Venkataraman }
1334*87a18d3fSMadhavan Venkataraman 
1335*87a18d3fSMadhavan Venkataraman /*
1336*87a18d3fSMadhavan Venkataraman  * Create per-callout table kstats.
1337*87a18d3fSMadhavan Venkataraman  */
1338*87a18d3fSMadhavan Venkataraman static void
1339*87a18d3fSMadhavan Venkataraman callout_kstat_init(callout_table_t *ct)
1340*87a18d3fSMadhavan Venkataraman {
1341*87a18d3fSMadhavan Venkataraman 	callout_stat_type_t stat;
1342*87a18d3fSMadhavan Venkataraman 	kstat_t *ct_kstats;
1343*87a18d3fSMadhavan Venkataraman 	int ndx;
1344*87a18d3fSMadhavan Venkataraman 
1345*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1346*87a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_kstats == NULL);
1347*87a18d3fSMadhavan Venkataraman 
1348*87a18d3fSMadhavan Venkataraman 	ndx = ct - callout_table;
1349*87a18d3fSMadhavan Venkataraman 	ct_kstats = kstat_create("unix", ndx, "callout",
1350*87a18d3fSMadhavan Venkataraman 	    "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL);
1351*87a18d3fSMadhavan Venkataraman 
1352*87a18d3fSMadhavan Venkataraman 	if (ct_kstats == NULL) {
1353*87a18d3fSMadhavan Venkataraman 		cmn_err(CE_WARN, "kstat_create for callout table %p failed",
1354*87a18d3fSMadhavan Venkataraman 		    (void *)ct);
1355*87a18d3fSMadhavan Venkataraman 	} else {
1356*87a18d3fSMadhavan Venkataraman 		ct_kstats->ks_data = ct->ct_kstat_data;
1357*87a18d3fSMadhavan Venkataraman 		for (stat = 0; stat < CALLOUT_NUM_STATS; stat++)
1358*87a18d3fSMadhavan Venkataraman 			kstat_named_init(&ct->ct_kstat_data[stat],
1359*87a18d3fSMadhavan Venkataraman 			    callout_kstat_names[stat], KSTAT_DATA_INT64);
1360*87a18d3fSMadhavan Venkataraman 		ct->ct_kstats = ct_kstats;
1361*87a18d3fSMadhavan Venkataraman 		kstat_install(ct_kstats);
1362*87a18d3fSMadhavan Venkataraman 	}
1363*87a18d3fSMadhavan Venkataraman }
1364*87a18d3fSMadhavan Venkataraman 
1365*87a18d3fSMadhavan Venkataraman static void
1366*87a18d3fSMadhavan Venkataraman callout_cyclic_init(callout_table_t *ct)
1367*87a18d3fSMadhavan Venkataraman {
1368*87a18d3fSMadhavan Venkataraman 	cyc_handler_t hdlr;
1369*87a18d3fSMadhavan Venkataraman 	cyc_time_t when;
1370*87a18d3fSMadhavan Venkataraman 	processorid_t seqid;
1371*87a18d3fSMadhavan Venkataraman 	int t;
1372*87a18d3fSMadhavan Venkataraman 
1373*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
1374*87a18d3fSMadhavan Venkataraman 
1375*87a18d3fSMadhavan Venkataraman 	t = CALLOUT_TABLE_TYPE(ct);
1376*87a18d3fSMadhavan Venkataraman 	seqid = CALLOUT_TABLE_SEQID(ct);
1377*87a18d3fSMadhavan Venkataraman 
1378*87a18d3fSMadhavan Venkataraman 	/*
1379*87a18d3fSMadhavan Venkataraman 	 * Create the taskq thread if the table type is normal.
1380*87a18d3fSMadhavan Venkataraman 	 * Realtime tables are handled at PIL1 by a softint
1381*87a18d3fSMadhavan Venkataraman 	 * handler.
1382*87a18d3fSMadhavan Venkataraman 	 */
13837c478bd9Sstevel@tonic-gate 	if (t == CALLOUT_NORMAL) {
1384*87a18d3fSMadhavan Venkataraman 		ASSERT(ct->ct_taskq == NULL);
13857c478bd9Sstevel@tonic-gate 		/*
13867c478bd9Sstevel@tonic-gate 		 * Each callout thread consumes exactly one
13877c478bd9Sstevel@tonic-gate 		 * task structure while active.  Therefore,
13887c478bd9Sstevel@tonic-gate 		 * prepopulating with 2 * CALLOUT_THREADS tasks
13897c478bd9Sstevel@tonic-gate 		 * ensures that there's at least one task per
13907c478bd9Sstevel@tonic-gate 		 * thread that's either scheduled or on the
13917c478bd9Sstevel@tonic-gate 		 * freelist.  In turn, this guarantees that
13927c478bd9Sstevel@tonic-gate 		 * taskq_dispatch() will always either succeed
13937c478bd9Sstevel@tonic-gate 		 * (because there's a free task structure) or
13947c478bd9Sstevel@tonic-gate 		 * be unnecessary (because "callout_excute(ct)"
13957c478bd9Sstevel@tonic-gate 		 * has already scheduled).
13967c478bd9Sstevel@tonic-gate 		 */
13977c478bd9Sstevel@tonic-gate 		ct->ct_taskq =
1398*87a18d3fSMadhavan Venkataraman 		    taskq_create_instance("callout_taskq", seqid,
13997c478bd9Sstevel@tonic-gate 		    CALLOUT_THREADS, maxclsyspri,
14007c478bd9Sstevel@tonic-gate 		    2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS,
14017c478bd9Sstevel@tonic-gate 		    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
14027c478bd9Sstevel@tonic-gate 	}
1403*87a18d3fSMadhavan Venkataraman 
1404*87a18d3fSMadhavan Venkataraman 	/*
1405*87a18d3fSMadhavan Venkataraman 	 * callouts can only be created in a table whose
1406*87a18d3fSMadhavan Venkataraman 	 * cyclic has been initialized.
1407*87a18d3fSMadhavan Venkataraman 	 */
1408*87a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_heap_num == 0);
1409*87a18d3fSMadhavan Venkataraman 
1410*87a18d3fSMadhavan Venkataraman 	/*
1411*87a18d3fSMadhavan Venkataraman 	 * Create the callout table cyclics.
1412*87a18d3fSMadhavan Venkataraman 	 */
1413*87a18d3fSMadhavan Venkataraman 	ASSERT(ct->ct_cyclic == CYCLIC_NONE);
1414*87a18d3fSMadhavan Venkataraman 
1415*87a18d3fSMadhavan Venkataraman 	/*
1416*87a18d3fSMadhavan Venkataraman 	 * Ideally, the handlers for CALLOUT_REALTIME and CALLOUT_NORMAL should
1417*87a18d3fSMadhavan Venkataraman 	 * be run at CY_LOW_LEVEL. But there are some callers of the delay(9F)
1418*87a18d3fSMadhavan Venkataraman 	 * function that call delay(9F) illegally from PIL > 0. delay(9F) uses
1419*87a18d3fSMadhavan Venkataraman 	 * normal callouts. In order to avoid a deadlock, we run the normal
1420*87a18d3fSMadhavan Venkataraman 	 * handler from LOCK level. When the delay(9F) issue is fixed, this
1421*87a18d3fSMadhavan Venkataraman 	 * should be fixed as well.
1422*87a18d3fSMadhavan Venkataraman 	 */
1423*87a18d3fSMadhavan Venkataraman 	hdlr.cyh_func = (cyc_func_t)CALLOUT_CYCLIC_HANDLER(t);
1424*87a18d3fSMadhavan Venkataraman 	hdlr.cyh_level = (t == CALLOUT_REALTIME) ? CY_LOW_LEVEL : CY_LOCK_LEVEL;
1425*87a18d3fSMadhavan Venkataraman 	hdlr.cyh_arg = ct;
1426*87a18d3fSMadhavan Venkataraman 	when.cyt_when = CY_INFINITY;
1427*87a18d3fSMadhavan Venkataraman 	when.cyt_interval = CY_INFINITY;
1428*87a18d3fSMadhavan Venkataraman 
1429*87a18d3fSMadhavan Venkataraman 	ct->ct_cyclic = cyclic_add(&hdlr, &when);
1430*87a18d3fSMadhavan Venkataraman }
1431*87a18d3fSMadhavan Venkataraman 
1432*87a18d3fSMadhavan Venkataraman void
1433*87a18d3fSMadhavan Venkataraman callout_cpu_online(cpu_t *cp)
1434*87a18d3fSMadhavan Venkataraman {
1435*87a18d3fSMadhavan Venkataraman 	lgrp_handle_t hand;
1436*87a18d3fSMadhavan Venkataraman 	callout_cache_t *cache;
1437*87a18d3fSMadhavan Venkataraman 	char s[KMEM_CACHE_NAMELEN];
1438*87a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
1439*87a18d3fSMadhavan Venkataraman 	processorid_t seqid;
1440*87a18d3fSMadhavan Venkataraman 	int t;
1441*87a18d3fSMadhavan Venkataraman 
1442*87a18d3fSMadhavan Venkataraman 	ASSERT(MUTEX_HELD(&cpu_lock));
1443*87a18d3fSMadhavan Venkataraman 
1444*87a18d3fSMadhavan Venkataraman 	/*
1445*87a18d3fSMadhavan Venkataraman 	 * Locate the cache corresponding to the onlined CPU's lgroup.
1446*87a18d3fSMadhavan Venkataraman 	 * Note that access to callout_caches is protected by cpu_lock.
1447*87a18d3fSMadhavan Venkataraman 	 */
1448*87a18d3fSMadhavan Venkataraman 	hand = lgrp_plat_cpu_to_hand(cp->cpu_id);
1449*87a18d3fSMadhavan Venkataraman 	for (cache = callout_caches; cache != NULL; cache = cache->cc_next) {
1450*87a18d3fSMadhavan Venkataraman 		if (cache->cc_hand == hand)
1451*87a18d3fSMadhavan Venkataraman 			break;
1452*87a18d3fSMadhavan Venkataraman 	}
1453*87a18d3fSMadhavan Venkataraman 
1454*87a18d3fSMadhavan Venkataraman 	/*
1455*87a18d3fSMadhavan Venkataraman 	 * If not found, create one. The caches are never destroyed.
1456*87a18d3fSMadhavan Venkataraman 	 */
1457*87a18d3fSMadhavan Venkataraman 	if (cache == NULL) {
1458*87a18d3fSMadhavan Venkataraman 		cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP);
1459*87a18d3fSMadhavan Venkataraman 		cache->cc_hand = hand;
1460*87a18d3fSMadhavan Venkataraman 		(void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx",
1461*87a18d3fSMadhavan Venkataraman 		    (long)hand);
1462*87a18d3fSMadhavan Venkataraman 		cache->cc_cache = kmem_cache_create(s, sizeof (callout_t),
1463*87a18d3fSMadhavan Venkataraman 		    CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
1464*87a18d3fSMadhavan Venkataraman 		(void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx",
1465*87a18d3fSMadhavan Venkataraman 		    (long)hand);
1466*87a18d3fSMadhavan Venkataraman 		cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t),
1467*87a18d3fSMadhavan Venkataraman 		    CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
1468*87a18d3fSMadhavan Venkataraman 		cache->cc_next = callout_caches;
1469*87a18d3fSMadhavan Venkataraman 		callout_caches = cache;
1470*87a18d3fSMadhavan Venkataraman 	}
1471*87a18d3fSMadhavan Venkataraman 
1472*87a18d3fSMadhavan Venkataraman 	seqid = cp->cpu_seqid;
1473*87a18d3fSMadhavan Venkataraman 
1474*87a18d3fSMadhavan Venkataraman 	for (t = 0; t < CALLOUT_NTYPES; t++) {
1475*87a18d3fSMadhavan Venkataraman 		ct = &callout_table[CALLOUT_TABLE(t, seqid)];
1476*87a18d3fSMadhavan Venkataraman 
1477*87a18d3fSMadhavan Venkataraman 		mutex_enter(&ct->ct_mutex);
1478*87a18d3fSMadhavan Venkataraman 		/*
1479*87a18d3fSMadhavan Venkataraman 		 * Store convinience pointers to the kmem caches
1480*87a18d3fSMadhavan Venkataraman 		 * in the callout table. These assignments should always be
1481*87a18d3fSMadhavan Venkataraman 		 * done as callout tables can map to different physical
1482*87a18d3fSMadhavan Venkataraman 		 * CPUs each time.
1483*87a18d3fSMadhavan Venkataraman 		 */
1484*87a18d3fSMadhavan Venkataraman 		ct->ct_cache = cache->cc_cache;
1485*87a18d3fSMadhavan Venkataraman 		ct->ct_lcache = cache->cc_lcache;
1486*87a18d3fSMadhavan Venkataraman 
1487*87a18d3fSMadhavan Venkataraman 		/*
1488*87a18d3fSMadhavan Venkataraman 		 * We use the heap pointer to check if stuff has been
1489*87a18d3fSMadhavan Venkataraman 		 * initialized for this callout table.
1490*87a18d3fSMadhavan Venkataraman 		 */
1491*87a18d3fSMadhavan Venkataraman 		if (ct->ct_heap == NULL) {
1492*87a18d3fSMadhavan Venkataraman 			callout_heap_init(ct);
1493*87a18d3fSMadhavan Venkataraman 			callout_hash_init(ct);
1494*87a18d3fSMadhavan Venkataraman 			callout_kstat_init(ct);
1495*87a18d3fSMadhavan Venkataraman 			callout_cyclic_init(ct);
1496*87a18d3fSMadhavan Venkataraman 		}
1497*87a18d3fSMadhavan Venkataraman 
1498*87a18d3fSMadhavan Venkataraman 		mutex_exit(&ct->ct_mutex);
1499*87a18d3fSMadhavan Venkataraman 
1500*87a18d3fSMadhavan Venkataraman 		/*
1501*87a18d3fSMadhavan Venkataraman 		 * Move the cyclic to this CPU by doing a bind. Then unbind
1502*87a18d3fSMadhavan Venkataraman 		 * the cyclic. This will allow the cyclic subsystem to juggle
1503*87a18d3fSMadhavan Venkataraman 		 * the cyclic during CPU offline.
1504*87a18d3fSMadhavan Venkataraman 		 */
1505*87a18d3fSMadhavan Venkataraman 		cyclic_bind(ct->ct_cyclic, cp, NULL);
1506*87a18d3fSMadhavan Venkataraman 		cyclic_bind(ct->ct_cyclic, NULL, NULL);
15077c478bd9Sstevel@tonic-gate 	}
15087c478bd9Sstevel@tonic-gate }
1509*87a18d3fSMadhavan Venkataraman 
1510*87a18d3fSMadhavan Venkataraman /*
1511*87a18d3fSMadhavan Venkataraman  * This is called to perform per-CPU initialization for slave CPUs at
1512*87a18d3fSMadhavan Venkataraman  * boot time.
1513*87a18d3fSMadhavan Venkataraman  */
1514*87a18d3fSMadhavan Venkataraman void
1515*87a18d3fSMadhavan Venkataraman callout_mp_init(void)
1516*87a18d3fSMadhavan Venkataraman {
1517*87a18d3fSMadhavan Venkataraman 	cpu_t *cp;
1518*87a18d3fSMadhavan Venkataraman 
1519*87a18d3fSMadhavan Venkataraman 	mutex_enter(&cpu_lock);
1520*87a18d3fSMadhavan Venkataraman 
1521*87a18d3fSMadhavan Venkataraman 	cp = cpu_active;
1522*87a18d3fSMadhavan Venkataraman 	do {
1523*87a18d3fSMadhavan Venkataraman 		callout_cpu_online(cp);
1524*87a18d3fSMadhavan Venkataraman 	} while ((cp = cp->cpu_next_onln) != cpu_active);
1525*87a18d3fSMadhavan Venkataraman 
1526*87a18d3fSMadhavan Venkataraman 	mutex_exit(&cpu_lock);
1527*87a18d3fSMadhavan Venkataraman }
1528*87a18d3fSMadhavan Venkataraman 
1529*87a18d3fSMadhavan Venkataraman /*
1530*87a18d3fSMadhavan Venkataraman  * Initialize all callout tables.  Called at boot time just before clkstart().
1531*87a18d3fSMadhavan Venkataraman  */
1532*87a18d3fSMadhavan Venkataraman void
1533*87a18d3fSMadhavan Venkataraman callout_init(void)
1534*87a18d3fSMadhavan Venkataraman {
1535*87a18d3fSMadhavan Venkataraman 	int f, t;
1536*87a18d3fSMadhavan Venkataraman 	size_t size;
1537*87a18d3fSMadhavan Venkataraman 	int table_id;
1538*87a18d3fSMadhavan Venkataraman 	callout_table_t *ct;
1539*87a18d3fSMadhavan Venkataraman 	long bits, fanout;
1540*87a18d3fSMadhavan Venkataraman 	uintptr_t buf;
1541*87a18d3fSMadhavan Venkataraman 
1542*87a18d3fSMadhavan Venkataraman 	/*
1543*87a18d3fSMadhavan Venkataraman 	 * Initialize callout globals.
1544*87a18d3fSMadhavan Venkataraman 	 */
1545*87a18d3fSMadhavan Venkataraman 	bits = 0;
1546*87a18d3fSMadhavan Venkataraman 	for (fanout = 1; (fanout < max_ncpus); fanout <<= 1)
1547*87a18d3fSMadhavan Venkataraman 		bits++;
1548*87a18d3fSMadhavan Venkataraman 	callout_table_bits = CALLOUT_TYPE_BITS + bits;
1549*87a18d3fSMadhavan Venkataraman 	callout_table_mask = (1 << callout_table_bits) - 1;
1550*87a18d3fSMadhavan Venkataraman 	callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT;
1551*87a18d3fSMadhavan Venkataraman 	callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS);
1552*87a18d3fSMadhavan Venkataraman 
1553*87a18d3fSMadhavan Venkataraman 	/*
1554*87a18d3fSMadhavan Venkataraman 	 * Because of the variability in timing behavior across systems with
1555*87a18d3fSMadhavan Venkataraman 	 * different architectures, we cannot allow arbitrarily low
1556*87a18d3fSMadhavan Venkataraman 	 * resolutions. The minimum resolution has to be determined in a
1557*87a18d3fSMadhavan Venkataraman 	 * platform-specific way. Until then, we define a blanket minimum
1558*87a18d3fSMadhavan Venkataraman 	 * resolution for callouts of CALLOUT_MIN_RESOLUTION.
1559*87a18d3fSMadhavan Venkataraman 	 *
1560*87a18d3fSMadhavan Venkataraman 	 * If, in the future, someone requires lower resolution timers, they
1561*87a18d3fSMadhavan Venkataraman 	 * can do one of two things:
1562*87a18d3fSMadhavan Venkataraman 	 *
1563*87a18d3fSMadhavan Venkataraman 	 *	- Define a lower value for callout_min_resolution. This would
1564*87a18d3fSMadhavan Venkataraman 	 *	  affect all clients of the callout subsystem. If this done
1565*87a18d3fSMadhavan Venkataraman 	 *	  via /etc/system, then no code changes are required and it
1566*87a18d3fSMadhavan Venkataraman 	 *	  would affect only that customer.
1567*87a18d3fSMadhavan Venkataraman 	 *
1568*87a18d3fSMadhavan Venkataraman 	 *	- Define a flag to be passed to timeout creation that allows
1569*87a18d3fSMadhavan Venkataraman 	 *	  the lower resolution. This involves code changes. But it
1570*87a18d3fSMadhavan Venkataraman 	 *	  would affect only the calling module. It is the developer's
1571*87a18d3fSMadhavan Venkataraman 	 *	  responsibility to test on all systems and make sure that
1572*87a18d3fSMadhavan Venkataraman 	 *	  everything works.
1573*87a18d3fSMadhavan Venkataraman 	 */
1574*87a18d3fSMadhavan Venkataraman 	if (callout_min_resolution <= 0)
1575*87a18d3fSMadhavan Venkataraman 		callout_min_resolution = CALLOUT_MIN_RESOLUTION;
1576*87a18d3fSMadhavan Venkataraman 
1577*87a18d3fSMadhavan Venkataraman 	/*
1578*87a18d3fSMadhavan Venkataraman 	 * Allocate all the callout tables based on max_ncpus. We have chosen
1579*87a18d3fSMadhavan Venkataraman 	 * to do boot-time allocation instead of dynamic allocation because:
1580*87a18d3fSMadhavan Venkataraman 	 *
1581*87a18d3fSMadhavan Venkataraman 	 *	- the size of the callout tables is not too large.
1582*87a18d3fSMadhavan Venkataraman 	 *	- there are race conditions involved in making this dynamic.
1583*87a18d3fSMadhavan Venkataraman 	 *	- the hash tables that go with the callout tables consume
1584*87a18d3fSMadhavan Venkataraman 	 *	  most of the memory and they are only allocated in
1585*87a18d3fSMadhavan Venkataraman 	 *	  callout_cpu_online().
1586*87a18d3fSMadhavan Venkataraman 	 *
1587*87a18d3fSMadhavan Venkataraman 	 * Each CPU has two tables that are consecutive in the array. The first
1588*87a18d3fSMadhavan Venkataraman 	 * one is for realtime callouts and the second one is for normal ones.
1589*87a18d3fSMadhavan Venkataraman 	 *
1590*87a18d3fSMadhavan Venkataraman 	 * We do this alignment dance to make sure that callout table
1591*87a18d3fSMadhavan Venkataraman 	 * structures will always be on a cache line boundary.
1592*87a18d3fSMadhavan Venkataraman 	 */
1593*87a18d3fSMadhavan Venkataraman 	size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus;
1594*87a18d3fSMadhavan Venkataraman 	size += CALLOUT_ALIGN;
1595*87a18d3fSMadhavan Venkataraman 	buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP);
1596*87a18d3fSMadhavan Venkataraman 	callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN);
1597*87a18d3fSMadhavan Venkataraman 
1598*87a18d3fSMadhavan Venkataraman 	size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS;
1599*87a18d3fSMadhavan Venkataraman 	/*
1600*87a18d3fSMadhavan Venkataraman 	 * Now, initialize the tables for all the CPUs.
1601*87a18d3fSMadhavan Venkataraman 	 */
1602*87a18d3fSMadhavan Venkataraman 	for (f = 0; f < max_ncpus; f++) {
1603*87a18d3fSMadhavan Venkataraman 		for (t = 0; t < CALLOUT_NTYPES; t++) {
1604*87a18d3fSMadhavan Venkataraman 			table_id = CALLOUT_TABLE(t, f);
1605*87a18d3fSMadhavan Venkataraman 			ct = &callout_table[table_id];
1606*87a18d3fSMadhavan Venkataraman 			mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1607*87a18d3fSMadhavan Venkataraman 			/*
1608*87a18d3fSMadhavan Venkataraman 			 * Precompute the base IDs for long and short-term
1609*87a18d3fSMadhavan Venkataraman 			 * legacy IDs. This makes ID generation during
1610*87a18d3fSMadhavan Venkataraman 			 * timeout() fast.
1611*87a18d3fSMadhavan Venkataraman 			 */
1612*87a18d3fSMadhavan Venkataraman 			ct->ct_short_id = CALLOUT_SHORT_ID(table_id);
1613*87a18d3fSMadhavan Venkataraman 			ct->ct_long_id = CALLOUT_LONG_ID(table_id);
1614*87a18d3fSMadhavan Venkataraman 			/*
1615*87a18d3fSMadhavan Venkataraman 			 * Precompute the base ID for generation-based IDs.
1616*87a18d3fSMadhavan Venkataraman 			 * Note that when the first ID gets allocated, the
1617*87a18d3fSMadhavan Venkataraman 			 * ID will wrap. This will cause the generation
1618*87a18d3fSMadhavan Venkataraman 			 * number to be incremented to 1.
1619*87a18d3fSMadhavan Venkataraman 			 */
1620*87a18d3fSMadhavan Venkataraman 			ct->ct_gen_id = CALLOUT_SHORT_ID(table_id);
1621*87a18d3fSMadhavan Venkataraman 			/*
1622*87a18d3fSMadhavan Venkataraman 			 * Initialize the cyclic as NONE. This will get set
1623*87a18d3fSMadhavan Venkataraman 			 * during CPU online. This is so that partially
1624*87a18d3fSMadhavan Venkataraman 			 * populated systems will only have the required
1625*87a18d3fSMadhavan Venkataraman 			 * number of cyclics, not more.
1626*87a18d3fSMadhavan Venkataraman 			 */
1627*87a18d3fSMadhavan Venkataraman 			ct->ct_cyclic = CYCLIC_NONE;
1628*87a18d3fSMadhavan Venkataraman 			ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP);
1629*87a18d3fSMadhavan Venkataraman 		}
1630*87a18d3fSMadhavan Venkataraman 	}
1631*87a18d3fSMadhavan Venkataraman 
1632*87a18d3fSMadhavan Venkataraman 	/*
1633*87a18d3fSMadhavan Venkataraman 	 * Add the callback for CPR. This is called during checkpoint
1634*87a18d3fSMadhavan Venkataraman 	 * resume to suspend and resume callouts.
1635*87a18d3fSMadhavan Venkataraman 	 */
1636*87a18d3fSMadhavan Venkataraman 	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT,
1637*87a18d3fSMadhavan Venkataraman 	    "callout_cpr");
1638*87a18d3fSMadhavan Venkataraman 	(void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER,
1639*87a18d3fSMadhavan Venkataraman 	    "callout_debug");
1640*87a18d3fSMadhavan Venkataraman 
1641*87a18d3fSMadhavan Venkataraman 	/*
1642*87a18d3fSMadhavan Venkataraman 	 * Call the per-CPU initialization function for the boot CPU. This
1643*87a18d3fSMadhavan Venkataraman 	 * is done here because the function is not called automatically for
1644*87a18d3fSMadhavan Venkataraman 	 * the boot CPU from the CPU online/offline hooks. Note that the
1645*87a18d3fSMadhavan Venkataraman 	 * CPU lock is taken here because of convention.
1646*87a18d3fSMadhavan Venkataraman 	 */
1647*87a18d3fSMadhavan Venkataraman 	mutex_enter(&cpu_lock);
1648*87a18d3fSMadhavan Venkataraman 	callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)];
1649*87a18d3fSMadhavan Venkataraman 	callout_cpu_online(CPU);
1650*87a18d3fSMadhavan Venkataraman 	mutex_exit(&cpu_lock);
16517c478bd9Sstevel@tonic-gate }
1652