17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5f635d46aSqiao * Common Development and Distribution License (the "License"). 6f635d46aSqiao * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22454ab202SMadhavan Venkataraman * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #include <sys/callo.h> 277c478bd9Sstevel@tonic-gate #include <sys/param.h> 287c478bd9Sstevel@tonic-gate #include <sys/types.h> 297c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 307c478bd9Sstevel@tonic-gate #include <sys/thread.h> 317c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 3287a18d3fSMadhavan Venkataraman #include <sys/kmem_impl.h> 337c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 347c478bd9Sstevel@tonic-gate #include <sys/callb.h> 357c478bd9Sstevel@tonic-gate #include <sys/debug.h> 367c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 377c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 387c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate /* 417c478bd9Sstevel@tonic-gate * Callout tables. See timeout(9F) for details. 427c478bd9Sstevel@tonic-gate */ 43*51b32bddSMadhavan Venkataraman static int callout_threads; /* callout normal threads */ 4487a18d3fSMadhavan Venkataraman static hrtime_t callout_debug_hrtime; /* debugger entry time */ 45*51b32bddSMadhavan Venkataraman static int callout_min_reap; /* callout minimum reap count */ 46*51b32bddSMadhavan Venkataraman static int callout_tolerance; /* callout hires tolerance */ 4787a18d3fSMadhavan Venkataraman static callout_table_t *callout_boot_ct; /* Boot CPU's callout tables */ 48454ab202SMadhavan Venkataraman static clock_t callout_max_ticks; /* max interval */ 4987a18d3fSMadhavan Venkataraman static hrtime_t callout_longterm; /* longterm nanoseconds */ 5087a18d3fSMadhavan Venkataraman static ulong_t callout_counter_low; /* callout ID increment */ 5187a18d3fSMadhavan Venkataraman static ulong_t callout_table_bits; /* number of table bits in ID */ 5287a18d3fSMadhavan Venkataraman static ulong_t callout_table_mask; /* mask for the table bits */ 5387a18d3fSMadhavan Venkataraman static callout_cache_t *callout_caches; /* linked list of caches */ 5487a18d3fSMadhavan Venkataraman #pragma align 64(callout_table) 5587a18d3fSMadhavan Venkataraman static callout_table_t *callout_table; /* global callout table array */ 567c478bd9Sstevel@tonic-gate 5707247649SMadhavan Venkataraman /* 5807247649SMadhavan Venkataraman * We run normal callouts from PIL 10. This means that no other handler that 5907247649SMadhavan Venkataraman * runs at PIL 10 is allowed to wait for normal callouts directly or indirectly 6007247649SMadhavan Venkataraman * as it will cause a deadlock. This has always been an unwritten rule. 6107247649SMadhavan Venkataraman * We are making it explicit here. 6207247649SMadhavan Venkataraman */ 63*51b32bddSMadhavan Venkataraman static volatile int callout_realtime_level = CY_LOW_LEVEL; 64*51b32bddSMadhavan Venkataraman static volatile int callout_normal_level = CY_LOCK_LEVEL; 6507247649SMadhavan Venkataraman 6687a18d3fSMadhavan Venkataraman static char *callout_kstat_names[] = { 6787a18d3fSMadhavan Venkataraman "callout_timeouts", 6887a18d3fSMadhavan Venkataraman "callout_timeouts_pending", 6987a18d3fSMadhavan Venkataraman "callout_untimeouts_unexpired", 7087a18d3fSMadhavan Venkataraman "callout_untimeouts_executing", 7187a18d3fSMadhavan Venkataraman "callout_untimeouts_expired", 7287a18d3fSMadhavan Venkataraman "callout_expirations", 7387a18d3fSMadhavan Venkataraman "callout_allocations", 74*51b32bddSMadhavan Venkataraman "callout_cleanups", 7587a18d3fSMadhavan Venkataraman }; 7687a18d3fSMadhavan Venkataraman 77*51b32bddSMadhavan Venkataraman static hrtime_t callout_heap_process(callout_table_t *, hrtime_t, int); 78*51b32bddSMadhavan Venkataraman 7987a18d3fSMadhavan Venkataraman #define CALLOUT_HASH_INSERT(hash, cp, cnext, cprev) \ 807c478bd9Sstevel@tonic-gate { \ 8187a18d3fSMadhavan Venkataraman callout_hash_t *hashp = &(hash); \ 8287a18d3fSMadhavan Venkataraman \ 837c478bd9Sstevel@tonic-gate cp->cprev = NULL; \ 8487a18d3fSMadhavan Venkataraman cp->cnext = hashp->ch_head; \ 8587a18d3fSMadhavan Venkataraman if (hashp->ch_head == NULL) \ 8687a18d3fSMadhavan Venkataraman hashp->ch_tail = cp; \ 877c478bd9Sstevel@tonic-gate else \ 8887a18d3fSMadhavan Venkataraman cp->cnext->cprev = cp; \ 8987a18d3fSMadhavan Venkataraman hashp->ch_head = cp; \ 907c478bd9Sstevel@tonic-gate } 917c478bd9Sstevel@tonic-gate 9287a18d3fSMadhavan Venkataraman #define CALLOUT_HASH_APPEND(hash, cp, cnext, cprev) \ 9387a18d3fSMadhavan Venkataraman { \ 9487a18d3fSMadhavan Venkataraman callout_hash_t *hashp = &(hash); \ 9587a18d3fSMadhavan Venkataraman \ 9687a18d3fSMadhavan Venkataraman cp->cnext = NULL; \ 9787a18d3fSMadhavan Venkataraman cp->cprev = hashp->ch_tail; \ 9887a18d3fSMadhavan Venkataraman if (hashp->ch_tail == NULL) \ 9987a18d3fSMadhavan Venkataraman hashp->ch_head = cp; \ 10087a18d3fSMadhavan Venkataraman else \ 10187a18d3fSMadhavan Venkataraman cp->cprev->cnext = cp; \ 10287a18d3fSMadhavan Venkataraman hashp->ch_tail = cp; \ 10387a18d3fSMadhavan Venkataraman } 10487a18d3fSMadhavan Venkataraman 10587a18d3fSMadhavan Venkataraman #define CALLOUT_HASH_DELETE(hash, cp, cnext, cprev) \ 10687a18d3fSMadhavan Venkataraman { \ 10787a18d3fSMadhavan Venkataraman callout_hash_t *hashp = &(hash); \ 10887a18d3fSMadhavan Venkataraman \ 10987a18d3fSMadhavan Venkataraman if (cp->cnext == NULL) \ 11087a18d3fSMadhavan Venkataraman hashp->ch_tail = cp->cprev; \ 11187a18d3fSMadhavan Venkataraman else \ 11287a18d3fSMadhavan Venkataraman cp->cnext->cprev = cp->cprev; \ 11387a18d3fSMadhavan Venkataraman if (cp->cprev == NULL) \ 11487a18d3fSMadhavan Venkataraman hashp->ch_head = cp->cnext; \ 11587a18d3fSMadhavan Venkataraman else \ 11687a18d3fSMadhavan Venkataraman cp->cprev->cnext = cp->cnext; \ 11787a18d3fSMadhavan Venkataraman } 11887a18d3fSMadhavan Venkataraman 11987a18d3fSMadhavan Venkataraman /* 12087a18d3fSMadhavan Venkataraman * These definitions help us queue callouts and callout lists. Here is 12187a18d3fSMadhavan Venkataraman * the queueing rationale: 12287a18d3fSMadhavan Venkataraman * 12387a18d3fSMadhavan Venkataraman * - callouts are queued in a FIFO manner in the ID hash table. 12487a18d3fSMadhavan Venkataraman * TCP timers are typically cancelled in the same order that they 12587a18d3fSMadhavan Venkataraman * were issued. The FIFO queueing shortens the search for a callout 12687a18d3fSMadhavan Venkataraman * during untimeout(). 12787a18d3fSMadhavan Venkataraman * 12887a18d3fSMadhavan Venkataraman * - callouts are queued in a FIFO manner in their callout lists. 12987a18d3fSMadhavan Venkataraman * This ensures that the callouts are executed in the same order that 13087a18d3fSMadhavan Venkataraman * they were queued. This is fair. Plus, it helps to make each 13187a18d3fSMadhavan Venkataraman * callout expiration timely. It also favors cancellations. 13287a18d3fSMadhavan Venkataraman * 133*51b32bddSMadhavan Venkataraman * - callout lists are queued in the following manner in the callout 134*51b32bddSMadhavan Venkataraman * hash table buckets: 135*51b32bddSMadhavan Venkataraman * 136*51b32bddSMadhavan Venkataraman * - appended, if the callout list is a 1-nanosecond resolution 137*51b32bddSMadhavan Venkataraman * callout list. When a callout is created, we first look for 138*51b32bddSMadhavan Venkataraman * a callout list that has the same expiration so we can avoid 139*51b32bddSMadhavan Venkataraman * allocating a callout list and inserting the expiration into 140*51b32bddSMadhavan Venkataraman * the heap. However, we do not want to look at 1-nanosecond 141*51b32bddSMadhavan Venkataraman * resolution callout lists as we will seldom find a match in 142*51b32bddSMadhavan Venkataraman * them. Keeping these callout lists in the rear of the hash 143*51b32bddSMadhavan Venkataraman * buckets allows us to skip these during the lookup. 144*51b32bddSMadhavan Venkataraman * 145*51b32bddSMadhavan Venkataraman * - inserted at the beginning, if the callout list is not a 146*51b32bddSMadhavan Venkataraman * 1-nanosecond resolution callout list. This also has the 147*51b32bddSMadhavan Venkataraman * side-effect of keeping the long term timers away from the 148*51b32bddSMadhavan Venkataraman * front of the buckets. 14987a18d3fSMadhavan Venkataraman * 15087a18d3fSMadhavan Venkataraman * - callout lists are queued in a FIFO manner in the expired callouts 15187a18d3fSMadhavan Venkataraman * list. This ensures that callout lists are executed in the order 15287a18d3fSMadhavan Venkataraman * of expiration. 15387a18d3fSMadhavan Venkataraman */ 15487a18d3fSMadhavan Venkataraman #define CALLOUT_APPEND(ct, cp) \ 15587a18d3fSMadhavan Venkataraman CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 15687a18d3fSMadhavan Venkataraman cp, c_idnext, c_idprev); \ 15787a18d3fSMadhavan Venkataraman CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 15887a18d3fSMadhavan Venkataraman 15987a18d3fSMadhavan Venkataraman #define CALLOUT_DELETE(ct, cp) \ 16087a18d3fSMadhavan Venkataraman CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 16187a18d3fSMadhavan Venkataraman cp, c_idnext, c_idprev); \ 16287a18d3fSMadhavan Venkataraman CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 16387a18d3fSMadhavan Venkataraman 16487a18d3fSMadhavan Venkataraman #define CALLOUT_LIST_INSERT(hash, cl) \ 16587a18d3fSMadhavan Venkataraman CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev) 16687a18d3fSMadhavan Venkataraman 16787a18d3fSMadhavan Venkataraman #define CALLOUT_LIST_APPEND(hash, cl) \ 16887a18d3fSMadhavan Venkataraman CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev) 16987a18d3fSMadhavan Venkataraman 17087a18d3fSMadhavan Venkataraman #define CALLOUT_LIST_DELETE(hash, cl) \ 17187a18d3fSMadhavan Venkataraman CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev) 1727c478bd9Sstevel@tonic-gate 1737c478bd9Sstevel@tonic-gate /* 17407247649SMadhavan Venkataraman * For normal callouts, there is a deadlock scenario if two callouts that 17507247649SMadhavan Venkataraman * have an inter-dependency end up on the same callout list. To break the 17607247649SMadhavan Venkataraman * deadlock, you need two taskq threads running in parallel. We compute 17707247649SMadhavan Venkataraman * the number of taskq threads here using a bunch of conditions to make 17807247649SMadhavan Venkataraman * it optimal for the common case. This is an ugly hack, but one that is 17907247649SMadhavan Venkataraman * necessary (sigh). 18007247649SMadhavan Venkataraman */ 18107247649SMadhavan Venkataraman #define CALLOUT_THRESHOLD 100000000 18207247649SMadhavan Venkataraman #define CALLOUT_EXEC_COMPUTE(ct, exec) \ 18307247649SMadhavan Venkataraman { \ 18407247649SMadhavan Venkataraman callout_list_t *cl; \ 18507247649SMadhavan Venkataraman \ 18607247649SMadhavan Venkataraman cl = ct->ct_expired.ch_head; \ 18707247649SMadhavan Venkataraman if (cl == NULL) { \ 18807247649SMadhavan Venkataraman /* \ 18907247649SMadhavan Venkataraman * If the expired list is NULL, there is nothing to \ 19007247649SMadhavan Venkataraman * process. \ 19107247649SMadhavan Venkataraman */ \ 19207247649SMadhavan Venkataraman exec = 0; \ 19307247649SMadhavan Venkataraman } else if ((cl->cl_next == NULL) && \ 19407247649SMadhavan Venkataraman (cl->cl_callouts.ch_head == cl->cl_callouts.ch_tail)) { \ 19507247649SMadhavan Venkataraman /* \ 19607247649SMadhavan Venkataraman * If there is only one callout list and it contains \ 19707247649SMadhavan Venkataraman * only one callout, there is no need for two threads. \ 19807247649SMadhavan Venkataraman */ \ 19907247649SMadhavan Venkataraman exec = 1; \ 20007247649SMadhavan Venkataraman } else if ((ct->ct_heap_num == 0) || \ 201*51b32bddSMadhavan Venkataraman (ct->ct_heap[0].ch_expiration > gethrtime() + CALLOUT_THRESHOLD)) {\ 20207247649SMadhavan Venkataraman /* \ 20307247649SMadhavan Venkataraman * If the heap has become empty, we need two threads as \ 20407247649SMadhavan Venkataraman * there is no one to kick off the second thread in the \ 20507247649SMadhavan Venkataraman * future. If the heap is not empty and the top of the \ 20607247649SMadhavan Venkataraman * heap does not expire in the near future, we need two \ 20707247649SMadhavan Venkataraman * threads. \ 20807247649SMadhavan Venkataraman */ \ 20907247649SMadhavan Venkataraman exec = 2; \ 21007247649SMadhavan Venkataraman } else { \ 21107247649SMadhavan Venkataraman /* \ 21207247649SMadhavan Venkataraman * We have multiple callouts to process. But the cyclic \ 21307247649SMadhavan Venkataraman * will fire in the near future. So, we only need one \ 21407247649SMadhavan Venkataraman * thread for now. \ 21507247649SMadhavan Venkataraman */ \ 21607247649SMadhavan Venkataraman exec = 1; \ 21707247649SMadhavan Venkataraman } \ 21807247649SMadhavan Venkataraman } 21907247649SMadhavan Venkataraman 22007247649SMadhavan Venkataraman /* 221*51b32bddSMadhavan Venkataraman * Macro to swap two heap items. 222*51b32bddSMadhavan Venkataraman */ 223*51b32bddSMadhavan Venkataraman #define CALLOUT_SWAP(h1, h2) \ 224*51b32bddSMadhavan Venkataraman { \ 225*51b32bddSMadhavan Venkataraman callout_heap_t tmp; \ 226*51b32bddSMadhavan Venkataraman \ 227*51b32bddSMadhavan Venkataraman tmp = *h1; \ 228*51b32bddSMadhavan Venkataraman *h1 = *h2; \ 229*51b32bddSMadhavan Venkataraman *h2 = tmp; \ 230*51b32bddSMadhavan Venkataraman } 231*51b32bddSMadhavan Venkataraman 232*51b32bddSMadhavan Venkataraman /* 233*51b32bddSMadhavan Venkataraman * Macro to free a callout list. 234*51b32bddSMadhavan Venkataraman */ 235*51b32bddSMadhavan Venkataraman #define CALLOUT_LIST_FREE(ct, cl) \ 236*51b32bddSMadhavan Venkataraman { \ 237*51b32bddSMadhavan Venkataraman cl->cl_next = ct->ct_lfree; \ 238*51b32bddSMadhavan Venkataraman ct->ct_lfree = cl; \ 239*51b32bddSMadhavan Venkataraman cl->cl_flags |= CALLOUT_LIST_FLAG_FREE; \ 240*51b32bddSMadhavan Venkataraman } 241*51b32bddSMadhavan Venkataraman 242*51b32bddSMadhavan Venkataraman /* 2437c478bd9Sstevel@tonic-gate * Allocate a callout structure. We try quite hard because we 2447c478bd9Sstevel@tonic-gate * can't sleep, and if we can't do the allocation, we're toast. 24587a18d3fSMadhavan Venkataraman * Failing all, we try a KM_PANIC allocation. Note that we never 24687a18d3fSMadhavan Venkataraman * deallocate a callout. See untimeout() for the reasoning. 2477c478bd9Sstevel@tonic-gate */ 2487c478bd9Sstevel@tonic-gate static callout_t * 2497c478bd9Sstevel@tonic-gate callout_alloc(callout_table_t *ct) 2507c478bd9Sstevel@tonic-gate { 25187a18d3fSMadhavan Venkataraman size_t size; 25287a18d3fSMadhavan Venkataraman callout_t *cp; 2537c478bd9Sstevel@tonic-gate 25487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 25587a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 25687a18d3fSMadhavan Venkataraman 25787a18d3fSMadhavan Venkataraman cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP); 25887a18d3fSMadhavan Venkataraman if (cp == NULL) { 25987a18d3fSMadhavan Venkataraman size = sizeof (callout_t); 26087a18d3fSMadhavan Venkataraman cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 26187a18d3fSMadhavan Venkataraman } 26287a18d3fSMadhavan Venkataraman cp->c_xid = 0; 26307247649SMadhavan Venkataraman cp->c_executor = NULL; 26407247649SMadhavan Venkataraman cv_init(&cp->c_done, NULL, CV_DEFAULT, NULL); 26507247649SMadhavan Venkataraman cp->c_waiting = 0; 26687a18d3fSMadhavan Venkataraman 26787a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 26887a18d3fSMadhavan Venkataraman ct->ct_allocations++; 2697c478bd9Sstevel@tonic-gate return (cp); 2707c478bd9Sstevel@tonic-gate } 2717c478bd9Sstevel@tonic-gate 2727c478bd9Sstevel@tonic-gate /* 27387a18d3fSMadhavan Venkataraman * Allocate a callout list structure. We try quite hard because we 27487a18d3fSMadhavan Venkataraman * can't sleep, and if we can't do the allocation, we're toast. 27587a18d3fSMadhavan Venkataraman * Failing all, we try a KM_PANIC allocation. Note that we never 27687a18d3fSMadhavan Venkataraman * deallocate a callout list. 2777c478bd9Sstevel@tonic-gate */ 27887a18d3fSMadhavan Venkataraman static void 27987a18d3fSMadhavan Venkataraman callout_list_alloc(callout_table_t *ct) 2807c478bd9Sstevel@tonic-gate { 28187a18d3fSMadhavan Venkataraman size_t size; 28287a18d3fSMadhavan Venkataraman callout_list_t *cl; 28387a18d3fSMadhavan Venkataraman 28487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 28587a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 28687a18d3fSMadhavan Venkataraman 28787a18d3fSMadhavan Venkataraman cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP); 28887a18d3fSMadhavan Venkataraman if (cl == NULL) { 28987a18d3fSMadhavan Venkataraman size = sizeof (callout_list_t); 29087a18d3fSMadhavan Venkataraman cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 29187a18d3fSMadhavan Venkataraman } 29287a18d3fSMadhavan Venkataraman bzero(cl, sizeof (callout_list_t)); 29387a18d3fSMadhavan Venkataraman 29487a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 295*51b32bddSMadhavan Venkataraman CALLOUT_LIST_FREE(ct, cl); 29687a18d3fSMadhavan Venkataraman } 29787a18d3fSMadhavan Venkataraman 29887a18d3fSMadhavan Venkataraman /* 299*51b32bddSMadhavan Venkataraman * Find a callout list that corresponds to an expiration and matching flags. 30087a18d3fSMadhavan Venkataraman */ 30187a18d3fSMadhavan Venkataraman static callout_list_t * 30207247649SMadhavan Venkataraman callout_list_get(callout_table_t *ct, hrtime_t expiration, int flags, int hash) 30387a18d3fSMadhavan Venkataraman { 30487a18d3fSMadhavan Venkataraman callout_list_t *cl; 305*51b32bddSMadhavan Venkataraman int clflags; 30687a18d3fSMadhavan Venkataraman 30787a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 30887a18d3fSMadhavan Venkataraman 309*51b32bddSMadhavan Venkataraman if (flags & CALLOUT_LIST_FLAG_NANO) { 310*51b32bddSMadhavan Venkataraman /* 311*51b32bddSMadhavan Venkataraman * This is a 1-nanosecond resolution callout. We will rarely 312*51b32bddSMadhavan Venkataraman * find a match for this. So, bail out. 313*51b32bddSMadhavan Venkataraman */ 314*51b32bddSMadhavan Venkataraman return (NULL); 315*51b32bddSMadhavan Venkataraman } 316*51b32bddSMadhavan Venkataraman 317*51b32bddSMadhavan Venkataraman clflags = (CALLOUT_LIST_FLAG_ABSOLUTE | CALLOUT_LIST_FLAG_HRESTIME); 31887a18d3fSMadhavan Venkataraman for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) { 319*51b32bddSMadhavan Venkataraman /* 320*51b32bddSMadhavan Venkataraman * If we have reached a 1-nanosecond resolution callout list, 321*51b32bddSMadhavan Venkataraman * we don't have much hope of finding a match in this hash 322*51b32bddSMadhavan Venkataraman * bucket. So, just bail out. 323*51b32bddSMadhavan Venkataraman */ 324*51b32bddSMadhavan Venkataraman if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) 325*51b32bddSMadhavan Venkataraman return (NULL); 326*51b32bddSMadhavan Venkataraman 32707247649SMadhavan Venkataraman if ((cl->cl_expiration == expiration) && 328*51b32bddSMadhavan Venkataraman ((cl->cl_flags & clflags) == (flags & clflags))) 32987a18d3fSMadhavan Venkataraman return (cl); 33087a18d3fSMadhavan Venkataraman } 33187a18d3fSMadhavan Venkataraman 33287a18d3fSMadhavan Venkataraman return (NULL); 33387a18d3fSMadhavan Venkataraman } 33487a18d3fSMadhavan Venkataraman 33587a18d3fSMadhavan Venkataraman /* 33687a18d3fSMadhavan Venkataraman * Initialize a callout table's heap, if necessary. Preallocate some free 33787a18d3fSMadhavan Venkataraman * entries so we don't have to check for NULL elsewhere. 33887a18d3fSMadhavan Venkataraman */ 33987a18d3fSMadhavan Venkataraman static void 34087a18d3fSMadhavan Venkataraman callout_heap_init(callout_table_t *ct) 34187a18d3fSMadhavan Venkataraman { 34287a18d3fSMadhavan Venkataraman size_t size; 34387a18d3fSMadhavan Venkataraman 34487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 34587a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap == NULL); 34687a18d3fSMadhavan Venkataraman 34787a18d3fSMadhavan Venkataraman ct->ct_heap_num = 0; 34887a18d3fSMadhavan Venkataraman ct->ct_heap_max = CALLOUT_CHUNK; 349*51b32bddSMadhavan Venkataraman size = sizeof (callout_heap_t) * CALLOUT_CHUNK; 35087a18d3fSMadhavan Venkataraman ct->ct_heap = kmem_alloc(size, KM_SLEEP); 35187a18d3fSMadhavan Venkataraman } 35287a18d3fSMadhavan Venkataraman 35387a18d3fSMadhavan Venkataraman /* 35487a18d3fSMadhavan Venkataraman * Reallocate the heap. We try quite hard because we can't sleep, and if 35587a18d3fSMadhavan Venkataraman * we can't do the allocation, we're toast. Failing all, we try a KM_PANIC 35687a18d3fSMadhavan Venkataraman * allocation. Note that the heap only expands, it never contracts. 35787a18d3fSMadhavan Venkataraman */ 35887a18d3fSMadhavan Venkataraman static void 35987a18d3fSMadhavan Venkataraman callout_heap_expand(callout_table_t *ct) 36087a18d3fSMadhavan Venkataraman { 36187a18d3fSMadhavan Venkataraman size_t max, size, osize; 362*51b32bddSMadhavan Venkataraman callout_heap_t *heap; 36387a18d3fSMadhavan Venkataraman 36487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 36587a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap_num <= ct->ct_heap_max); 36687a18d3fSMadhavan Venkataraman 36787a18d3fSMadhavan Venkataraman while (ct->ct_heap_num == ct->ct_heap_max) { 36887a18d3fSMadhavan Venkataraman max = ct->ct_heap_max; 36987a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 37087a18d3fSMadhavan Venkataraman 371*51b32bddSMadhavan Venkataraman osize = sizeof (callout_heap_t) * max; 372*51b32bddSMadhavan Venkataraman size = sizeof (callout_heap_t) * (max + CALLOUT_CHUNK); 37387a18d3fSMadhavan Venkataraman heap = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 37487a18d3fSMadhavan Venkataraman 37587a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 37687a18d3fSMadhavan Venkataraman if (max < ct->ct_heap_max) { 37787a18d3fSMadhavan Venkataraman /* 37887a18d3fSMadhavan Venkataraman * Someone beat us to the allocation. Free what we 37987a18d3fSMadhavan Venkataraman * just allocated and proceed. 38087a18d3fSMadhavan Venkataraman */ 38187a18d3fSMadhavan Venkataraman kmem_free(heap, size); 38287a18d3fSMadhavan Venkataraman continue; 38387a18d3fSMadhavan Venkataraman } 38487a18d3fSMadhavan Venkataraman 38587a18d3fSMadhavan Venkataraman bcopy(ct->ct_heap, heap, osize); 38687a18d3fSMadhavan Venkataraman kmem_free(ct->ct_heap, osize); 38787a18d3fSMadhavan Venkataraman ct->ct_heap = heap; 388*51b32bddSMadhavan Venkataraman ct->ct_heap_max = size / sizeof (callout_heap_t); 38987a18d3fSMadhavan Venkataraman } 39087a18d3fSMadhavan Venkataraman } 39187a18d3fSMadhavan Venkataraman 39287a18d3fSMadhavan Venkataraman /* 39387a18d3fSMadhavan Venkataraman * Move an expiration from the bottom of the heap to its correct place 39487a18d3fSMadhavan Venkataraman * in the heap. If we reached the root doing this, return 1. Else, 39587a18d3fSMadhavan Venkataraman * return 0. 39687a18d3fSMadhavan Venkataraman */ 39787a18d3fSMadhavan Venkataraman static int 39887a18d3fSMadhavan Venkataraman callout_upheap(callout_table_t *ct) 39987a18d3fSMadhavan Venkataraman { 40087a18d3fSMadhavan Venkataraman int current, parent; 401*51b32bddSMadhavan Venkataraman callout_heap_t *heap, *hcurrent, *hparent; 40287a18d3fSMadhavan Venkataraman 40387a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 40487a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap_num >= 1); 40587a18d3fSMadhavan Venkataraman 40687a18d3fSMadhavan Venkataraman if (ct->ct_heap_num == 1) { 40787a18d3fSMadhavan Venkataraman return (1); 40887a18d3fSMadhavan Venkataraman } 40987a18d3fSMadhavan Venkataraman 41087a18d3fSMadhavan Venkataraman heap = ct->ct_heap; 41187a18d3fSMadhavan Venkataraman current = ct->ct_heap_num - 1; 41287a18d3fSMadhavan Venkataraman 41387a18d3fSMadhavan Venkataraman for (;;) { 41487a18d3fSMadhavan Venkataraman parent = CALLOUT_HEAP_PARENT(current); 415*51b32bddSMadhavan Venkataraman hparent = &heap[parent]; 416*51b32bddSMadhavan Venkataraman hcurrent = &heap[current]; 41787a18d3fSMadhavan Venkataraman 41887a18d3fSMadhavan Venkataraman /* 41987a18d3fSMadhavan Venkataraman * We have an expiration later than our parent; we're done. 42087a18d3fSMadhavan Venkataraman */ 421*51b32bddSMadhavan Venkataraman if (hcurrent->ch_expiration >= hparent->ch_expiration) { 42287a18d3fSMadhavan Venkataraman return (0); 42387a18d3fSMadhavan Venkataraman } 42487a18d3fSMadhavan Venkataraman 42587a18d3fSMadhavan Venkataraman /* 42687a18d3fSMadhavan Venkataraman * We need to swap with our parent, and continue up the heap. 42787a18d3fSMadhavan Venkataraman */ 428*51b32bddSMadhavan Venkataraman CALLOUT_SWAP(hparent, hcurrent); 42987a18d3fSMadhavan Venkataraman 43087a18d3fSMadhavan Venkataraman /* 43187a18d3fSMadhavan Venkataraman * If we just reached the root, we're done. 43287a18d3fSMadhavan Venkataraman */ 43387a18d3fSMadhavan Venkataraman if (parent == 0) { 43487a18d3fSMadhavan Venkataraman return (1); 43587a18d3fSMadhavan Venkataraman } 43687a18d3fSMadhavan Venkataraman 43787a18d3fSMadhavan Venkataraman current = parent; 43887a18d3fSMadhavan Venkataraman } 43987a18d3fSMadhavan Venkataraman /*NOTREACHED*/ 44087a18d3fSMadhavan Venkataraman } 44187a18d3fSMadhavan Venkataraman 44287a18d3fSMadhavan Venkataraman /* 443*51b32bddSMadhavan Venkataraman * Insert a new heap item into a callout table's heap. 44487a18d3fSMadhavan Venkataraman */ 44587a18d3fSMadhavan Venkataraman static void 446*51b32bddSMadhavan Venkataraman callout_heap_insert(callout_table_t *ct, callout_list_t *cl) 44787a18d3fSMadhavan Venkataraman { 44887a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 44987a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap_num < ct->ct_heap_max); 45087a18d3fSMadhavan Venkataraman 45187a18d3fSMadhavan Venkataraman /* 452*51b32bddSMadhavan Venkataraman * First, copy the expiration and callout list pointer to the bottom 453*51b32bddSMadhavan Venkataraman * of the heap. 45487a18d3fSMadhavan Venkataraman */ 455*51b32bddSMadhavan Venkataraman ct->ct_heap[ct->ct_heap_num].ch_expiration = cl->cl_expiration; 456*51b32bddSMadhavan Venkataraman ct->ct_heap[ct->ct_heap_num].ch_list = cl; 45787a18d3fSMadhavan Venkataraman ct->ct_heap_num++; 45887a18d3fSMadhavan Venkataraman 45987a18d3fSMadhavan Venkataraman /* 46087a18d3fSMadhavan Venkataraman * Now, perform an upheap operation. If we reached the root, then 46187a18d3fSMadhavan Venkataraman * the cyclic needs to be reprogrammed as we have an earlier 46287a18d3fSMadhavan Venkataraman * expiration. 46387a18d3fSMadhavan Venkataraman * 46487a18d3fSMadhavan Venkataraman * Also, during the CPR suspend phase, do not reprogram the cyclic. 46587a18d3fSMadhavan Venkataraman * We don't want any callout activity. When the CPR resume phase is 46687a18d3fSMadhavan Venkataraman * entered, the cyclic will be programmed for the earliest expiration 46787a18d3fSMadhavan Venkataraman * in the heap. 46887a18d3fSMadhavan Venkataraman */ 469454ab202SMadhavan Venkataraman if (callout_upheap(ct) && (ct->ct_suspend == 0)) 470*51b32bddSMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, cl->cl_expiration); 47187a18d3fSMadhavan Venkataraman } 47287a18d3fSMadhavan Venkataraman 47387a18d3fSMadhavan Venkataraman /* 47487a18d3fSMadhavan Venkataraman * Move an expiration from the top of the heap to its correct place 47587a18d3fSMadhavan Venkataraman * in the heap. 47687a18d3fSMadhavan Venkataraman */ 47787a18d3fSMadhavan Venkataraman static void 47887a18d3fSMadhavan Venkataraman callout_downheap(callout_table_t *ct) 47987a18d3fSMadhavan Venkataraman { 480*51b32bddSMadhavan Venkataraman int current, left, right, nelems; 481*51b32bddSMadhavan Venkataraman callout_heap_t *heap, *hleft, *hright, *hcurrent; 48287a18d3fSMadhavan Venkataraman 48387a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 48487a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap_num >= 1); 48587a18d3fSMadhavan Venkataraman 48687a18d3fSMadhavan Venkataraman heap = ct->ct_heap; 48787a18d3fSMadhavan Venkataraman current = 0; 48887a18d3fSMadhavan Venkataraman nelems = ct->ct_heap_num; 48987a18d3fSMadhavan Venkataraman 49087a18d3fSMadhavan Venkataraman for (;;) { 49187a18d3fSMadhavan Venkataraman /* 49287a18d3fSMadhavan Venkataraman * If we don't have a left child (i.e., we're a leaf), we're 49387a18d3fSMadhavan Venkataraman * done. 49487a18d3fSMadhavan Venkataraman */ 49587a18d3fSMadhavan Venkataraman if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems) 49687a18d3fSMadhavan Venkataraman return; 49787a18d3fSMadhavan Venkataraman 498*51b32bddSMadhavan Venkataraman hleft = &heap[left]; 499*51b32bddSMadhavan Venkataraman hcurrent = &heap[current]; 50087a18d3fSMadhavan Venkataraman 50187a18d3fSMadhavan Venkataraman right = CALLOUT_HEAP_RIGHT(current); 50287a18d3fSMadhavan Venkataraman 50387a18d3fSMadhavan Venkataraman /* 50487a18d3fSMadhavan Venkataraman * Even if we don't have a right child, we still need to compare 50587a18d3fSMadhavan Venkataraman * our expiration against that of our left child. 50687a18d3fSMadhavan Venkataraman */ 50787a18d3fSMadhavan Venkataraman if (right >= nelems) 50887a18d3fSMadhavan Venkataraman goto comp_left; 50987a18d3fSMadhavan Venkataraman 510*51b32bddSMadhavan Venkataraman hright = &heap[right]; 51187a18d3fSMadhavan Venkataraman 51287a18d3fSMadhavan Venkataraman /* 51387a18d3fSMadhavan Venkataraman * We have both a left and a right child. We need to compare 51487a18d3fSMadhavan Venkataraman * the expiration of the children to determine which 51587a18d3fSMadhavan Venkataraman * expires earlier. 51687a18d3fSMadhavan Venkataraman */ 517*51b32bddSMadhavan Venkataraman if (hright->ch_expiration < hleft->ch_expiration) { 51887a18d3fSMadhavan Venkataraman /* 51987a18d3fSMadhavan Venkataraman * Our right child is the earlier of our children. 52087a18d3fSMadhavan Venkataraman * We'll now compare our expiration to its expiration. 52187a18d3fSMadhavan Venkataraman * If ours is the earlier one, we're done. 52287a18d3fSMadhavan Venkataraman */ 523*51b32bddSMadhavan Venkataraman if (hcurrent->ch_expiration <= hright->ch_expiration) 52487a18d3fSMadhavan Venkataraman return; 52587a18d3fSMadhavan Venkataraman 52687a18d3fSMadhavan Venkataraman /* 52787a18d3fSMadhavan Venkataraman * Our right child expires earlier than we do; swap 52887a18d3fSMadhavan Venkataraman * with our right child, and descend right. 52987a18d3fSMadhavan Venkataraman */ 530*51b32bddSMadhavan Venkataraman CALLOUT_SWAP(hright, hcurrent); 53187a18d3fSMadhavan Venkataraman current = right; 53287a18d3fSMadhavan Venkataraman continue; 53387a18d3fSMadhavan Venkataraman } 53487a18d3fSMadhavan Venkataraman 53587a18d3fSMadhavan Venkataraman comp_left: 53687a18d3fSMadhavan Venkataraman /* 53787a18d3fSMadhavan Venkataraman * Our left child is the earlier of our children (or we have 53887a18d3fSMadhavan Venkataraman * no right child). We'll now compare our expiration 53987a18d3fSMadhavan Venkataraman * to its expiration. If ours is the earlier one, we're done. 54087a18d3fSMadhavan Venkataraman */ 541*51b32bddSMadhavan Venkataraman if (hcurrent->ch_expiration <= hleft->ch_expiration) 54287a18d3fSMadhavan Venkataraman return; 54387a18d3fSMadhavan Venkataraman 54487a18d3fSMadhavan Venkataraman /* 54587a18d3fSMadhavan Venkataraman * Our left child expires earlier than we do; swap with our 54687a18d3fSMadhavan Venkataraman * left child, and descend left. 54787a18d3fSMadhavan Venkataraman */ 548*51b32bddSMadhavan Venkataraman CALLOUT_SWAP(hleft, hcurrent); 54987a18d3fSMadhavan Venkataraman current = left; 55087a18d3fSMadhavan Venkataraman } 55187a18d3fSMadhavan Venkataraman } 55287a18d3fSMadhavan Venkataraman 55387a18d3fSMadhavan Venkataraman /* 55487a18d3fSMadhavan Venkataraman * Delete and handle all past expirations in a callout table's heap. 55587a18d3fSMadhavan Venkataraman */ 55687a18d3fSMadhavan Venkataraman static void 55787a18d3fSMadhavan Venkataraman callout_heap_delete(callout_table_t *ct) 55887a18d3fSMadhavan Venkataraman { 559*51b32bddSMadhavan Venkataraman hrtime_t now, expiration, next; 56087a18d3fSMadhavan Venkataraman callout_list_t *cl; 561*51b32bddSMadhavan Venkataraman callout_heap_t *heap; 56287a18d3fSMadhavan Venkataraman int hash; 56387a18d3fSMadhavan Venkataraman 56487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 56587a18d3fSMadhavan Venkataraman 566*51b32bddSMadhavan Venkataraman if (CALLOUT_CLEANUP(ct)) { 567*51b32bddSMadhavan Venkataraman /* 568*51b32bddSMadhavan Venkataraman * There are too many heap elements pointing to empty callout 569*51b32bddSMadhavan Venkataraman * lists. Clean them out. 570*51b32bddSMadhavan Venkataraman */ 571*51b32bddSMadhavan Venkataraman (void) callout_heap_process(ct, 0, 0); 572*51b32bddSMadhavan Venkataraman } 573*51b32bddSMadhavan Venkataraman 57487a18d3fSMadhavan Venkataraman now = gethrtime(); 575*51b32bddSMadhavan Venkataraman heap = ct->ct_heap; 57687a18d3fSMadhavan Venkataraman 57787a18d3fSMadhavan Venkataraman while (ct->ct_heap_num > 0) { 578*51b32bddSMadhavan Venkataraman expiration = heap->ch_expiration; 57987a18d3fSMadhavan Venkataraman hash = CALLOUT_CLHASH(expiration); 580*51b32bddSMadhavan Venkataraman cl = heap->ch_list; 581*51b32bddSMadhavan Venkataraman ASSERT(expiration == cl->cl_expiration); 582*51b32bddSMadhavan Venkataraman 583*51b32bddSMadhavan Venkataraman if (cl->cl_callouts.ch_head == NULL) { 58487a18d3fSMadhavan Venkataraman /* 585*51b32bddSMadhavan Venkataraman * If the callout list is empty, reap it. 586*51b32bddSMadhavan Venkataraman * Decrement the reap count. 587*51b32bddSMadhavan Venkataraman */ 588*51b32bddSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 589*51b32bddSMadhavan Venkataraman CALLOUT_LIST_FREE(ct, cl); 590*51b32bddSMadhavan Venkataraman ct->ct_nreap--; 591*51b32bddSMadhavan Venkataraman } else { 592*51b32bddSMadhavan Venkataraman /* 593*51b32bddSMadhavan Venkataraman * If the root of the heap expires in the future, 594*51b32bddSMadhavan Venkataraman * bail out. 59587a18d3fSMadhavan Venkataraman */ 59687a18d3fSMadhavan Venkataraman if (expiration > now) 59787a18d3fSMadhavan Venkataraman break; 59887a18d3fSMadhavan Venkataraman 59987a18d3fSMadhavan Venkataraman /* 60087a18d3fSMadhavan Venkataraman * Move the callout list for this expiration to the 60187a18d3fSMadhavan Venkataraman * list of expired callout lists. It will be processed 60287a18d3fSMadhavan Venkataraman * by the callout executor. 60387a18d3fSMadhavan Venkataraman */ 60487a18d3fSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 60587a18d3fSMadhavan Venkataraman CALLOUT_LIST_APPEND(ct->ct_expired, cl); 60687a18d3fSMadhavan Venkataraman } 60787a18d3fSMadhavan Venkataraman 60887a18d3fSMadhavan Venkataraman /* 60987a18d3fSMadhavan Venkataraman * Now delete the root. This is done by swapping the root with 61087a18d3fSMadhavan Venkataraman * the last item in the heap and downheaping the item. 61187a18d3fSMadhavan Venkataraman */ 61287a18d3fSMadhavan Venkataraman ct->ct_heap_num--; 61387a18d3fSMadhavan Venkataraman if (ct->ct_heap_num > 0) { 614*51b32bddSMadhavan Venkataraman heap[0] = heap[ct->ct_heap_num]; 61587a18d3fSMadhavan Venkataraman callout_downheap(ct); 61687a18d3fSMadhavan Venkataraman } 61787a18d3fSMadhavan Venkataraman } 61887a18d3fSMadhavan Venkataraman 61987a18d3fSMadhavan Venkataraman /* 620*51b32bddSMadhavan Venkataraman * If this callout table is empty or callouts have been suspended, 621*51b32bddSMadhavan Venkataraman * just return. The cyclic has already been programmed to 62287a18d3fSMadhavan Venkataraman * infinity by the cyclic subsystem. 62387a18d3fSMadhavan Venkataraman */ 624454ab202SMadhavan Venkataraman if ((ct->ct_heap_num == 0) || (ct->ct_suspend > 0)) 62587a18d3fSMadhavan Venkataraman return; 62687a18d3fSMadhavan Venkataraman 627*51b32bddSMadhavan Venkataraman /* 628*51b32bddSMadhavan Venkataraman * If the top expirations are within callout_tolerance of each other, 629*51b32bddSMadhavan Venkataraman * delay the cyclic expire so that they can be processed together. 630*51b32bddSMadhavan Venkataraman * This is to prevent high resolution timers from swamping the system 631*51b32bddSMadhavan Venkataraman * with cyclic activity. 632*51b32bddSMadhavan Venkataraman */ 633*51b32bddSMadhavan Venkataraman if (ct->ct_heap_num > 2) { 634*51b32bddSMadhavan Venkataraman next = expiration + callout_tolerance; 635*51b32bddSMadhavan Venkataraman if ((heap[1].ch_expiration < next) || 636*51b32bddSMadhavan Venkataraman (heap[2].ch_expiration < next)) 637*51b32bddSMadhavan Venkataraman expiration = next; 638*51b32bddSMadhavan Venkataraman } 639*51b32bddSMadhavan Venkataraman 64087a18d3fSMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, expiration); 64187a18d3fSMadhavan Venkataraman } 64287a18d3fSMadhavan Venkataraman 643454ab202SMadhavan Venkataraman /* 644*51b32bddSMadhavan Venkataraman * There are some situations when the entire heap is walked and processed. 645*51b32bddSMadhavan Venkataraman * This function is called to do the processing. These are the situations: 646*51b32bddSMadhavan Venkataraman * 647*51b32bddSMadhavan Venkataraman * 1. When the reap count reaches its threshold, the heap has to be cleared 648*51b32bddSMadhavan Venkataraman * of all empty callout lists. 649*51b32bddSMadhavan Venkataraman * 650*51b32bddSMadhavan Venkataraman * 2. When the system enters and exits KMDB/OBP, all entries in the heap 651*51b32bddSMadhavan Venkataraman * need to be adjusted by the interval spent in KMDB/OBP. 652*51b32bddSMadhavan Venkataraman * 653*51b32bddSMadhavan Venkataraman * 3. When system time is changed, the heap has to be scanned for 654*51b32bddSMadhavan Venkataraman * absolute hrestime timers. These need to be removed from the heap 655*51b32bddSMadhavan Venkataraman * and expired immediately. 656*51b32bddSMadhavan Venkataraman * 657*51b32bddSMadhavan Venkataraman * In cases 2 and 3, it is a good idea to do 1 as well since we are 658*51b32bddSMadhavan Venkataraman * scanning the heap anyway. 659*51b32bddSMadhavan Venkataraman * 660*51b32bddSMadhavan Venkataraman * If the root gets changed and/or callout lists are expired, return the 661*51b32bddSMadhavan Venkataraman * new expiration to the caller so he can reprogram the cyclic accordingly. 662*51b32bddSMadhavan Venkataraman */ 663*51b32bddSMadhavan Venkataraman static hrtime_t 664*51b32bddSMadhavan Venkataraman callout_heap_process(callout_table_t *ct, hrtime_t delta, int timechange) 665*51b32bddSMadhavan Venkataraman { 666*51b32bddSMadhavan Venkataraman callout_heap_t *heap; 667*51b32bddSMadhavan Venkataraman callout_list_t *cl, *rootcl; 668*51b32bddSMadhavan Venkataraman hrtime_t expiration, now; 669*51b32bddSMadhavan Venkataraman int i, hash, clflags, expired; 670*51b32bddSMadhavan Venkataraman ulong_t num; 671*51b32bddSMadhavan Venkataraman 672*51b32bddSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 673*51b32bddSMadhavan Venkataraman 674*51b32bddSMadhavan Venkataraman if (ct->ct_heap_num == 0) 675*51b32bddSMadhavan Venkataraman return (0); 676*51b32bddSMadhavan Venkataraman 677*51b32bddSMadhavan Venkataraman if (ct->ct_nreap > 0) 678*51b32bddSMadhavan Venkataraman ct->ct_cleanups++; 679*51b32bddSMadhavan Venkataraman 680*51b32bddSMadhavan Venkataraman heap = ct->ct_heap; 681*51b32bddSMadhavan Venkataraman rootcl = heap->ch_list; 682*51b32bddSMadhavan Venkataraman 683*51b32bddSMadhavan Venkataraman /* 684*51b32bddSMadhavan Venkataraman * We walk the heap from the top to the bottom. If we encounter 685*51b32bddSMadhavan Venkataraman * a heap item that points to an empty callout list, we clean 686*51b32bddSMadhavan Venkataraman * it out. If we encounter a hrestime entry that must be removed, 687*51b32bddSMadhavan Venkataraman * again we clean it out. Otherwise, we apply any adjustments needed 688*51b32bddSMadhavan Venkataraman * to an element. 689*51b32bddSMadhavan Venkataraman * 690*51b32bddSMadhavan Venkataraman * During the walk, we also compact the heap from the bottom and 691*51b32bddSMadhavan Venkataraman * reconstruct the heap using upheap operations. This is very 692*51b32bddSMadhavan Venkataraman * efficient if the number of elements to be cleaned is greater than 693*51b32bddSMadhavan Venkataraman * or equal to half the heap. This is the common case. 694*51b32bddSMadhavan Venkataraman * 695*51b32bddSMadhavan Venkataraman * Even in the non-common case, the upheap operations should be short 696*51b32bddSMadhavan Venkataraman * as the entries below generally tend to be bigger than the entries 697*51b32bddSMadhavan Venkataraman * above. 698*51b32bddSMadhavan Venkataraman */ 699*51b32bddSMadhavan Venkataraman num = ct->ct_heap_num; 700*51b32bddSMadhavan Venkataraman ct->ct_heap_num = 0; 701*51b32bddSMadhavan Venkataraman clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE); 702*51b32bddSMadhavan Venkataraman now = gethrtime(); 703*51b32bddSMadhavan Venkataraman expired = 0; 704*51b32bddSMadhavan Venkataraman for (i = 0; i < num; i++) { 705*51b32bddSMadhavan Venkataraman cl = heap[i].ch_list; 706*51b32bddSMadhavan Venkataraman /* 707*51b32bddSMadhavan Venkataraman * If the callout list is empty, delete the heap element and 708*51b32bddSMadhavan Venkataraman * free the callout list. 709*51b32bddSMadhavan Venkataraman */ 710*51b32bddSMadhavan Venkataraman if (cl->cl_callouts.ch_head == NULL) { 711*51b32bddSMadhavan Venkataraman hash = CALLOUT_CLHASH(cl->cl_expiration); 712*51b32bddSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 713*51b32bddSMadhavan Venkataraman CALLOUT_LIST_FREE(ct, cl); 714*51b32bddSMadhavan Venkataraman continue; 715*51b32bddSMadhavan Venkataraman } 716*51b32bddSMadhavan Venkataraman 717*51b32bddSMadhavan Venkataraman /* 718*51b32bddSMadhavan Venkataraman * Delete the heap element and expire the callout list, if 719*51b32bddSMadhavan Venkataraman * one of the following is true: 720*51b32bddSMadhavan Venkataraman * - the callout list has expired 721*51b32bddSMadhavan Venkataraman * - the callout list is an absolute hrestime one and 722*51b32bddSMadhavan Venkataraman * there has been a system time change 723*51b32bddSMadhavan Venkataraman */ 724*51b32bddSMadhavan Venkataraman if ((cl->cl_expiration <= now) || 725*51b32bddSMadhavan Venkataraman (timechange && ((cl->cl_flags & clflags) == clflags))) { 726*51b32bddSMadhavan Venkataraman hash = CALLOUT_CLHASH(cl->cl_expiration); 727*51b32bddSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 728*51b32bddSMadhavan Venkataraman CALLOUT_LIST_APPEND(ct->ct_expired, cl); 729*51b32bddSMadhavan Venkataraman expired = 1; 730*51b32bddSMadhavan Venkataraman continue; 731*51b32bddSMadhavan Venkataraman } 732*51b32bddSMadhavan Venkataraman 733*51b32bddSMadhavan Venkataraman /* 734*51b32bddSMadhavan Venkataraman * Apply adjustments, if any. Adjustments are applied after 735*51b32bddSMadhavan Venkataraman * the system returns from KMDB or OBP. They are only applied 736*51b32bddSMadhavan Venkataraman * to relative callout lists. 737*51b32bddSMadhavan Venkataraman */ 738*51b32bddSMadhavan Venkataraman if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) { 739*51b32bddSMadhavan Venkataraman hash = CALLOUT_CLHASH(cl->cl_expiration); 740*51b32bddSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 741*51b32bddSMadhavan Venkataraman expiration = cl->cl_expiration + delta; 742*51b32bddSMadhavan Venkataraman if (expiration <= 0) 743*51b32bddSMadhavan Venkataraman expiration = CY_INFINITY; 744*51b32bddSMadhavan Venkataraman heap[i].ch_expiration = expiration; 745*51b32bddSMadhavan Venkataraman cl->cl_expiration = expiration; 746*51b32bddSMadhavan Venkataraman hash = CALLOUT_CLHASH(cl->cl_expiration); 747*51b32bddSMadhavan Venkataraman if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) { 748*51b32bddSMadhavan Venkataraman CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl); 749*51b32bddSMadhavan Venkataraman } else { 750*51b32bddSMadhavan Venkataraman CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 751*51b32bddSMadhavan Venkataraman } 752*51b32bddSMadhavan Venkataraman } 753*51b32bddSMadhavan Venkataraman 754*51b32bddSMadhavan Venkataraman heap[ct->ct_heap_num] = heap[i]; 755*51b32bddSMadhavan Venkataraman ct->ct_heap_num++; 756*51b32bddSMadhavan Venkataraman (void) callout_upheap(ct); 757*51b32bddSMadhavan Venkataraman } 758*51b32bddSMadhavan Venkataraman 759*51b32bddSMadhavan Venkataraman ct->ct_nreap = 0; 760*51b32bddSMadhavan Venkataraman 761*51b32bddSMadhavan Venkataraman if (expired) 762*51b32bddSMadhavan Venkataraman expiration = gethrtime(); 763*51b32bddSMadhavan Venkataraman else if (ct->ct_heap_num == 0) 764*51b32bddSMadhavan Venkataraman expiration = CY_INFINITY; 765*51b32bddSMadhavan Venkataraman else if (rootcl != heap->ch_list) 766*51b32bddSMadhavan Venkataraman expiration = heap->ch_expiration; 767*51b32bddSMadhavan Venkataraman else 768*51b32bddSMadhavan Venkataraman expiration = 0; 769*51b32bddSMadhavan Venkataraman 770*51b32bddSMadhavan Venkataraman return (expiration); 771*51b32bddSMadhavan Venkataraman } 772*51b32bddSMadhavan Venkataraman 773*51b32bddSMadhavan Venkataraman /* 774454ab202SMadhavan Venkataraman * Common function used to create normal and realtime callouts. 775454ab202SMadhavan Venkataraman * 776454ab202SMadhavan Venkataraman * Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So, 777454ab202SMadhavan Venkataraman * there is one restriction on a realtime callout handler - it should not 778454ab202SMadhavan Venkataraman * directly or indirectly acquire cpu_lock. CPU offline waits for pending 779454ab202SMadhavan Venkataraman * cyclic handlers to complete while holding cpu_lock. So, if a realtime 780454ab202SMadhavan Venkataraman * callout handler were to try to get cpu_lock, there would be a deadlock 781454ab202SMadhavan Venkataraman * during CPU offline. 782454ab202SMadhavan Venkataraman */ 78387a18d3fSMadhavan Venkataraman callout_id_t 78487a18d3fSMadhavan Venkataraman timeout_generic(int type, void (*func)(void *), void *arg, 78587a18d3fSMadhavan Venkataraman hrtime_t expiration, hrtime_t resolution, int flags) 78687a18d3fSMadhavan Venkataraman { 78787a18d3fSMadhavan Venkataraman callout_table_t *ct; 7887c478bd9Sstevel@tonic-gate callout_t *cp; 7897c478bd9Sstevel@tonic-gate callout_id_t id; 79087a18d3fSMadhavan Venkataraman callout_list_t *cl; 791*51b32bddSMadhavan Venkataraman hrtime_t now, interval, rexpiration; 792*51b32bddSMadhavan Venkataraman int hash, clflags; 793f635d46aSqiao 79487a18d3fSMadhavan Venkataraman ASSERT(resolution > 0); 79587a18d3fSMadhavan Venkataraman ASSERT(func != NULL); 7967c478bd9Sstevel@tonic-gate 79787a18d3fSMadhavan Venkataraman /* 798*51b32bddSMadhavan Venkataraman * We get the current hrtime right upfront so that latencies in 799*51b32bddSMadhavan Venkataraman * this function do not affect the accuracy of the callout. 80087a18d3fSMadhavan Venkataraman */ 801*51b32bddSMadhavan Venkataraman now = gethrtime(); 8027c478bd9Sstevel@tonic-gate 80387a18d3fSMadhavan Venkataraman /* 80487a18d3fSMadhavan Venkataraman * We disable kernel preemption so that we remain on the same CPU 80587a18d3fSMadhavan Venkataraman * throughout. If we needed to reprogram the callout table's cyclic, 80687a18d3fSMadhavan Venkataraman * we can avoid X-calls if we are on the same CPU. 80787a18d3fSMadhavan Venkataraman * 80887a18d3fSMadhavan Venkataraman * Note that callout_alloc() releases and reacquires the callout 80987a18d3fSMadhavan Venkataraman * table mutex. While reacquiring the mutex, it is possible for us 81087a18d3fSMadhavan Venkataraman * to go to sleep and later migrate to another CPU. This should be 81187a18d3fSMadhavan Venkataraman * pretty rare, though. 81287a18d3fSMadhavan Venkataraman */ 81387a18d3fSMadhavan Venkataraman kpreempt_disable(); 81487a18d3fSMadhavan Venkataraman 81587a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)]; 81687a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 81787a18d3fSMadhavan Venkataraman 81887a18d3fSMadhavan Venkataraman if (ct->ct_cyclic == CYCLIC_NONE) { 81987a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 82087a18d3fSMadhavan Venkataraman /* 82187a18d3fSMadhavan Venkataraman * The callout table has not yet been initialized fully. 82287a18d3fSMadhavan Venkataraman * So, put this one on the boot callout table which is 82387a18d3fSMadhavan Venkataraman * always initialized. 82487a18d3fSMadhavan Venkataraman */ 82587a18d3fSMadhavan Venkataraman ct = &callout_boot_ct[type]; 82687a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 82787a18d3fSMadhavan Venkataraman } 82887a18d3fSMadhavan Venkataraman 829*51b32bddSMadhavan Venkataraman if (CALLOUT_CLEANUP(ct)) { 830*51b32bddSMadhavan Venkataraman /* 831*51b32bddSMadhavan Venkataraman * There are too many heap elements pointing to empty callout 832*51b32bddSMadhavan Venkataraman * lists. Clean them out. 833*51b32bddSMadhavan Venkataraman */ 834*51b32bddSMadhavan Venkataraman rexpiration = callout_heap_process(ct, 0, 0); 835*51b32bddSMadhavan Venkataraman if ((rexpiration != 0) && (ct->ct_suspend == 0)) 836*51b32bddSMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, rexpiration); 837*51b32bddSMadhavan Venkataraman } 838*51b32bddSMadhavan Venkataraman 83987a18d3fSMadhavan Venkataraman if ((cp = ct->ct_free) == NULL) 8407c478bd9Sstevel@tonic-gate cp = callout_alloc(ct); 8417c478bd9Sstevel@tonic-gate else 84287a18d3fSMadhavan Venkataraman ct->ct_free = cp->c_idnext; 8437c478bd9Sstevel@tonic-gate 8447c478bd9Sstevel@tonic-gate cp->c_func = func; 8457c478bd9Sstevel@tonic-gate cp->c_arg = arg; 8467c478bd9Sstevel@tonic-gate 8477c478bd9Sstevel@tonic-gate /* 84887a18d3fSMadhavan Venkataraman * Compute the expiration hrtime. 84987a18d3fSMadhavan Venkataraman */ 85087a18d3fSMadhavan Venkataraman if (flags & CALLOUT_FLAG_ABSOLUTE) { 85187a18d3fSMadhavan Venkataraman interval = expiration - now; 85287a18d3fSMadhavan Venkataraman } else { 85387a18d3fSMadhavan Venkataraman interval = expiration; 85487a18d3fSMadhavan Venkataraman expiration += now; 85587a18d3fSMadhavan Venkataraman } 856*51b32bddSMadhavan Venkataraman 857*51b32bddSMadhavan Venkataraman if (resolution > 1) { 858*51b32bddSMadhavan Venkataraman /* 859*51b32bddSMadhavan Venkataraman * Align expiration to the specified resolution. 860*51b32bddSMadhavan Venkataraman */ 86187a18d3fSMadhavan Venkataraman if (flags & CALLOUT_FLAG_ROUNDUP) 86287a18d3fSMadhavan Venkataraman expiration += resolution - 1; 86387a18d3fSMadhavan Venkataraman expiration = (expiration / resolution) * resolution; 864*51b32bddSMadhavan Venkataraman } 865*51b32bddSMadhavan Venkataraman 866454ab202SMadhavan Venkataraman if (expiration <= 0) { 867454ab202SMadhavan Venkataraman /* 868454ab202SMadhavan Venkataraman * expiration hrtime overflow has occurred. Just set the 869454ab202SMadhavan Venkataraman * expiration to infinity. 870454ab202SMadhavan Venkataraman */ 871454ab202SMadhavan Venkataraman expiration = CY_INFINITY; 872454ab202SMadhavan Venkataraman } 87387a18d3fSMadhavan Venkataraman 87487a18d3fSMadhavan Venkataraman /* 87587a18d3fSMadhavan Venkataraman * Assign an ID to this callout 87687a18d3fSMadhavan Venkataraman */ 87787a18d3fSMadhavan Venkataraman if (flags & CALLOUT_FLAG_32BIT) { 87887a18d3fSMadhavan Venkataraman if (interval > callout_longterm) { 87987a18d3fSMadhavan Venkataraman id = (ct->ct_long_id - callout_counter_low); 88087a18d3fSMadhavan Venkataraman id |= CALLOUT_COUNTER_HIGH; 88187a18d3fSMadhavan Venkataraman ct->ct_long_id = id; 88287a18d3fSMadhavan Venkataraman } else { 88387a18d3fSMadhavan Venkataraman id = (ct->ct_short_id - callout_counter_low); 88487a18d3fSMadhavan Venkataraman id |= CALLOUT_COUNTER_HIGH; 88587a18d3fSMadhavan Venkataraman ct->ct_short_id = id; 88687a18d3fSMadhavan Venkataraman } 88787a18d3fSMadhavan Venkataraman } else { 88887a18d3fSMadhavan Venkataraman id = (ct->ct_gen_id - callout_counter_low); 88987a18d3fSMadhavan Venkataraman if ((id & CALLOUT_COUNTER_HIGH) == 0) { 89087a18d3fSMadhavan Venkataraman id |= CALLOUT_COUNTER_HIGH; 89187a18d3fSMadhavan Venkataraman id += CALLOUT_GENERATION_LOW; 89287a18d3fSMadhavan Venkataraman } 89387a18d3fSMadhavan Venkataraman ct->ct_gen_id = id; 89487a18d3fSMadhavan Venkataraman } 89587a18d3fSMadhavan Venkataraman 89687a18d3fSMadhavan Venkataraman cp->c_xid = id; 89787a18d3fSMadhavan Venkataraman 898*51b32bddSMadhavan Venkataraman clflags = 0; 899*51b32bddSMadhavan Venkataraman if (flags & CALLOUT_FLAG_ABSOLUTE) 900*51b32bddSMadhavan Venkataraman clflags |= CALLOUT_LIST_FLAG_ABSOLUTE; 901*51b32bddSMadhavan Venkataraman if (flags & CALLOUT_FLAG_HRESTIME) 902*51b32bddSMadhavan Venkataraman clflags |= CALLOUT_LIST_FLAG_HRESTIME; 903*51b32bddSMadhavan Venkataraman if (resolution == 1) 904*51b32bddSMadhavan Venkataraman clflags |= CALLOUT_LIST_FLAG_NANO; 90587a18d3fSMadhavan Venkataraman hash = CALLOUT_CLHASH(expiration); 90687a18d3fSMadhavan Venkataraman 90787a18d3fSMadhavan Venkataraman again: 90887a18d3fSMadhavan Venkataraman /* 90987a18d3fSMadhavan Venkataraman * Try to see if a callout list already exists for this expiration. 91087a18d3fSMadhavan Venkataraman */ 911*51b32bddSMadhavan Venkataraman cl = callout_list_get(ct, expiration, clflags, hash); 91287a18d3fSMadhavan Venkataraman if (cl == NULL) { 91387a18d3fSMadhavan Venkataraman /* 91487a18d3fSMadhavan Venkataraman * Check if we have enough space in the heap to insert one 91587a18d3fSMadhavan Venkataraman * expiration. If not, expand the heap. 91687a18d3fSMadhavan Venkataraman */ 91787a18d3fSMadhavan Venkataraman if (ct->ct_heap_num == ct->ct_heap_max) { 91887a18d3fSMadhavan Venkataraman callout_heap_expand(ct); 91987a18d3fSMadhavan Venkataraman /* 92087a18d3fSMadhavan Venkataraman * In the above call, we drop the lock, allocate and 92187a18d3fSMadhavan Venkataraman * reacquire the lock. So, we could have been away 92287a18d3fSMadhavan Venkataraman * for a while. In the meantime, someone could have 92387a18d3fSMadhavan Venkataraman * inserted a callout list with the same expiration. 92487a18d3fSMadhavan Venkataraman * So, the best course is to repeat the steps. This 92587a18d3fSMadhavan Venkataraman * should be an infrequent event. 92687a18d3fSMadhavan Venkataraman */ 92787a18d3fSMadhavan Venkataraman goto again; 92887a18d3fSMadhavan Venkataraman } 92987a18d3fSMadhavan Venkataraman 93087a18d3fSMadhavan Venkataraman /* 93187a18d3fSMadhavan Venkataraman * Check the free list. If we don't find one, we have to 93287a18d3fSMadhavan Venkataraman * take the slow path and allocate from kmem. 93387a18d3fSMadhavan Venkataraman */ 93487a18d3fSMadhavan Venkataraman if ((cl = ct->ct_lfree) == NULL) { 93587a18d3fSMadhavan Venkataraman callout_list_alloc(ct); 93687a18d3fSMadhavan Venkataraman /* 93787a18d3fSMadhavan Venkataraman * In the above call, we drop the lock, allocate and 93887a18d3fSMadhavan Venkataraman * reacquire the lock. So, we could have been away 93987a18d3fSMadhavan Venkataraman * for a while. In the meantime, someone could have 94087a18d3fSMadhavan Venkataraman * inserted a callout list with the same expiration. 94187a18d3fSMadhavan Venkataraman * Plus, the heap could have become full. So, the best 94287a18d3fSMadhavan Venkataraman * course is to repeat the steps. This should be an 94387a18d3fSMadhavan Venkataraman * infrequent event. 94487a18d3fSMadhavan Venkataraman */ 94587a18d3fSMadhavan Venkataraman goto again; 94687a18d3fSMadhavan Venkataraman } 94787a18d3fSMadhavan Venkataraman ct->ct_lfree = cl->cl_next; 94887a18d3fSMadhavan Venkataraman cl->cl_expiration = expiration; 949*51b32bddSMadhavan Venkataraman cl->cl_flags = clflags; 95087a18d3fSMadhavan Venkataraman 951*51b32bddSMadhavan Venkataraman if (clflags & CALLOUT_LIST_FLAG_NANO) { 952*51b32bddSMadhavan Venkataraman CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl); 953*51b32bddSMadhavan Venkataraman } else { 95487a18d3fSMadhavan Venkataraman CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 955*51b32bddSMadhavan Venkataraman } 95687a18d3fSMadhavan Venkataraman 95787a18d3fSMadhavan Venkataraman /* 95887a18d3fSMadhavan Venkataraman * This is a new expiration. So, insert it into the heap. 95987a18d3fSMadhavan Venkataraman * This will also reprogram the cyclic, if the expiration 96087a18d3fSMadhavan Venkataraman * propagated to the root of the heap. 96187a18d3fSMadhavan Venkataraman */ 962*51b32bddSMadhavan Venkataraman callout_heap_insert(ct, cl); 963*51b32bddSMadhavan Venkataraman } else { 964*51b32bddSMadhavan Venkataraman /* 965*51b32bddSMadhavan Venkataraman * If the callout list was empty, untimeout_generic() would 966*51b32bddSMadhavan Venkataraman * have incremented a reap count. Decrement the reap count 967*51b32bddSMadhavan Venkataraman * as we are going to insert a callout into this list. 968*51b32bddSMadhavan Venkataraman */ 969*51b32bddSMadhavan Venkataraman if (cl->cl_callouts.ch_head == NULL) 970*51b32bddSMadhavan Venkataraman ct->ct_nreap--; 97187a18d3fSMadhavan Venkataraman } 97287a18d3fSMadhavan Venkataraman cp->c_list = cl; 97387a18d3fSMadhavan Venkataraman CALLOUT_APPEND(ct, cp); 97487a18d3fSMadhavan Venkataraman 97587a18d3fSMadhavan Venkataraman ct->ct_timeouts++; 97687a18d3fSMadhavan Venkataraman ct->ct_timeouts_pending++; 97787a18d3fSMadhavan Venkataraman 97887a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 97987a18d3fSMadhavan Venkataraman 98087a18d3fSMadhavan Venkataraman kpreempt_enable(); 98187a18d3fSMadhavan Venkataraman 98287a18d3fSMadhavan Venkataraman TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 98387a18d3fSMadhavan Venkataraman "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration, 98487a18d3fSMadhavan Venkataraman cp); 98587a18d3fSMadhavan Venkataraman 98687a18d3fSMadhavan Venkataraman return (id); 98787a18d3fSMadhavan Venkataraman } 98887a18d3fSMadhavan Venkataraman 98987a18d3fSMadhavan Venkataraman timeout_id_t 99087a18d3fSMadhavan Venkataraman timeout(void (*func)(void *), void *arg, clock_t delta) 99187a18d3fSMadhavan Venkataraman { 99287a18d3fSMadhavan Venkataraman ulong_t id; 99387a18d3fSMadhavan Venkataraman 99487a18d3fSMadhavan Venkataraman /* 9957c478bd9Sstevel@tonic-gate * Make sure the callout runs at least 1 tick in the future. 9967c478bd9Sstevel@tonic-gate */ 9977c478bd9Sstevel@tonic-gate if (delta <= 0) 9987c478bd9Sstevel@tonic-gate delta = 1; 999454ab202SMadhavan Venkataraman else if (delta > callout_max_ticks) 1000454ab202SMadhavan Venkataraman delta = callout_max_ticks; 10017c478bd9Sstevel@tonic-gate 100287a18d3fSMadhavan Venkataraman id = (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg, 100387a18d3fSMadhavan Venkataraman TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 10047c478bd9Sstevel@tonic-gate 10057c478bd9Sstevel@tonic-gate return ((timeout_id_t)id); 10067c478bd9Sstevel@tonic-gate } 10077c478bd9Sstevel@tonic-gate 100887a18d3fSMadhavan Venkataraman /* 100987a18d3fSMadhavan Venkataraman * Convenience function that creates a normal callout with default parameters 101087a18d3fSMadhavan Venkataraman * and returns a full ID. 101187a18d3fSMadhavan Venkataraman */ 101287a18d3fSMadhavan Venkataraman callout_id_t 101387a18d3fSMadhavan Venkataraman timeout_default(void (*func)(void *), void *arg, clock_t delta) 10147c478bd9Sstevel@tonic-gate { 101587a18d3fSMadhavan Venkataraman callout_id_t id; 10167c478bd9Sstevel@tonic-gate 101787a18d3fSMadhavan Venkataraman /* 101887a18d3fSMadhavan Venkataraman * Make sure the callout runs at least 1 tick in the future. 101987a18d3fSMadhavan Venkataraman */ 102087a18d3fSMadhavan Venkataraman if (delta <= 0) 102187a18d3fSMadhavan Venkataraman delta = 1; 1022454ab202SMadhavan Venkataraman else if (delta > callout_max_ticks) 1023454ab202SMadhavan Venkataraman delta = callout_max_ticks; 102487a18d3fSMadhavan Venkataraman 102587a18d3fSMadhavan Venkataraman id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta), 102687a18d3fSMadhavan Venkataraman nsec_per_tick, 0); 102787a18d3fSMadhavan Venkataraman 102887a18d3fSMadhavan Venkataraman return (id); 10297c478bd9Sstevel@tonic-gate } 10307c478bd9Sstevel@tonic-gate 10317c478bd9Sstevel@tonic-gate timeout_id_t 10327c478bd9Sstevel@tonic-gate realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 10337c478bd9Sstevel@tonic-gate { 103487a18d3fSMadhavan Venkataraman ulong_t id; 103587a18d3fSMadhavan Venkataraman 103687a18d3fSMadhavan Venkataraman /* 103787a18d3fSMadhavan Venkataraman * Make sure the callout runs at least 1 tick in the future. 103887a18d3fSMadhavan Venkataraman */ 103987a18d3fSMadhavan Venkataraman if (delta <= 0) 104087a18d3fSMadhavan Venkataraman delta = 1; 1041454ab202SMadhavan Venkataraman else if (delta > callout_max_ticks) 1042454ab202SMadhavan Venkataraman delta = callout_max_ticks; 104387a18d3fSMadhavan Venkataraman 104487a18d3fSMadhavan Venkataraman id = (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg, 104587a18d3fSMadhavan Venkataraman TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 104687a18d3fSMadhavan Venkataraman 104787a18d3fSMadhavan Venkataraman return ((timeout_id_t)id); 10487c478bd9Sstevel@tonic-gate } 10497c478bd9Sstevel@tonic-gate 105087a18d3fSMadhavan Venkataraman /* 105187a18d3fSMadhavan Venkataraman * Convenience function that creates a realtime callout with default parameters 105287a18d3fSMadhavan Venkataraman * and returns a full ID. 105387a18d3fSMadhavan Venkataraman */ 105487a18d3fSMadhavan Venkataraman callout_id_t 105587a18d3fSMadhavan Venkataraman realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta) 10567c478bd9Sstevel@tonic-gate { 105787a18d3fSMadhavan Venkataraman callout_id_t id; 105887a18d3fSMadhavan Venkataraman 105987a18d3fSMadhavan Venkataraman /* 106087a18d3fSMadhavan Venkataraman * Make sure the callout runs at least 1 tick in the future. 106187a18d3fSMadhavan Venkataraman */ 106287a18d3fSMadhavan Venkataraman if (delta <= 0) 106387a18d3fSMadhavan Venkataraman delta = 1; 1064454ab202SMadhavan Venkataraman else if (delta > callout_max_ticks) 1065454ab202SMadhavan Venkataraman delta = callout_max_ticks; 106687a18d3fSMadhavan Venkataraman 106787a18d3fSMadhavan Venkataraman id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta), 106887a18d3fSMadhavan Venkataraman nsec_per_tick, 0); 106987a18d3fSMadhavan Venkataraman 107087a18d3fSMadhavan Venkataraman return (id); 107187a18d3fSMadhavan Venkataraman } 107287a18d3fSMadhavan Venkataraman 107387a18d3fSMadhavan Venkataraman hrtime_t 107487a18d3fSMadhavan Venkataraman untimeout_generic(callout_id_t id, int nowait) 107587a18d3fSMadhavan Venkataraman { 10767c478bd9Sstevel@tonic-gate callout_table_t *ct; 10777c478bd9Sstevel@tonic-gate callout_t *cp; 10787c478bd9Sstevel@tonic-gate callout_id_t xid; 1079*51b32bddSMadhavan Venkataraman callout_list_t *cl; 108087a18d3fSMadhavan Venkataraman int hash; 108187a18d3fSMadhavan Venkataraman callout_id_t bogus; 10827c478bd9Sstevel@tonic-gate 108387a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_ID_TO_TABLE(id)]; 108487a18d3fSMadhavan Venkataraman hash = CALLOUT_IDHASH(id); 10857c478bd9Sstevel@tonic-gate 108687a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 10877c478bd9Sstevel@tonic-gate 108887a18d3fSMadhavan Venkataraman /* 108987a18d3fSMadhavan Venkataraman * Search the ID hash table for the callout. 109087a18d3fSMadhavan Venkataraman */ 109187a18d3fSMadhavan Venkataraman for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) { 10927c478bd9Sstevel@tonic-gate 109387a18d3fSMadhavan Venkataraman xid = cp->c_xid; 10947c478bd9Sstevel@tonic-gate 109587a18d3fSMadhavan Venkataraman /* 109687a18d3fSMadhavan Venkataraman * Match the ID and generation number. 109787a18d3fSMadhavan Venkataraman */ 109887a18d3fSMadhavan Venkataraman if ((xid & CALLOUT_ID_MASK) != id) 10997c478bd9Sstevel@tonic-gate continue; 11007c478bd9Sstevel@tonic-gate 110187a18d3fSMadhavan Venkataraman if ((xid & CALLOUT_EXECUTING) == 0) { 110287a18d3fSMadhavan Venkataraman hrtime_t expiration; 110387a18d3fSMadhavan Venkataraman 110487a18d3fSMadhavan Venkataraman /* 110587a18d3fSMadhavan Venkataraman * Delete the callout. If the callout list becomes 110687a18d3fSMadhavan Venkataraman * NULL, we don't remove it from the table. This is 110787a18d3fSMadhavan Venkataraman * so it can be reused. If the empty callout list 110887a18d3fSMadhavan Venkataraman * corresponds to the top of the the callout heap, we 110987a18d3fSMadhavan Venkataraman * don't reprogram the table cyclic here. This is in 111087a18d3fSMadhavan Venkataraman * order to avoid lots of X-calls to the CPU associated 111187a18d3fSMadhavan Venkataraman * with the callout table. 111287a18d3fSMadhavan Venkataraman */ 1113*51b32bddSMadhavan Venkataraman cl = cp->c_list; 1114*51b32bddSMadhavan Venkataraman expiration = cl->cl_expiration; 111587a18d3fSMadhavan Venkataraman CALLOUT_DELETE(ct, cp); 111687a18d3fSMadhavan Venkataraman cp->c_idnext = ct->ct_free; 111787a18d3fSMadhavan Venkataraman ct->ct_free = cp; 1118*51b32bddSMadhavan Venkataraman cp->c_xid |= CALLOUT_FREE; 111987a18d3fSMadhavan Venkataraman ct->ct_untimeouts_unexpired++; 112087a18d3fSMadhavan Venkataraman ct->ct_timeouts_pending--; 1121*51b32bddSMadhavan Venkataraman 1122*51b32bddSMadhavan Venkataraman /* 1123*51b32bddSMadhavan Venkataraman * If the callout list has become empty, it needs 1124*51b32bddSMadhavan Venkataraman * to be cleaned along with its heap entry. Increment 1125*51b32bddSMadhavan Venkataraman * a reap count. 1126*51b32bddSMadhavan Venkataraman */ 1127*51b32bddSMadhavan Venkataraman if (cl->cl_callouts.ch_head == NULL) 1128*51b32bddSMadhavan Venkataraman ct->ct_nreap++; 112987a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 113087a18d3fSMadhavan Venkataraman 113187a18d3fSMadhavan Venkataraman expiration -= gethrtime(); 113287a18d3fSMadhavan Venkataraman TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 113387a18d3fSMadhavan Venkataraman "untimeout:ID %lx hrtime left %llx", id, 113487a18d3fSMadhavan Venkataraman expiration); 113587a18d3fSMadhavan Venkataraman return (expiration < 0 ? 0 : expiration); 113687a18d3fSMadhavan Venkataraman } 113787a18d3fSMadhavan Venkataraman 113887a18d3fSMadhavan Venkataraman ct->ct_untimeouts_executing++; 11397c478bd9Sstevel@tonic-gate /* 11407c478bd9Sstevel@tonic-gate * The callout we want to delete is currently executing. 11417c478bd9Sstevel@tonic-gate * The DDI states that we must wait until the callout 114207247649SMadhavan Venkataraman * completes before returning, so we block on c_done until the 114387a18d3fSMadhavan Venkataraman * callout ID changes (to the old ID if it's on the freelist, 11447c478bd9Sstevel@tonic-gate * or to a new callout ID if it's in use). This implicitly 11457c478bd9Sstevel@tonic-gate * assumes that callout structures are persistent (they are). 11467c478bd9Sstevel@tonic-gate */ 114707247649SMadhavan Venkataraman if (cp->c_executor == curthread) { 11487c478bd9Sstevel@tonic-gate /* 11497c478bd9Sstevel@tonic-gate * The timeout handler called untimeout() on itself. 11507c478bd9Sstevel@tonic-gate * Stupid, but legal. We can't wait for the timeout 11517c478bd9Sstevel@tonic-gate * to complete without deadlocking, so we just return. 11527c478bd9Sstevel@tonic-gate */ 115387a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 11547c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 11557c478bd9Sstevel@tonic-gate "untimeout_self:ID %x", id); 11567c478bd9Sstevel@tonic-gate return (-1); 11577c478bd9Sstevel@tonic-gate } 115887a18d3fSMadhavan Venkataraman if (nowait == 0) { 115987a18d3fSMadhavan Venkataraman /* 116087a18d3fSMadhavan Venkataraman * We need to wait. Indicate that we are waiting by 116107247649SMadhavan Venkataraman * incrementing c_waiting. This prevents the executor 116207247649SMadhavan Venkataraman * from doing a wakeup on c_done if there are no 116387a18d3fSMadhavan Venkataraman * waiters. 116487a18d3fSMadhavan Venkataraman */ 116587a18d3fSMadhavan Venkataraman while (cp->c_xid == xid) { 116607247649SMadhavan Venkataraman cp->c_waiting = 1; 116707247649SMadhavan Venkataraman cv_wait(&cp->c_done, &ct->ct_mutex); 116887a18d3fSMadhavan Venkataraman } 116987a18d3fSMadhavan Venkataraman } 117087a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 11717c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 11727c478bd9Sstevel@tonic-gate "untimeout_executing:ID %lx", id); 11737c478bd9Sstevel@tonic-gate return (-1); 11747c478bd9Sstevel@tonic-gate } 117587a18d3fSMadhavan Venkataraman ct->ct_untimeouts_expired++; 11767c478bd9Sstevel@tonic-gate 117787a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 11787c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 11797c478bd9Sstevel@tonic-gate "untimeout_bogus_id:ID %lx", id); 11807c478bd9Sstevel@tonic-gate 11817c478bd9Sstevel@tonic-gate /* 11827c478bd9Sstevel@tonic-gate * We didn't find the specified callout ID. This means either 11837c478bd9Sstevel@tonic-gate * (1) the callout already fired, or (2) the caller passed us 11847c478bd9Sstevel@tonic-gate * a bogus value. Perform a sanity check to detect case (2). 11857c478bd9Sstevel@tonic-gate */ 1186*51b32bddSMadhavan Venkataraman bogus = (CALLOUT_ID_FLAGS | CALLOUT_COUNTER_HIGH); 118787a18d3fSMadhavan Venkataraman if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0)) 118887a18d3fSMadhavan Venkataraman panic("untimeout: impossible timeout id %llx", 118987a18d3fSMadhavan Venkataraman (unsigned long long)id); 11907c478bd9Sstevel@tonic-gate 11917c478bd9Sstevel@tonic-gate return (-1); 11927c478bd9Sstevel@tonic-gate } 11937c478bd9Sstevel@tonic-gate 119487a18d3fSMadhavan Venkataraman clock_t 119587a18d3fSMadhavan Venkataraman untimeout(timeout_id_t id_arg) 119687a18d3fSMadhavan Venkataraman { 119787a18d3fSMadhavan Venkataraman hrtime_t hleft; 119887a18d3fSMadhavan Venkataraman clock_t tleft; 119987a18d3fSMadhavan Venkataraman callout_id_t id; 120087a18d3fSMadhavan Venkataraman 120187a18d3fSMadhavan Venkataraman id = (ulong_t)id_arg; 120287a18d3fSMadhavan Venkataraman hleft = untimeout_generic(id, 0); 120387a18d3fSMadhavan Venkataraman if (hleft < 0) 120487a18d3fSMadhavan Venkataraman tleft = -1; 120587a18d3fSMadhavan Venkataraman else if (hleft == 0) 120687a18d3fSMadhavan Venkataraman tleft = 0; 120787a18d3fSMadhavan Venkataraman else 120887a18d3fSMadhavan Venkataraman tleft = NSEC_TO_TICK(hleft); 120987a18d3fSMadhavan Venkataraman 121087a18d3fSMadhavan Venkataraman return (tleft); 121187a18d3fSMadhavan Venkataraman } 121287a18d3fSMadhavan Venkataraman 12137c478bd9Sstevel@tonic-gate /* 121487a18d3fSMadhavan Venkataraman * Convenience function to untimeout a timeout with a full ID with default 121587a18d3fSMadhavan Venkataraman * parameters. 121687a18d3fSMadhavan Venkataraman */ 121787a18d3fSMadhavan Venkataraman clock_t 121887a18d3fSMadhavan Venkataraman untimeout_default(callout_id_t id, int nowait) 121987a18d3fSMadhavan Venkataraman { 122087a18d3fSMadhavan Venkataraman hrtime_t hleft; 122187a18d3fSMadhavan Venkataraman clock_t tleft; 122287a18d3fSMadhavan Venkataraman 122387a18d3fSMadhavan Venkataraman hleft = untimeout_generic(id, nowait); 122487a18d3fSMadhavan Venkataraman if (hleft < 0) 122587a18d3fSMadhavan Venkataraman tleft = -1; 122687a18d3fSMadhavan Venkataraman else if (hleft == 0) 122787a18d3fSMadhavan Venkataraman tleft = 0; 122887a18d3fSMadhavan Venkataraman else 122987a18d3fSMadhavan Venkataraman tleft = NSEC_TO_TICK(hleft); 123087a18d3fSMadhavan Venkataraman 123187a18d3fSMadhavan Venkataraman return (tleft); 123287a18d3fSMadhavan Venkataraman } 123387a18d3fSMadhavan Venkataraman 123487a18d3fSMadhavan Venkataraman /* 123587a18d3fSMadhavan Venkataraman * Expire all the callouts queued in the specified callout list. 12367c478bd9Sstevel@tonic-gate */ 12377c478bd9Sstevel@tonic-gate static void 123887a18d3fSMadhavan Venkataraman callout_list_expire(callout_table_t *ct, callout_list_t *cl) 12397c478bd9Sstevel@tonic-gate { 124007247649SMadhavan Venkataraman callout_t *cp, *cnext; 12417c478bd9Sstevel@tonic-gate 124287a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 124387a18d3fSMadhavan Venkataraman ASSERT(cl != NULL); 12447c478bd9Sstevel@tonic-gate 124507247649SMadhavan Venkataraman for (cp = cl->cl_callouts.ch_head; cp != NULL; cp = cnext) { 124607247649SMadhavan Venkataraman /* 124707247649SMadhavan Venkataraman * Multiple executor threads could be running at the same 124807247649SMadhavan Venkataraman * time. If this callout is already being executed, 124907247649SMadhavan Venkataraman * go on to the next one. 125007247649SMadhavan Venkataraman */ 125107247649SMadhavan Venkataraman if (cp->c_xid & CALLOUT_EXECUTING) { 125207247649SMadhavan Venkataraman cnext = cp->c_clnext; 125307247649SMadhavan Venkataraman continue; 125407247649SMadhavan Venkataraman } 125587a18d3fSMadhavan Venkataraman 1256f635d46aSqiao /* 125787a18d3fSMadhavan Venkataraman * Indicate to untimeout() that a callout is 125887a18d3fSMadhavan Venkataraman * being expired by the executor. 1259f635d46aSqiao */ 126087a18d3fSMadhavan Venkataraman cp->c_xid |= CALLOUT_EXECUTING; 126107247649SMadhavan Venkataraman cp->c_executor = curthread; 126287a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 126387a18d3fSMadhavan Venkataraman 12647c478bd9Sstevel@tonic-gate DTRACE_PROBE1(callout__start, callout_t *, cp); 12657c478bd9Sstevel@tonic-gate (*cp->c_func)(cp->c_arg); 12667c478bd9Sstevel@tonic-gate DTRACE_PROBE1(callout__end, callout_t *, cp); 12677c478bd9Sstevel@tonic-gate 126887a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 126987a18d3fSMadhavan Venkataraman 127087a18d3fSMadhavan Venkataraman ct->ct_expirations++; 127187a18d3fSMadhavan Venkataraman ct->ct_timeouts_pending--; 12727c478bd9Sstevel@tonic-gate /* 127307247649SMadhavan Venkataraman * Indicate completion for c_done. 12747c478bd9Sstevel@tonic-gate */ 127587a18d3fSMadhavan Venkataraman cp->c_xid &= ~CALLOUT_EXECUTING; 127607247649SMadhavan Venkataraman cp->c_executor = NULL; 127707247649SMadhavan Venkataraman cnext = cp->c_clnext; 1278f635d46aSqiao 12797c478bd9Sstevel@tonic-gate /* 128087a18d3fSMadhavan Venkataraman * Delete callout from ID hash table and the callout 128187a18d3fSMadhavan Venkataraman * list, return to freelist, and tell any untimeout() that 128287a18d3fSMadhavan Venkataraman * cares that we're done. 12837c478bd9Sstevel@tonic-gate */ 128487a18d3fSMadhavan Venkataraman CALLOUT_DELETE(ct, cp); 128587a18d3fSMadhavan Venkataraman cp->c_idnext = ct->ct_free; 128687a18d3fSMadhavan Venkataraman ct->ct_free = cp; 1287*51b32bddSMadhavan Venkataraman cp->c_xid |= CALLOUT_FREE; 128887a18d3fSMadhavan Venkataraman 128907247649SMadhavan Venkataraman if (cp->c_waiting) { 129007247649SMadhavan Venkataraman cp->c_waiting = 0; 129107247649SMadhavan Venkataraman cv_broadcast(&cp->c_done); 12927c478bd9Sstevel@tonic-gate } 129387a18d3fSMadhavan Venkataraman } 12947c478bd9Sstevel@tonic-gate } 12957c478bd9Sstevel@tonic-gate 12967c478bd9Sstevel@tonic-gate /* 129787a18d3fSMadhavan Venkataraman * Execute all expired callout lists for a callout table. 12987c478bd9Sstevel@tonic-gate */ 12997c478bd9Sstevel@tonic-gate static void 130087a18d3fSMadhavan Venkataraman callout_expire(callout_table_t *ct) 13017c478bd9Sstevel@tonic-gate { 130287a18d3fSMadhavan Venkataraman callout_list_t *cl, *clnext; 1303f635d46aSqiao 130487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 13057c478bd9Sstevel@tonic-gate 130687a18d3fSMadhavan Venkataraman for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) { 1307f635d46aSqiao /* 130887a18d3fSMadhavan Venkataraman * Expire all the callouts in this callout list. 130987a18d3fSMadhavan Venkataraman */ 131087a18d3fSMadhavan Venkataraman callout_list_expire(ct, cl); 131187a18d3fSMadhavan Venkataraman 131207247649SMadhavan Venkataraman clnext = cl->cl_next; 131307247649SMadhavan Venkataraman if (cl->cl_callouts.ch_head == NULL) { 131487a18d3fSMadhavan Venkataraman /* 131587a18d3fSMadhavan Venkataraman * Free the callout list. 131687a18d3fSMadhavan Venkataraman */ 131787a18d3fSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_expired, cl); 1318*51b32bddSMadhavan Venkataraman CALLOUT_LIST_FREE(ct, cl); 131987a18d3fSMadhavan Venkataraman } 132087a18d3fSMadhavan Venkataraman } 132107247649SMadhavan Venkataraman } 132287a18d3fSMadhavan Venkataraman 132387a18d3fSMadhavan Venkataraman /* 132487a18d3fSMadhavan Venkataraman * The cyclic handlers below process callouts in two steps: 132587a18d3fSMadhavan Venkataraman * 132687a18d3fSMadhavan Venkataraman * 1. Find all expired callout lists and queue them in a separate 132787a18d3fSMadhavan Venkataraman * list of expired callouts. 132887a18d3fSMadhavan Venkataraman * 2. Execute the expired callout lists. 132987a18d3fSMadhavan Venkataraman * 133087a18d3fSMadhavan Venkataraman * This is done for two reasons: 133187a18d3fSMadhavan Venkataraman * 133287a18d3fSMadhavan Venkataraman * 1. We want to quickly find the next earliest expiration to program 133387a18d3fSMadhavan Venkataraman * the cyclic to and reprogram it. We can do this right at the end 133487a18d3fSMadhavan Venkataraman * of step 1. 133587a18d3fSMadhavan Venkataraman * 2. The realtime cyclic handler expires callouts in place. However, 133687a18d3fSMadhavan Venkataraman * for normal callouts, callouts are expired by a taskq thread. 133787a18d3fSMadhavan Venkataraman * So, it is simpler and more robust to have the taskq thread just 133887a18d3fSMadhavan Venkataraman * do step 2. 133987a18d3fSMadhavan Venkataraman */ 134087a18d3fSMadhavan Venkataraman 134187a18d3fSMadhavan Venkataraman /* 134287a18d3fSMadhavan Venkataraman * Realtime callout cyclic handler. 13437c478bd9Sstevel@tonic-gate */ 13447c478bd9Sstevel@tonic-gate void 134587a18d3fSMadhavan Venkataraman callout_realtime(callout_table_t *ct) 13467c478bd9Sstevel@tonic-gate { 134787a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 134887a18d3fSMadhavan Venkataraman callout_heap_delete(ct); 134987a18d3fSMadhavan Venkataraman callout_expire(ct); 135087a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 135187a18d3fSMadhavan Venkataraman } 13527c478bd9Sstevel@tonic-gate 135387a18d3fSMadhavan Venkataraman void 135487a18d3fSMadhavan Venkataraman callout_execute(callout_table_t *ct) 135587a18d3fSMadhavan Venkataraman { 135687a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 135787a18d3fSMadhavan Venkataraman callout_expire(ct); 135887a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 135987a18d3fSMadhavan Venkataraman } 13607c478bd9Sstevel@tonic-gate 136187a18d3fSMadhavan Venkataraman /* 136287a18d3fSMadhavan Venkataraman * Normal callout cyclic handler. 136387a18d3fSMadhavan Venkataraman */ 136487a18d3fSMadhavan Venkataraman void 136587a18d3fSMadhavan Venkataraman callout_normal(callout_table_t *ct) 136687a18d3fSMadhavan Venkataraman { 136707247649SMadhavan Venkataraman int i, exec; 136887a18d3fSMadhavan Venkataraman 136987a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 137087a18d3fSMadhavan Venkataraman callout_heap_delete(ct); 137107247649SMadhavan Venkataraman CALLOUT_EXEC_COMPUTE(ct, exec); 137287a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 137387a18d3fSMadhavan Venkataraman 137407247649SMadhavan Venkataraman for (i = 0; i < exec; i++) { 137587a18d3fSMadhavan Venkataraman ASSERT(ct->ct_taskq != NULL); 137687a18d3fSMadhavan Venkataraman (void) taskq_dispatch(ct->ct_taskq, 137787a18d3fSMadhavan Venkataraman (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 137887a18d3fSMadhavan Venkataraman } 137987a18d3fSMadhavan Venkataraman } 138087a18d3fSMadhavan Venkataraman 138187a18d3fSMadhavan Venkataraman /* 138287a18d3fSMadhavan Venkataraman * Suspend callout processing. 138387a18d3fSMadhavan Venkataraman */ 138487a18d3fSMadhavan Venkataraman static void 138587a18d3fSMadhavan Venkataraman callout_suspend(void) 138687a18d3fSMadhavan Venkataraman { 138787a18d3fSMadhavan Venkataraman int t, f; 138887a18d3fSMadhavan Venkataraman callout_table_t *ct; 138987a18d3fSMadhavan Venkataraman 139087a18d3fSMadhavan Venkataraman /* 139187a18d3fSMadhavan Venkataraman * Traverse every callout table in the system and suspend callout 139287a18d3fSMadhavan Venkataraman * processing. 139387a18d3fSMadhavan Venkataraman * 139487a18d3fSMadhavan Venkataraman * We need to suspend all the tables (including the inactive ones) 139587a18d3fSMadhavan Venkataraman * so that if a table is made active while the suspend is still on, 139687a18d3fSMadhavan Venkataraman * the table remains suspended. 139787a18d3fSMadhavan Venkataraman */ 139887a18d3fSMadhavan Venkataraman for (f = 0; f < max_ncpus; f++) { 139987a18d3fSMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 140087a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(t, f)]; 140187a18d3fSMadhavan Venkataraman 140287a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 1403454ab202SMadhavan Venkataraman ct->ct_suspend++; 140487a18d3fSMadhavan Venkataraman if (ct->ct_cyclic == CYCLIC_NONE) { 140587a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 140687a18d3fSMadhavan Venkataraman continue; 140787a18d3fSMadhavan Venkataraman } 1408454ab202SMadhavan Venkataraman if (ct->ct_suspend == 1) 1409454ab202SMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, 1410454ab202SMadhavan Venkataraman CY_INFINITY); 141187a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 141287a18d3fSMadhavan Venkataraman } 141387a18d3fSMadhavan Venkataraman } 141487a18d3fSMadhavan Venkataraman } 141587a18d3fSMadhavan Venkataraman 141687a18d3fSMadhavan Venkataraman /* 141787a18d3fSMadhavan Venkataraman * Resume callout processing. 141887a18d3fSMadhavan Venkataraman */ 141987a18d3fSMadhavan Venkataraman static void 1420*51b32bddSMadhavan Venkataraman callout_resume(hrtime_t delta, int timechange) 142187a18d3fSMadhavan Venkataraman { 142287a18d3fSMadhavan Venkataraman hrtime_t exp; 142387a18d3fSMadhavan Venkataraman int t, f; 142487a18d3fSMadhavan Venkataraman callout_table_t *ct; 142587a18d3fSMadhavan Venkataraman 142687a18d3fSMadhavan Venkataraman /* 142787a18d3fSMadhavan Venkataraman * Traverse every callout table in the system and resume callout 142887a18d3fSMadhavan Venkataraman * processing. For active tables, perform any hrtime adjustments 142987a18d3fSMadhavan Venkataraman * necessary. 143087a18d3fSMadhavan Venkataraman */ 143187a18d3fSMadhavan Venkataraman for (f = 0; f < max_ncpus; f++) { 143287a18d3fSMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 143387a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(t, f)]; 143487a18d3fSMadhavan Venkataraman 143587a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 143687a18d3fSMadhavan Venkataraman if (ct->ct_cyclic == CYCLIC_NONE) { 1437454ab202SMadhavan Venkataraman ct->ct_suspend--; 143887a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 143987a18d3fSMadhavan Venkataraman continue; 144087a18d3fSMadhavan Venkataraman } 144187a18d3fSMadhavan Venkataraman 1442*51b32bddSMadhavan Venkataraman /* 1443*51b32bddSMadhavan Venkataraman * If a delta is specified, adjust the expirations in 1444*51b32bddSMadhavan Venkataraman * the heap by delta. Also, if the caller indicates 1445*51b32bddSMadhavan Venkataraman * a timechange, process that. This step also cleans 1446*51b32bddSMadhavan Venkataraman * out any empty callout lists that might happen to 1447*51b32bddSMadhavan Venkataraman * be there. 1448*51b32bddSMadhavan Venkataraman */ 1449*51b32bddSMadhavan Venkataraman (void) callout_heap_process(ct, delta, timechange); 145087a18d3fSMadhavan Venkataraman 1451454ab202SMadhavan Venkataraman ct->ct_suspend--; 1452454ab202SMadhavan Venkataraman if (ct->ct_suspend == 0) { 145387a18d3fSMadhavan Venkataraman /* 1454454ab202SMadhavan Venkataraman * If the expired list is non-empty, then have 1455454ab202SMadhavan Venkataraman * the cyclic expire immediately. Else, program 1456454ab202SMadhavan Venkataraman * the cyclic based on the heap. 145787a18d3fSMadhavan Venkataraman */ 145887a18d3fSMadhavan Venkataraman if (ct->ct_expired.ch_head != NULL) 145987a18d3fSMadhavan Venkataraman exp = gethrtime(); 146087a18d3fSMadhavan Venkataraman else if (ct->ct_heap_num > 0) 1461*51b32bddSMadhavan Venkataraman exp = ct->ct_heap[0].ch_expiration; 146287a18d3fSMadhavan Venkataraman else 146387a18d3fSMadhavan Venkataraman exp = 0; 146487a18d3fSMadhavan Venkataraman if (exp != 0) 1465454ab202SMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, 1466454ab202SMadhavan Venkataraman exp); 1467454ab202SMadhavan Venkataraman } 1468*51b32bddSMadhavan Venkataraman 146987a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 147087a18d3fSMadhavan Venkataraman } 147187a18d3fSMadhavan Venkataraman } 14727c478bd9Sstevel@tonic-gate } 14737c478bd9Sstevel@tonic-gate 14747c478bd9Sstevel@tonic-gate /* 14757c478bd9Sstevel@tonic-gate * Callback handler used by CPR to stop and resume callouts. 1476*51b32bddSMadhavan Venkataraman * The cyclic subsystem saves and restores hrtime during CPR. 1477*51b32bddSMadhavan Venkataraman * That is why callout_resume() is called with a 0 delta. 1478*51b32bddSMadhavan Venkataraman * Although hrtime is the same, hrestime (system time) has 1479*51b32bddSMadhavan Venkataraman * progressed during CPR. So, we have to indicate a time change 1480*51b32bddSMadhavan Venkataraman * to expire the absolute hrestime timers. 14817c478bd9Sstevel@tonic-gate */ 14827c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 14837c478bd9Sstevel@tonic-gate static boolean_t 14847c478bd9Sstevel@tonic-gate callout_cpr_callb(void *arg, int code) 14857c478bd9Sstevel@tonic-gate { 148687a18d3fSMadhavan Venkataraman if (code == CB_CODE_CPR_CHKPT) 148787a18d3fSMadhavan Venkataraman callout_suspend(); 148887a18d3fSMadhavan Venkataraman else 1489*51b32bddSMadhavan Venkataraman callout_resume(0, 1); 149087a18d3fSMadhavan Venkataraman 14917c478bd9Sstevel@tonic-gate return (B_TRUE); 14927c478bd9Sstevel@tonic-gate } 14937c478bd9Sstevel@tonic-gate 14947c478bd9Sstevel@tonic-gate /* 149587a18d3fSMadhavan Venkataraman * Callback handler invoked when the debugger is entered or exited. 14967c478bd9Sstevel@tonic-gate */ 149787a18d3fSMadhavan Venkataraman /*ARGSUSED*/ 149887a18d3fSMadhavan Venkataraman static boolean_t 149987a18d3fSMadhavan Venkataraman callout_debug_callb(void *arg, int code) 15007c478bd9Sstevel@tonic-gate { 150187a18d3fSMadhavan Venkataraman hrtime_t delta; 1502f635d46aSqiao 1503f635d46aSqiao /* 150487a18d3fSMadhavan Venkataraman * When the system enters the debugger. make a note of the hrtime. 150587a18d3fSMadhavan Venkataraman * When it is resumed, compute how long the system was in the 150687a18d3fSMadhavan Venkataraman * debugger. This interval should not be counted for callouts. 1507f635d46aSqiao */ 150887a18d3fSMadhavan Venkataraman if (code == 0) { 150987a18d3fSMadhavan Venkataraman callout_suspend(); 151087a18d3fSMadhavan Venkataraman callout_debug_hrtime = gethrtime(); 151187a18d3fSMadhavan Venkataraman } else { 151287a18d3fSMadhavan Venkataraman delta = gethrtime() - callout_debug_hrtime; 1513*51b32bddSMadhavan Venkataraman callout_resume(delta, 0); 151487a18d3fSMadhavan Venkataraman } 1515f635d46aSqiao 151687a18d3fSMadhavan Venkataraman return (B_TRUE); 151787a18d3fSMadhavan Venkataraman } 151887a18d3fSMadhavan Venkataraman 151987a18d3fSMadhavan Venkataraman /* 152007247649SMadhavan Venkataraman * Move the absolute hrestime callouts to the expired list. Then program the 152107247649SMadhavan Venkataraman * table's cyclic to expire immediately so that the callouts can be executed 152287a18d3fSMadhavan Venkataraman * immediately. 152387a18d3fSMadhavan Venkataraman */ 152487a18d3fSMadhavan Venkataraman static void 152587a18d3fSMadhavan Venkataraman callout_hrestime_one(callout_table_t *ct) 152687a18d3fSMadhavan Venkataraman { 1527*51b32bddSMadhavan Venkataraman hrtime_t expiration; 152887a18d3fSMadhavan Venkataraman 152987a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 153087a18d3fSMadhavan Venkataraman if (ct->ct_heap_num == 0) { 153187a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 153287a18d3fSMadhavan Venkataraman return; 153387a18d3fSMadhavan Venkataraman } 153487a18d3fSMadhavan Venkataraman 1535*51b32bddSMadhavan Venkataraman /* 1536*51b32bddSMadhavan Venkataraman * Walk the heap and process all the absolute hrestime entries. 1537*51b32bddSMadhavan Venkataraman */ 1538*51b32bddSMadhavan Venkataraman expiration = callout_heap_process(ct, 0, 1); 153987a18d3fSMadhavan Venkataraman 1540*51b32bddSMadhavan Venkataraman if ((expiration != 0) && (ct->ct_suspend == 0)) 1541*51b32bddSMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, expiration); 154207247649SMadhavan Venkataraman 154387a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 154487a18d3fSMadhavan Venkataraman } 154587a18d3fSMadhavan Venkataraman 154687a18d3fSMadhavan Venkataraman /* 154787a18d3fSMadhavan Venkataraman * This function is called whenever system time (hrestime) is changed 154887a18d3fSMadhavan Venkataraman * explicitly. All the HRESTIME callouts must be expired at once. 154987a18d3fSMadhavan Venkataraman */ 155087a18d3fSMadhavan Venkataraman /*ARGSUSED*/ 155187a18d3fSMadhavan Venkataraman void 155287a18d3fSMadhavan Venkataraman callout_hrestime(void) 155387a18d3fSMadhavan Venkataraman { 155487a18d3fSMadhavan Venkataraman int t, f; 155587a18d3fSMadhavan Venkataraman callout_table_t *ct; 155687a18d3fSMadhavan Venkataraman 155787a18d3fSMadhavan Venkataraman /* 155887a18d3fSMadhavan Venkataraman * Traverse every callout table in the system and process the hrestime 155987a18d3fSMadhavan Venkataraman * callouts therein. 156087a18d3fSMadhavan Venkataraman * 156187a18d3fSMadhavan Venkataraman * We look at all the tables because we don't know which ones were 156287a18d3fSMadhavan Venkataraman * onlined and offlined in the past. The offlined tables may still 156387a18d3fSMadhavan Venkataraman * have active cyclics processing timers somewhere. 156487a18d3fSMadhavan Venkataraman */ 156587a18d3fSMadhavan Venkataraman for (f = 0; f < max_ncpus; f++) { 156687a18d3fSMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 156787a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(t, f)]; 156887a18d3fSMadhavan Venkataraman callout_hrestime_one(ct); 156987a18d3fSMadhavan Venkataraman } 157087a18d3fSMadhavan Venkataraman } 157187a18d3fSMadhavan Venkataraman } 157287a18d3fSMadhavan Venkataraman 157387a18d3fSMadhavan Venkataraman /* 157487a18d3fSMadhavan Venkataraman * Create the hash tables for this callout table. 157587a18d3fSMadhavan Venkataraman */ 157687a18d3fSMadhavan Venkataraman static void 157787a18d3fSMadhavan Venkataraman callout_hash_init(callout_table_t *ct) 157887a18d3fSMadhavan Venkataraman { 157987a18d3fSMadhavan Venkataraman size_t size; 158087a18d3fSMadhavan Venkataraman 158187a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 158287a18d3fSMadhavan Venkataraman ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL)); 158387a18d3fSMadhavan Venkataraman 158487a18d3fSMadhavan Venkataraman size = sizeof (callout_hash_t) * CALLOUT_BUCKETS; 158587a18d3fSMadhavan Venkataraman ct->ct_idhash = kmem_zalloc(size, KM_SLEEP); 158687a18d3fSMadhavan Venkataraman ct->ct_clhash = kmem_zalloc(size, KM_SLEEP); 158787a18d3fSMadhavan Venkataraman } 158887a18d3fSMadhavan Venkataraman 158987a18d3fSMadhavan Venkataraman /* 159087a18d3fSMadhavan Venkataraman * Create per-callout table kstats. 159187a18d3fSMadhavan Venkataraman */ 159287a18d3fSMadhavan Venkataraman static void 159387a18d3fSMadhavan Venkataraman callout_kstat_init(callout_table_t *ct) 159487a18d3fSMadhavan Venkataraman { 159587a18d3fSMadhavan Venkataraman callout_stat_type_t stat; 159687a18d3fSMadhavan Venkataraman kstat_t *ct_kstats; 159787a18d3fSMadhavan Venkataraman int ndx; 159887a18d3fSMadhavan Venkataraman 159987a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 160087a18d3fSMadhavan Venkataraman ASSERT(ct->ct_kstats == NULL); 160187a18d3fSMadhavan Venkataraman 160287a18d3fSMadhavan Venkataraman ndx = ct - callout_table; 160387a18d3fSMadhavan Venkataraman ct_kstats = kstat_create("unix", ndx, "callout", 160487a18d3fSMadhavan Venkataraman "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL); 160587a18d3fSMadhavan Venkataraman 160687a18d3fSMadhavan Venkataraman if (ct_kstats == NULL) { 160787a18d3fSMadhavan Venkataraman cmn_err(CE_WARN, "kstat_create for callout table %p failed", 160887a18d3fSMadhavan Venkataraman (void *)ct); 160987a18d3fSMadhavan Venkataraman } else { 161087a18d3fSMadhavan Venkataraman ct_kstats->ks_data = ct->ct_kstat_data; 161187a18d3fSMadhavan Venkataraman for (stat = 0; stat < CALLOUT_NUM_STATS; stat++) 161287a18d3fSMadhavan Venkataraman kstat_named_init(&ct->ct_kstat_data[stat], 161387a18d3fSMadhavan Venkataraman callout_kstat_names[stat], KSTAT_DATA_INT64); 161487a18d3fSMadhavan Venkataraman ct->ct_kstats = ct_kstats; 161587a18d3fSMadhavan Venkataraman kstat_install(ct_kstats); 161687a18d3fSMadhavan Venkataraman } 161787a18d3fSMadhavan Venkataraman } 161887a18d3fSMadhavan Venkataraman 161987a18d3fSMadhavan Venkataraman static void 162087a18d3fSMadhavan Venkataraman callout_cyclic_init(callout_table_t *ct) 162187a18d3fSMadhavan Venkataraman { 162287a18d3fSMadhavan Venkataraman cyc_handler_t hdlr; 162387a18d3fSMadhavan Venkataraman cyc_time_t when; 162487a18d3fSMadhavan Venkataraman processorid_t seqid; 162587a18d3fSMadhavan Venkataraman int t; 162687a18d3fSMadhavan Venkataraman 162787a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 162887a18d3fSMadhavan Venkataraman 162987a18d3fSMadhavan Venkataraman t = CALLOUT_TABLE_TYPE(ct); 163087a18d3fSMadhavan Venkataraman seqid = CALLOUT_TABLE_SEQID(ct); 163187a18d3fSMadhavan Venkataraman 163287a18d3fSMadhavan Venkataraman /* 163387a18d3fSMadhavan Venkataraman * Create the taskq thread if the table type is normal. 163487a18d3fSMadhavan Venkataraman * Realtime tables are handled at PIL1 by a softint 163587a18d3fSMadhavan Venkataraman * handler. 163687a18d3fSMadhavan Venkataraman */ 16377c478bd9Sstevel@tonic-gate if (t == CALLOUT_NORMAL) { 163887a18d3fSMadhavan Venkataraman ASSERT(ct->ct_taskq == NULL); 16397c478bd9Sstevel@tonic-gate /* 16407c478bd9Sstevel@tonic-gate * Each callout thread consumes exactly one 16417c478bd9Sstevel@tonic-gate * task structure while active. Therefore, 1642*51b32bddSMadhavan Venkataraman * prepopulating with 2 * callout_threads tasks 16437c478bd9Sstevel@tonic-gate * ensures that there's at least one task per 16447c478bd9Sstevel@tonic-gate * thread that's either scheduled or on the 16457c478bd9Sstevel@tonic-gate * freelist. In turn, this guarantees that 16467c478bd9Sstevel@tonic-gate * taskq_dispatch() will always either succeed 16477c478bd9Sstevel@tonic-gate * (because there's a free task structure) or 16487c478bd9Sstevel@tonic-gate * be unnecessary (because "callout_excute(ct)" 16497c478bd9Sstevel@tonic-gate * has already scheduled). 16507c478bd9Sstevel@tonic-gate */ 16517c478bd9Sstevel@tonic-gate ct->ct_taskq = 165287a18d3fSMadhavan Venkataraman taskq_create_instance("callout_taskq", seqid, 1653*51b32bddSMadhavan Venkataraman callout_threads, maxclsyspri, 1654*51b32bddSMadhavan Venkataraman 2 * callout_threads, 2 * callout_threads, 16557c478bd9Sstevel@tonic-gate TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 16567c478bd9Sstevel@tonic-gate } 165787a18d3fSMadhavan Venkataraman 165887a18d3fSMadhavan Venkataraman /* 165987a18d3fSMadhavan Venkataraman * callouts can only be created in a table whose 166087a18d3fSMadhavan Venkataraman * cyclic has been initialized. 166187a18d3fSMadhavan Venkataraman */ 166287a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap_num == 0); 166387a18d3fSMadhavan Venkataraman 166487a18d3fSMadhavan Venkataraman /* 166587a18d3fSMadhavan Venkataraman * Create the callout table cyclics. 166607247649SMadhavan Venkataraman * 166707247649SMadhavan Venkataraman * The realtime cyclic handler executes at low PIL. The normal cyclic 166807247649SMadhavan Venkataraman * handler executes at lock PIL. This is because there are cases 166907247649SMadhavan Venkataraman * where code can block at PIL > 1 waiting for a normal callout handler 167007247649SMadhavan Venkataraman * to unblock it directly or indirectly. If the normal cyclic were to 167107247649SMadhavan Venkataraman * be executed at low PIL, it could get blocked out by the waiter 167207247649SMadhavan Venkataraman * and cause a deadlock. 167387a18d3fSMadhavan Venkataraman */ 167487a18d3fSMadhavan Venkataraman ASSERT(ct->ct_cyclic == CYCLIC_NONE); 167587a18d3fSMadhavan Venkataraman 167687a18d3fSMadhavan Venkataraman hdlr.cyh_func = (cyc_func_t)CALLOUT_CYCLIC_HANDLER(t); 167707247649SMadhavan Venkataraman if (ct->ct_type == CALLOUT_REALTIME) 167807247649SMadhavan Venkataraman hdlr.cyh_level = callout_realtime_level; 167907247649SMadhavan Venkataraman else 168007247649SMadhavan Venkataraman hdlr.cyh_level = callout_normal_level; 168187a18d3fSMadhavan Venkataraman hdlr.cyh_arg = ct; 168287a18d3fSMadhavan Venkataraman when.cyt_when = CY_INFINITY; 168387a18d3fSMadhavan Venkataraman when.cyt_interval = CY_INFINITY; 168487a18d3fSMadhavan Venkataraman 168587a18d3fSMadhavan Venkataraman ct->ct_cyclic = cyclic_add(&hdlr, &when); 168687a18d3fSMadhavan Venkataraman } 168787a18d3fSMadhavan Venkataraman 168887a18d3fSMadhavan Venkataraman void 168987a18d3fSMadhavan Venkataraman callout_cpu_online(cpu_t *cp) 169087a18d3fSMadhavan Venkataraman { 169187a18d3fSMadhavan Venkataraman lgrp_handle_t hand; 169287a18d3fSMadhavan Venkataraman callout_cache_t *cache; 169387a18d3fSMadhavan Venkataraman char s[KMEM_CACHE_NAMELEN]; 169487a18d3fSMadhavan Venkataraman callout_table_t *ct; 169587a18d3fSMadhavan Venkataraman processorid_t seqid; 169687a18d3fSMadhavan Venkataraman int t; 169787a18d3fSMadhavan Venkataraman 169887a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&cpu_lock)); 169987a18d3fSMadhavan Venkataraman 170087a18d3fSMadhavan Venkataraman /* 170187a18d3fSMadhavan Venkataraman * Locate the cache corresponding to the onlined CPU's lgroup. 170287a18d3fSMadhavan Venkataraman * Note that access to callout_caches is protected by cpu_lock. 170387a18d3fSMadhavan Venkataraman */ 170487a18d3fSMadhavan Venkataraman hand = lgrp_plat_cpu_to_hand(cp->cpu_id); 170587a18d3fSMadhavan Venkataraman for (cache = callout_caches; cache != NULL; cache = cache->cc_next) { 170687a18d3fSMadhavan Venkataraman if (cache->cc_hand == hand) 170787a18d3fSMadhavan Venkataraman break; 170887a18d3fSMadhavan Venkataraman } 170987a18d3fSMadhavan Venkataraman 171087a18d3fSMadhavan Venkataraman /* 171187a18d3fSMadhavan Venkataraman * If not found, create one. The caches are never destroyed. 171287a18d3fSMadhavan Venkataraman */ 171387a18d3fSMadhavan Venkataraman if (cache == NULL) { 171487a18d3fSMadhavan Venkataraman cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP); 171587a18d3fSMadhavan Venkataraman cache->cc_hand = hand; 171687a18d3fSMadhavan Venkataraman (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx", 171787a18d3fSMadhavan Venkataraman (long)hand); 171887a18d3fSMadhavan Venkataraman cache->cc_cache = kmem_cache_create(s, sizeof (callout_t), 171987a18d3fSMadhavan Venkataraman CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 172087a18d3fSMadhavan Venkataraman (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx", 172187a18d3fSMadhavan Venkataraman (long)hand); 172287a18d3fSMadhavan Venkataraman cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t), 172387a18d3fSMadhavan Venkataraman CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 172487a18d3fSMadhavan Venkataraman cache->cc_next = callout_caches; 172587a18d3fSMadhavan Venkataraman callout_caches = cache; 172687a18d3fSMadhavan Venkataraman } 172787a18d3fSMadhavan Venkataraman 172887a18d3fSMadhavan Venkataraman seqid = cp->cpu_seqid; 172987a18d3fSMadhavan Venkataraman 173087a18d3fSMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 173187a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 173287a18d3fSMadhavan Venkataraman 173387a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 173487a18d3fSMadhavan Venkataraman /* 173587a18d3fSMadhavan Venkataraman * Store convinience pointers to the kmem caches 173687a18d3fSMadhavan Venkataraman * in the callout table. These assignments should always be 173787a18d3fSMadhavan Venkataraman * done as callout tables can map to different physical 173887a18d3fSMadhavan Venkataraman * CPUs each time. 173987a18d3fSMadhavan Venkataraman */ 174087a18d3fSMadhavan Venkataraman ct->ct_cache = cache->cc_cache; 174187a18d3fSMadhavan Venkataraman ct->ct_lcache = cache->cc_lcache; 174287a18d3fSMadhavan Venkataraman 174387a18d3fSMadhavan Venkataraman /* 174487a18d3fSMadhavan Venkataraman * We use the heap pointer to check if stuff has been 174587a18d3fSMadhavan Venkataraman * initialized for this callout table. 174687a18d3fSMadhavan Venkataraman */ 174787a18d3fSMadhavan Venkataraman if (ct->ct_heap == NULL) { 174887a18d3fSMadhavan Venkataraman callout_heap_init(ct); 174987a18d3fSMadhavan Venkataraman callout_hash_init(ct); 175087a18d3fSMadhavan Venkataraman callout_kstat_init(ct); 175187a18d3fSMadhavan Venkataraman callout_cyclic_init(ct); 175287a18d3fSMadhavan Venkataraman } 175387a18d3fSMadhavan Venkataraman 175487a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 175587a18d3fSMadhavan Venkataraman 175687a18d3fSMadhavan Venkataraman /* 1757454ab202SMadhavan Venkataraman * Move the cyclic to this CPU by doing a bind. 175887a18d3fSMadhavan Venkataraman */ 175987a18d3fSMadhavan Venkataraman cyclic_bind(ct->ct_cyclic, cp, NULL); 1760454ab202SMadhavan Venkataraman } 1761454ab202SMadhavan Venkataraman } 1762454ab202SMadhavan Venkataraman 1763454ab202SMadhavan Venkataraman void 1764454ab202SMadhavan Venkataraman callout_cpu_offline(cpu_t *cp) 1765454ab202SMadhavan Venkataraman { 1766454ab202SMadhavan Venkataraman callout_table_t *ct; 1767454ab202SMadhavan Venkataraman processorid_t seqid; 1768454ab202SMadhavan Venkataraman int t; 1769454ab202SMadhavan Venkataraman 1770454ab202SMadhavan Venkataraman ASSERT(MUTEX_HELD(&cpu_lock)); 1771454ab202SMadhavan Venkataraman 1772454ab202SMadhavan Venkataraman seqid = cp->cpu_seqid; 1773454ab202SMadhavan Venkataraman 1774454ab202SMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 1775454ab202SMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 1776454ab202SMadhavan Venkataraman 1777454ab202SMadhavan Venkataraman /* 1778454ab202SMadhavan Venkataraman * Unbind the cyclic. This will allow the cyclic subsystem 1779454ab202SMadhavan Venkataraman * to juggle the cyclic during CPU offline. 1780454ab202SMadhavan Venkataraman */ 178187a18d3fSMadhavan Venkataraman cyclic_bind(ct->ct_cyclic, NULL, NULL); 17827c478bd9Sstevel@tonic-gate } 17837c478bd9Sstevel@tonic-gate } 178487a18d3fSMadhavan Venkataraman 178587a18d3fSMadhavan Venkataraman /* 178687a18d3fSMadhavan Venkataraman * This is called to perform per-CPU initialization for slave CPUs at 178787a18d3fSMadhavan Venkataraman * boot time. 178887a18d3fSMadhavan Venkataraman */ 178987a18d3fSMadhavan Venkataraman void 179087a18d3fSMadhavan Venkataraman callout_mp_init(void) 179187a18d3fSMadhavan Venkataraman { 179287a18d3fSMadhavan Venkataraman cpu_t *cp; 179387a18d3fSMadhavan Venkataraman 179487a18d3fSMadhavan Venkataraman mutex_enter(&cpu_lock); 179587a18d3fSMadhavan Venkataraman 179687a18d3fSMadhavan Venkataraman cp = cpu_active; 179787a18d3fSMadhavan Venkataraman do { 179887a18d3fSMadhavan Venkataraman callout_cpu_online(cp); 179987a18d3fSMadhavan Venkataraman } while ((cp = cp->cpu_next_onln) != cpu_active); 180087a18d3fSMadhavan Venkataraman 180187a18d3fSMadhavan Venkataraman mutex_exit(&cpu_lock); 180287a18d3fSMadhavan Venkataraman } 180387a18d3fSMadhavan Venkataraman 180487a18d3fSMadhavan Venkataraman /* 180587a18d3fSMadhavan Venkataraman * Initialize all callout tables. Called at boot time just before clkstart(). 180687a18d3fSMadhavan Venkataraman */ 180787a18d3fSMadhavan Venkataraman void 180887a18d3fSMadhavan Venkataraman callout_init(void) 180987a18d3fSMadhavan Venkataraman { 181087a18d3fSMadhavan Venkataraman int f, t; 181187a18d3fSMadhavan Venkataraman size_t size; 181287a18d3fSMadhavan Venkataraman int table_id; 181387a18d3fSMadhavan Venkataraman callout_table_t *ct; 181487a18d3fSMadhavan Venkataraman long bits, fanout; 181587a18d3fSMadhavan Venkataraman uintptr_t buf; 181687a18d3fSMadhavan Venkataraman 181787a18d3fSMadhavan Venkataraman /* 181887a18d3fSMadhavan Venkataraman * Initialize callout globals. 181987a18d3fSMadhavan Venkataraman */ 182087a18d3fSMadhavan Venkataraman bits = 0; 182187a18d3fSMadhavan Venkataraman for (fanout = 1; (fanout < max_ncpus); fanout <<= 1) 182287a18d3fSMadhavan Venkataraman bits++; 182387a18d3fSMadhavan Venkataraman callout_table_bits = CALLOUT_TYPE_BITS + bits; 182487a18d3fSMadhavan Venkataraman callout_table_mask = (1 << callout_table_bits) - 1; 182587a18d3fSMadhavan Venkataraman callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT; 182687a18d3fSMadhavan Venkataraman callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS); 1827454ab202SMadhavan Venkataraman callout_max_ticks = CALLOUT_MAX_TICKS; 1828*51b32bddSMadhavan Venkataraman if (callout_min_reap == 0) 1829*51b32bddSMadhavan Venkataraman callout_min_reap = CALLOUT_MIN_REAP; 183087a18d3fSMadhavan Venkataraman 1831*51b32bddSMadhavan Venkataraman if (callout_tolerance <= 0) 1832*51b32bddSMadhavan Venkataraman callout_tolerance = CALLOUT_TOLERANCE; 1833*51b32bddSMadhavan Venkataraman if (callout_threads <= 0) 1834*51b32bddSMadhavan Venkataraman callout_threads = CALLOUT_THREADS; 183587a18d3fSMadhavan Venkataraman 183687a18d3fSMadhavan Venkataraman /* 183787a18d3fSMadhavan Venkataraman * Allocate all the callout tables based on max_ncpus. We have chosen 183887a18d3fSMadhavan Venkataraman * to do boot-time allocation instead of dynamic allocation because: 183987a18d3fSMadhavan Venkataraman * 184087a18d3fSMadhavan Venkataraman * - the size of the callout tables is not too large. 184187a18d3fSMadhavan Venkataraman * - there are race conditions involved in making this dynamic. 184287a18d3fSMadhavan Venkataraman * - the hash tables that go with the callout tables consume 184387a18d3fSMadhavan Venkataraman * most of the memory and they are only allocated in 184487a18d3fSMadhavan Venkataraman * callout_cpu_online(). 184587a18d3fSMadhavan Venkataraman * 184687a18d3fSMadhavan Venkataraman * Each CPU has two tables that are consecutive in the array. The first 184787a18d3fSMadhavan Venkataraman * one is for realtime callouts and the second one is for normal ones. 184887a18d3fSMadhavan Venkataraman * 184987a18d3fSMadhavan Venkataraman * We do this alignment dance to make sure that callout table 185087a18d3fSMadhavan Venkataraman * structures will always be on a cache line boundary. 185187a18d3fSMadhavan Venkataraman */ 185287a18d3fSMadhavan Venkataraman size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus; 185387a18d3fSMadhavan Venkataraman size += CALLOUT_ALIGN; 185487a18d3fSMadhavan Venkataraman buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP); 185587a18d3fSMadhavan Venkataraman callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN); 185687a18d3fSMadhavan Venkataraman 185787a18d3fSMadhavan Venkataraman size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS; 185887a18d3fSMadhavan Venkataraman /* 185987a18d3fSMadhavan Venkataraman * Now, initialize the tables for all the CPUs. 186087a18d3fSMadhavan Venkataraman */ 186187a18d3fSMadhavan Venkataraman for (f = 0; f < max_ncpus; f++) { 186287a18d3fSMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 186387a18d3fSMadhavan Venkataraman table_id = CALLOUT_TABLE(t, f); 186487a18d3fSMadhavan Venkataraman ct = &callout_table[table_id]; 1865454ab202SMadhavan Venkataraman ct->ct_type = t; 186687a18d3fSMadhavan Venkataraman mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 186787a18d3fSMadhavan Venkataraman /* 186887a18d3fSMadhavan Venkataraman * Precompute the base IDs for long and short-term 186987a18d3fSMadhavan Venkataraman * legacy IDs. This makes ID generation during 187087a18d3fSMadhavan Venkataraman * timeout() fast. 187187a18d3fSMadhavan Venkataraman */ 187287a18d3fSMadhavan Venkataraman ct->ct_short_id = CALLOUT_SHORT_ID(table_id); 187387a18d3fSMadhavan Venkataraman ct->ct_long_id = CALLOUT_LONG_ID(table_id); 187487a18d3fSMadhavan Venkataraman /* 187587a18d3fSMadhavan Venkataraman * Precompute the base ID for generation-based IDs. 187687a18d3fSMadhavan Venkataraman * Note that when the first ID gets allocated, the 187787a18d3fSMadhavan Venkataraman * ID will wrap. This will cause the generation 187887a18d3fSMadhavan Venkataraman * number to be incremented to 1. 187987a18d3fSMadhavan Venkataraman */ 188087a18d3fSMadhavan Venkataraman ct->ct_gen_id = CALLOUT_SHORT_ID(table_id); 188187a18d3fSMadhavan Venkataraman /* 188287a18d3fSMadhavan Venkataraman * Initialize the cyclic as NONE. This will get set 188387a18d3fSMadhavan Venkataraman * during CPU online. This is so that partially 188487a18d3fSMadhavan Venkataraman * populated systems will only have the required 188587a18d3fSMadhavan Venkataraman * number of cyclics, not more. 188687a18d3fSMadhavan Venkataraman */ 188787a18d3fSMadhavan Venkataraman ct->ct_cyclic = CYCLIC_NONE; 188887a18d3fSMadhavan Venkataraman ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP); 188987a18d3fSMadhavan Venkataraman } 189087a18d3fSMadhavan Venkataraman } 189187a18d3fSMadhavan Venkataraman 189287a18d3fSMadhavan Venkataraman /* 189387a18d3fSMadhavan Venkataraman * Add the callback for CPR. This is called during checkpoint 189487a18d3fSMadhavan Venkataraman * resume to suspend and resume callouts. 189587a18d3fSMadhavan Venkataraman */ 189687a18d3fSMadhavan Venkataraman (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, 189787a18d3fSMadhavan Venkataraman "callout_cpr"); 189887a18d3fSMadhavan Venkataraman (void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER, 189987a18d3fSMadhavan Venkataraman "callout_debug"); 190087a18d3fSMadhavan Venkataraman 190187a18d3fSMadhavan Venkataraman /* 190287a18d3fSMadhavan Venkataraman * Call the per-CPU initialization function for the boot CPU. This 190387a18d3fSMadhavan Venkataraman * is done here because the function is not called automatically for 190487a18d3fSMadhavan Venkataraman * the boot CPU from the CPU online/offline hooks. Note that the 190587a18d3fSMadhavan Venkataraman * CPU lock is taken here because of convention. 190687a18d3fSMadhavan Venkataraman */ 190787a18d3fSMadhavan Venkataraman mutex_enter(&cpu_lock); 190887a18d3fSMadhavan Venkataraman callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)]; 190987a18d3fSMadhavan Venkataraman callout_cpu_online(CPU); 191087a18d3fSMadhavan Venkataraman mutex_exit(&cpu_lock); 19117c478bd9Sstevel@tonic-gate } 1912