17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5f635d46aSqiao * Common Development and Distribution License (the "License"). 6f635d46aSqiao * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*113d3ed7SMadhavan Venkataraman * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #include <sys/callo.h> 277c478bd9Sstevel@tonic-gate #include <sys/param.h> 287c478bd9Sstevel@tonic-gate #include <sys/types.h> 297c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 307c478bd9Sstevel@tonic-gate #include <sys/thread.h> 317c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 3287a18d3fSMadhavan Venkataraman #include <sys/kmem_impl.h> 337c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 347c478bd9Sstevel@tonic-gate #include <sys/callb.h> 357c478bd9Sstevel@tonic-gate #include <sys/debug.h> 367c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 377c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 387c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate /* 417c478bd9Sstevel@tonic-gate * Callout tables. See timeout(9F) for details. 427c478bd9Sstevel@tonic-gate */ 4351b32bddSMadhavan Venkataraman static int callout_threads; /* callout normal threads */ 4487a18d3fSMadhavan Venkataraman static hrtime_t callout_debug_hrtime; /* debugger entry time */ 4551b32bddSMadhavan Venkataraman static int callout_min_reap; /* callout minimum reap count */ 4651b32bddSMadhavan Venkataraman static int callout_tolerance; /* callout hires tolerance */ 4787a18d3fSMadhavan Venkataraman static callout_table_t *callout_boot_ct; /* Boot CPU's callout tables */ 48454ab202SMadhavan Venkataraman static clock_t callout_max_ticks; /* max interval */ 4987a18d3fSMadhavan Venkataraman static hrtime_t callout_longterm; /* longterm nanoseconds */ 5087a18d3fSMadhavan Venkataraman static ulong_t callout_counter_low; /* callout ID increment */ 5187a18d3fSMadhavan Venkataraman static ulong_t callout_table_bits; /* number of table bits in ID */ 5287a18d3fSMadhavan Venkataraman static ulong_t callout_table_mask; /* mask for the table bits */ 5387a18d3fSMadhavan Venkataraman static callout_cache_t *callout_caches; /* linked list of caches */ 5487a18d3fSMadhavan Venkataraman #pragma align 64(callout_table) 5587a18d3fSMadhavan Venkataraman static callout_table_t *callout_table; /* global callout table array */ 567c478bd9Sstevel@tonic-gate 5707247649SMadhavan Venkataraman /* 584c06356bSdh142964 * We run 'realtime' callouts at PIL 1 (CY_LOW_LEVEL). For 'normal' 594c06356bSdh142964 * callouts, from PIL 10 (CY_LOCK_LEVEL) we dispatch the callout, 604c06356bSdh142964 * via taskq, to a thread that executes at PIL 0 - so we end up running 614c06356bSdh142964 * 'normal' callouts at PIL 0. 6207247649SMadhavan Venkataraman */ 6351b32bddSMadhavan Venkataraman static volatile int callout_realtime_level = CY_LOW_LEVEL; 6451b32bddSMadhavan Venkataraman static volatile int callout_normal_level = CY_LOCK_LEVEL; 6507247649SMadhavan Venkataraman 6687a18d3fSMadhavan Venkataraman static char *callout_kstat_names[] = { 6787a18d3fSMadhavan Venkataraman "callout_timeouts", 6887a18d3fSMadhavan Venkataraman "callout_timeouts_pending", 6987a18d3fSMadhavan Venkataraman "callout_untimeouts_unexpired", 7087a18d3fSMadhavan Venkataraman "callout_untimeouts_executing", 7187a18d3fSMadhavan Venkataraman "callout_untimeouts_expired", 7287a18d3fSMadhavan Venkataraman "callout_expirations", 7387a18d3fSMadhavan Venkataraman "callout_allocations", 7451b32bddSMadhavan Venkataraman "callout_cleanups", 7587a18d3fSMadhavan Venkataraman }; 7687a18d3fSMadhavan Venkataraman 7751b32bddSMadhavan Venkataraman static hrtime_t callout_heap_process(callout_table_t *, hrtime_t, int); 7851b32bddSMadhavan Venkataraman 7987a18d3fSMadhavan Venkataraman #define CALLOUT_HASH_INSERT(hash, cp, cnext, cprev) \ 807c478bd9Sstevel@tonic-gate { \ 8187a18d3fSMadhavan Venkataraman callout_hash_t *hashp = &(hash); \ 8287a18d3fSMadhavan Venkataraman \ 837c478bd9Sstevel@tonic-gate cp->cprev = NULL; \ 8487a18d3fSMadhavan Venkataraman cp->cnext = hashp->ch_head; \ 8587a18d3fSMadhavan Venkataraman if (hashp->ch_head == NULL) \ 8687a18d3fSMadhavan Venkataraman hashp->ch_tail = cp; \ 877c478bd9Sstevel@tonic-gate else \ 8887a18d3fSMadhavan Venkataraman cp->cnext->cprev = cp; \ 8987a18d3fSMadhavan Venkataraman hashp->ch_head = cp; \ 907c478bd9Sstevel@tonic-gate } 917c478bd9Sstevel@tonic-gate 9287a18d3fSMadhavan Venkataraman #define CALLOUT_HASH_APPEND(hash, cp, cnext, cprev) \ 9387a18d3fSMadhavan Venkataraman { \ 9487a18d3fSMadhavan Venkataraman callout_hash_t *hashp = &(hash); \ 9587a18d3fSMadhavan Venkataraman \ 9687a18d3fSMadhavan Venkataraman cp->cnext = NULL; \ 9787a18d3fSMadhavan Venkataraman cp->cprev = hashp->ch_tail; \ 9887a18d3fSMadhavan Venkataraman if (hashp->ch_tail == NULL) \ 9987a18d3fSMadhavan Venkataraman hashp->ch_head = cp; \ 10087a18d3fSMadhavan Venkataraman else \ 10187a18d3fSMadhavan Venkataraman cp->cprev->cnext = cp; \ 10287a18d3fSMadhavan Venkataraman hashp->ch_tail = cp; \ 10387a18d3fSMadhavan Venkataraman } 10487a18d3fSMadhavan Venkataraman 10587a18d3fSMadhavan Venkataraman #define CALLOUT_HASH_DELETE(hash, cp, cnext, cprev) \ 10687a18d3fSMadhavan Venkataraman { \ 10787a18d3fSMadhavan Venkataraman callout_hash_t *hashp = &(hash); \ 10887a18d3fSMadhavan Venkataraman \ 10987a18d3fSMadhavan Venkataraman if (cp->cnext == NULL) \ 11087a18d3fSMadhavan Venkataraman hashp->ch_tail = cp->cprev; \ 11187a18d3fSMadhavan Venkataraman else \ 11287a18d3fSMadhavan Venkataraman cp->cnext->cprev = cp->cprev; \ 11387a18d3fSMadhavan Venkataraman if (cp->cprev == NULL) \ 11487a18d3fSMadhavan Venkataraman hashp->ch_head = cp->cnext; \ 11587a18d3fSMadhavan Venkataraman else \ 11687a18d3fSMadhavan Venkataraman cp->cprev->cnext = cp->cnext; \ 11787a18d3fSMadhavan Venkataraman } 11887a18d3fSMadhavan Venkataraman 11987a18d3fSMadhavan Venkataraman /* 12087a18d3fSMadhavan Venkataraman * These definitions help us queue callouts and callout lists. Here is 12187a18d3fSMadhavan Venkataraman * the queueing rationale: 12287a18d3fSMadhavan Venkataraman * 12387a18d3fSMadhavan Venkataraman * - callouts are queued in a FIFO manner in the ID hash table. 12487a18d3fSMadhavan Venkataraman * TCP timers are typically cancelled in the same order that they 12587a18d3fSMadhavan Venkataraman * were issued. The FIFO queueing shortens the search for a callout 12687a18d3fSMadhavan Venkataraman * during untimeout(). 12787a18d3fSMadhavan Venkataraman * 12887a18d3fSMadhavan Venkataraman * - callouts are queued in a FIFO manner in their callout lists. 12987a18d3fSMadhavan Venkataraman * This ensures that the callouts are executed in the same order that 13087a18d3fSMadhavan Venkataraman * they were queued. This is fair. Plus, it helps to make each 13187a18d3fSMadhavan Venkataraman * callout expiration timely. It also favors cancellations. 13287a18d3fSMadhavan Venkataraman * 13351b32bddSMadhavan Venkataraman * - callout lists are queued in the following manner in the callout 13451b32bddSMadhavan Venkataraman * hash table buckets: 13551b32bddSMadhavan Venkataraman * 13651b32bddSMadhavan Venkataraman * - appended, if the callout list is a 1-nanosecond resolution 13751b32bddSMadhavan Venkataraman * callout list. When a callout is created, we first look for 13851b32bddSMadhavan Venkataraman * a callout list that has the same expiration so we can avoid 13951b32bddSMadhavan Venkataraman * allocating a callout list and inserting the expiration into 14051b32bddSMadhavan Venkataraman * the heap. However, we do not want to look at 1-nanosecond 14151b32bddSMadhavan Venkataraman * resolution callout lists as we will seldom find a match in 14251b32bddSMadhavan Venkataraman * them. Keeping these callout lists in the rear of the hash 14351b32bddSMadhavan Venkataraman * buckets allows us to skip these during the lookup. 14451b32bddSMadhavan Venkataraman * 14551b32bddSMadhavan Venkataraman * - inserted at the beginning, if the callout list is not a 14651b32bddSMadhavan Venkataraman * 1-nanosecond resolution callout list. This also has the 14751b32bddSMadhavan Venkataraman * side-effect of keeping the long term timers away from the 14851b32bddSMadhavan Venkataraman * front of the buckets. 14987a18d3fSMadhavan Venkataraman * 15087a18d3fSMadhavan Venkataraman * - callout lists are queued in a FIFO manner in the expired callouts 15187a18d3fSMadhavan Venkataraman * list. This ensures that callout lists are executed in the order 15287a18d3fSMadhavan Venkataraman * of expiration. 15387a18d3fSMadhavan Venkataraman */ 15487a18d3fSMadhavan Venkataraman #define CALLOUT_APPEND(ct, cp) \ 15587a18d3fSMadhavan Venkataraman CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 15687a18d3fSMadhavan Venkataraman cp, c_idnext, c_idprev); \ 15787a18d3fSMadhavan Venkataraman CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 15887a18d3fSMadhavan Venkataraman 15987a18d3fSMadhavan Venkataraman #define CALLOUT_DELETE(ct, cp) \ 16087a18d3fSMadhavan Venkataraman CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 16187a18d3fSMadhavan Venkataraman cp, c_idnext, c_idprev); \ 16287a18d3fSMadhavan Venkataraman CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 16387a18d3fSMadhavan Venkataraman 16487a18d3fSMadhavan Venkataraman #define CALLOUT_LIST_INSERT(hash, cl) \ 16587a18d3fSMadhavan Venkataraman CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev) 16687a18d3fSMadhavan Venkataraman 16787a18d3fSMadhavan Venkataraman #define CALLOUT_LIST_APPEND(hash, cl) \ 16887a18d3fSMadhavan Venkataraman CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev) 16987a18d3fSMadhavan Venkataraman 17087a18d3fSMadhavan Venkataraman #define CALLOUT_LIST_DELETE(hash, cl) \ 17187a18d3fSMadhavan Venkataraman CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev) 1727c478bd9Sstevel@tonic-gate 1737c478bd9Sstevel@tonic-gate /* 17407247649SMadhavan Venkataraman * For normal callouts, there is a deadlock scenario if two callouts that 17507247649SMadhavan Venkataraman * have an inter-dependency end up on the same callout list. To break the 17607247649SMadhavan Venkataraman * deadlock, you need two taskq threads running in parallel. We compute 17707247649SMadhavan Venkataraman * the number of taskq threads here using a bunch of conditions to make 17807247649SMadhavan Venkataraman * it optimal for the common case. This is an ugly hack, but one that is 17907247649SMadhavan Venkataraman * necessary (sigh). 18007247649SMadhavan Venkataraman */ 18107247649SMadhavan Venkataraman #define CALLOUT_THRESHOLD 100000000 18207247649SMadhavan Venkataraman #define CALLOUT_EXEC_COMPUTE(ct, exec) \ 18307247649SMadhavan Venkataraman { \ 18407247649SMadhavan Venkataraman callout_list_t *cl; \ 18507247649SMadhavan Venkataraman \ 18607247649SMadhavan Venkataraman cl = ct->ct_expired.ch_head; \ 18707247649SMadhavan Venkataraman if (cl == NULL) { \ 18807247649SMadhavan Venkataraman /* \ 18907247649SMadhavan Venkataraman * If the expired list is NULL, there is nothing to \ 19007247649SMadhavan Venkataraman * process. \ 19107247649SMadhavan Venkataraman */ \ 19207247649SMadhavan Venkataraman exec = 0; \ 19307247649SMadhavan Venkataraman } else if ((cl->cl_next == NULL) && \ 19407247649SMadhavan Venkataraman (cl->cl_callouts.ch_head == cl->cl_callouts.ch_tail)) { \ 19507247649SMadhavan Venkataraman /* \ 19607247649SMadhavan Venkataraman * If there is only one callout list and it contains \ 19707247649SMadhavan Venkataraman * only one callout, there is no need for two threads. \ 19807247649SMadhavan Venkataraman */ \ 19907247649SMadhavan Venkataraman exec = 1; \ 20007247649SMadhavan Venkataraman } else if ((ct->ct_heap_num == 0) || \ 20151b32bddSMadhavan Venkataraman (ct->ct_heap[0].ch_expiration > gethrtime() + CALLOUT_THRESHOLD)) {\ 20207247649SMadhavan Venkataraman /* \ 20307247649SMadhavan Venkataraman * If the heap has become empty, we need two threads as \ 20407247649SMadhavan Venkataraman * there is no one to kick off the second thread in the \ 20507247649SMadhavan Venkataraman * future. If the heap is not empty and the top of the \ 20607247649SMadhavan Venkataraman * heap does not expire in the near future, we need two \ 20707247649SMadhavan Venkataraman * threads. \ 20807247649SMadhavan Venkataraman */ \ 20907247649SMadhavan Venkataraman exec = 2; \ 21007247649SMadhavan Venkataraman } else { \ 21107247649SMadhavan Venkataraman /* \ 21207247649SMadhavan Venkataraman * We have multiple callouts to process. But the cyclic \ 21307247649SMadhavan Venkataraman * will fire in the near future. So, we only need one \ 21407247649SMadhavan Venkataraman * thread for now. \ 21507247649SMadhavan Venkataraman */ \ 21607247649SMadhavan Venkataraman exec = 1; \ 21707247649SMadhavan Venkataraman } \ 21807247649SMadhavan Venkataraman } 21907247649SMadhavan Venkataraman 22007247649SMadhavan Venkataraman /* 22151b32bddSMadhavan Venkataraman * Macro to swap two heap items. 22251b32bddSMadhavan Venkataraman */ 22351b32bddSMadhavan Venkataraman #define CALLOUT_SWAP(h1, h2) \ 22451b32bddSMadhavan Venkataraman { \ 22551b32bddSMadhavan Venkataraman callout_heap_t tmp; \ 22651b32bddSMadhavan Venkataraman \ 22751b32bddSMadhavan Venkataraman tmp = *h1; \ 22851b32bddSMadhavan Venkataraman *h1 = *h2; \ 22951b32bddSMadhavan Venkataraman *h2 = tmp; \ 23051b32bddSMadhavan Venkataraman } 23151b32bddSMadhavan Venkataraman 23251b32bddSMadhavan Venkataraman /* 23351b32bddSMadhavan Venkataraman * Macro to free a callout list. 23451b32bddSMadhavan Venkataraman */ 23551b32bddSMadhavan Venkataraman #define CALLOUT_LIST_FREE(ct, cl) \ 23651b32bddSMadhavan Venkataraman { \ 23751b32bddSMadhavan Venkataraman cl->cl_next = ct->ct_lfree; \ 23851b32bddSMadhavan Venkataraman ct->ct_lfree = cl; \ 23951b32bddSMadhavan Venkataraman cl->cl_flags |= CALLOUT_LIST_FLAG_FREE; \ 24051b32bddSMadhavan Venkataraman } 24151b32bddSMadhavan Venkataraman 24251b32bddSMadhavan Venkataraman /* 2437c478bd9Sstevel@tonic-gate * Allocate a callout structure. We try quite hard because we 2447c478bd9Sstevel@tonic-gate * can't sleep, and if we can't do the allocation, we're toast. 24587a18d3fSMadhavan Venkataraman * Failing all, we try a KM_PANIC allocation. Note that we never 24687a18d3fSMadhavan Venkataraman * deallocate a callout. See untimeout() for the reasoning. 2477c478bd9Sstevel@tonic-gate */ 2487c478bd9Sstevel@tonic-gate static callout_t * 2497c478bd9Sstevel@tonic-gate callout_alloc(callout_table_t *ct) 2507c478bd9Sstevel@tonic-gate { 25187a18d3fSMadhavan Venkataraman size_t size; 25287a18d3fSMadhavan Venkataraman callout_t *cp; 2537c478bd9Sstevel@tonic-gate 25487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 25587a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 25687a18d3fSMadhavan Venkataraman 25787a18d3fSMadhavan Venkataraman cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP); 25887a18d3fSMadhavan Venkataraman if (cp == NULL) { 25987a18d3fSMadhavan Venkataraman size = sizeof (callout_t); 26087a18d3fSMadhavan Venkataraman cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 26187a18d3fSMadhavan Venkataraman } 26287a18d3fSMadhavan Venkataraman cp->c_xid = 0; 26307247649SMadhavan Venkataraman cp->c_executor = NULL; 26407247649SMadhavan Venkataraman cv_init(&cp->c_done, NULL, CV_DEFAULT, NULL); 26507247649SMadhavan Venkataraman cp->c_waiting = 0; 26687a18d3fSMadhavan Venkataraman 26787a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 26887a18d3fSMadhavan Venkataraman ct->ct_allocations++; 2697c478bd9Sstevel@tonic-gate return (cp); 2707c478bd9Sstevel@tonic-gate } 2717c478bd9Sstevel@tonic-gate 2727c478bd9Sstevel@tonic-gate /* 27387a18d3fSMadhavan Venkataraman * Allocate a callout list structure. We try quite hard because we 27487a18d3fSMadhavan Venkataraman * can't sleep, and if we can't do the allocation, we're toast. 27587a18d3fSMadhavan Venkataraman * Failing all, we try a KM_PANIC allocation. Note that we never 27687a18d3fSMadhavan Venkataraman * deallocate a callout list. 2777c478bd9Sstevel@tonic-gate */ 27887a18d3fSMadhavan Venkataraman static void 27987a18d3fSMadhavan Venkataraman callout_list_alloc(callout_table_t *ct) 2807c478bd9Sstevel@tonic-gate { 28187a18d3fSMadhavan Venkataraman size_t size; 28287a18d3fSMadhavan Venkataraman callout_list_t *cl; 28387a18d3fSMadhavan Venkataraman 28487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 28587a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 28687a18d3fSMadhavan Venkataraman 28787a18d3fSMadhavan Venkataraman cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP); 28887a18d3fSMadhavan Venkataraman if (cl == NULL) { 28987a18d3fSMadhavan Venkataraman size = sizeof (callout_list_t); 29087a18d3fSMadhavan Venkataraman cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 29187a18d3fSMadhavan Venkataraman } 29287a18d3fSMadhavan Venkataraman bzero(cl, sizeof (callout_list_t)); 29387a18d3fSMadhavan Venkataraman 29487a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 29551b32bddSMadhavan Venkataraman CALLOUT_LIST_FREE(ct, cl); 29687a18d3fSMadhavan Venkataraman } 29787a18d3fSMadhavan Venkataraman 29887a18d3fSMadhavan Venkataraman /* 29951b32bddSMadhavan Venkataraman * Find a callout list that corresponds to an expiration and matching flags. 30087a18d3fSMadhavan Venkataraman */ 30187a18d3fSMadhavan Venkataraman static callout_list_t * 30207247649SMadhavan Venkataraman callout_list_get(callout_table_t *ct, hrtime_t expiration, int flags, int hash) 30387a18d3fSMadhavan Venkataraman { 30487a18d3fSMadhavan Venkataraman callout_list_t *cl; 30551b32bddSMadhavan Venkataraman int clflags; 30687a18d3fSMadhavan Venkataraman 30787a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 30887a18d3fSMadhavan Venkataraman 30951b32bddSMadhavan Venkataraman if (flags & CALLOUT_LIST_FLAG_NANO) { 31051b32bddSMadhavan Venkataraman /* 31151b32bddSMadhavan Venkataraman * This is a 1-nanosecond resolution callout. We will rarely 31251b32bddSMadhavan Venkataraman * find a match for this. So, bail out. 31351b32bddSMadhavan Venkataraman */ 31451b32bddSMadhavan Venkataraman return (NULL); 31551b32bddSMadhavan Venkataraman } 31651b32bddSMadhavan Venkataraman 31751b32bddSMadhavan Venkataraman clflags = (CALLOUT_LIST_FLAG_ABSOLUTE | CALLOUT_LIST_FLAG_HRESTIME); 31887a18d3fSMadhavan Venkataraman for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) { 31951b32bddSMadhavan Venkataraman /* 32051b32bddSMadhavan Venkataraman * If we have reached a 1-nanosecond resolution callout list, 32151b32bddSMadhavan Venkataraman * we don't have much hope of finding a match in this hash 32251b32bddSMadhavan Venkataraman * bucket. So, just bail out. 32351b32bddSMadhavan Venkataraman */ 32451b32bddSMadhavan Venkataraman if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) 32551b32bddSMadhavan Venkataraman return (NULL); 32651b32bddSMadhavan Venkataraman 32707247649SMadhavan Venkataraman if ((cl->cl_expiration == expiration) && 32851b32bddSMadhavan Venkataraman ((cl->cl_flags & clflags) == (flags & clflags))) 32987a18d3fSMadhavan Venkataraman return (cl); 33087a18d3fSMadhavan Venkataraman } 33187a18d3fSMadhavan Venkataraman 33287a18d3fSMadhavan Venkataraman return (NULL); 33387a18d3fSMadhavan Venkataraman } 33487a18d3fSMadhavan Venkataraman 33587a18d3fSMadhavan Venkataraman /* 33687a18d3fSMadhavan Venkataraman * Initialize a callout table's heap, if necessary. Preallocate some free 33787a18d3fSMadhavan Venkataraman * entries so we don't have to check for NULL elsewhere. 33887a18d3fSMadhavan Venkataraman */ 33987a18d3fSMadhavan Venkataraman static void 34087a18d3fSMadhavan Venkataraman callout_heap_init(callout_table_t *ct) 34187a18d3fSMadhavan Venkataraman { 34287a18d3fSMadhavan Venkataraman size_t size; 34387a18d3fSMadhavan Venkataraman 34487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 34587a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap == NULL); 34687a18d3fSMadhavan Venkataraman 34787a18d3fSMadhavan Venkataraman ct->ct_heap_num = 0; 34887a18d3fSMadhavan Venkataraman ct->ct_heap_max = CALLOUT_CHUNK; 34951b32bddSMadhavan Venkataraman size = sizeof (callout_heap_t) * CALLOUT_CHUNK; 35087a18d3fSMadhavan Venkataraman ct->ct_heap = kmem_alloc(size, KM_SLEEP); 35187a18d3fSMadhavan Venkataraman } 35287a18d3fSMadhavan Venkataraman 35387a18d3fSMadhavan Venkataraman /* 35487a18d3fSMadhavan Venkataraman * Reallocate the heap. We try quite hard because we can't sleep, and if 35587a18d3fSMadhavan Venkataraman * we can't do the allocation, we're toast. Failing all, we try a KM_PANIC 35687a18d3fSMadhavan Venkataraman * allocation. Note that the heap only expands, it never contracts. 35787a18d3fSMadhavan Venkataraman */ 35887a18d3fSMadhavan Venkataraman static void 35987a18d3fSMadhavan Venkataraman callout_heap_expand(callout_table_t *ct) 36087a18d3fSMadhavan Venkataraman { 36187a18d3fSMadhavan Venkataraman size_t max, size, osize; 36251b32bddSMadhavan Venkataraman callout_heap_t *heap; 36387a18d3fSMadhavan Venkataraman 36487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 36587a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap_num <= ct->ct_heap_max); 36687a18d3fSMadhavan Venkataraman 36787a18d3fSMadhavan Venkataraman while (ct->ct_heap_num == ct->ct_heap_max) { 36887a18d3fSMadhavan Venkataraman max = ct->ct_heap_max; 36987a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 37087a18d3fSMadhavan Venkataraman 37151b32bddSMadhavan Venkataraman osize = sizeof (callout_heap_t) * max; 37251b32bddSMadhavan Venkataraman size = sizeof (callout_heap_t) * (max + CALLOUT_CHUNK); 37387a18d3fSMadhavan Venkataraman heap = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 37487a18d3fSMadhavan Venkataraman 37587a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 37687a18d3fSMadhavan Venkataraman if (max < ct->ct_heap_max) { 37787a18d3fSMadhavan Venkataraman /* 37887a18d3fSMadhavan Venkataraman * Someone beat us to the allocation. Free what we 37987a18d3fSMadhavan Venkataraman * just allocated and proceed. 38087a18d3fSMadhavan Venkataraman */ 38187a18d3fSMadhavan Venkataraman kmem_free(heap, size); 38287a18d3fSMadhavan Venkataraman continue; 38387a18d3fSMadhavan Venkataraman } 38487a18d3fSMadhavan Venkataraman 38587a18d3fSMadhavan Venkataraman bcopy(ct->ct_heap, heap, osize); 38687a18d3fSMadhavan Venkataraman kmem_free(ct->ct_heap, osize); 38787a18d3fSMadhavan Venkataraman ct->ct_heap = heap; 38851b32bddSMadhavan Venkataraman ct->ct_heap_max = size / sizeof (callout_heap_t); 38987a18d3fSMadhavan Venkataraman } 39087a18d3fSMadhavan Venkataraman } 39187a18d3fSMadhavan Venkataraman 39287a18d3fSMadhavan Venkataraman /* 39387a18d3fSMadhavan Venkataraman * Move an expiration from the bottom of the heap to its correct place 39487a18d3fSMadhavan Venkataraman * in the heap. If we reached the root doing this, return 1. Else, 39587a18d3fSMadhavan Venkataraman * return 0. 39687a18d3fSMadhavan Venkataraman */ 39787a18d3fSMadhavan Venkataraman static int 39887a18d3fSMadhavan Venkataraman callout_upheap(callout_table_t *ct) 39987a18d3fSMadhavan Venkataraman { 40087a18d3fSMadhavan Venkataraman int current, parent; 40151b32bddSMadhavan Venkataraman callout_heap_t *heap, *hcurrent, *hparent; 40287a18d3fSMadhavan Venkataraman 40387a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 40487a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap_num >= 1); 40587a18d3fSMadhavan Venkataraman 40687a18d3fSMadhavan Venkataraman if (ct->ct_heap_num == 1) { 40787a18d3fSMadhavan Venkataraman return (1); 40887a18d3fSMadhavan Venkataraman } 40987a18d3fSMadhavan Venkataraman 41087a18d3fSMadhavan Venkataraman heap = ct->ct_heap; 41187a18d3fSMadhavan Venkataraman current = ct->ct_heap_num - 1; 41287a18d3fSMadhavan Venkataraman 41387a18d3fSMadhavan Venkataraman for (;;) { 41487a18d3fSMadhavan Venkataraman parent = CALLOUT_HEAP_PARENT(current); 41551b32bddSMadhavan Venkataraman hparent = &heap[parent]; 41651b32bddSMadhavan Venkataraman hcurrent = &heap[current]; 41787a18d3fSMadhavan Venkataraman 41887a18d3fSMadhavan Venkataraman /* 41987a18d3fSMadhavan Venkataraman * We have an expiration later than our parent; we're done. 42087a18d3fSMadhavan Venkataraman */ 42151b32bddSMadhavan Venkataraman if (hcurrent->ch_expiration >= hparent->ch_expiration) { 42287a18d3fSMadhavan Venkataraman return (0); 42387a18d3fSMadhavan Venkataraman } 42487a18d3fSMadhavan Venkataraman 42587a18d3fSMadhavan Venkataraman /* 42687a18d3fSMadhavan Venkataraman * We need to swap with our parent, and continue up the heap. 42787a18d3fSMadhavan Venkataraman */ 42851b32bddSMadhavan Venkataraman CALLOUT_SWAP(hparent, hcurrent); 42987a18d3fSMadhavan Venkataraman 43087a18d3fSMadhavan Venkataraman /* 43187a18d3fSMadhavan Venkataraman * If we just reached the root, we're done. 43287a18d3fSMadhavan Venkataraman */ 43387a18d3fSMadhavan Venkataraman if (parent == 0) { 43487a18d3fSMadhavan Venkataraman return (1); 43587a18d3fSMadhavan Venkataraman } 43687a18d3fSMadhavan Venkataraman 43787a18d3fSMadhavan Venkataraman current = parent; 43887a18d3fSMadhavan Venkataraman } 43987a18d3fSMadhavan Venkataraman /*NOTREACHED*/ 44087a18d3fSMadhavan Venkataraman } 44187a18d3fSMadhavan Venkataraman 44287a18d3fSMadhavan Venkataraman /* 44351b32bddSMadhavan Venkataraman * Insert a new heap item into a callout table's heap. 44487a18d3fSMadhavan Venkataraman */ 44587a18d3fSMadhavan Venkataraman static void 44651b32bddSMadhavan Venkataraman callout_heap_insert(callout_table_t *ct, callout_list_t *cl) 44787a18d3fSMadhavan Venkataraman { 44887a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 44987a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap_num < ct->ct_heap_max); 45087a18d3fSMadhavan Venkataraman 45187a18d3fSMadhavan Venkataraman /* 45251b32bddSMadhavan Venkataraman * First, copy the expiration and callout list pointer to the bottom 45351b32bddSMadhavan Venkataraman * of the heap. 45487a18d3fSMadhavan Venkataraman */ 45551b32bddSMadhavan Venkataraman ct->ct_heap[ct->ct_heap_num].ch_expiration = cl->cl_expiration; 45651b32bddSMadhavan Venkataraman ct->ct_heap[ct->ct_heap_num].ch_list = cl; 45787a18d3fSMadhavan Venkataraman ct->ct_heap_num++; 45887a18d3fSMadhavan Venkataraman 45987a18d3fSMadhavan Venkataraman /* 46087a18d3fSMadhavan Venkataraman * Now, perform an upheap operation. If we reached the root, then 46187a18d3fSMadhavan Venkataraman * the cyclic needs to be reprogrammed as we have an earlier 46287a18d3fSMadhavan Venkataraman * expiration. 46387a18d3fSMadhavan Venkataraman * 46487a18d3fSMadhavan Venkataraman * Also, during the CPR suspend phase, do not reprogram the cyclic. 46587a18d3fSMadhavan Venkataraman * We don't want any callout activity. When the CPR resume phase is 46687a18d3fSMadhavan Venkataraman * entered, the cyclic will be programmed for the earliest expiration 46787a18d3fSMadhavan Venkataraman * in the heap. 46887a18d3fSMadhavan Venkataraman */ 469454ab202SMadhavan Venkataraman if (callout_upheap(ct) && (ct->ct_suspend == 0)) 47051b32bddSMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, cl->cl_expiration); 47187a18d3fSMadhavan Venkataraman } 47287a18d3fSMadhavan Venkataraman 47387a18d3fSMadhavan Venkataraman /* 47487a18d3fSMadhavan Venkataraman * Move an expiration from the top of the heap to its correct place 47587a18d3fSMadhavan Venkataraman * in the heap. 47687a18d3fSMadhavan Venkataraman */ 47787a18d3fSMadhavan Venkataraman static void 47887a18d3fSMadhavan Venkataraman callout_downheap(callout_table_t *ct) 47987a18d3fSMadhavan Venkataraman { 48051b32bddSMadhavan Venkataraman int current, left, right, nelems; 48151b32bddSMadhavan Venkataraman callout_heap_t *heap, *hleft, *hright, *hcurrent; 48287a18d3fSMadhavan Venkataraman 48387a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 48487a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap_num >= 1); 48587a18d3fSMadhavan Venkataraman 48687a18d3fSMadhavan Venkataraman heap = ct->ct_heap; 48787a18d3fSMadhavan Venkataraman current = 0; 48887a18d3fSMadhavan Venkataraman nelems = ct->ct_heap_num; 48987a18d3fSMadhavan Venkataraman 49087a18d3fSMadhavan Venkataraman for (;;) { 49187a18d3fSMadhavan Venkataraman /* 49287a18d3fSMadhavan Venkataraman * If we don't have a left child (i.e., we're a leaf), we're 49387a18d3fSMadhavan Venkataraman * done. 49487a18d3fSMadhavan Venkataraman */ 49587a18d3fSMadhavan Venkataraman if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems) 49687a18d3fSMadhavan Venkataraman return; 49787a18d3fSMadhavan Venkataraman 49851b32bddSMadhavan Venkataraman hleft = &heap[left]; 49951b32bddSMadhavan Venkataraman hcurrent = &heap[current]; 50087a18d3fSMadhavan Venkataraman 50187a18d3fSMadhavan Venkataraman right = CALLOUT_HEAP_RIGHT(current); 50287a18d3fSMadhavan Venkataraman 50387a18d3fSMadhavan Venkataraman /* 50487a18d3fSMadhavan Venkataraman * Even if we don't have a right child, we still need to compare 50587a18d3fSMadhavan Venkataraman * our expiration against that of our left child. 50687a18d3fSMadhavan Venkataraman */ 50787a18d3fSMadhavan Venkataraman if (right >= nelems) 50887a18d3fSMadhavan Venkataraman goto comp_left; 50987a18d3fSMadhavan Venkataraman 51051b32bddSMadhavan Venkataraman hright = &heap[right]; 51187a18d3fSMadhavan Venkataraman 51287a18d3fSMadhavan Venkataraman /* 51387a18d3fSMadhavan Venkataraman * We have both a left and a right child. We need to compare 51487a18d3fSMadhavan Venkataraman * the expiration of the children to determine which 51587a18d3fSMadhavan Venkataraman * expires earlier. 51687a18d3fSMadhavan Venkataraman */ 51751b32bddSMadhavan Venkataraman if (hright->ch_expiration < hleft->ch_expiration) { 51887a18d3fSMadhavan Venkataraman /* 51987a18d3fSMadhavan Venkataraman * Our right child is the earlier of our children. 52087a18d3fSMadhavan Venkataraman * We'll now compare our expiration to its expiration. 52187a18d3fSMadhavan Venkataraman * If ours is the earlier one, we're done. 52287a18d3fSMadhavan Venkataraman */ 52351b32bddSMadhavan Venkataraman if (hcurrent->ch_expiration <= hright->ch_expiration) 52487a18d3fSMadhavan Venkataraman return; 52587a18d3fSMadhavan Venkataraman 52687a18d3fSMadhavan Venkataraman /* 52787a18d3fSMadhavan Venkataraman * Our right child expires earlier than we do; swap 52887a18d3fSMadhavan Venkataraman * with our right child, and descend right. 52987a18d3fSMadhavan Venkataraman */ 53051b32bddSMadhavan Venkataraman CALLOUT_SWAP(hright, hcurrent); 53187a18d3fSMadhavan Venkataraman current = right; 53287a18d3fSMadhavan Venkataraman continue; 53387a18d3fSMadhavan Venkataraman } 53487a18d3fSMadhavan Venkataraman 53587a18d3fSMadhavan Venkataraman comp_left: 53687a18d3fSMadhavan Venkataraman /* 53787a18d3fSMadhavan Venkataraman * Our left child is the earlier of our children (or we have 53887a18d3fSMadhavan Venkataraman * no right child). We'll now compare our expiration 53987a18d3fSMadhavan Venkataraman * to its expiration. If ours is the earlier one, we're done. 54087a18d3fSMadhavan Venkataraman */ 54151b32bddSMadhavan Venkataraman if (hcurrent->ch_expiration <= hleft->ch_expiration) 54287a18d3fSMadhavan Venkataraman return; 54387a18d3fSMadhavan Venkataraman 54487a18d3fSMadhavan Venkataraman /* 54587a18d3fSMadhavan Venkataraman * Our left child expires earlier than we do; swap with our 54687a18d3fSMadhavan Venkataraman * left child, and descend left. 54787a18d3fSMadhavan Venkataraman */ 54851b32bddSMadhavan Venkataraman CALLOUT_SWAP(hleft, hcurrent); 54987a18d3fSMadhavan Venkataraman current = left; 55087a18d3fSMadhavan Venkataraman } 55187a18d3fSMadhavan Venkataraman } 55287a18d3fSMadhavan Venkataraman 55387a18d3fSMadhavan Venkataraman /* 55487a18d3fSMadhavan Venkataraman * Delete and handle all past expirations in a callout table's heap. 55587a18d3fSMadhavan Venkataraman */ 55687a18d3fSMadhavan Venkataraman static void 55787a18d3fSMadhavan Venkataraman callout_heap_delete(callout_table_t *ct) 55887a18d3fSMadhavan Venkataraman { 55951b32bddSMadhavan Venkataraman hrtime_t now, expiration, next; 56087a18d3fSMadhavan Venkataraman callout_list_t *cl; 56151b32bddSMadhavan Venkataraman callout_heap_t *heap; 56287a18d3fSMadhavan Venkataraman int hash; 56387a18d3fSMadhavan Venkataraman 56487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 56587a18d3fSMadhavan Venkataraman 56651b32bddSMadhavan Venkataraman if (CALLOUT_CLEANUP(ct)) { 56751b32bddSMadhavan Venkataraman /* 56851b32bddSMadhavan Venkataraman * There are too many heap elements pointing to empty callout 56951b32bddSMadhavan Venkataraman * lists. Clean them out. 57051b32bddSMadhavan Venkataraman */ 57151b32bddSMadhavan Venkataraman (void) callout_heap_process(ct, 0, 0); 57251b32bddSMadhavan Venkataraman } 57351b32bddSMadhavan Venkataraman 57487a18d3fSMadhavan Venkataraman now = gethrtime(); 57551b32bddSMadhavan Venkataraman heap = ct->ct_heap; 57687a18d3fSMadhavan Venkataraman 57787a18d3fSMadhavan Venkataraman while (ct->ct_heap_num > 0) { 57851b32bddSMadhavan Venkataraman expiration = heap->ch_expiration; 57987a18d3fSMadhavan Venkataraman hash = CALLOUT_CLHASH(expiration); 58051b32bddSMadhavan Venkataraman cl = heap->ch_list; 58151b32bddSMadhavan Venkataraman ASSERT(expiration == cl->cl_expiration); 58251b32bddSMadhavan Venkataraman 58351b32bddSMadhavan Venkataraman if (cl->cl_callouts.ch_head == NULL) { 58487a18d3fSMadhavan Venkataraman /* 58551b32bddSMadhavan Venkataraman * If the callout list is empty, reap it. 58651b32bddSMadhavan Venkataraman * Decrement the reap count. 58751b32bddSMadhavan Venkataraman */ 58851b32bddSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 58951b32bddSMadhavan Venkataraman CALLOUT_LIST_FREE(ct, cl); 59051b32bddSMadhavan Venkataraman ct->ct_nreap--; 59151b32bddSMadhavan Venkataraman } else { 59251b32bddSMadhavan Venkataraman /* 59351b32bddSMadhavan Venkataraman * If the root of the heap expires in the future, 59451b32bddSMadhavan Venkataraman * bail out. 59587a18d3fSMadhavan Venkataraman */ 59687a18d3fSMadhavan Venkataraman if (expiration > now) 59787a18d3fSMadhavan Venkataraman break; 59887a18d3fSMadhavan Venkataraman 59987a18d3fSMadhavan Venkataraman /* 60087a18d3fSMadhavan Venkataraman * Move the callout list for this expiration to the 60187a18d3fSMadhavan Venkataraman * list of expired callout lists. It will be processed 60287a18d3fSMadhavan Venkataraman * by the callout executor. 60387a18d3fSMadhavan Venkataraman */ 60487a18d3fSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 60587a18d3fSMadhavan Venkataraman CALLOUT_LIST_APPEND(ct->ct_expired, cl); 60687a18d3fSMadhavan Venkataraman } 60787a18d3fSMadhavan Venkataraman 60887a18d3fSMadhavan Venkataraman /* 60987a18d3fSMadhavan Venkataraman * Now delete the root. This is done by swapping the root with 61087a18d3fSMadhavan Venkataraman * the last item in the heap and downheaping the item. 61187a18d3fSMadhavan Venkataraman */ 61287a18d3fSMadhavan Venkataraman ct->ct_heap_num--; 61387a18d3fSMadhavan Venkataraman if (ct->ct_heap_num > 0) { 61451b32bddSMadhavan Venkataraman heap[0] = heap[ct->ct_heap_num]; 61587a18d3fSMadhavan Venkataraman callout_downheap(ct); 61687a18d3fSMadhavan Venkataraman } 61787a18d3fSMadhavan Venkataraman } 61887a18d3fSMadhavan Venkataraman 61987a18d3fSMadhavan Venkataraman /* 62051b32bddSMadhavan Venkataraman * If this callout table is empty or callouts have been suspended, 62151b32bddSMadhavan Venkataraman * just return. The cyclic has already been programmed to 62287a18d3fSMadhavan Venkataraman * infinity by the cyclic subsystem. 62387a18d3fSMadhavan Venkataraman */ 624454ab202SMadhavan Venkataraman if ((ct->ct_heap_num == 0) || (ct->ct_suspend > 0)) 62587a18d3fSMadhavan Venkataraman return; 62687a18d3fSMadhavan Venkataraman 62751b32bddSMadhavan Venkataraman /* 62851b32bddSMadhavan Venkataraman * If the top expirations are within callout_tolerance of each other, 62951b32bddSMadhavan Venkataraman * delay the cyclic expire so that they can be processed together. 63051b32bddSMadhavan Venkataraman * This is to prevent high resolution timers from swamping the system 63151b32bddSMadhavan Venkataraman * with cyclic activity. 63251b32bddSMadhavan Venkataraman */ 63351b32bddSMadhavan Venkataraman if (ct->ct_heap_num > 2) { 63451b32bddSMadhavan Venkataraman next = expiration + callout_tolerance; 63551b32bddSMadhavan Venkataraman if ((heap[1].ch_expiration < next) || 63651b32bddSMadhavan Venkataraman (heap[2].ch_expiration < next)) 63751b32bddSMadhavan Venkataraman expiration = next; 63851b32bddSMadhavan Venkataraman } 63951b32bddSMadhavan Venkataraman 64087a18d3fSMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, expiration); 64187a18d3fSMadhavan Venkataraman } 64287a18d3fSMadhavan Venkataraman 643454ab202SMadhavan Venkataraman /* 64451b32bddSMadhavan Venkataraman * There are some situations when the entire heap is walked and processed. 64551b32bddSMadhavan Venkataraman * This function is called to do the processing. These are the situations: 64651b32bddSMadhavan Venkataraman * 64751b32bddSMadhavan Venkataraman * 1. When the reap count reaches its threshold, the heap has to be cleared 64851b32bddSMadhavan Venkataraman * of all empty callout lists. 64951b32bddSMadhavan Venkataraman * 65051b32bddSMadhavan Venkataraman * 2. When the system enters and exits KMDB/OBP, all entries in the heap 65151b32bddSMadhavan Venkataraman * need to be adjusted by the interval spent in KMDB/OBP. 65251b32bddSMadhavan Venkataraman * 65351b32bddSMadhavan Venkataraman * 3. When system time is changed, the heap has to be scanned for 65451b32bddSMadhavan Venkataraman * absolute hrestime timers. These need to be removed from the heap 65551b32bddSMadhavan Venkataraman * and expired immediately. 65651b32bddSMadhavan Venkataraman * 65751b32bddSMadhavan Venkataraman * In cases 2 and 3, it is a good idea to do 1 as well since we are 65851b32bddSMadhavan Venkataraman * scanning the heap anyway. 65951b32bddSMadhavan Venkataraman * 66051b32bddSMadhavan Venkataraman * If the root gets changed and/or callout lists are expired, return the 66151b32bddSMadhavan Venkataraman * new expiration to the caller so he can reprogram the cyclic accordingly. 66251b32bddSMadhavan Venkataraman */ 66351b32bddSMadhavan Venkataraman static hrtime_t 66451b32bddSMadhavan Venkataraman callout_heap_process(callout_table_t *ct, hrtime_t delta, int timechange) 66551b32bddSMadhavan Venkataraman { 66651b32bddSMadhavan Venkataraman callout_heap_t *heap; 66751b32bddSMadhavan Venkataraman callout_list_t *cl, *rootcl; 66851b32bddSMadhavan Venkataraman hrtime_t expiration, now; 66951b32bddSMadhavan Venkataraman int i, hash, clflags, expired; 67051b32bddSMadhavan Venkataraman ulong_t num; 67151b32bddSMadhavan Venkataraman 67251b32bddSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 67351b32bddSMadhavan Venkataraman 67451b32bddSMadhavan Venkataraman if (ct->ct_heap_num == 0) 67551b32bddSMadhavan Venkataraman return (0); 67651b32bddSMadhavan Venkataraman 67751b32bddSMadhavan Venkataraman if (ct->ct_nreap > 0) 67851b32bddSMadhavan Venkataraman ct->ct_cleanups++; 67951b32bddSMadhavan Venkataraman 68051b32bddSMadhavan Venkataraman heap = ct->ct_heap; 68151b32bddSMadhavan Venkataraman rootcl = heap->ch_list; 68251b32bddSMadhavan Venkataraman 68351b32bddSMadhavan Venkataraman /* 68451b32bddSMadhavan Venkataraman * We walk the heap from the top to the bottom. If we encounter 68551b32bddSMadhavan Venkataraman * a heap item that points to an empty callout list, we clean 68651b32bddSMadhavan Venkataraman * it out. If we encounter a hrestime entry that must be removed, 68751b32bddSMadhavan Venkataraman * again we clean it out. Otherwise, we apply any adjustments needed 68851b32bddSMadhavan Venkataraman * to an element. 68951b32bddSMadhavan Venkataraman * 69051b32bddSMadhavan Venkataraman * During the walk, we also compact the heap from the bottom and 69151b32bddSMadhavan Venkataraman * reconstruct the heap using upheap operations. This is very 69251b32bddSMadhavan Venkataraman * efficient if the number of elements to be cleaned is greater than 69351b32bddSMadhavan Venkataraman * or equal to half the heap. This is the common case. 69451b32bddSMadhavan Venkataraman * 69551b32bddSMadhavan Venkataraman * Even in the non-common case, the upheap operations should be short 69651b32bddSMadhavan Venkataraman * as the entries below generally tend to be bigger than the entries 69751b32bddSMadhavan Venkataraman * above. 69851b32bddSMadhavan Venkataraman */ 69951b32bddSMadhavan Venkataraman num = ct->ct_heap_num; 70051b32bddSMadhavan Venkataraman ct->ct_heap_num = 0; 70151b32bddSMadhavan Venkataraman clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE); 70251b32bddSMadhavan Venkataraman now = gethrtime(); 70351b32bddSMadhavan Venkataraman expired = 0; 70451b32bddSMadhavan Venkataraman for (i = 0; i < num; i++) { 70551b32bddSMadhavan Venkataraman cl = heap[i].ch_list; 70651b32bddSMadhavan Venkataraman /* 70751b32bddSMadhavan Venkataraman * If the callout list is empty, delete the heap element and 70851b32bddSMadhavan Venkataraman * free the callout list. 70951b32bddSMadhavan Venkataraman */ 71051b32bddSMadhavan Venkataraman if (cl->cl_callouts.ch_head == NULL) { 71151b32bddSMadhavan Venkataraman hash = CALLOUT_CLHASH(cl->cl_expiration); 71251b32bddSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 71351b32bddSMadhavan Venkataraman CALLOUT_LIST_FREE(ct, cl); 71451b32bddSMadhavan Venkataraman continue; 71551b32bddSMadhavan Venkataraman } 71651b32bddSMadhavan Venkataraman 71751b32bddSMadhavan Venkataraman /* 71851b32bddSMadhavan Venkataraman * Delete the heap element and expire the callout list, if 71951b32bddSMadhavan Venkataraman * one of the following is true: 72051b32bddSMadhavan Venkataraman * - the callout list has expired 72151b32bddSMadhavan Venkataraman * - the callout list is an absolute hrestime one and 72251b32bddSMadhavan Venkataraman * there has been a system time change 72351b32bddSMadhavan Venkataraman */ 72451b32bddSMadhavan Venkataraman if ((cl->cl_expiration <= now) || 72551b32bddSMadhavan Venkataraman (timechange && ((cl->cl_flags & clflags) == clflags))) { 72651b32bddSMadhavan Venkataraman hash = CALLOUT_CLHASH(cl->cl_expiration); 72751b32bddSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 72851b32bddSMadhavan Venkataraman CALLOUT_LIST_APPEND(ct->ct_expired, cl); 72951b32bddSMadhavan Venkataraman expired = 1; 73051b32bddSMadhavan Venkataraman continue; 73151b32bddSMadhavan Venkataraman } 73251b32bddSMadhavan Venkataraman 73351b32bddSMadhavan Venkataraman /* 73451b32bddSMadhavan Venkataraman * Apply adjustments, if any. Adjustments are applied after 73551b32bddSMadhavan Venkataraman * the system returns from KMDB or OBP. They are only applied 73651b32bddSMadhavan Venkataraman * to relative callout lists. 73751b32bddSMadhavan Venkataraman */ 73851b32bddSMadhavan Venkataraman if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) { 73951b32bddSMadhavan Venkataraman hash = CALLOUT_CLHASH(cl->cl_expiration); 74051b32bddSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 74151b32bddSMadhavan Venkataraman expiration = cl->cl_expiration + delta; 74251b32bddSMadhavan Venkataraman if (expiration <= 0) 74351b32bddSMadhavan Venkataraman expiration = CY_INFINITY; 74451b32bddSMadhavan Venkataraman heap[i].ch_expiration = expiration; 74551b32bddSMadhavan Venkataraman cl->cl_expiration = expiration; 74651b32bddSMadhavan Venkataraman hash = CALLOUT_CLHASH(cl->cl_expiration); 74751b32bddSMadhavan Venkataraman if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) { 74851b32bddSMadhavan Venkataraman CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl); 74951b32bddSMadhavan Venkataraman } else { 75051b32bddSMadhavan Venkataraman CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 75151b32bddSMadhavan Venkataraman } 75251b32bddSMadhavan Venkataraman } 75351b32bddSMadhavan Venkataraman 75451b32bddSMadhavan Venkataraman heap[ct->ct_heap_num] = heap[i]; 75551b32bddSMadhavan Venkataraman ct->ct_heap_num++; 75651b32bddSMadhavan Venkataraman (void) callout_upheap(ct); 75751b32bddSMadhavan Venkataraman } 75851b32bddSMadhavan Venkataraman 75951b32bddSMadhavan Venkataraman ct->ct_nreap = 0; 76051b32bddSMadhavan Venkataraman 76151b32bddSMadhavan Venkataraman if (expired) 76251b32bddSMadhavan Venkataraman expiration = gethrtime(); 76351b32bddSMadhavan Venkataraman else if (ct->ct_heap_num == 0) 76451b32bddSMadhavan Venkataraman expiration = CY_INFINITY; 76551b32bddSMadhavan Venkataraman else if (rootcl != heap->ch_list) 76651b32bddSMadhavan Venkataraman expiration = heap->ch_expiration; 76751b32bddSMadhavan Venkataraman else 76851b32bddSMadhavan Venkataraman expiration = 0; 76951b32bddSMadhavan Venkataraman 77051b32bddSMadhavan Venkataraman return (expiration); 77151b32bddSMadhavan Venkataraman } 77251b32bddSMadhavan Venkataraman 77351b32bddSMadhavan Venkataraman /* 774454ab202SMadhavan Venkataraman * Common function used to create normal and realtime callouts. 775454ab202SMadhavan Venkataraman * 776454ab202SMadhavan Venkataraman * Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So, 777454ab202SMadhavan Venkataraman * there is one restriction on a realtime callout handler - it should not 778454ab202SMadhavan Venkataraman * directly or indirectly acquire cpu_lock. CPU offline waits for pending 779454ab202SMadhavan Venkataraman * cyclic handlers to complete while holding cpu_lock. So, if a realtime 780454ab202SMadhavan Venkataraman * callout handler were to try to get cpu_lock, there would be a deadlock 781454ab202SMadhavan Venkataraman * during CPU offline. 782454ab202SMadhavan Venkataraman */ 78387a18d3fSMadhavan Venkataraman callout_id_t 78487a18d3fSMadhavan Venkataraman timeout_generic(int type, void (*func)(void *), void *arg, 78587a18d3fSMadhavan Venkataraman hrtime_t expiration, hrtime_t resolution, int flags) 78687a18d3fSMadhavan Venkataraman { 78787a18d3fSMadhavan Venkataraman callout_table_t *ct; 7887c478bd9Sstevel@tonic-gate callout_t *cp; 7897c478bd9Sstevel@tonic-gate callout_id_t id; 79087a18d3fSMadhavan Venkataraman callout_list_t *cl; 79151b32bddSMadhavan Venkataraman hrtime_t now, interval, rexpiration; 79251b32bddSMadhavan Venkataraman int hash, clflags; 793f635d46aSqiao 79487a18d3fSMadhavan Venkataraman ASSERT(resolution > 0); 79587a18d3fSMadhavan Venkataraman ASSERT(func != NULL); 7967c478bd9Sstevel@tonic-gate 79787a18d3fSMadhavan Venkataraman /* 79851b32bddSMadhavan Venkataraman * We get the current hrtime right upfront so that latencies in 79951b32bddSMadhavan Venkataraman * this function do not affect the accuracy of the callout. 80087a18d3fSMadhavan Venkataraman */ 80151b32bddSMadhavan Venkataraman now = gethrtime(); 8027c478bd9Sstevel@tonic-gate 80387a18d3fSMadhavan Venkataraman /* 80487a18d3fSMadhavan Venkataraman * We disable kernel preemption so that we remain on the same CPU 80587a18d3fSMadhavan Venkataraman * throughout. If we needed to reprogram the callout table's cyclic, 80687a18d3fSMadhavan Venkataraman * we can avoid X-calls if we are on the same CPU. 80787a18d3fSMadhavan Venkataraman * 80887a18d3fSMadhavan Venkataraman * Note that callout_alloc() releases and reacquires the callout 80987a18d3fSMadhavan Venkataraman * table mutex. While reacquiring the mutex, it is possible for us 81087a18d3fSMadhavan Venkataraman * to go to sleep and later migrate to another CPU. This should be 81187a18d3fSMadhavan Venkataraman * pretty rare, though. 81287a18d3fSMadhavan Venkataraman */ 81387a18d3fSMadhavan Venkataraman kpreempt_disable(); 81487a18d3fSMadhavan Venkataraman 81587a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)]; 81687a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 81787a18d3fSMadhavan Venkataraman 81887a18d3fSMadhavan Venkataraman if (ct->ct_cyclic == CYCLIC_NONE) { 81987a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 82087a18d3fSMadhavan Venkataraman /* 82187a18d3fSMadhavan Venkataraman * The callout table has not yet been initialized fully. 82287a18d3fSMadhavan Venkataraman * So, put this one on the boot callout table which is 82387a18d3fSMadhavan Venkataraman * always initialized. 82487a18d3fSMadhavan Venkataraman */ 82587a18d3fSMadhavan Venkataraman ct = &callout_boot_ct[type]; 82687a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 82787a18d3fSMadhavan Venkataraman } 82887a18d3fSMadhavan Venkataraman 82951b32bddSMadhavan Venkataraman if (CALLOUT_CLEANUP(ct)) { 83051b32bddSMadhavan Venkataraman /* 83151b32bddSMadhavan Venkataraman * There are too many heap elements pointing to empty callout 83251b32bddSMadhavan Venkataraman * lists. Clean them out. 83351b32bddSMadhavan Venkataraman */ 83451b32bddSMadhavan Venkataraman rexpiration = callout_heap_process(ct, 0, 0); 83551b32bddSMadhavan Venkataraman if ((rexpiration != 0) && (ct->ct_suspend == 0)) 83651b32bddSMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, rexpiration); 83751b32bddSMadhavan Venkataraman } 83851b32bddSMadhavan Venkataraman 83987a18d3fSMadhavan Venkataraman if ((cp = ct->ct_free) == NULL) 8407c478bd9Sstevel@tonic-gate cp = callout_alloc(ct); 8417c478bd9Sstevel@tonic-gate else 84287a18d3fSMadhavan Venkataraman ct->ct_free = cp->c_idnext; 8437c478bd9Sstevel@tonic-gate 8447c478bd9Sstevel@tonic-gate cp->c_func = func; 8457c478bd9Sstevel@tonic-gate cp->c_arg = arg; 8467c478bd9Sstevel@tonic-gate 8477c478bd9Sstevel@tonic-gate /* 84887a18d3fSMadhavan Venkataraman * Compute the expiration hrtime. 84987a18d3fSMadhavan Venkataraman */ 85087a18d3fSMadhavan Venkataraman if (flags & CALLOUT_FLAG_ABSOLUTE) { 85187a18d3fSMadhavan Venkataraman interval = expiration - now; 85287a18d3fSMadhavan Venkataraman } else { 85387a18d3fSMadhavan Venkataraman interval = expiration; 85487a18d3fSMadhavan Venkataraman expiration += now; 85587a18d3fSMadhavan Venkataraman } 85651b32bddSMadhavan Venkataraman 85751b32bddSMadhavan Venkataraman if (resolution > 1) { 85851b32bddSMadhavan Venkataraman /* 85951b32bddSMadhavan Venkataraman * Align expiration to the specified resolution. 86051b32bddSMadhavan Venkataraman */ 86187a18d3fSMadhavan Venkataraman if (flags & CALLOUT_FLAG_ROUNDUP) 86287a18d3fSMadhavan Venkataraman expiration += resolution - 1; 86387a18d3fSMadhavan Venkataraman expiration = (expiration / resolution) * resolution; 86451b32bddSMadhavan Venkataraman } 86551b32bddSMadhavan Venkataraman 866454ab202SMadhavan Venkataraman if (expiration <= 0) { 867454ab202SMadhavan Venkataraman /* 868454ab202SMadhavan Venkataraman * expiration hrtime overflow has occurred. Just set the 869454ab202SMadhavan Venkataraman * expiration to infinity. 870454ab202SMadhavan Venkataraman */ 871454ab202SMadhavan Venkataraman expiration = CY_INFINITY; 872454ab202SMadhavan Venkataraman } 87387a18d3fSMadhavan Venkataraman 87487a18d3fSMadhavan Venkataraman /* 87587a18d3fSMadhavan Venkataraman * Assign an ID to this callout 87687a18d3fSMadhavan Venkataraman */ 87787a18d3fSMadhavan Venkataraman if (flags & CALLOUT_FLAG_32BIT) { 87887a18d3fSMadhavan Venkataraman if (interval > callout_longterm) { 87987a18d3fSMadhavan Venkataraman id = (ct->ct_long_id - callout_counter_low); 88087a18d3fSMadhavan Venkataraman id |= CALLOUT_COUNTER_HIGH; 88187a18d3fSMadhavan Venkataraman ct->ct_long_id = id; 88287a18d3fSMadhavan Venkataraman } else { 88387a18d3fSMadhavan Venkataraman id = (ct->ct_short_id - callout_counter_low); 88487a18d3fSMadhavan Venkataraman id |= CALLOUT_COUNTER_HIGH; 88587a18d3fSMadhavan Venkataraman ct->ct_short_id = id; 88687a18d3fSMadhavan Venkataraman } 88787a18d3fSMadhavan Venkataraman } else { 88887a18d3fSMadhavan Venkataraman id = (ct->ct_gen_id - callout_counter_low); 88987a18d3fSMadhavan Venkataraman if ((id & CALLOUT_COUNTER_HIGH) == 0) { 89087a18d3fSMadhavan Venkataraman id |= CALLOUT_COUNTER_HIGH; 89187a18d3fSMadhavan Venkataraman id += CALLOUT_GENERATION_LOW; 89287a18d3fSMadhavan Venkataraman } 89387a18d3fSMadhavan Venkataraman ct->ct_gen_id = id; 89487a18d3fSMadhavan Venkataraman } 89587a18d3fSMadhavan Venkataraman 89687a18d3fSMadhavan Venkataraman cp->c_xid = id; 89787a18d3fSMadhavan Venkataraman 89851b32bddSMadhavan Venkataraman clflags = 0; 89951b32bddSMadhavan Venkataraman if (flags & CALLOUT_FLAG_ABSOLUTE) 90051b32bddSMadhavan Venkataraman clflags |= CALLOUT_LIST_FLAG_ABSOLUTE; 90151b32bddSMadhavan Venkataraman if (flags & CALLOUT_FLAG_HRESTIME) 90251b32bddSMadhavan Venkataraman clflags |= CALLOUT_LIST_FLAG_HRESTIME; 90351b32bddSMadhavan Venkataraman if (resolution == 1) 90451b32bddSMadhavan Venkataraman clflags |= CALLOUT_LIST_FLAG_NANO; 90587a18d3fSMadhavan Venkataraman hash = CALLOUT_CLHASH(expiration); 90687a18d3fSMadhavan Venkataraman 90787a18d3fSMadhavan Venkataraman again: 90887a18d3fSMadhavan Venkataraman /* 90987a18d3fSMadhavan Venkataraman * Try to see if a callout list already exists for this expiration. 91087a18d3fSMadhavan Venkataraman */ 91151b32bddSMadhavan Venkataraman cl = callout_list_get(ct, expiration, clflags, hash); 91287a18d3fSMadhavan Venkataraman if (cl == NULL) { 91387a18d3fSMadhavan Venkataraman /* 91487a18d3fSMadhavan Venkataraman * Check if we have enough space in the heap to insert one 91587a18d3fSMadhavan Venkataraman * expiration. If not, expand the heap. 91687a18d3fSMadhavan Venkataraman */ 91787a18d3fSMadhavan Venkataraman if (ct->ct_heap_num == ct->ct_heap_max) { 91887a18d3fSMadhavan Venkataraman callout_heap_expand(ct); 91987a18d3fSMadhavan Venkataraman /* 92087a18d3fSMadhavan Venkataraman * In the above call, we drop the lock, allocate and 92187a18d3fSMadhavan Venkataraman * reacquire the lock. So, we could have been away 92287a18d3fSMadhavan Venkataraman * for a while. In the meantime, someone could have 92387a18d3fSMadhavan Venkataraman * inserted a callout list with the same expiration. 92487a18d3fSMadhavan Venkataraman * So, the best course is to repeat the steps. This 92587a18d3fSMadhavan Venkataraman * should be an infrequent event. 92687a18d3fSMadhavan Venkataraman */ 92787a18d3fSMadhavan Venkataraman goto again; 92887a18d3fSMadhavan Venkataraman } 92987a18d3fSMadhavan Venkataraman 93087a18d3fSMadhavan Venkataraman /* 93187a18d3fSMadhavan Venkataraman * Check the free list. If we don't find one, we have to 93287a18d3fSMadhavan Venkataraman * take the slow path and allocate from kmem. 93387a18d3fSMadhavan Venkataraman */ 93487a18d3fSMadhavan Venkataraman if ((cl = ct->ct_lfree) == NULL) { 93587a18d3fSMadhavan Venkataraman callout_list_alloc(ct); 93687a18d3fSMadhavan Venkataraman /* 93787a18d3fSMadhavan Venkataraman * In the above call, we drop the lock, allocate and 93887a18d3fSMadhavan Venkataraman * reacquire the lock. So, we could have been away 93987a18d3fSMadhavan Venkataraman * for a while. In the meantime, someone could have 94087a18d3fSMadhavan Venkataraman * inserted a callout list with the same expiration. 94187a18d3fSMadhavan Venkataraman * Plus, the heap could have become full. So, the best 94287a18d3fSMadhavan Venkataraman * course is to repeat the steps. This should be an 94387a18d3fSMadhavan Venkataraman * infrequent event. 94487a18d3fSMadhavan Venkataraman */ 94587a18d3fSMadhavan Venkataraman goto again; 94687a18d3fSMadhavan Venkataraman } 94787a18d3fSMadhavan Venkataraman ct->ct_lfree = cl->cl_next; 94887a18d3fSMadhavan Venkataraman cl->cl_expiration = expiration; 94951b32bddSMadhavan Venkataraman cl->cl_flags = clflags; 95087a18d3fSMadhavan Venkataraman 95151b32bddSMadhavan Venkataraman if (clflags & CALLOUT_LIST_FLAG_NANO) { 95251b32bddSMadhavan Venkataraman CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl); 95351b32bddSMadhavan Venkataraman } else { 95487a18d3fSMadhavan Venkataraman CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 95551b32bddSMadhavan Venkataraman } 95687a18d3fSMadhavan Venkataraman 95787a18d3fSMadhavan Venkataraman /* 95887a18d3fSMadhavan Venkataraman * This is a new expiration. So, insert it into the heap. 95987a18d3fSMadhavan Venkataraman * This will also reprogram the cyclic, if the expiration 96087a18d3fSMadhavan Venkataraman * propagated to the root of the heap. 96187a18d3fSMadhavan Venkataraman */ 96251b32bddSMadhavan Venkataraman callout_heap_insert(ct, cl); 96351b32bddSMadhavan Venkataraman } else { 96451b32bddSMadhavan Venkataraman /* 96551b32bddSMadhavan Venkataraman * If the callout list was empty, untimeout_generic() would 96651b32bddSMadhavan Venkataraman * have incremented a reap count. Decrement the reap count 96751b32bddSMadhavan Venkataraman * as we are going to insert a callout into this list. 96851b32bddSMadhavan Venkataraman */ 96951b32bddSMadhavan Venkataraman if (cl->cl_callouts.ch_head == NULL) 97051b32bddSMadhavan Venkataraman ct->ct_nreap--; 97187a18d3fSMadhavan Venkataraman } 97287a18d3fSMadhavan Venkataraman cp->c_list = cl; 97387a18d3fSMadhavan Venkataraman CALLOUT_APPEND(ct, cp); 97487a18d3fSMadhavan Venkataraman 97587a18d3fSMadhavan Venkataraman ct->ct_timeouts++; 97687a18d3fSMadhavan Venkataraman ct->ct_timeouts_pending++; 97787a18d3fSMadhavan Venkataraman 97887a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 97987a18d3fSMadhavan Venkataraman 98087a18d3fSMadhavan Venkataraman kpreempt_enable(); 98187a18d3fSMadhavan Venkataraman 98287a18d3fSMadhavan Venkataraman TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 98387a18d3fSMadhavan Venkataraman "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration, 98487a18d3fSMadhavan Venkataraman cp); 98587a18d3fSMadhavan Venkataraman 98687a18d3fSMadhavan Venkataraman return (id); 98787a18d3fSMadhavan Venkataraman } 98887a18d3fSMadhavan Venkataraman 98987a18d3fSMadhavan Venkataraman timeout_id_t 99087a18d3fSMadhavan Venkataraman timeout(void (*func)(void *), void *arg, clock_t delta) 99187a18d3fSMadhavan Venkataraman { 99287a18d3fSMadhavan Venkataraman ulong_t id; 99387a18d3fSMadhavan Venkataraman 99487a18d3fSMadhavan Venkataraman /* 9957c478bd9Sstevel@tonic-gate * Make sure the callout runs at least 1 tick in the future. 9967c478bd9Sstevel@tonic-gate */ 9977c478bd9Sstevel@tonic-gate if (delta <= 0) 9987c478bd9Sstevel@tonic-gate delta = 1; 999454ab202SMadhavan Venkataraman else if (delta > callout_max_ticks) 1000454ab202SMadhavan Venkataraman delta = callout_max_ticks; 10017c478bd9Sstevel@tonic-gate 100287a18d3fSMadhavan Venkataraman id = (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg, 100387a18d3fSMadhavan Venkataraman TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 10047c478bd9Sstevel@tonic-gate 10057c478bd9Sstevel@tonic-gate return ((timeout_id_t)id); 10067c478bd9Sstevel@tonic-gate } 10077c478bd9Sstevel@tonic-gate 100887a18d3fSMadhavan Venkataraman /* 100987a18d3fSMadhavan Venkataraman * Convenience function that creates a normal callout with default parameters 101087a18d3fSMadhavan Venkataraman * and returns a full ID. 101187a18d3fSMadhavan Venkataraman */ 101287a18d3fSMadhavan Venkataraman callout_id_t 101387a18d3fSMadhavan Venkataraman timeout_default(void (*func)(void *), void *arg, clock_t delta) 10147c478bd9Sstevel@tonic-gate { 101587a18d3fSMadhavan Venkataraman callout_id_t id; 10167c478bd9Sstevel@tonic-gate 101787a18d3fSMadhavan Venkataraman /* 101887a18d3fSMadhavan Venkataraman * Make sure the callout runs at least 1 tick in the future. 101987a18d3fSMadhavan Venkataraman */ 102087a18d3fSMadhavan Venkataraman if (delta <= 0) 102187a18d3fSMadhavan Venkataraman delta = 1; 1022454ab202SMadhavan Venkataraman else if (delta > callout_max_ticks) 1023454ab202SMadhavan Venkataraman delta = callout_max_ticks; 102487a18d3fSMadhavan Venkataraman 102587a18d3fSMadhavan Venkataraman id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta), 102687a18d3fSMadhavan Venkataraman nsec_per_tick, 0); 102787a18d3fSMadhavan Venkataraman 102887a18d3fSMadhavan Venkataraman return (id); 10297c478bd9Sstevel@tonic-gate } 10307c478bd9Sstevel@tonic-gate 10317c478bd9Sstevel@tonic-gate timeout_id_t 10327c478bd9Sstevel@tonic-gate realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 10337c478bd9Sstevel@tonic-gate { 103487a18d3fSMadhavan Venkataraman ulong_t id; 103587a18d3fSMadhavan Venkataraman 103687a18d3fSMadhavan Venkataraman /* 103787a18d3fSMadhavan Venkataraman * Make sure the callout runs at least 1 tick in the future. 103887a18d3fSMadhavan Venkataraman */ 103987a18d3fSMadhavan Venkataraman if (delta <= 0) 104087a18d3fSMadhavan Venkataraman delta = 1; 1041454ab202SMadhavan Venkataraman else if (delta > callout_max_ticks) 1042454ab202SMadhavan Venkataraman delta = callout_max_ticks; 104387a18d3fSMadhavan Venkataraman 104487a18d3fSMadhavan Venkataraman id = (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg, 104587a18d3fSMadhavan Venkataraman TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 104687a18d3fSMadhavan Venkataraman 104787a18d3fSMadhavan Venkataraman return ((timeout_id_t)id); 10487c478bd9Sstevel@tonic-gate } 10497c478bd9Sstevel@tonic-gate 105087a18d3fSMadhavan Venkataraman /* 105187a18d3fSMadhavan Venkataraman * Convenience function that creates a realtime callout with default parameters 105287a18d3fSMadhavan Venkataraman * and returns a full ID. 105387a18d3fSMadhavan Venkataraman */ 105487a18d3fSMadhavan Venkataraman callout_id_t 105587a18d3fSMadhavan Venkataraman realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta) 10567c478bd9Sstevel@tonic-gate { 105787a18d3fSMadhavan Venkataraman callout_id_t id; 105887a18d3fSMadhavan Venkataraman 105987a18d3fSMadhavan Venkataraman /* 106087a18d3fSMadhavan Venkataraman * Make sure the callout runs at least 1 tick in the future. 106187a18d3fSMadhavan Venkataraman */ 106287a18d3fSMadhavan Venkataraman if (delta <= 0) 106387a18d3fSMadhavan Venkataraman delta = 1; 1064454ab202SMadhavan Venkataraman else if (delta > callout_max_ticks) 1065454ab202SMadhavan Venkataraman delta = callout_max_ticks; 106687a18d3fSMadhavan Venkataraman 106787a18d3fSMadhavan Venkataraman id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta), 106887a18d3fSMadhavan Venkataraman nsec_per_tick, 0); 106987a18d3fSMadhavan Venkataraman 107087a18d3fSMadhavan Venkataraman return (id); 107187a18d3fSMadhavan Venkataraman } 107287a18d3fSMadhavan Venkataraman 107387a18d3fSMadhavan Venkataraman hrtime_t 107487a18d3fSMadhavan Venkataraman untimeout_generic(callout_id_t id, int nowait) 107587a18d3fSMadhavan Venkataraman { 10767c478bd9Sstevel@tonic-gate callout_table_t *ct; 10777c478bd9Sstevel@tonic-gate callout_t *cp; 10787c478bd9Sstevel@tonic-gate callout_id_t xid; 107951b32bddSMadhavan Venkataraman callout_list_t *cl; 108087a18d3fSMadhavan Venkataraman int hash; 108187a18d3fSMadhavan Venkataraman callout_id_t bogus; 10827c478bd9Sstevel@tonic-gate 108387a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_ID_TO_TABLE(id)]; 108487a18d3fSMadhavan Venkataraman hash = CALLOUT_IDHASH(id); 10857c478bd9Sstevel@tonic-gate 108687a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 10877c478bd9Sstevel@tonic-gate 108887a18d3fSMadhavan Venkataraman /* 108987a18d3fSMadhavan Venkataraman * Search the ID hash table for the callout. 109087a18d3fSMadhavan Venkataraman */ 109187a18d3fSMadhavan Venkataraman for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) { 10927c478bd9Sstevel@tonic-gate 109387a18d3fSMadhavan Venkataraman xid = cp->c_xid; 10947c478bd9Sstevel@tonic-gate 109587a18d3fSMadhavan Venkataraman /* 109687a18d3fSMadhavan Venkataraman * Match the ID and generation number. 109787a18d3fSMadhavan Venkataraman */ 109887a18d3fSMadhavan Venkataraman if ((xid & CALLOUT_ID_MASK) != id) 10997c478bd9Sstevel@tonic-gate continue; 11007c478bd9Sstevel@tonic-gate 110187a18d3fSMadhavan Venkataraman if ((xid & CALLOUT_EXECUTING) == 0) { 110287a18d3fSMadhavan Venkataraman hrtime_t expiration; 110387a18d3fSMadhavan Venkataraman 110487a18d3fSMadhavan Venkataraman /* 110587a18d3fSMadhavan Venkataraman * Delete the callout. If the callout list becomes 110687a18d3fSMadhavan Venkataraman * NULL, we don't remove it from the table. This is 110787a18d3fSMadhavan Venkataraman * so it can be reused. If the empty callout list 110887a18d3fSMadhavan Venkataraman * corresponds to the top of the the callout heap, we 110987a18d3fSMadhavan Venkataraman * don't reprogram the table cyclic here. This is in 111087a18d3fSMadhavan Venkataraman * order to avoid lots of X-calls to the CPU associated 111187a18d3fSMadhavan Venkataraman * with the callout table. 111287a18d3fSMadhavan Venkataraman */ 111351b32bddSMadhavan Venkataraman cl = cp->c_list; 111451b32bddSMadhavan Venkataraman expiration = cl->cl_expiration; 111587a18d3fSMadhavan Venkataraman CALLOUT_DELETE(ct, cp); 111687a18d3fSMadhavan Venkataraman cp->c_idnext = ct->ct_free; 111787a18d3fSMadhavan Venkataraman ct->ct_free = cp; 111851b32bddSMadhavan Venkataraman cp->c_xid |= CALLOUT_FREE; 111987a18d3fSMadhavan Venkataraman ct->ct_untimeouts_unexpired++; 112087a18d3fSMadhavan Venkataraman ct->ct_timeouts_pending--; 112151b32bddSMadhavan Venkataraman 112251b32bddSMadhavan Venkataraman /* 112351b32bddSMadhavan Venkataraman * If the callout list has become empty, it needs 112451b32bddSMadhavan Venkataraman * to be cleaned along with its heap entry. Increment 112551b32bddSMadhavan Venkataraman * a reap count. 112651b32bddSMadhavan Venkataraman */ 112751b32bddSMadhavan Venkataraman if (cl->cl_callouts.ch_head == NULL) 112851b32bddSMadhavan Venkataraman ct->ct_nreap++; 112987a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 113087a18d3fSMadhavan Venkataraman 113187a18d3fSMadhavan Venkataraman expiration -= gethrtime(); 113287a18d3fSMadhavan Venkataraman TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 113387a18d3fSMadhavan Venkataraman "untimeout:ID %lx hrtime left %llx", id, 113487a18d3fSMadhavan Venkataraman expiration); 113587a18d3fSMadhavan Venkataraman return (expiration < 0 ? 0 : expiration); 113687a18d3fSMadhavan Venkataraman } 113787a18d3fSMadhavan Venkataraman 113887a18d3fSMadhavan Venkataraman ct->ct_untimeouts_executing++; 11397c478bd9Sstevel@tonic-gate /* 11407c478bd9Sstevel@tonic-gate * The callout we want to delete is currently executing. 11417c478bd9Sstevel@tonic-gate * The DDI states that we must wait until the callout 114207247649SMadhavan Venkataraman * completes before returning, so we block on c_done until the 114387a18d3fSMadhavan Venkataraman * callout ID changes (to the old ID if it's on the freelist, 11447c478bd9Sstevel@tonic-gate * or to a new callout ID if it's in use). This implicitly 11457c478bd9Sstevel@tonic-gate * assumes that callout structures are persistent (they are). 11467c478bd9Sstevel@tonic-gate */ 114707247649SMadhavan Venkataraman if (cp->c_executor == curthread) { 11487c478bd9Sstevel@tonic-gate /* 11497c478bd9Sstevel@tonic-gate * The timeout handler called untimeout() on itself. 11507c478bd9Sstevel@tonic-gate * Stupid, but legal. We can't wait for the timeout 11517c478bd9Sstevel@tonic-gate * to complete without deadlocking, so we just return. 11527c478bd9Sstevel@tonic-gate */ 115387a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 11547c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 11557c478bd9Sstevel@tonic-gate "untimeout_self:ID %x", id); 11567c478bd9Sstevel@tonic-gate return (-1); 11577c478bd9Sstevel@tonic-gate } 115887a18d3fSMadhavan Venkataraman if (nowait == 0) { 115987a18d3fSMadhavan Venkataraman /* 116087a18d3fSMadhavan Venkataraman * We need to wait. Indicate that we are waiting by 116107247649SMadhavan Venkataraman * incrementing c_waiting. This prevents the executor 116207247649SMadhavan Venkataraman * from doing a wakeup on c_done if there are no 116387a18d3fSMadhavan Venkataraman * waiters. 116487a18d3fSMadhavan Venkataraman */ 116587a18d3fSMadhavan Venkataraman while (cp->c_xid == xid) { 116607247649SMadhavan Venkataraman cp->c_waiting = 1; 116707247649SMadhavan Venkataraman cv_wait(&cp->c_done, &ct->ct_mutex); 116887a18d3fSMadhavan Venkataraman } 116987a18d3fSMadhavan Venkataraman } 117087a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 11717c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 11727c478bd9Sstevel@tonic-gate "untimeout_executing:ID %lx", id); 11737c478bd9Sstevel@tonic-gate return (-1); 11747c478bd9Sstevel@tonic-gate } 117587a18d3fSMadhavan Venkataraman ct->ct_untimeouts_expired++; 11767c478bd9Sstevel@tonic-gate 117787a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 11787c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 11797c478bd9Sstevel@tonic-gate "untimeout_bogus_id:ID %lx", id); 11807c478bd9Sstevel@tonic-gate 11817c478bd9Sstevel@tonic-gate /* 11827c478bd9Sstevel@tonic-gate * We didn't find the specified callout ID. This means either 11837c478bd9Sstevel@tonic-gate * (1) the callout already fired, or (2) the caller passed us 11847c478bd9Sstevel@tonic-gate * a bogus value. Perform a sanity check to detect case (2). 11857c478bd9Sstevel@tonic-gate */ 118651b32bddSMadhavan Venkataraman bogus = (CALLOUT_ID_FLAGS | CALLOUT_COUNTER_HIGH); 118787a18d3fSMadhavan Venkataraman if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0)) 118887a18d3fSMadhavan Venkataraman panic("untimeout: impossible timeout id %llx", 118987a18d3fSMadhavan Venkataraman (unsigned long long)id); 11907c478bd9Sstevel@tonic-gate 11917c478bd9Sstevel@tonic-gate return (-1); 11927c478bd9Sstevel@tonic-gate } 11937c478bd9Sstevel@tonic-gate 119487a18d3fSMadhavan Venkataraman clock_t 119587a18d3fSMadhavan Venkataraman untimeout(timeout_id_t id_arg) 119687a18d3fSMadhavan Venkataraman { 119787a18d3fSMadhavan Venkataraman hrtime_t hleft; 119887a18d3fSMadhavan Venkataraman clock_t tleft; 119987a18d3fSMadhavan Venkataraman callout_id_t id; 120087a18d3fSMadhavan Venkataraman 120187a18d3fSMadhavan Venkataraman id = (ulong_t)id_arg; 120287a18d3fSMadhavan Venkataraman hleft = untimeout_generic(id, 0); 120387a18d3fSMadhavan Venkataraman if (hleft < 0) 120487a18d3fSMadhavan Venkataraman tleft = -1; 120587a18d3fSMadhavan Venkataraman else if (hleft == 0) 120687a18d3fSMadhavan Venkataraman tleft = 0; 120787a18d3fSMadhavan Venkataraman else 120887a18d3fSMadhavan Venkataraman tleft = NSEC_TO_TICK(hleft); 120987a18d3fSMadhavan Venkataraman 121087a18d3fSMadhavan Venkataraman return (tleft); 121187a18d3fSMadhavan Venkataraman } 121287a18d3fSMadhavan Venkataraman 12137c478bd9Sstevel@tonic-gate /* 121487a18d3fSMadhavan Venkataraman * Convenience function to untimeout a timeout with a full ID with default 121587a18d3fSMadhavan Venkataraman * parameters. 121687a18d3fSMadhavan Venkataraman */ 121787a18d3fSMadhavan Venkataraman clock_t 121887a18d3fSMadhavan Venkataraman untimeout_default(callout_id_t id, int nowait) 121987a18d3fSMadhavan Venkataraman { 122087a18d3fSMadhavan Venkataraman hrtime_t hleft; 122187a18d3fSMadhavan Venkataraman clock_t tleft; 122287a18d3fSMadhavan Venkataraman 122387a18d3fSMadhavan Venkataraman hleft = untimeout_generic(id, nowait); 122487a18d3fSMadhavan Venkataraman if (hleft < 0) 122587a18d3fSMadhavan Venkataraman tleft = -1; 122687a18d3fSMadhavan Venkataraman else if (hleft == 0) 122787a18d3fSMadhavan Venkataraman tleft = 0; 122887a18d3fSMadhavan Venkataraman else 122987a18d3fSMadhavan Venkataraman tleft = NSEC_TO_TICK(hleft); 123087a18d3fSMadhavan Venkataraman 123187a18d3fSMadhavan Venkataraman return (tleft); 123287a18d3fSMadhavan Venkataraman } 123387a18d3fSMadhavan Venkataraman 123487a18d3fSMadhavan Venkataraman /* 123587a18d3fSMadhavan Venkataraman * Expire all the callouts queued in the specified callout list. 12367c478bd9Sstevel@tonic-gate */ 12377c478bd9Sstevel@tonic-gate static void 123887a18d3fSMadhavan Venkataraman callout_list_expire(callout_table_t *ct, callout_list_t *cl) 12397c478bd9Sstevel@tonic-gate { 124007247649SMadhavan Venkataraman callout_t *cp, *cnext; 12417c478bd9Sstevel@tonic-gate 124287a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 124387a18d3fSMadhavan Venkataraman ASSERT(cl != NULL); 12447c478bd9Sstevel@tonic-gate 124507247649SMadhavan Venkataraman for (cp = cl->cl_callouts.ch_head; cp != NULL; cp = cnext) { 124607247649SMadhavan Venkataraman /* 124707247649SMadhavan Venkataraman * Multiple executor threads could be running at the same 124807247649SMadhavan Venkataraman * time. If this callout is already being executed, 124907247649SMadhavan Venkataraman * go on to the next one. 125007247649SMadhavan Venkataraman */ 125107247649SMadhavan Venkataraman if (cp->c_xid & CALLOUT_EXECUTING) { 125207247649SMadhavan Venkataraman cnext = cp->c_clnext; 125307247649SMadhavan Venkataraman continue; 125407247649SMadhavan Venkataraman } 125587a18d3fSMadhavan Venkataraman 1256f635d46aSqiao /* 125787a18d3fSMadhavan Venkataraman * Indicate to untimeout() that a callout is 125887a18d3fSMadhavan Venkataraman * being expired by the executor. 1259f635d46aSqiao */ 126087a18d3fSMadhavan Venkataraman cp->c_xid |= CALLOUT_EXECUTING; 126107247649SMadhavan Venkataraman cp->c_executor = curthread; 126287a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 126387a18d3fSMadhavan Venkataraman 12647c478bd9Sstevel@tonic-gate DTRACE_PROBE1(callout__start, callout_t *, cp); 12657c478bd9Sstevel@tonic-gate (*cp->c_func)(cp->c_arg); 12667c478bd9Sstevel@tonic-gate DTRACE_PROBE1(callout__end, callout_t *, cp); 12677c478bd9Sstevel@tonic-gate 126887a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 126987a18d3fSMadhavan Venkataraman 127087a18d3fSMadhavan Venkataraman ct->ct_expirations++; 127187a18d3fSMadhavan Venkataraman ct->ct_timeouts_pending--; 12727c478bd9Sstevel@tonic-gate /* 127307247649SMadhavan Venkataraman * Indicate completion for c_done. 12747c478bd9Sstevel@tonic-gate */ 127587a18d3fSMadhavan Venkataraman cp->c_xid &= ~CALLOUT_EXECUTING; 127607247649SMadhavan Venkataraman cp->c_executor = NULL; 127707247649SMadhavan Venkataraman cnext = cp->c_clnext; 1278f635d46aSqiao 12797c478bd9Sstevel@tonic-gate /* 128087a18d3fSMadhavan Venkataraman * Delete callout from ID hash table and the callout 128187a18d3fSMadhavan Venkataraman * list, return to freelist, and tell any untimeout() that 128287a18d3fSMadhavan Venkataraman * cares that we're done. 12837c478bd9Sstevel@tonic-gate */ 128487a18d3fSMadhavan Venkataraman CALLOUT_DELETE(ct, cp); 128587a18d3fSMadhavan Venkataraman cp->c_idnext = ct->ct_free; 128687a18d3fSMadhavan Venkataraman ct->ct_free = cp; 128751b32bddSMadhavan Venkataraman cp->c_xid |= CALLOUT_FREE; 128887a18d3fSMadhavan Venkataraman 128907247649SMadhavan Venkataraman if (cp->c_waiting) { 129007247649SMadhavan Venkataraman cp->c_waiting = 0; 129107247649SMadhavan Venkataraman cv_broadcast(&cp->c_done); 12927c478bd9Sstevel@tonic-gate } 129387a18d3fSMadhavan Venkataraman } 12947c478bd9Sstevel@tonic-gate } 12957c478bd9Sstevel@tonic-gate 12967c478bd9Sstevel@tonic-gate /* 129787a18d3fSMadhavan Venkataraman * Execute all expired callout lists for a callout table. 12987c478bd9Sstevel@tonic-gate */ 12997c478bd9Sstevel@tonic-gate static void 130087a18d3fSMadhavan Venkataraman callout_expire(callout_table_t *ct) 13017c478bd9Sstevel@tonic-gate { 130287a18d3fSMadhavan Venkataraman callout_list_t *cl, *clnext; 1303f635d46aSqiao 130487a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 13057c478bd9Sstevel@tonic-gate 130687a18d3fSMadhavan Venkataraman for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) { 1307f635d46aSqiao /* 130887a18d3fSMadhavan Venkataraman * Expire all the callouts in this callout list. 130987a18d3fSMadhavan Venkataraman */ 131087a18d3fSMadhavan Venkataraman callout_list_expire(ct, cl); 131187a18d3fSMadhavan Venkataraman 131207247649SMadhavan Venkataraman clnext = cl->cl_next; 131307247649SMadhavan Venkataraman if (cl->cl_callouts.ch_head == NULL) { 131487a18d3fSMadhavan Venkataraman /* 131587a18d3fSMadhavan Venkataraman * Free the callout list. 131687a18d3fSMadhavan Venkataraman */ 131787a18d3fSMadhavan Venkataraman CALLOUT_LIST_DELETE(ct->ct_expired, cl); 131851b32bddSMadhavan Venkataraman CALLOUT_LIST_FREE(ct, cl); 131987a18d3fSMadhavan Venkataraman } 132087a18d3fSMadhavan Venkataraman } 132107247649SMadhavan Venkataraman } 132287a18d3fSMadhavan Venkataraman 132387a18d3fSMadhavan Venkataraman /* 132487a18d3fSMadhavan Venkataraman * The cyclic handlers below process callouts in two steps: 132587a18d3fSMadhavan Venkataraman * 132687a18d3fSMadhavan Venkataraman * 1. Find all expired callout lists and queue them in a separate 132787a18d3fSMadhavan Venkataraman * list of expired callouts. 132887a18d3fSMadhavan Venkataraman * 2. Execute the expired callout lists. 132987a18d3fSMadhavan Venkataraman * 133087a18d3fSMadhavan Venkataraman * This is done for two reasons: 133187a18d3fSMadhavan Venkataraman * 133287a18d3fSMadhavan Venkataraman * 1. We want to quickly find the next earliest expiration to program 133387a18d3fSMadhavan Venkataraman * the cyclic to and reprogram it. We can do this right at the end 133487a18d3fSMadhavan Venkataraman * of step 1. 133587a18d3fSMadhavan Venkataraman * 2. The realtime cyclic handler expires callouts in place. However, 133687a18d3fSMadhavan Venkataraman * for normal callouts, callouts are expired by a taskq thread. 133787a18d3fSMadhavan Venkataraman * So, it is simpler and more robust to have the taskq thread just 133887a18d3fSMadhavan Venkataraman * do step 2. 133987a18d3fSMadhavan Venkataraman */ 134087a18d3fSMadhavan Venkataraman 134187a18d3fSMadhavan Venkataraman /* 134287a18d3fSMadhavan Venkataraman * Realtime callout cyclic handler. 13437c478bd9Sstevel@tonic-gate */ 13447c478bd9Sstevel@tonic-gate void 134587a18d3fSMadhavan Venkataraman callout_realtime(callout_table_t *ct) 13467c478bd9Sstevel@tonic-gate { 134787a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 134887a18d3fSMadhavan Venkataraman callout_heap_delete(ct); 134987a18d3fSMadhavan Venkataraman callout_expire(ct); 135087a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 135187a18d3fSMadhavan Venkataraman } 13527c478bd9Sstevel@tonic-gate 135387a18d3fSMadhavan Venkataraman void 135487a18d3fSMadhavan Venkataraman callout_execute(callout_table_t *ct) 135587a18d3fSMadhavan Venkataraman { 135687a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 135787a18d3fSMadhavan Venkataraman callout_expire(ct); 135887a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 135987a18d3fSMadhavan Venkataraman } 13607c478bd9Sstevel@tonic-gate 136187a18d3fSMadhavan Venkataraman /* 136287a18d3fSMadhavan Venkataraman * Normal callout cyclic handler. 136387a18d3fSMadhavan Venkataraman */ 136487a18d3fSMadhavan Venkataraman void 136587a18d3fSMadhavan Venkataraman callout_normal(callout_table_t *ct) 136687a18d3fSMadhavan Venkataraman { 136707247649SMadhavan Venkataraman int i, exec; 136887a18d3fSMadhavan Venkataraman 136987a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 137087a18d3fSMadhavan Venkataraman callout_heap_delete(ct); 137107247649SMadhavan Venkataraman CALLOUT_EXEC_COMPUTE(ct, exec); 137287a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 137387a18d3fSMadhavan Venkataraman 137407247649SMadhavan Venkataraman for (i = 0; i < exec; i++) { 137587a18d3fSMadhavan Venkataraman ASSERT(ct->ct_taskq != NULL); 137687a18d3fSMadhavan Venkataraman (void) taskq_dispatch(ct->ct_taskq, 137787a18d3fSMadhavan Venkataraman (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 137887a18d3fSMadhavan Venkataraman } 137987a18d3fSMadhavan Venkataraman } 138087a18d3fSMadhavan Venkataraman 138187a18d3fSMadhavan Venkataraman /* 138287a18d3fSMadhavan Venkataraman * Suspend callout processing. 138387a18d3fSMadhavan Venkataraman */ 138487a18d3fSMadhavan Venkataraman static void 138587a18d3fSMadhavan Venkataraman callout_suspend(void) 138687a18d3fSMadhavan Venkataraman { 138787a18d3fSMadhavan Venkataraman int t, f; 138887a18d3fSMadhavan Venkataraman callout_table_t *ct; 138987a18d3fSMadhavan Venkataraman 139087a18d3fSMadhavan Venkataraman /* 139187a18d3fSMadhavan Venkataraman * Traverse every callout table in the system and suspend callout 139287a18d3fSMadhavan Venkataraman * processing. 139387a18d3fSMadhavan Venkataraman * 139487a18d3fSMadhavan Venkataraman * We need to suspend all the tables (including the inactive ones) 139587a18d3fSMadhavan Venkataraman * so that if a table is made active while the suspend is still on, 139687a18d3fSMadhavan Venkataraman * the table remains suspended. 139787a18d3fSMadhavan Venkataraman */ 139887a18d3fSMadhavan Venkataraman for (f = 0; f < max_ncpus; f++) { 139987a18d3fSMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 140087a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(t, f)]; 140187a18d3fSMadhavan Venkataraman 140287a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 1403454ab202SMadhavan Venkataraman ct->ct_suspend++; 140487a18d3fSMadhavan Venkataraman if (ct->ct_cyclic == CYCLIC_NONE) { 140587a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 140687a18d3fSMadhavan Venkataraman continue; 140787a18d3fSMadhavan Venkataraman } 1408454ab202SMadhavan Venkataraman if (ct->ct_suspend == 1) 1409454ab202SMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, 1410454ab202SMadhavan Venkataraman CY_INFINITY); 141187a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 141287a18d3fSMadhavan Venkataraman } 141387a18d3fSMadhavan Venkataraman } 141487a18d3fSMadhavan Venkataraman } 141587a18d3fSMadhavan Venkataraman 141687a18d3fSMadhavan Venkataraman /* 141787a18d3fSMadhavan Venkataraman * Resume callout processing. 141887a18d3fSMadhavan Venkataraman */ 141987a18d3fSMadhavan Venkataraman static void 142051b32bddSMadhavan Venkataraman callout_resume(hrtime_t delta, int timechange) 142187a18d3fSMadhavan Venkataraman { 142287a18d3fSMadhavan Venkataraman hrtime_t exp; 142387a18d3fSMadhavan Venkataraman int t, f; 142487a18d3fSMadhavan Venkataraman callout_table_t *ct; 142587a18d3fSMadhavan Venkataraman 142687a18d3fSMadhavan Venkataraman /* 142787a18d3fSMadhavan Venkataraman * Traverse every callout table in the system and resume callout 142887a18d3fSMadhavan Venkataraman * processing. For active tables, perform any hrtime adjustments 142987a18d3fSMadhavan Venkataraman * necessary. 143087a18d3fSMadhavan Venkataraman */ 143187a18d3fSMadhavan Venkataraman for (f = 0; f < max_ncpus; f++) { 143287a18d3fSMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 143387a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(t, f)]; 143487a18d3fSMadhavan Venkataraman 143587a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 143687a18d3fSMadhavan Venkataraman if (ct->ct_cyclic == CYCLIC_NONE) { 1437454ab202SMadhavan Venkataraman ct->ct_suspend--; 143887a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 143987a18d3fSMadhavan Venkataraman continue; 144087a18d3fSMadhavan Venkataraman } 144187a18d3fSMadhavan Venkataraman 144251b32bddSMadhavan Venkataraman /* 144351b32bddSMadhavan Venkataraman * If a delta is specified, adjust the expirations in 144451b32bddSMadhavan Venkataraman * the heap by delta. Also, if the caller indicates 144551b32bddSMadhavan Venkataraman * a timechange, process that. This step also cleans 144651b32bddSMadhavan Venkataraman * out any empty callout lists that might happen to 144751b32bddSMadhavan Venkataraman * be there. 144851b32bddSMadhavan Venkataraman */ 144951b32bddSMadhavan Venkataraman (void) callout_heap_process(ct, delta, timechange); 145087a18d3fSMadhavan Venkataraman 1451454ab202SMadhavan Venkataraman ct->ct_suspend--; 1452454ab202SMadhavan Venkataraman if (ct->ct_suspend == 0) { 145387a18d3fSMadhavan Venkataraman /* 1454454ab202SMadhavan Venkataraman * If the expired list is non-empty, then have 1455454ab202SMadhavan Venkataraman * the cyclic expire immediately. Else, program 1456454ab202SMadhavan Venkataraman * the cyclic based on the heap. 145787a18d3fSMadhavan Venkataraman */ 145887a18d3fSMadhavan Venkataraman if (ct->ct_expired.ch_head != NULL) 145987a18d3fSMadhavan Venkataraman exp = gethrtime(); 146087a18d3fSMadhavan Venkataraman else if (ct->ct_heap_num > 0) 146151b32bddSMadhavan Venkataraman exp = ct->ct_heap[0].ch_expiration; 146287a18d3fSMadhavan Venkataraman else 146387a18d3fSMadhavan Venkataraman exp = 0; 146487a18d3fSMadhavan Venkataraman if (exp != 0) 1465454ab202SMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, 1466454ab202SMadhavan Venkataraman exp); 1467454ab202SMadhavan Venkataraman } 146851b32bddSMadhavan Venkataraman 146987a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 147087a18d3fSMadhavan Venkataraman } 147187a18d3fSMadhavan Venkataraman } 14727c478bd9Sstevel@tonic-gate } 14737c478bd9Sstevel@tonic-gate 14747c478bd9Sstevel@tonic-gate /* 14757c478bd9Sstevel@tonic-gate * Callback handler used by CPR to stop and resume callouts. 147651b32bddSMadhavan Venkataraman * The cyclic subsystem saves and restores hrtime during CPR. 147751b32bddSMadhavan Venkataraman * That is why callout_resume() is called with a 0 delta. 147851b32bddSMadhavan Venkataraman * Although hrtime is the same, hrestime (system time) has 147951b32bddSMadhavan Venkataraman * progressed during CPR. So, we have to indicate a time change 148051b32bddSMadhavan Venkataraman * to expire the absolute hrestime timers. 14817c478bd9Sstevel@tonic-gate */ 14827c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 14837c478bd9Sstevel@tonic-gate static boolean_t 14847c478bd9Sstevel@tonic-gate callout_cpr_callb(void *arg, int code) 14857c478bd9Sstevel@tonic-gate { 148687a18d3fSMadhavan Venkataraman if (code == CB_CODE_CPR_CHKPT) 148787a18d3fSMadhavan Venkataraman callout_suspend(); 148887a18d3fSMadhavan Venkataraman else 148951b32bddSMadhavan Venkataraman callout_resume(0, 1); 149087a18d3fSMadhavan Venkataraman 14917c478bd9Sstevel@tonic-gate return (B_TRUE); 14927c478bd9Sstevel@tonic-gate } 14937c478bd9Sstevel@tonic-gate 14947c478bd9Sstevel@tonic-gate /* 149587a18d3fSMadhavan Venkataraman * Callback handler invoked when the debugger is entered or exited. 14967c478bd9Sstevel@tonic-gate */ 149787a18d3fSMadhavan Venkataraman /*ARGSUSED*/ 149887a18d3fSMadhavan Venkataraman static boolean_t 149987a18d3fSMadhavan Venkataraman callout_debug_callb(void *arg, int code) 15007c478bd9Sstevel@tonic-gate { 150187a18d3fSMadhavan Venkataraman hrtime_t delta; 1502f635d46aSqiao 1503f635d46aSqiao /* 150487a18d3fSMadhavan Venkataraman * When the system enters the debugger. make a note of the hrtime. 150587a18d3fSMadhavan Venkataraman * When it is resumed, compute how long the system was in the 150687a18d3fSMadhavan Venkataraman * debugger. This interval should not be counted for callouts. 1507f635d46aSqiao */ 150887a18d3fSMadhavan Venkataraman if (code == 0) { 150987a18d3fSMadhavan Venkataraman callout_suspend(); 151087a18d3fSMadhavan Venkataraman callout_debug_hrtime = gethrtime(); 151187a18d3fSMadhavan Venkataraman } else { 151287a18d3fSMadhavan Venkataraman delta = gethrtime() - callout_debug_hrtime; 151351b32bddSMadhavan Venkataraman callout_resume(delta, 0); 151487a18d3fSMadhavan Venkataraman } 1515f635d46aSqiao 151687a18d3fSMadhavan Venkataraman return (B_TRUE); 151787a18d3fSMadhavan Venkataraman } 151887a18d3fSMadhavan Venkataraman 151987a18d3fSMadhavan Venkataraman /* 152007247649SMadhavan Venkataraman * Move the absolute hrestime callouts to the expired list. Then program the 152107247649SMadhavan Venkataraman * table's cyclic to expire immediately so that the callouts can be executed 152287a18d3fSMadhavan Venkataraman * immediately. 152387a18d3fSMadhavan Venkataraman */ 152487a18d3fSMadhavan Venkataraman static void 152587a18d3fSMadhavan Venkataraman callout_hrestime_one(callout_table_t *ct) 152687a18d3fSMadhavan Venkataraman { 152751b32bddSMadhavan Venkataraman hrtime_t expiration; 152887a18d3fSMadhavan Venkataraman 152987a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 153087a18d3fSMadhavan Venkataraman if (ct->ct_heap_num == 0) { 153187a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 153287a18d3fSMadhavan Venkataraman return; 153387a18d3fSMadhavan Venkataraman } 153487a18d3fSMadhavan Venkataraman 153551b32bddSMadhavan Venkataraman /* 153651b32bddSMadhavan Venkataraman * Walk the heap and process all the absolute hrestime entries. 153751b32bddSMadhavan Venkataraman */ 153851b32bddSMadhavan Venkataraman expiration = callout_heap_process(ct, 0, 1); 153987a18d3fSMadhavan Venkataraman 154051b32bddSMadhavan Venkataraman if ((expiration != 0) && (ct->ct_suspend == 0)) 154151b32bddSMadhavan Venkataraman (void) cyclic_reprogram(ct->ct_cyclic, expiration); 154207247649SMadhavan Venkataraman 154387a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 154487a18d3fSMadhavan Venkataraman } 154587a18d3fSMadhavan Venkataraman 154687a18d3fSMadhavan Venkataraman /* 154787a18d3fSMadhavan Venkataraman * This function is called whenever system time (hrestime) is changed 154887a18d3fSMadhavan Venkataraman * explicitly. All the HRESTIME callouts must be expired at once. 154987a18d3fSMadhavan Venkataraman */ 155087a18d3fSMadhavan Venkataraman /*ARGSUSED*/ 155187a18d3fSMadhavan Venkataraman void 155287a18d3fSMadhavan Venkataraman callout_hrestime(void) 155387a18d3fSMadhavan Venkataraman { 155487a18d3fSMadhavan Venkataraman int t, f; 155587a18d3fSMadhavan Venkataraman callout_table_t *ct; 155687a18d3fSMadhavan Venkataraman 155787a18d3fSMadhavan Venkataraman /* 155887a18d3fSMadhavan Venkataraman * Traverse every callout table in the system and process the hrestime 155987a18d3fSMadhavan Venkataraman * callouts therein. 156087a18d3fSMadhavan Venkataraman * 156187a18d3fSMadhavan Venkataraman * We look at all the tables because we don't know which ones were 156287a18d3fSMadhavan Venkataraman * onlined and offlined in the past. The offlined tables may still 156387a18d3fSMadhavan Venkataraman * have active cyclics processing timers somewhere. 156487a18d3fSMadhavan Venkataraman */ 156587a18d3fSMadhavan Venkataraman for (f = 0; f < max_ncpus; f++) { 156687a18d3fSMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 156787a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(t, f)]; 156887a18d3fSMadhavan Venkataraman callout_hrestime_one(ct); 156987a18d3fSMadhavan Venkataraman } 157087a18d3fSMadhavan Venkataraman } 157187a18d3fSMadhavan Venkataraman } 157287a18d3fSMadhavan Venkataraman 157387a18d3fSMadhavan Venkataraman /* 157487a18d3fSMadhavan Venkataraman * Create the hash tables for this callout table. 157587a18d3fSMadhavan Venkataraman */ 157687a18d3fSMadhavan Venkataraman static void 157787a18d3fSMadhavan Venkataraman callout_hash_init(callout_table_t *ct) 157887a18d3fSMadhavan Venkataraman { 157987a18d3fSMadhavan Venkataraman size_t size; 158087a18d3fSMadhavan Venkataraman 158187a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 158287a18d3fSMadhavan Venkataraman ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL)); 158387a18d3fSMadhavan Venkataraman 158487a18d3fSMadhavan Venkataraman size = sizeof (callout_hash_t) * CALLOUT_BUCKETS; 158587a18d3fSMadhavan Venkataraman ct->ct_idhash = kmem_zalloc(size, KM_SLEEP); 158687a18d3fSMadhavan Venkataraman ct->ct_clhash = kmem_zalloc(size, KM_SLEEP); 158787a18d3fSMadhavan Venkataraman } 158887a18d3fSMadhavan Venkataraman 158987a18d3fSMadhavan Venkataraman /* 159087a18d3fSMadhavan Venkataraman * Create per-callout table kstats. 159187a18d3fSMadhavan Venkataraman */ 159287a18d3fSMadhavan Venkataraman static void 159387a18d3fSMadhavan Venkataraman callout_kstat_init(callout_table_t *ct) 159487a18d3fSMadhavan Venkataraman { 159587a18d3fSMadhavan Venkataraman callout_stat_type_t stat; 159687a18d3fSMadhavan Venkataraman kstat_t *ct_kstats; 159787a18d3fSMadhavan Venkataraman int ndx; 159887a18d3fSMadhavan Venkataraman 159987a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 160087a18d3fSMadhavan Venkataraman ASSERT(ct->ct_kstats == NULL); 160187a18d3fSMadhavan Venkataraman 160287a18d3fSMadhavan Venkataraman ndx = ct - callout_table; 160387a18d3fSMadhavan Venkataraman ct_kstats = kstat_create("unix", ndx, "callout", 160487a18d3fSMadhavan Venkataraman "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL); 160587a18d3fSMadhavan Venkataraman 160687a18d3fSMadhavan Venkataraman if (ct_kstats == NULL) { 160787a18d3fSMadhavan Venkataraman cmn_err(CE_WARN, "kstat_create for callout table %p failed", 160887a18d3fSMadhavan Venkataraman (void *)ct); 160987a18d3fSMadhavan Venkataraman } else { 161087a18d3fSMadhavan Venkataraman ct_kstats->ks_data = ct->ct_kstat_data; 161187a18d3fSMadhavan Venkataraman for (stat = 0; stat < CALLOUT_NUM_STATS; stat++) 161287a18d3fSMadhavan Venkataraman kstat_named_init(&ct->ct_kstat_data[stat], 161387a18d3fSMadhavan Venkataraman callout_kstat_names[stat], KSTAT_DATA_INT64); 161487a18d3fSMadhavan Venkataraman ct->ct_kstats = ct_kstats; 161587a18d3fSMadhavan Venkataraman kstat_install(ct_kstats); 161687a18d3fSMadhavan Venkataraman } 161787a18d3fSMadhavan Venkataraman } 161887a18d3fSMadhavan Venkataraman 161987a18d3fSMadhavan Venkataraman static void 162087a18d3fSMadhavan Venkataraman callout_cyclic_init(callout_table_t *ct) 162187a18d3fSMadhavan Venkataraman { 162287a18d3fSMadhavan Venkataraman cyc_handler_t hdlr; 162387a18d3fSMadhavan Venkataraman cyc_time_t when; 162487a18d3fSMadhavan Venkataraman processorid_t seqid; 162587a18d3fSMadhavan Venkataraman int t; 1626*113d3ed7SMadhavan Venkataraman cyclic_id_t cyclic; 162787a18d3fSMadhavan Venkataraman 162887a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&ct->ct_mutex)); 162987a18d3fSMadhavan Venkataraman 163087a18d3fSMadhavan Venkataraman t = CALLOUT_TABLE_TYPE(ct); 163187a18d3fSMadhavan Venkataraman seqid = CALLOUT_TABLE_SEQID(ct); 163287a18d3fSMadhavan Venkataraman 163387a18d3fSMadhavan Venkataraman /* 163487a18d3fSMadhavan Venkataraman * Create the taskq thread if the table type is normal. 163587a18d3fSMadhavan Venkataraman * Realtime tables are handled at PIL1 by a softint 163687a18d3fSMadhavan Venkataraman * handler. 163787a18d3fSMadhavan Venkataraman */ 16387c478bd9Sstevel@tonic-gate if (t == CALLOUT_NORMAL) { 163987a18d3fSMadhavan Venkataraman ASSERT(ct->ct_taskq == NULL); 16407c478bd9Sstevel@tonic-gate /* 16417c478bd9Sstevel@tonic-gate * Each callout thread consumes exactly one 16427c478bd9Sstevel@tonic-gate * task structure while active. Therefore, 164351b32bddSMadhavan Venkataraman * prepopulating with 2 * callout_threads tasks 16447c478bd9Sstevel@tonic-gate * ensures that there's at least one task per 16457c478bd9Sstevel@tonic-gate * thread that's either scheduled or on the 16467c478bd9Sstevel@tonic-gate * freelist. In turn, this guarantees that 16477c478bd9Sstevel@tonic-gate * taskq_dispatch() will always either succeed 16487c478bd9Sstevel@tonic-gate * (because there's a free task structure) or 16497c478bd9Sstevel@tonic-gate * be unnecessary (because "callout_excute(ct)" 16507c478bd9Sstevel@tonic-gate * has already scheduled). 16517c478bd9Sstevel@tonic-gate */ 16527c478bd9Sstevel@tonic-gate ct->ct_taskq = 165387a18d3fSMadhavan Venkataraman taskq_create_instance("callout_taskq", seqid, 165451b32bddSMadhavan Venkataraman callout_threads, maxclsyspri, 165551b32bddSMadhavan Venkataraman 2 * callout_threads, 2 * callout_threads, 16567c478bd9Sstevel@tonic-gate TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 16577c478bd9Sstevel@tonic-gate } 165887a18d3fSMadhavan Venkataraman 165987a18d3fSMadhavan Venkataraman /* 166087a18d3fSMadhavan Venkataraman * callouts can only be created in a table whose 166187a18d3fSMadhavan Venkataraman * cyclic has been initialized. 166287a18d3fSMadhavan Venkataraman */ 166387a18d3fSMadhavan Venkataraman ASSERT(ct->ct_heap_num == 0); 166487a18d3fSMadhavan Venkataraman 166587a18d3fSMadhavan Venkataraman /* 1666*113d3ed7SMadhavan Venkataraman * Drop the mutex before creating the callout cyclics. cyclic_add() 1667*113d3ed7SMadhavan Venkataraman * could potentially expand the cyclic heap. We don't want to be 1668*113d3ed7SMadhavan Venkataraman * holding the callout table mutex in that case. Note that this 1669*113d3ed7SMadhavan Venkataraman * function is called during CPU online. cpu_lock is held at this 1670*113d3ed7SMadhavan Venkataraman * point. So, only one thread can be executing the cyclic add logic 1671*113d3ed7SMadhavan Venkataraman * below at any time. 1672*113d3ed7SMadhavan Venkataraman */ 1673*113d3ed7SMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 1674*113d3ed7SMadhavan Venkataraman 1675*113d3ed7SMadhavan Venkataraman /* 167687a18d3fSMadhavan Venkataraman * Create the callout table cyclics. 167707247649SMadhavan Venkataraman * 167807247649SMadhavan Venkataraman * The realtime cyclic handler executes at low PIL. The normal cyclic 167907247649SMadhavan Venkataraman * handler executes at lock PIL. This is because there are cases 168007247649SMadhavan Venkataraman * where code can block at PIL > 1 waiting for a normal callout handler 168107247649SMadhavan Venkataraman * to unblock it directly or indirectly. If the normal cyclic were to 168207247649SMadhavan Venkataraman * be executed at low PIL, it could get blocked out by the waiter 168307247649SMadhavan Venkataraman * and cause a deadlock. 168487a18d3fSMadhavan Venkataraman */ 168587a18d3fSMadhavan Venkataraman ASSERT(ct->ct_cyclic == CYCLIC_NONE); 168687a18d3fSMadhavan Venkataraman 168787a18d3fSMadhavan Venkataraman hdlr.cyh_func = (cyc_func_t)CALLOUT_CYCLIC_HANDLER(t); 168807247649SMadhavan Venkataraman if (ct->ct_type == CALLOUT_REALTIME) 168907247649SMadhavan Venkataraman hdlr.cyh_level = callout_realtime_level; 169007247649SMadhavan Venkataraman else 169107247649SMadhavan Venkataraman hdlr.cyh_level = callout_normal_level; 169287a18d3fSMadhavan Venkataraman hdlr.cyh_arg = ct; 169387a18d3fSMadhavan Venkataraman when.cyt_when = CY_INFINITY; 169487a18d3fSMadhavan Venkataraman when.cyt_interval = CY_INFINITY; 169587a18d3fSMadhavan Venkataraman 1696*113d3ed7SMadhavan Venkataraman cyclic = cyclic_add(&hdlr, &when); 1697*113d3ed7SMadhavan Venkataraman 1698*113d3ed7SMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 1699*113d3ed7SMadhavan Venkataraman ct->ct_cyclic = cyclic; 170087a18d3fSMadhavan Venkataraman } 170187a18d3fSMadhavan Venkataraman 170287a18d3fSMadhavan Venkataraman void 170387a18d3fSMadhavan Venkataraman callout_cpu_online(cpu_t *cp) 170487a18d3fSMadhavan Venkataraman { 170587a18d3fSMadhavan Venkataraman lgrp_handle_t hand; 170687a18d3fSMadhavan Venkataraman callout_cache_t *cache; 170787a18d3fSMadhavan Venkataraman char s[KMEM_CACHE_NAMELEN]; 170887a18d3fSMadhavan Venkataraman callout_table_t *ct; 170987a18d3fSMadhavan Venkataraman processorid_t seqid; 171087a18d3fSMadhavan Venkataraman int t; 171187a18d3fSMadhavan Venkataraman 171287a18d3fSMadhavan Venkataraman ASSERT(MUTEX_HELD(&cpu_lock)); 171387a18d3fSMadhavan Venkataraman 171487a18d3fSMadhavan Venkataraman /* 171587a18d3fSMadhavan Venkataraman * Locate the cache corresponding to the onlined CPU's lgroup. 171687a18d3fSMadhavan Venkataraman * Note that access to callout_caches is protected by cpu_lock. 171787a18d3fSMadhavan Venkataraman */ 171887a18d3fSMadhavan Venkataraman hand = lgrp_plat_cpu_to_hand(cp->cpu_id); 171987a18d3fSMadhavan Venkataraman for (cache = callout_caches; cache != NULL; cache = cache->cc_next) { 172087a18d3fSMadhavan Venkataraman if (cache->cc_hand == hand) 172187a18d3fSMadhavan Venkataraman break; 172287a18d3fSMadhavan Venkataraman } 172387a18d3fSMadhavan Venkataraman 172487a18d3fSMadhavan Venkataraman /* 172587a18d3fSMadhavan Venkataraman * If not found, create one. The caches are never destroyed. 172687a18d3fSMadhavan Venkataraman */ 172787a18d3fSMadhavan Venkataraman if (cache == NULL) { 172887a18d3fSMadhavan Venkataraman cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP); 172987a18d3fSMadhavan Venkataraman cache->cc_hand = hand; 173087a18d3fSMadhavan Venkataraman (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx", 173187a18d3fSMadhavan Venkataraman (long)hand); 173287a18d3fSMadhavan Venkataraman cache->cc_cache = kmem_cache_create(s, sizeof (callout_t), 173387a18d3fSMadhavan Venkataraman CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 173487a18d3fSMadhavan Venkataraman (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx", 173587a18d3fSMadhavan Venkataraman (long)hand); 173687a18d3fSMadhavan Venkataraman cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t), 173787a18d3fSMadhavan Venkataraman CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 173887a18d3fSMadhavan Venkataraman cache->cc_next = callout_caches; 173987a18d3fSMadhavan Venkataraman callout_caches = cache; 174087a18d3fSMadhavan Venkataraman } 174187a18d3fSMadhavan Venkataraman 174287a18d3fSMadhavan Venkataraman seqid = cp->cpu_seqid; 174387a18d3fSMadhavan Venkataraman 174487a18d3fSMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 174587a18d3fSMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 174687a18d3fSMadhavan Venkataraman 174787a18d3fSMadhavan Venkataraman mutex_enter(&ct->ct_mutex); 174887a18d3fSMadhavan Venkataraman /* 174987a18d3fSMadhavan Venkataraman * Store convinience pointers to the kmem caches 175087a18d3fSMadhavan Venkataraman * in the callout table. These assignments should always be 175187a18d3fSMadhavan Venkataraman * done as callout tables can map to different physical 175287a18d3fSMadhavan Venkataraman * CPUs each time. 175387a18d3fSMadhavan Venkataraman */ 175487a18d3fSMadhavan Venkataraman ct->ct_cache = cache->cc_cache; 175587a18d3fSMadhavan Venkataraman ct->ct_lcache = cache->cc_lcache; 175687a18d3fSMadhavan Venkataraman 175787a18d3fSMadhavan Venkataraman /* 175887a18d3fSMadhavan Venkataraman * We use the heap pointer to check if stuff has been 175987a18d3fSMadhavan Venkataraman * initialized for this callout table. 176087a18d3fSMadhavan Venkataraman */ 176187a18d3fSMadhavan Venkataraman if (ct->ct_heap == NULL) { 176287a18d3fSMadhavan Venkataraman callout_heap_init(ct); 176387a18d3fSMadhavan Venkataraman callout_hash_init(ct); 176487a18d3fSMadhavan Venkataraman callout_kstat_init(ct); 176587a18d3fSMadhavan Venkataraman callout_cyclic_init(ct); 176687a18d3fSMadhavan Venkataraman } 176787a18d3fSMadhavan Venkataraman 176887a18d3fSMadhavan Venkataraman mutex_exit(&ct->ct_mutex); 176987a18d3fSMadhavan Venkataraman 177087a18d3fSMadhavan Venkataraman /* 1771454ab202SMadhavan Venkataraman * Move the cyclic to this CPU by doing a bind. 177287a18d3fSMadhavan Venkataraman */ 177387a18d3fSMadhavan Venkataraman cyclic_bind(ct->ct_cyclic, cp, NULL); 1774454ab202SMadhavan Venkataraman } 1775454ab202SMadhavan Venkataraman } 1776454ab202SMadhavan Venkataraman 1777454ab202SMadhavan Venkataraman void 1778454ab202SMadhavan Venkataraman callout_cpu_offline(cpu_t *cp) 1779454ab202SMadhavan Venkataraman { 1780454ab202SMadhavan Venkataraman callout_table_t *ct; 1781454ab202SMadhavan Venkataraman processorid_t seqid; 1782454ab202SMadhavan Venkataraman int t; 1783454ab202SMadhavan Venkataraman 1784454ab202SMadhavan Venkataraman ASSERT(MUTEX_HELD(&cpu_lock)); 1785454ab202SMadhavan Venkataraman 1786454ab202SMadhavan Venkataraman seqid = cp->cpu_seqid; 1787454ab202SMadhavan Venkataraman 1788454ab202SMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 1789454ab202SMadhavan Venkataraman ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 1790454ab202SMadhavan Venkataraman 1791454ab202SMadhavan Venkataraman /* 1792454ab202SMadhavan Venkataraman * Unbind the cyclic. This will allow the cyclic subsystem 1793454ab202SMadhavan Venkataraman * to juggle the cyclic during CPU offline. 1794454ab202SMadhavan Venkataraman */ 179587a18d3fSMadhavan Venkataraman cyclic_bind(ct->ct_cyclic, NULL, NULL); 17967c478bd9Sstevel@tonic-gate } 17977c478bd9Sstevel@tonic-gate } 179887a18d3fSMadhavan Venkataraman 179987a18d3fSMadhavan Venkataraman /* 180087a18d3fSMadhavan Venkataraman * This is called to perform per-CPU initialization for slave CPUs at 180187a18d3fSMadhavan Venkataraman * boot time. 180287a18d3fSMadhavan Venkataraman */ 180387a18d3fSMadhavan Venkataraman void 180487a18d3fSMadhavan Venkataraman callout_mp_init(void) 180587a18d3fSMadhavan Venkataraman { 180687a18d3fSMadhavan Venkataraman cpu_t *cp; 180787a18d3fSMadhavan Venkataraman 180887a18d3fSMadhavan Venkataraman mutex_enter(&cpu_lock); 180987a18d3fSMadhavan Venkataraman 181087a18d3fSMadhavan Venkataraman cp = cpu_active; 181187a18d3fSMadhavan Venkataraman do { 181287a18d3fSMadhavan Venkataraman callout_cpu_online(cp); 181387a18d3fSMadhavan Venkataraman } while ((cp = cp->cpu_next_onln) != cpu_active); 181487a18d3fSMadhavan Venkataraman 181587a18d3fSMadhavan Venkataraman mutex_exit(&cpu_lock); 181687a18d3fSMadhavan Venkataraman } 181787a18d3fSMadhavan Venkataraman 181887a18d3fSMadhavan Venkataraman /* 181987a18d3fSMadhavan Venkataraman * Initialize all callout tables. Called at boot time just before clkstart(). 182087a18d3fSMadhavan Venkataraman */ 182187a18d3fSMadhavan Venkataraman void 182287a18d3fSMadhavan Venkataraman callout_init(void) 182387a18d3fSMadhavan Venkataraman { 182487a18d3fSMadhavan Venkataraman int f, t; 182587a18d3fSMadhavan Venkataraman size_t size; 182687a18d3fSMadhavan Venkataraman int table_id; 182787a18d3fSMadhavan Venkataraman callout_table_t *ct; 182887a18d3fSMadhavan Venkataraman long bits, fanout; 182987a18d3fSMadhavan Venkataraman uintptr_t buf; 183087a18d3fSMadhavan Venkataraman 183187a18d3fSMadhavan Venkataraman /* 183287a18d3fSMadhavan Venkataraman * Initialize callout globals. 183387a18d3fSMadhavan Venkataraman */ 183487a18d3fSMadhavan Venkataraman bits = 0; 183587a18d3fSMadhavan Venkataraman for (fanout = 1; (fanout < max_ncpus); fanout <<= 1) 183687a18d3fSMadhavan Venkataraman bits++; 183787a18d3fSMadhavan Venkataraman callout_table_bits = CALLOUT_TYPE_BITS + bits; 183887a18d3fSMadhavan Venkataraman callout_table_mask = (1 << callout_table_bits) - 1; 183987a18d3fSMadhavan Venkataraman callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT; 184087a18d3fSMadhavan Venkataraman callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS); 1841454ab202SMadhavan Venkataraman callout_max_ticks = CALLOUT_MAX_TICKS; 184251b32bddSMadhavan Venkataraman if (callout_min_reap == 0) 184351b32bddSMadhavan Venkataraman callout_min_reap = CALLOUT_MIN_REAP; 184487a18d3fSMadhavan Venkataraman 184551b32bddSMadhavan Venkataraman if (callout_tolerance <= 0) 184651b32bddSMadhavan Venkataraman callout_tolerance = CALLOUT_TOLERANCE; 184751b32bddSMadhavan Venkataraman if (callout_threads <= 0) 184851b32bddSMadhavan Venkataraman callout_threads = CALLOUT_THREADS; 184987a18d3fSMadhavan Venkataraman 185087a18d3fSMadhavan Venkataraman /* 185187a18d3fSMadhavan Venkataraman * Allocate all the callout tables based on max_ncpus. We have chosen 185287a18d3fSMadhavan Venkataraman * to do boot-time allocation instead of dynamic allocation because: 185387a18d3fSMadhavan Venkataraman * 185487a18d3fSMadhavan Venkataraman * - the size of the callout tables is not too large. 185587a18d3fSMadhavan Venkataraman * - there are race conditions involved in making this dynamic. 185687a18d3fSMadhavan Venkataraman * - the hash tables that go with the callout tables consume 185787a18d3fSMadhavan Venkataraman * most of the memory and they are only allocated in 185887a18d3fSMadhavan Venkataraman * callout_cpu_online(). 185987a18d3fSMadhavan Venkataraman * 186087a18d3fSMadhavan Venkataraman * Each CPU has two tables that are consecutive in the array. The first 186187a18d3fSMadhavan Venkataraman * one is for realtime callouts and the second one is for normal ones. 186287a18d3fSMadhavan Venkataraman * 186387a18d3fSMadhavan Venkataraman * We do this alignment dance to make sure that callout table 186487a18d3fSMadhavan Venkataraman * structures will always be on a cache line boundary. 186587a18d3fSMadhavan Venkataraman */ 186687a18d3fSMadhavan Venkataraman size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus; 186787a18d3fSMadhavan Venkataraman size += CALLOUT_ALIGN; 186887a18d3fSMadhavan Venkataraman buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP); 186987a18d3fSMadhavan Venkataraman callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN); 187087a18d3fSMadhavan Venkataraman 187187a18d3fSMadhavan Venkataraman size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS; 187287a18d3fSMadhavan Venkataraman /* 187387a18d3fSMadhavan Venkataraman * Now, initialize the tables for all the CPUs. 187487a18d3fSMadhavan Venkataraman */ 187587a18d3fSMadhavan Venkataraman for (f = 0; f < max_ncpus; f++) { 187687a18d3fSMadhavan Venkataraman for (t = 0; t < CALLOUT_NTYPES; t++) { 187787a18d3fSMadhavan Venkataraman table_id = CALLOUT_TABLE(t, f); 187887a18d3fSMadhavan Venkataraman ct = &callout_table[table_id]; 1879454ab202SMadhavan Venkataraman ct->ct_type = t; 188087a18d3fSMadhavan Venkataraman mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 188187a18d3fSMadhavan Venkataraman /* 188287a18d3fSMadhavan Venkataraman * Precompute the base IDs for long and short-term 188387a18d3fSMadhavan Venkataraman * legacy IDs. This makes ID generation during 188487a18d3fSMadhavan Venkataraman * timeout() fast. 188587a18d3fSMadhavan Venkataraman */ 188687a18d3fSMadhavan Venkataraman ct->ct_short_id = CALLOUT_SHORT_ID(table_id); 188787a18d3fSMadhavan Venkataraman ct->ct_long_id = CALLOUT_LONG_ID(table_id); 188887a18d3fSMadhavan Venkataraman /* 188987a18d3fSMadhavan Venkataraman * Precompute the base ID for generation-based IDs. 189087a18d3fSMadhavan Venkataraman * Note that when the first ID gets allocated, the 189187a18d3fSMadhavan Venkataraman * ID will wrap. This will cause the generation 189287a18d3fSMadhavan Venkataraman * number to be incremented to 1. 189387a18d3fSMadhavan Venkataraman */ 189487a18d3fSMadhavan Venkataraman ct->ct_gen_id = CALLOUT_SHORT_ID(table_id); 189587a18d3fSMadhavan Venkataraman /* 189687a18d3fSMadhavan Venkataraman * Initialize the cyclic as NONE. This will get set 189787a18d3fSMadhavan Venkataraman * during CPU online. This is so that partially 189887a18d3fSMadhavan Venkataraman * populated systems will only have the required 189987a18d3fSMadhavan Venkataraman * number of cyclics, not more. 190087a18d3fSMadhavan Venkataraman */ 190187a18d3fSMadhavan Venkataraman ct->ct_cyclic = CYCLIC_NONE; 190287a18d3fSMadhavan Venkataraman ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP); 190387a18d3fSMadhavan Venkataraman } 190487a18d3fSMadhavan Venkataraman } 190587a18d3fSMadhavan Venkataraman 190687a18d3fSMadhavan Venkataraman /* 190787a18d3fSMadhavan Venkataraman * Add the callback for CPR. This is called during checkpoint 190887a18d3fSMadhavan Venkataraman * resume to suspend and resume callouts. 190987a18d3fSMadhavan Venkataraman */ 191087a18d3fSMadhavan Venkataraman (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, 191187a18d3fSMadhavan Venkataraman "callout_cpr"); 191287a18d3fSMadhavan Venkataraman (void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER, 191387a18d3fSMadhavan Venkataraman "callout_debug"); 191487a18d3fSMadhavan Venkataraman 191587a18d3fSMadhavan Venkataraman /* 191687a18d3fSMadhavan Venkataraman * Call the per-CPU initialization function for the boot CPU. This 191787a18d3fSMadhavan Venkataraman * is done here because the function is not called automatically for 191887a18d3fSMadhavan Venkataraman * the boot CPU from the CPU online/offline hooks. Note that the 191987a18d3fSMadhavan Venkataraman * CPU lock is taken here because of convention. 192087a18d3fSMadhavan Venkataraman */ 192187a18d3fSMadhavan Venkataraman mutex_enter(&cpu_lock); 192287a18d3fSMadhavan Venkataraman callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)]; 192387a18d3fSMadhavan Venkataraman callout_cpu_online(CPU); 192487a18d3fSMadhavan Venkataraman mutex_exit(&cpu_lock); 19257c478bd9Sstevel@tonic-gate } 1926