1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #ifndef _SYS_CALLO_H 31 #define _SYS_CALLO_H 32 33 #include <sys/t_lock.h> 34 #include <sys/taskq.h> 35 #include <sys/lgrp.h> 36 #include <sys/processor.h> 37 #include <sys/cyclic.h> 38 #include <sys/kstat.h> 39 #include <sys/systm.h> 40 41 #ifdef __cplusplus 42 extern "C" { 43 #endif 44 45 #ifdef _KERNEL 46 47 typedef struct callout_list callout_list_t; 48 49 /* 50 * The callout mechanism provides general-purpose event scheduling: 51 * an arbitrary function is called in a specified amount of time. 52 * The expiration time for a callout is kept in its callout list 53 * structure. 54 */ 55 typedef struct callout { 56 struct callout *c_idnext; /* next in ID hash, or on freelist */ 57 struct callout *c_idprev; /* prev in ID hash */ 58 struct callout *c_clnext; /* next in callout list */ 59 struct callout *c_clprev; /* prev in callout list */ 60 callout_id_t c_xid; /* extended callout ID; see below */ 61 callout_list_t *c_list; /* callout list */ 62 void (*c_func)(void *); /* function to call */ 63 void *c_arg; /* argument to function */ 64 kthread_t *c_executor; /* executing thread */ 65 kcondvar_t c_done; /* signal callout completion */ 66 ushort_t c_waiting; /* untimeout waiting flag */ 67 } callout_t; 68 69 /* 70 * The callout ID (callout_id_t) uniquely identifies a callout. The callout 71 * ID is always 64 bits internally. The lower 32 bits contain an ID value. 72 * The upper 32 bits contain a generation number and flags. When the ID value 73 * wraps the generation number is incremented during ID generation. This 74 * protects callers from ID collisions that can happen as a result of the wrap. 75 * 76 * The kernel internal interface, timeout_generic(), always returns a 77 * callout_id_t. But the legacy interfaces, timeout() and realtime_timeout() 78 * return a timeout_id_t. On a 64-bit system, timeout_id_t is also 64 bits. 79 * So, the full 64-bit ID (sans the flags) can be returned. However, on 32-bit 80 * systems, timeout_id_t is 32 bits. So, only the lower 32 bits can be 81 * returned. In such cases, a default generation number of 0 is assigned to 82 * the legacy IDs. 83 * 84 * The lower 32-bit ID space is partitioned into two spaces - one for 85 * short-term callouts and one for long-term. 86 * 87 * Here is the bit layout for the callout ID: 88 * 89 * 63 62 61 ... 32 31 30 29 .. X+1 X ... 1 0 90 * ----------------------------------------------------------------------- 91 * | Free | Exec | Generation | Long | Counter | ID bits | Table | Type | 92 * | | | number | term | High | | number | | 93 * ----------------------------------------------------------------------- 94 * 95 * Free: 96 * This bit indicates that this callout has been freed. This is for 97 * debugging purposes. 98 * 99 * Exec(uting): 100 * This is the executing bit which is only set in the extended callout 101 * ID. This bit indicates that the callout handler is currently being 102 * executed. 103 * 104 * Generation number: 105 * This is the generation part of the ID. 106 * 107 * Long term: 108 * This bit indicates whether this is a short-term or a long-term callout. 109 * The long-term bit exists to address the problem of callout ID collision 110 * on 32-bit systems. This is an issue because the system typically 111 * generates a large number of timeout() requests, which means that callout 112 * IDs eventually get recycled. Most timeouts are very short-lived, so that 113 * ID recycling isn't a problem; but there are a handful of timeouts which 114 * are sufficiently long-lived to see their own IDs reused. We use the 115 * long-term bit to partition the ID namespace into pieces; the short-term 116 * space gets all the heavy traffic and can wrap frequently (i.e., on the 117 * order of a day) with no ill effects; the long-term space gets very little 118 * traffic and thus never wraps. That said, we need to future proof callouts 119 * in case 32-bit systems grow in size and are able to consume callout IDs 120 * at faster rates. So, we should make all the kernel clients that use 121 * callouts to use the internal interface so that they can use IDs outside 122 * of the legacy space with a proper generation number. 123 * 124 * Counter High + ID counter bits: 125 * These bits represent the actual ID bits in the callout ID. 126 * The highest bit of the running counter is always set; this ensures that 127 * the callout ID is always non-zero, thus eliminating the need for an 128 * explicit wrap-around test during ID generation. 129 * 130 * Table number: 131 * These bits carry the table number for the callout table where the callout 132 * is queued. Each CPU has its own callout table. So, the callout tables are 133 * numbered from 0 - (max_ncpus - 1). Because max_ncpus is different on 134 * different systems, the actual number of table number bits will vary 135 * accordingly. And so will the ID counter bits. 136 * 137 * Type: 138 * This bit represents the callout (table) type. Each CPU has one realtime 139 * and one normal callout table. 140 */ 141 #define CALLOUT_ID_FREE 0x8000000000000000ULL 142 #define CALLOUT_EXECUTING 0x4000000000000000ULL 143 #define CALLOUT_ID_FLAGS (CALLOUT_ID_FREE | CALLOUT_EXECUTING) 144 #define CALLOUT_ID_MASK ~CALLOUT_ID_FLAGS 145 #define CALLOUT_GENERATION_LOW 0x100000000ULL 146 #define CALLOUT_LONGTERM 0x80000000 147 #define CALLOUT_COUNTER_HIGH 0x40000000 148 #define CALLOUT_TYPE_BITS 1 149 #define CALLOUT_NTYPES (1 << CALLOUT_TYPE_BITS) 150 #define CALLOUT_TYPE_MASK (CALLOUT_NTYPES - 1) 151 #define CALLOUT_COUNTER_SHIFT callout_table_bits 152 #define CALLOUT_TABLE(t, f) (((f) << CALLOUT_TYPE_BITS) | (t)) 153 #define CALLOUT_TABLE_NUM(ct) ((ct) - callout_table) 154 #define CALLOUT_TABLE_SEQID(ct) (CALLOUT_TABLE_NUM(ct) >> CALLOUT_TYPE_BITS) 155 156 /* 157 * We assume that during any period of CALLOUT_LONGTERM_TICKS ticks, at most 158 * (CALLOUT_COUNTER_HIGH / callout_counter_low) callouts will be generated. 159 */ 160 #define CALLOUT_LONGTERM_TICKS 0x4000UL 161 #define CALLOUT_BUCKET_SHIFT 9 162 #define CALLOUT_BUCKETS (1 << CALLOUT_BUCKET_SHIFT) 163 #define CALLOUT_BUCKET_MASK (CALLOUT_BUCKETS - 1) 164 #define CALLOUT_HASH(x) ((x) & CALLOUT_BUCKET_MASK) 165 #define CALLOUT_IDHASH(x) CALLOUT_HASH((x) >> CALLOUT_COUNTER_SHIFT) 166 /* 167 * The multiply by 0 and 1 below are cosmetic. Just to align things better 168 * and make it more readable. The multiplications will be done at compile 169 * time. 170 */ 171 #define CALLOUT_CLHASH(x) \ 172 CALLOUT_HASH( \ 173 ((x)>>(CALLOUT_BUCKET_SHIFT*0)) ^ \ 174 ((x)>>(CALLOUT_BUCKET_SHIFT*1)) ^ \ 175 ((x)>>(CALLOUT_BUCKET_SHIFT*2)) ^ \ 176 ((x)>>(CALLOUT_BUCKET_SHIFT*3))) 177 178 #define CALLOUT_ID_TO_TABLE(id) ((id) & callout_table_mask) 179 180 #define CALLOUT_SHORT_ID(table) \ 181 ((callout_id_t)(table) | CALLOUT_COUNTER_HIGH) 182 #define CALLOUT_LONG_ID(table) \ 183 (CALLOUT_SHORT_ID(table) | CALLOUT_LONGTERM) 184 185 #define CALLOUT_THREADS 2 186 187 #define CALLOUT_REALTIME 0 /* realtime callout type */ 188 #define CALLOUT_NORMAL 1 /* normal callout type */ 189 190 /* 191 * callout_t's are cache-aligned structures allocated from kmem caches. One kmem 192 * cache is created per lgrp and is shared by all CPUs in that lgrp. Benefits: 193 * - cache pages are mapped only in the TLBs of the CPUs of the lgrp 194 * - data in cache pages is present only in those CPU caches 195 * - memory access performance improves with locality-awareness in kmem 196 * 197 * The following structure is used to manage per-lgroup kmem caches. 198 * 199 * NOTE: Free callout_t's go to a callout table's freelist. CPUs map to callout 200 * tables via their sequence IDs, not CPU IDs. DR operations can cause a 201 * free list to have callouts from multiple lgrp caches. This takes away some 202 * performance, but is no worse than if we did not use lgrp caches at all. 203 */ 204 typedef struct callout_cache { 205 struct callout_cache *cc_next; /* link in the global list */ 206 lgrp_handle_t cc_hand; /* lgroup handle */ 207 kmem_cache_t *cc_cache; /* kmem cache pointer */ 208 kmem_cache_t *cc_lcache; /* kmem cache pointer */ 209 } callout_cache_t; 210 211 /* 212 * The callout hash structure is used for queueing both callouts and 213 * callout lists. That is why the fields are declared as void *. 214 */ 215 typedef struct callout_hash { 216 void *ch_head; 217 void *ch_tail; 218 } callout_hash_t; 219 220 /* 221 * CALLOUT_LIST_FLAG_FREE 222 * Callout list is free. 223 * CALLOUT_LIST_FLAG_ABSOLUTE 224 * Callout list contains absolute timers. 225 * CALLOUT_LIST_FLAG_HRESTIME 226 * Callout list contains hrestime timers. 227 * CALLOUT_LIST_FLAG_NANO 228 * Callout list contains 1-nanosecond resolution callouts. 229 * CALLOUT_LIST_FLAG_HEAPED 230 * Callout list is present in the callout heap. 231 * CALLOUT_LIST_FLAG_QUEUED 232 * Callout list is present in the callout queue. 233 */ 234 #define CALLOUT_LIST_FLAG_FREE 0x1 235 #define CALLOUT_LIST_FLAG_ABSOLUTE 0x2 236 #define CALLOUT_LIST_FLAG_HRESTIME 0x4 237 #define CALLOUT_LIST_FLAG_NANO 0x8 238 #define CALLOUT_LIST_FLAG_HEAPED 0x10 239 #define CALLOUT_LIST_FLAG_QUEUED 0x20 240 241 struct callout_list { 242 callout_list_t *cl_next; /* next in clhash */ 243 callout_list_t *cl_prev; /* prev in clhash */ 244 hrtime_t cl_expiration; /* expiration for callouts in list */ 245 callout_hash_t cl_callouts; /* list of callouts */ 246 int cl_flags; /* callout flags */ 247 }; 248 249 /* 250 * Callout heap element. Each element in the heap stores the expiration 251 * as well as the corresponding callout list. This is to avoid a lookup 252 * of the callout list when the heap is processed. Because we store the 253 * callout list pointer in the heap element, we have to always remove 254 * a heap element and its callout list together. We cannot remove one 255 * without the other. 256 * 257 * This structure's size must be a power of two because we want an 258 * integral number of these to fit into a page. 259 */ 260 typedef struct callout_heap { 261 hrtime_t ch_expiration; 262 callout_list_t *ch_list; 263 #ifndef _LP64 264 char ch_pad[4]; /* pad to power of 2 */ 265 #endif 266 } callout_heap_t; 267 268 /* 269 * When the heap contains too many empty callout lists, it needs to be 270 * cleaned up. The decision to clean up the heap is a function of the 271 * number of empty entries and the heap size. Also, we don't want to 272 * clean up small heaps. 273 */ 274 #define CALLOUT_MIN_REAP (CALLOUT_BUCKETS >> 3) 275 #define CALLOUT_CLEANUP(ct) ((ct->ct_nreap >= callout_min_reap) && \ 276 (ct->ct_nreap >= (ct->ct_heap_num >> 1))) 277 278 /* 279 * Per-callout table kstats. 280 * 281 * CALLOUT_TIMEOUTS 282 * Callouts created since boot. 283 * CALLOUT_TIMEOUTS_PENDING 284 * Number of outstanding callouts. 285 * CALLOUT_UNTIMEOUTS_UNEXPIRED 286 * Number of cancelled callouts that have not expired. 287 * CALLOUT_UNTIMEOUTS_EXECUTING 288 * Number of cancelled callouts that were executing at the time of 289 * cancellation. 290 * CALLOUT_UNTIMEOUTS_EXPIRED 291 * Number of cancelled callouts that had already expired at the time 292 * of cancellations. 293 * CALLOUT_EXPIRATIONS 294 * Number of callouts that expired. 295 * CALLOUT_ALLOCATIONS 296 * Number of callout structures allocated. 297 * CALLOUT_CLEANUPS 298 * Number of times a callout table is cleaned up. 299 */ 300 typedef enum callout_stat_type { 301 CALLOUT_TIMEOUTS, 302 CALLOUT_TIMEOUTS_PENDING, 303 CALLOUT_UNTIMEOUTS_UNEXPIRED, 304 CALLOUT_UNTIMEOUTS_EXECUTING, 305 CALLOUT_UNTIMEOUTS_EXPIRED, 306 CALLOUT_EXPIRATIONS, 307 CALLOUT_ALLOCATIONS, 308 CALLOUT_CLEANUPS, 309 CALLOUT_NUM_STATS 310 } callout_stat_type_t; 311 312 /* 313 * Callout flags: 314 * 315 * CALLOUT_FLAG_ROUNDUP 316 * Roundup the expiration time to the next resolution boundary. 317 * If this flag is not specified, the expiration time is rounded down. 318 * CALLOUT_FLAG_ABSOLUTE 319 * Normally, the expiration passed to the timeout API functions is an 320 * expiration interval. If this flag is specified, then it is 321 * interpreted as the expiration time itself. 322 * CALLOUT_FLAG_HRESTIME 323 * Normally, callouts are not affected by changes to system time 324 * (hrestime). This flag is used to create a callout that is affected 325 * by system time. If system time changes, these timers must be 326 * handled in a special way (see callout.c). These are used by condition 327 * variables and LWP timers that need this behavior. 328 * CALLOUT_FLAG_32BIT 329 * Legacy interfaces timeout() and realtime_timeout() pass this flag 330 * to timeout_generic() to indicate that a 32-bit ID should be allocated. 331 */ 332 #define CALLOUT_FLAG_ROUNDUP 0x1 333 #define CALLOUT_FLAG_ABSOLUTE 0x2 334 #define CALLOUT_FLAG_HRESTIME 0x4 335 #define CALLOUT_FLAG_32BIT 0x8 336 337 /* 338 * On 32-bit systems, the legacy interfaces, timeout() and realtime_timeout(), 339 * must pass CALLOUT_FLAG_32BIT to timeout_generic() so that a 32-bit ID 340 * can be generated. 341 */ 342 #ifdef _LP64 343 #define CALLOUT_LEGACY 0 344 #else 345 #define CALLOUT_LEGACY CALLOUT_FLAG_32BIT 346 #endif 347 348 /* 349 * All of the state information associated with a callout table. 350 * The fields are ordered with cache performance in mind. 351 */ 352 typedef struct callout_table { 353 kmutex_t ct_mutex; /* protects all callout state */ 354 callout_t *ct_free; /* free callout structures */ 355 callout_list_t *ct_lfree; /* free callout list structures */ 356 callout_id_t ct_short_id; /* most recently issued short-term ID */ 357 callout_id_t ct_long_id; /* most recently issued long-term ID */ 358 callout_hash_t *ct_idhash; /* ID hash chains */ 359 callout_hash_t *ct_clhash; /* callout list hash */ 360 kstat_named_t *ct_kstat_data; /* callout kstat data */ 361 362 uint_t ct_type; /* callout table type */ 363 uint_t ct_suspend; /* suspend count */ 364 cyclic_id_t ct_cyclic; /* cyclic for this table */ 365 callout_heap_t *ct_heap; /* callout expiration heap */ 366 ulong_t ct_heap_num; /* occupied slots in the heap */ 367 ulong_t ct_heap_max; /* end of the heap */ 368 kmem_cache_t *ct_cache; /* callout kmem cache */ 369 kmem_cache_t *ct_lcache; /* callout list kmem cache */ 370 callout_id_t ct_gen_id; /* generation based ID */ 371 372 callout_hash_t ct_expired; /* list of expired callout lists */ 373 taskq_t *ct_taskq; /* taskq to execute normal callouts */ 374 kstat_t *ct_kstats; /* callout kstats */ 375 int ct_nreap; /* # heap entries that need reaping */ 376 cyclic_id_t ct_qcyclic; /* cyclic for the callout queue */ 377 callout_hash_t ct_queue; /* overflow queue of callouts */ 378 #ifndef _LP64 379 char ct_pad[12]; /* cache alignment */ 380 #endif 381 /* 382 * This structure should be aligned to a 64-byte (cache-line) 383 * boundary. Make sure the padding is right for 32-bit as well 384 * as 64-bit kernels. 385 */ 386 } callout_table_t; 387 388 /* 389 * Short hand definitions for the callout kstats. 390 */ 391 #define ct_timeouts \ 392 ct_kstat_data[CALLOUT_TIMEOUTS].value.ui64 393 #define ct_timeouts_pending \ 394 ct_kstat_data[CALLOUT_TIMEOUTS_PENDING].value.ui64 395 #define ct_untimeouts_unexpired \ 396 ct_kstat_data[CALLOUT_UNTIMEOUTS_UNEXPIRED].value.ui64 397 #define ct_untimeouts_executing \ 398 ct_kstat_data[CALLOUT_UNTIMEOUTS_EXECUTING].value.ui64 399 #define ct_untimeouts_expired \ 400 ct_kstat_data[CALLOUT_UNTIMEOUTS_EXPIRED].value.ui64 401 #define ct_expirations \ 402 ct_kstat_data[CALLOUT_EXPIRATIONS].value.ui64 403 #define ct_allocations \ 404 ct_kstat_data[CALLOUT_ALLOCATIONS].value.ui64 405 #define ct_cleanups \ 406 ct_kstat_data[CALLOUT_CLEANUPS].value.ui64 407 408 /* 409 * CALLOUT_CHUNK is the minimum initial size of each heap, and the amount 410 * by which a full heap is expanded to make room for new entries. 411 */ 412 #define CALLOUT_CHUNK (PAGESIZE / sizeof (callout_heap_t)) 413 414 /* 415 * CALLOUT_MIN_HEAP_SIZE defines the minimum size for the callout heap for 416 * the whole system. 417 */ 418 #define CALLOUT_MIN_HEAP_SIZE (64 * 1024 * sizeof (callout_heap_t)) 419 420 /* 421 * CALLOUT_MEM_FRACTION defines the fraction of available physical memory that 422 * can be allocated towards the callout heap for the whole system. 423 */ 424 #define CALLOUT_MEM_FRACTION 4096 425 426 #define CALLOUT_HEAP_PARENT(index) (((index) - 1) >> 1) 427 #define CALLOUT_HEAP_RIGHT(index) (((index) + 1) << 1) 428 #define CALLOUT_HEAP_LEFT(index) ((((index) + 1) << 1) - 1) 429 430 #define CALLOUT_TCP_RESOLUTION 10000000ULL 431 432 #define CALLOUT_ALIGN 64 /* cache line size */ 433 434 #ifdef _LP64 435 #define CALLOUT_MAX_TICKS NSEC_TO_TICK(CY_INFINITY); 436 #else 437 #define CALLOUT_MAX_TICKS LONG_MAX 438 #endif 439 440 #define CALLOUT_TOLERANCE 200000 /* nanoseconds */ 441 442 extern void callout_init(void); 443 extern void membar_sync(void); 444 extern void callout_cpu_online(cpu_t *); 445 extern void callout_cpu_offline(cpu_t *); 446 extern void callout_hrestime(void); 447 448 #endif 449 450 #ifdef __cplusplus 451 } 452 #endif 453 454 #endif /* _SYS_CALLO_H */ 455