1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2016, Joyent Inc. 29 */ 30 31 #include <sys/timer.h> 32 #include <sys/systm.h> 33 #include <sys/param.h> 34 #include <sys/kmem.h> 35 #include <sys/debug.h> 36 #include <sys/cyclic.h> 37 #include <sys/cmn_err.h> 38 #include <sys/pset.h> 39 #include <sys/atomic.h> 40 #include <sys/policy.h> 41 42 static clock_backend_t clock_highres; 43 44 /* minimum non-privileged interval (200us) */ 45 long clock_highres_interval_min = 200000; 46 47 /*ARGSUSED*/ 48 static int 49 clock_highres_settime(timespec_t *ts) 50 { 51 return (EINVAL); 52 } 53 54 static int 55 clock_highres_gettime(timespec_t *ts) 56 { 57 hrt2ts(gethrtime(), (timestruc_t *)ts); 58 59 return (0); 60 } 61 62 static int 63 clock_highres_getres(timespec_t *ts) 64 { 65 hrt2ts(cyclic_getres(), (timestruc_t *)ts); 66 67 return (0); 68 } 69 70 /*ARGSUSED*/ 71 static int 72 clock_highres_timer_create(itimer_t *it, void (*fire)(itimer_t *)) 73 { 74 it->it_arg = kmem_zalloc(sizeof (cyclic_id_t), KM_SLEEP); 75 it->it_fire = fire; 76 77 return (0); 78 } 79 80 static void 81 clock_highres_fire(void *arg) 82 { 83 itimer_t *it = (itimer_t *)arg; 84 hrtime_t *addr = &it->it_hrtime; 85 hrtime_t old = *addr, new = gethrtime(); 86 87 do { 88 old = *addr; 89 } while (atomic_cas_64((uint64_t *)addr, old, new) != old); 90 91 it->it_fire(it); 92 } 93 94 static int 95 clock_highres_timer_settime(itimer_t *it, int flags, 96 const struct itimerspec *when) 97 { 98 cyclic_id_t cyc, *cycp = it->it_arg; 99 proc_t *p = curproc; 100 kthread_t *t = curthread; 101 cyc_time_t cyctime; 102 cyc_handler_t hdlr; 103 cpu_t *cpu; 104 cpupart_t *cpupart; 105 int pset; 106 boolean_t value_need_clamp = B_FALSE; 107 boolean_t intval_need_clamp = B_FALSE; 108 cred_t *cr = CRED(); 109 struct itimerspec clamped; 110 111 /* 112 * CLOCK_HIGHRES timers of sufficiently high resolution can deny 113 * service; only allow privileged users to create such timers. 114 * Non-privileged users (those without the "proc_clock_highres" 115 * privilege) can create timers with lower resolution but if they 116 * attempt to use a very low time value (< 200us) then their 117 * timer will be clamped at 200us. 118 */ 119 if (when->it_value.tv_sec == 0 && 120 when->it_value.tv_nsec > 0 && 121 when->it_value.tv_nsec < clock_highres_interval_min) 122 value_need_clamp = B_TRUE; 123 124 if (when->it_interval.tv_sec == 0 && 125 when->it_interval.tv_nsec > 0 && 126 when->it_interval.tv_nsec < clock_highres_interval_min) 127 intval_need_clamp = B_TRUE; 128 129 if ((value_need_clamp || intval_need_clamp) && 130 secpolicy_clock_highres(cr) != 0) { 131 clamped.it_value.tv_sec = when->it_value.tv_sec; 132 clamped.it_interval.tv_sec = when->it_interval.tv_sec; 133 134 if (value_need_clamp) { 135 clamped.it_value.tv_nsec = clock_highres_interval_min; 136 } else { 137 clamped.it_value.tv_nsec = when->it_value.tv_nsec; 138 } 139 140 if (intval_need_clamp) { 141 clamped.it_interval.tv_nsec = 142 clock_highres_interval_min; 143 } else { 144 clamped.it_interval.tv_nsec = when->it_interval.tv_nsec; 145 } 146 147 when = &clamped; 148 } 149 150 cyctime.cyt_when = ts2hrt(&when->it_value); 151 cyctime.cyt_interval = ts2hrt(&when->it_interval); 152 153 if (cyctime.cyt_when != 0 && cyctime.cyt_interval == 0 && 154 it->it_itime.it_interval.tv_sec == 0 && 155 it->it_itime.it_interval.tv_nsec == 0 && 156 (cyc = *cycp) != CYCLIC_NONE) { 157 /* 158 * If our existing timer is a one-shot and our new timer is a 159 * one-shot, we'll save ourselves a world of grief and just 160 * reprogram the cyclic. 161 */ 162 it->it_itime = *when; 163 164 if (!(flags & TIMER_ABSTIME)) 165 cyctime.cyt_when += gethrtime(); 166 167 hrt2ts(cyctime.cyt_when, &it->it_itime.it_value); 168 (void) cyclic_reprogram(cyc, cyctime.cyt_when); 169 return (0); 170 } 171 172 mutex_enter(&cpu_lock); 173 if ((cyc = *cycp) != CYCLIC_NONE) { 174 cyclic_remove(cyc); 175 *cycp = CYCLIC_NONE; 176 } 177 178 if (cyctime.cyt_when == 0) { 179 mutex_exit(&cpu_lock); 180 return (0); 181 } 182 183 if (!(flags & TIMER_ABSTIME)) 184 cyctime.cyt_when += gethrtime(); 185 186 /* 187 * Now we will check for overflow (that is, we will check to see 188 * that the start time plus the interval time doesn't exceed 189 * INT64_MAX). The astute code reviewer will observe that this 190 * one-time check doesn't guarantee that a future expiration 191 * will not wrap. We wish to prove, then, that if a future 192 * expiration does wrap, the earliest the problem can be encountered 193 * is (INT64_MAX / 2) nanoseconds (191 years) after boot. Formally: 194 * 195 * Given: s + i < m s > 0 i > 0 196 * s + ni > m n > 1 197 * 198 * (where "s" is the start time, "i" is the interval, "n" is the 199 * number of times the cyclic has fired and "m" is INT64_MAX) 200 * 201 * Prove: 202 * (a) s + (n - 1)i > (m / 2) 203 * (b) s + (n - 1)i < m 204 * 205 * That is, prove that we must have fired at least once 191 years 206 * after boot. The proof is very straightforward; since the left 207 * side of (a) is minimized when i is small, it is sufficient to show 208 * that the statement is true for i's smallest possible value 209 * (((m - s) / n) + epsilon). The same goes for (b); showing that the 210 * statement is true for i's largest possible value (m - s + epsilon) 211 * is sufficient to prove the statement. 212 * 213 * The actual arithmetic manipulation is left up to reader. 214 */ 215 if (cyctime.cyt_when > INT64_MAX - cyctime.cyt_interval) { 216 mutex_exit(&cpu_lock); 217 return (EOVERFLOW); 218 } 219 220 if (cyctime.cyt_interval == 0) { 221 /* 222 * If this is a one-shot, then we set the interval to be 223 * inifinite. If this timer is never touched, this cyclic will 224 * simply consume space in the cyclic subsystem. As soon as 225 * timer_settime() or timer_delete() is called, the cyclic is 226 * removed (so it's not possible to run the machine out 227 * of resources by creating one-shots). 228 */ 229 cyctime.cyt_interval = CY_INFINITY; 230 } 231 232 it->it_itime = *when; 233 234 hrt2ts(cyctime.cyt_when, &it->it_itime.it_value); 235 236 hdlr.cyh_func = (cyc_func_t)clock_highres_fire; 237 hdlr.cyh_arg = it; 238 hdlr.cyh_level = CY_LOW_LEVEL; 239 240 if (cyctime.cyt_when != 0) 241 *cycp = cyc = cyclic_add(&hdlr, &cyctime); 242 243 /* 244 * Now that we have the cyclic created, we need to bind it to our 245 * bound CPU and processor set (if any). 246 */ 247 mutex_enter(&p->p_lock); 248 cpu = t->t_bound_cpu; 249 cpupart = t->t_cpupart; 250 pset = t->t_bind_pset; 251 252 mutex_exit(&p->p_lock); 253 254 cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart); 255 256 mutex_exit(&cpu_lock); 257 258 return (0); 259 } 260 261 static int 262 clock_highres_timer_gettime(itimer_t *it, struct itimerspec *when) 263 { 264 /* 265 * CLOCK_HIGHRES doesn't update it_itime. 266 */ 267 hrtime_t start = ts2hrt(&it->it_itime.it_value); 268 hrtime_t interval = ts2hrt(&it->it_itime.it_interval); 269 hrtime_t diff, now = gethrtime(); 270 hrtime_t *addr = &it->it_hrtime; 271 hrtime_t last; 272 273 /* 274 * We're using atomic_cas_64() here only to assure that we slurp the 275 * entire timestamp atomically. 276 */ 277 last = atomic_cas_64((uint64_t *)addr, 0, 0); 278 279 *when = it->it_itime; 280 281 if (!timerspecisset(&when->it_value)) 282 return (0); 283 284 if (start > now) { 285 /* 286 * We haven't gone off yet... 287 */ 288 diff = start - now; 289 } else { 290 if (interval == 0) { 291 /* 292 * This is a one-shot which should have already 293 * fired; set it_value to 0. 294 */ 295 timerspecclear(&when->it_value); 296 return (0); 297 } 298 299 /* 300 * Calculate how far we are into this interval. 301 */ 302 diff = (now - start) % interval; 303 304 /* 305 * Now check to see if we've dealt with the last interval 306 * yet. 307 */ 308 if (now - diff > last) { 309 /* 310 * The last interval hasn't fired; set it_value to 0. 311 */ 312 timerspecclear(&when->it_value); 313 return (0); 314 } 315 316 /* 317 * The last interval _has_ fired; we can return the amount 318 * of time left in this interval. 319 */ 320 diff = interval - diff; 321 } 322 323 hrt2ts(diff, &when->it_value); 324 325 return (0); 326 } 327 328 static int 329 clock_highres_timer_delete(itimer_t *it) 330 { 331 cyclic_id_t cyc; 332 333 if (it->it_arg == NULL) { 334 /* 335 * This timer was never fully created; we must have failed 336 * in the clock_highres_timer_create() routine. 337 */ 338 return (0); 339 } 340 341 mutex_enter(&cpu_lock); 342 343 if ((cyc = *((cyclic_id_t *)it->it_arg)) != CYCLIC_NONE) 344 cyclic_remove(cyc); 345 346 mutex_exit(&cpu_lock); 347 348 kmem_free(it->it_arg, sizeof (cyclic_id_t)); 349 350 return (0); 351 } 352 353 static void 354 clock_highres_timer_lwpbind(itimer_t *it) 355 { 356 proc_t *p = curproc; 357 kthread_t *t = curthread; 358 cyclic_id_t cyc = *((cyclic_id_t *)it->it_arg); 359 cpu_t *cpu; 360 cpupart_t *cpupart; 361 int pset; 362 363 if (cyc == CYCLIC_NONE) 364 return; 365 366 mutex_enter(&cpu_lock); 367 mutex_enter(&p->p_lock); 368 369 /* 370 * Okay, now we can safely look at the bindings. 371 */ 372 cpu = t->t_bound_cpu; 373 cpupart = t->t_cpupart; 374 pset = t->t_bind_pset; 375 376 /* 377 * Now we drop p_lock. We haven't dropped cpu_lock; we're guaranteed 378 * that even if the bindings change, the CPU and/or processor set 379 * that this timer was bound to remain valid (and the combination 380 * remains self-consistent). 381 */ 382 mutex_exit(&p->p_lock); 383 384 cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart); 385 386 mutex_exit(&cpu_lock); 387 } 388 389 void 390 clock_highres_init() 391 { 392 clock_backend_t *be = &clock_highres; 393 struct sigevent *ev = &be->clk_default; 394 395 ev->sigev_signo = SIGALRM; 396 ev->sigev_notify = SIGEV_SIGNAL; 397 ev->sigev_value.sival_ptr = NULL; 398 399 be->clk_clock_settime = clock_highres_settime; 400 be->clk_clock_gettime = clock_highres_gettime; 401 be->clk_clock_getres = clock_highres_getres; 402 be->clk_timer_create = clock_highres_timer_create; 403 be->clk_timer_gettime = clock_highres_timer_gettime; 404 be->clk_timer_settime = clock_highres_timer_settime; 405 be->clk_timer_delete = clock_highres_timer_delete; 406 be->clk_timer_lwpbind = clock_highres_timer_lwpbind; 407 408 clock_add_backend(CLOCK_HIGHRES, &clock_highres); 409 } 410