xref: /freebsd/sys/kern/sys_timerfd.c (revision 1edb7116)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
5  * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/callout.h>
32 #include <sys/fcntl.h>
33 #include <sys/file.h>
34 #include <sys/filedesc.h>
35 #include <sys/filio.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mount.h>
40 #include <sys/mutex.h>
41 #include <sys/poll.h>
42 #include <sys/proc.h>
43 #include <sys/queue.h>
44 #include <sys/selinfo.h>
45 #include <sys/stat.h>
46 #include <sys/sx.h>
47 #include <sys/syscallsubr.h>
48 #include <sys/sysctl.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
51 #include <sys/timerfd.h>
52 #include <sys/timespec.h>
53 #include <sys/uio.h>
54 #include <sys/user.h>
55 
56 #include <security/audit/audit.h>
57 
58 static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures");
59 
60 static struct mtx timerfd_list_lock;
61 static LIST_HEAD(, timerfd) timerfd_list;
62 MTX_SYSINIT(timerfd, &timerfd_list_lock, "timerfd_list_lock", MTX_DEF);
63 
64 static struct unrhdr64 tfdino_unr;
65 
66 #define	TFD_NOJUMP	0	/* Realtime clock has not jumped. */
67 #define	TFD_READ	1	/* Jumped, tfd has been read since. */
68 #define	TFD_ZREAD	2	/* Jumped backwards, CANCEL_ON_SET=false. */
69 #define	TFD_CANCELED	4	/* Jumped, CANCEL_ON_SET=true. */
70 #define	TFD_JUMPED	(TFD_ZREAD | TFD_CANCELED)
71 
72 /*
73  * One structure allocated per timerfd descriptor.
74  *
75  * Locking semantics:
76  * (t)	locked by tfd_lock mtx
77  * (l)	locked by timerfd_list_lock sx
78  * (c)	const until freeing
79  */
80 struct timerfd {
81 	/* User specified. */
82 	struct itimerspec tfd_time;	/* (t) tfd timer */
83 	clockid_t	tfd_clockid;	/* (c) timing base */
84 	int		tfd_flags;	/* (c) creation flags */
85 	int		tfd_timflags;	/* (t) timer flags */
86 
87 	/* Used internally. */
88 	timerfd_t	tfd_count;	/* (t) expiration count since read */
89 	bool		tfd_expired;	/* (t) true upon initial expiration */
90 	struct mtx	tfd_lock;	/* tfd mtx lock */
91 	struct callout	tfd_callout;	/* (t) expiration notification */
92 	struct selinfo	tfd_sel;	/* (t) I/O alerts */
93 	struct timespec	tfd_boottim;	/* (t) cached boottime */
94 	int		tfd_jumped;	/* (t) timer jump status */
95 	LIST_ENTRY(timerfd) entry;	/* (l) entry in list */
96 
97 	/* For stat(2). */
98 	ino_t		tfd_ino;	/* (c) inode number */
99 	struct timespec	tfd_atim;	/* (t) time of last read */
100 	struct timespec	tfd_mtim;	/* (t) time of last settime */
101 	struct timespec tfd_birthtim;	/* (c) creation time */
102 };
103 
104 static void
105 timerfd_init(void *data)
106 {
107 	new_unrhdr64(&tfdino_unr, 1);
108 }
109 
110 SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL);
111 
112 static inline void
113 timerfd_getboottime(struct timespec *ts)
114 {
115 	struct timeval tv;
116 
117 	getboottime(&tv);
118 	TIMEVAL_TO_TIMESPEC(&tv, ts);
119 }
120 
121 /*
122  * Call when a discontinuous jump has occured in CLOCK_REALTIME and
123  * update timerfd's cached boottime. A jump can be triggered using
124  * functions like clock_settime(2) or settimeofday(2).
125  *
126  * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set
127  * and the realtime clock jumps.
128  * Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set,
129  * but the realtime clock jumps backwards.
130  */
131 void
132 timerfd_jumped(void)
133 {
134 	struct timerfd *tfd;
135 	struct timespec boottime, diff;
136 
137 	if (LIST_EMPTY(&timerfd_list))
138 		return;
139 
140 	timerfd_getboottime(&boottime);
141 	mtx_lock(&timerfd_list_lock);
142 	LIST_FOREACH(tfd, &timerfd_list, entry) {
143 		mtx_lock(&tfd->tfd_lock);
144 		if (tfd->tfd_clockid != CLOCK_REALTIME ||
145 		    (tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 ||
146 		    timespeccmp(&boottime, &tfd->tfd_boottim, ==)) {
147 			mtx_unlock(&tfd->tfd_lock);
148 			continue;
149 		}
150 
151 		if (callout_active(&tfd->tfd_callout)) {
152 			if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0)
153 				tfd->tfd_jumped = TFD_CANCELED;
154 			else if (timespeccmp(&boottime, &tfd->tfd_boottim, <))
155 				tfd->tfd_jumped = TFD_ZREAD;
156 
157 			/*
158 			 * Do not reschedule callout when
159 			 * inside interval time loop.
160 			 */
161 			if (!tfd->tfd_expired) {
162 				timespecsub(&boottime,
163 				    &tfd->tfd_boottim, &diff);
164 				timespecsub(&tfd->tfd_time.it_value,
165 				    &diff, &tfd->tfd_time.it_value);
166 				if (callout_stop(&tfd->tfd_callout) == 1) {
167 					callout_schedule_sbt(&tfd->tfd_callout,
168 					    tstosbt(tfd->tfd_time.it_value),
169 					    0, C_ABSOLUTE);
170 				}
171 			}
172 		}
173 
174 		tfd->tfd_boottim = boottime;
175 		mtx_unlock(&tfd->tfd_lock);
176 	}
177 	mtx_unlock(&timerfd_list_lock);
178 }
179 
180 static int
181 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
182     int flags, struct thread *td)
183 {
184 	struct timerfd *tfd = fp->f_data;
185 	timerfd_t count;
186 	int error = 0;
187 
188 	if (uio->uio_resid < sizeof(timerfd_t))
189 		return (EINVAL);
190 
191 	mtx_lock(&tfd->tfd_lock);
192 retry:
193 	getnanotime(&tfd->tfd_atim);
194 	if ((tfd->tfd_jumped & TFD_JUMPED) != 0) {
195 		if (tfd->tfd_jumped == TFD_CANCELED)
196 			error = ECANCELED;
197 		tfd->tfd_jumped = TFD_READ;
198 		tfd->tfd_count = 0;
199 		mtx_unlock(&tfd->tfd_lock);
200 		return (error);
201 	} else {
202 		tfd->tfd_jumped = TFD_NOJUMP;
203 	}
204 	if (tfd->tfd_count == 0) {
205 		if ((fp->f_flag & FNONBLOCK) != 0) {
206 			mtx_unlock(&tfd->tfd_lock);
207 			return (EAGAIN);
208 		}
209 		td->td_rtcgen = atomic_load_acq_int(&rtc_generation);
210 		error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock,
211 		    PCATCH, "tfdrd", 0);
212 		if (error == 0) {
213 			goto retry;
214 		} else {
215 			mtx_unlock(&tfd->tfd_lock);
216 			return (error);
217 		}
218 	}
219 
220 	count = tfd->tfd_count;
221 	tfd->tfd_count = 0;
222 	mtx_unlock(&tfd->tfd_lock);
223 	error = uiomove(&count, sizeof(timerfd_t), uio);
224 
225 	return (error);
226 }
227 
228 static int
229 timerfd_ioctl(struct file *fp, u_long cmd, void *data,
230     struct ucred *active_cred, struct thread *td)
231 {
232 	switch (cmd) {
233 	case FIOASYNC:
234 		if (*(int *)data != 0)
235 			atomic_set_int(&fp->f_flag, FASYNC);
236 		else
237 			atomic_clear_int(&fp->f_flag, FASYNC);
238 		return (0);
239 	case FIONBIO:
240 		if (*(int *)data != 0)
241 			atomic_set_int(&fp->f_flag, FNONBLOCK);
242 		else
243 			atomic_clear_int(&fp->f_flag, FNONBLOCK);
244 		return (0);
245 	}
246 	return (ENOTTY);
247 }
248 
249 static int
250 timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
251     struct thread *td)
252 {
253 	struct timerfd *tfd = fp->f_data;
254 	int revents = 0;
255 
256 	mtx_lock(&tfd->tfd_lock);
257 	if ((events & (POLLIN | POLLRDNORM)) != 0 &&
258 	    tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ)
259 		revents |= events & (POLLIN | POLLRDNORM);
260 	if (revents == 0)
261 		selrecord(td, &tfd->tfd_sel);
262 	mtx_unlock(&tfd->tfd_lock);
263 
264 	return (revents);
265 }
266 
267 static void
268 filt_timerfddetach(struct knote *kn)
269 {
270 	struct timerfd *tfd = kn->kn_hook;
271 
272 	mtx_lock(&tfd->tfd_lock);
273 	knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
274 	mtx_unlock(&tfd->tfd_lock);
275 }
276 
277 static int
278 filt_timerfdread(struct knote *kn, long hint)
279 {
280 	struct timerfd *tfd = kn->kn_hook;
281 
282 	mtx_assert(&tfd->tfd_lock, MA_OWNED);
283 	kn->kn_data = (int64_t)tfd->tfd_count;
284 	return (tfd->tfd_count > 0);
285 }
286 
287 static struct filterops timerfd_rfiltops = {
288 	.f_isfd = 1,
289 	.f_detach = filt_timerfddetach,
290 	.f_event = filt_timerfdread,
291 };
292 
293 static int
294 timerfd_kqfilter(struct file *fp, struct knote *kn)
295 {
296 	struct timerfd *tfd = fp->f_data;
297 
298 	if (kn->kn_filter != EVFILT_READ)
299 		return (EINVAL);
300 
301 	kn->kn_fop = &timerfd_rfiltops;
302 	kn->kn_hook = tfd;
303 	knlist_add(&tfd->tfd_sel.si_note, kn, 0);
304 
305 	return (0);
306 }
307 
308 static int
309 timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
310 {
311 	struct timerfd *tfd = fp->f_data;
312 
313 	bzero(sb, sizeof(*sb));
314 	sb->st_nlink = fp->f_count - 1;
315 	sb->st_uid = fp->f_cred->cr_uid;
316 	sb->st_gid = fp->f_cred->cr_gid;
317 	sb->st_blksize = PAGE_SIZE;
318 	mtx_lock(&tfd->tfd_lock);
319 	sb->st_atim = tfd->tfd_atim;
320 	sb->st_mtim = tfd->tfd_mtim;
321 	mtx_unlock(&tfd->tfd_lock);
322 	sb->st_ctim = sb->st_mtim;
323 	sb->st_ino = tfd->tfd_ino;
324 	sb->st_birthtim = tfd->tfd_birthtim;
325 
326 	return (0);
327 }
328 
329 static int
330 timerfd_close(struct file *fp, struct thread *td)
331 {
332 	struct timerfd *tfd = fp->f_data;
333 
334 	mtx_lock(&timerfd_list_lock);
335 	LIST_REMOVE(tfd, entry);
336 	mtx_unlock(&timerfd_list_lock);
337 
338 	callout_drain(&tfd->tfd_callout);
339 	seldrain(&tfd->tfd_sel);
340 	knlist_destroy(&tfd->tfd_sel.si_note);
341 	mtx_destroy(&tfd->tfd_lock);
342 	free(tfd, M_TIMERFD);
343 	fp->f_ops = &badfileops;
344 
345 	return (0);
346 }
347 
348 static int
349 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif,
350     struct filedesc *fdp)
351 {
352 	struct timerfd *tfd = fp->f_data;
353 
354 	kif->kf_type = KF_TYPE_TIMERFD;
355 	kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid;
356 	kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags;
357 	kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd;
358 
359 	return (0);
360 }
361 
362 static struct fileops timerfdops = {
363 	.fo_read = timerfd_read,
364 	.fo_write = invfo_rdwr,
365 	.fo_truncate = invfo_truncate,
366 	.fo_ioctl = timerfd_ioctl,
367 	.fo_poll = timerfd_poll,
368 	.fo_kqfilter = timerfd_kqfilter,
369 	.fo_stat = timerfd_stat,
370 	.fo_close = timerfd_close,
371 	.fo_chmod = invfo_chmod,
372 	.fo_chown = invfo_chown,
373 	.fo_sendfile = invfo_sendfile,
374 	.fo_fill_kinfo = timerfd_fill_kinfo,
375 	.fo_cmp = file_kcmp_generic,
376 	.fo_flags = DFLAG_PASSABLE,
377 };
378 
379 static void
380 timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value)
381 {
382 	struct timespec curr_value;
383 
384 	mtx_assert(&tfd->tfd_lock, MA_OWNED);
385 	*old_value = tfd->tfd_time;
386 	if (timespecisset(&tfd->tfd_time.it_value)) {
387 		nanouptime(&curr_value);
388 		timespecsub(&tfd->tfd_time.it_value, &curr_value,
389 		    &old_value->it_value);
390 	}
391 }
392 
393 static void
394 timerfd_expire(void *arg)
395 {
396 	struct timerfd *tfd = (struct timerfd *)arg;
397 	struct timespec uptime;
398 
399 	++tfd->tfd_count;
400 	tfd->tfd_expired = true;
401 	if (timespecisset(&tfd->tfd_time.it_interval)) {
402 		/* Count missed events. */
403 		nanouptime(&uptime);
404 		if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) {
405 			timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime);
406 			tfd->tfd_count += tstosbt(uptime) /
407 			    tstosbt(tfd->tfd_time.it_interval);
408 		}
409 		timespecadd(&tfd->tfd_time.it_value,
410 		    &tfd->tfd_time.it_interval, &tfd->tfd_time.it_value);
411 		callout_schedule_sbt(&tfd->tfd_callout,
412 		    tstosbt(tfd->tfd_time.it_value),
413 		    0, C_ABSOLUTE);
414 	} else {
415 		/* Single shot timer. */
416 		callout_deactivate(&tfd->tfd_callout);
417 		timespecclear(&tfd->tfd_time.it_value);
418 	}
419 
420 	wakeup(&tfd->tfd_count);
421 	selwakeup(&tfd->tfd_sel);
422 	KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
423 }
424 
425 int
426 kern_timerfd_create(struct thread *td, int clockid, int flags)
427 {
428 	struct file *fp;
429 	struct timerfd *tfd;
430 	int error, fd, fflags;
431 
432 	AUDIT_ARG_VALUE(clockid);
433 	AUDIT_ARG_FFLAGS(flags);
434 
435 	if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
436 		return (EINVAL);
437 	if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0)
438 		return (EINVAL);
439 
440 	fflags = FREAD;
441 	if ((flags & TFD_CLOEXEC) != 0)
442 		fflags |= O_CLOEXEC;
443 	if ((flags & TFD_NONBLOCK) != 0)
444 		fflags |= FNONBLOCK;
445 
446 	error = falloc(td, &fp, &fd, fflags);
447 	if (error != 0)
448 		return (error);
449 
450 	tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO);
451 	tfd->tfd_clockid = (clockid_t)clockid;
452 	tfd->tfd_flags = flags;
453 	tfd->tfd_ino = alloc_unr64(&tfdino_unr);
454 	mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
455 	callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
456 	knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
457 	timerfd_getboottime(&tfd->tfd_boottim);
458 	getnanotime(&tfd->tfd_birthtim);
459 	mtx_lock(&timerfd_list_lock);
460 	LIST_INSERT_HEAD(&timerfd_list, tfd, entry);
461 	mtx_unlock(&timerfd_list_lock);
462 
463 	finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops);
464 
465 	fdrop(fp, td);
466 
467 	td->td_retval[0] = fd;
468 	return (0);
469 }
470 
471 int
472 kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value)
473 {
474 	struct file *fp;
475 	struct timerfd *tfd;
476 	int error;
477 
478 	error = fget(td, fd, &cap_write_rights, &fp);
479 	if (error != 0)
480 		return (error);
481 	if (fp->f_type != DTYPE_TIMERFD) {
482 		fdrop(fp, td);
483 		return (EINVAL);
484 	}
485 	tfd = fp->f_data;
486 
487 	mtx_lock(&tfd->tfd_lock);
488 	timerfd_curval(tfd, curr_value);
489 	mtx_unlock(&tfd->tfd_lock);
490 
491 	fdrop(fp, td);
492 	return (0);
493 }
494 
495 int
496 kern_timerfd_settime(struct thread *td, int fd, int flags,
497     const struct itimerspec *new_value, struct itimerspec *old_value)
498 {
499 	struct file *fp;
500 	struct timerfd *tfd;
501 	struct timespec ts;
502 	int error = 0;
503 
504 	if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0)
505 		return (EINVAL);
506 	if (!timespecvalid_interval(&new_value->it_value) ||
507 	    !timespecvalid_interval(&new_value->it_interval))
508 		return (EINVAL);
509 
510 	error = fget(td, fd, &cap_write_rights, &fp);
511 	if (error != 0)
512 		return (error);
513 	if (fp->f_type != DTYPE_TIMERFD) {
514 		fdrop(fp, td);
515 		return (EINVAL);
516 	}
517 	tfd = fp->f_data;
518 
519 	mtx_lock(&tfd->tfd_lock);
520 	getnanotime(&tfd->tfd_mtim);
521 	tfd->tfd_timflags = flags;
522 
523 	/* Store old itimerspec, if applicable. */
524 	if (old_value != NULL)
525 		timerfd_curval(tfd, old_value);
526 
527 	/* Set new expiration. */
528 	tfd->tfd_time = *new_value;
529 	if (timespecisset(&tfd->tfd_time.it_value)) {
530 		if ((flags & TFD_TIMER_ABSTIME) == 0) {
531 			nanouptime(&ts);
532 			timespecadd(&tfd->tfd_time.it_value, &ts,
533 			    &tfd->tfd_time.it_value);
534 		} else if (tfd->tfd_clockid == CLOCK_REALTIME) {
535 			/* ECANCELED if unread jump is pending. */
536 			if (tfd->tfd_jumped == TFD_CANCELED)
537 				error = ECANCELED;
538 			/* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */
539 			timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim,
540 			    &tfd->tfd_time.it_value);
541 		}
542 		callout_reset_sbt(&tfd->tfd_callout,
543 		    tstosbt(tfd->tfd_time.it_value),
544 		    0, timerfd_expire, tfd, C_ABSOLUTE);
545 	} else {
546 		callout_stop(&tfd->tfd_callout);
547 	}
548 	tfd->tfd_count = 0;
549 	tfd->tfd_expired = false;
550 	tfd->tfd_jumped = TFD_NOJUMP;
551 	mtx_unlock(&tfd->tfd_lock);
552 
553 	fdrop(fp, td);
554 	return (error);
555 }
556 
557 int
558 sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap)
559 {
560 	return (kern_timerfd_create(td, uap->clockid, uap->flags));
561 }
562 
563 int
564 sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap)
565 {
566 	struct itimerspec curr_value;
567 	int error;
568 
569 	error = kern_timerfd_gettime(td, uap->fd, &curr_value);
570 	if (error == 0)
571 		error = copyout(&curr_value, uap->curr_value,
572 		    sizeof(curr_value));
573 
574 	return (error);
575 }
576 
577 int
578 sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap)
579 {
580 	struct itimerspec new_value, old_value;
581 	int error;
582 
583 	error = copyin(uap->new_value, &new_value, sizeof(new_value));
584 	if (error != 0)
585 		return (error);
586 	if (uap->old_value == NULL) {
587 		error = kern_timerfd_settime(td, uap->fd, uap->flags,
588 		    &new_value, NULL);
589 	} else {
590 		error = kern_timerfd_settime(td, uap->fd, uap->flags,
591 		    &new_value, &old_value);
592 		if (error == 0)
593 			error = copyout(&old_value, uap->old_value,
594 			    sizeof(old_value));
595 	}
596 	return (error);
597 }
598