xref: /freebsd/sys/kern/uipc_sem.c (revision f05cddf9)
1 /*-
2  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
3  * Copyright (c) 2003-2005 SPARTA, Inc.
4  * Copyright (c) 2005 Robert N. M. Watson
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project in part by Network
8  * Associates Laboratories, the Security Research Division of Network
9  * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
10  * as part of the DARPA CHATS research program.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_compat.h"
38 #include "opt_posix.h"
39 
40 #include <sys/param.h>
41 #include <sys/capability.h>
42 #include <sys/condvar.h>
43 #include <sys/fcntl.h>
44 #include <sys/file.h>
45 #include <sys/filedesc.h>
46 #include <sys/fnv_hash.h>
47 #include <sys/kernel.h>
48 #include <sys/ksem.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/priv.h>
54 #include <sys/proc.h>
55 #include <sys/posix4.h>
56 #include <sys/_semaphore.h>
57 #include <sys/stat.h>
58 #include <sys/syscall.h>
59 #include <sys/syscallsubr.h>
60 #include <sys/sysctl.h>
61 #include <sys/sysent.h>
62 #include <sys/sysproto.h>
63 #include <sys/systm.h>
64 #include <sys/sx.h>
65 #include <sys/vnode.h>
66 
67 #include <security/mac/mac_framework.h>
68 
69 FEATURE(p1003_1b_semaphores, "POSIX P1003.1B semaphores support");
70 /*
71  * TODO
72  *
73  * - Resource limits?
74  * - Replace global sem_lock with mtx_pool locks?
75  * - Add a MAC check_create() hook for creating new named semaphores.
76  */
77 
78 #ifndef SEM_MAX
79 #define	SEM_MAX	30
80 #endif
81 
82 #ifdef SEM_DEBUG
83 #define	DP(x)	printf x
84 #else
85 #define	DP(x)
86 #endif
87 
88 struct ksem_mapping {
89 	char		*km_path;
90 	Fnv32_t		km_fnv;
91 	struct ksem	*km_ksem;
92 	LIST_ENTRY(ksem_mapping) km_link;
93 };
94 
95 static MALLOC_DEFINE(M_KSEM, "ksem", "semaphore file descriptor");
96 static LIST_HEAD(, ksem_mapping) *ksem_dictionary;
97 static struct sx ksem_dict_lock;
98 static struct mtx ksem_count_lock;
99 static struct mtx sem_lock;
100 static u_long ksem_hash;
101 static int ksem_dead;
102 
103 #define	KSEM_HASH(fnv)	(&ksem_dictionary[(fnv) & ksem_hash])
104 
105 static int nsems = 0;
106 SYSCTL_DECL(_p1003_1b);
107 SYSCTL_INT(_p1003_1b, OID_AUTO, nsems, CTLFLAG_RD, &nsems, 0,
108     "Number of active kernel POSIX semaphores");
109 
110 static int	kern_sem_wait(struct thread *td, semid_t id, int tryflag,
111 		    struct timespec *abstime);
112 static int	ksem_access(struct ksem *ks, struct ucred *ucred);
113 static struct ksem *ksem_alloc(struct ucred *ucred, mode_t mode,
114 		    unsigned int value);
115 static int	ksem_create(struct thread *td, const char *path,
116 		    semid_t *semidp, mode_t mode, unsigned int value,
117 		    int flags, int compat32);
118 static void	ksem_drop(struct ksem *ks);
119 static int	ksem_get(struct thread *td, semid_t id, cap_rights_t rights,
120     struct file **fpp);
121 static struct ksem *ksem_hold(struct ksem *ks);
122 static void	ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks);
123 static struct ksem *ksem_lookup(char *path, Fnv32_t fnv);
124 static void	ksem_module_destroy(void);
125 static int	ksem_module_init(void);
126 static int	ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucred);
127 static int	sem_modload(struct module *module, int cmd, void *arg);
128 
129 static fo_rdwr_t	ksem_read;
130 static fo_rdwr_t	ksem_write;
131 static fo_truncate_t	ksem_truncate;
132 static fo_ioctl_t	ksem_ioctl;
133 static fo_poll_t	ksem_poll;
134 static fo_kqfilter_t	ksem_kqfilter;
135 static fo_stat_t	ksem_stat;
136 static fo_close_t	ksem_closef;
137 static fo_chmod_t	ksem_chmod;
138 static fo_chown_t	ksem_chown;
139 
140 /* File descriptor operations. */
141 static struct fileops ksem_ops = {
142 	.fo_read = ksem_read,
143 	.fo_write = ksem_write,
144 	.fo_truncate = ksem_truncate,
145 	.fo_ioctl = ksem_ioctl,
146 	.fo_poll = ksem_poll,
147 	.fo_kqfilter = ksem_kqfilter,
148 	.fo_stat = ksem_stat,
149 	.fo_close = ksem_closef,
150 	.fo_chmod = ksem_chmod,
151 	.fo_chown = ksem_chown,
152 	.fo_flags = DFLAG_PASSABLE
153 };
154 
155 FEATURE(posix_sem, "POSIX semaphores");
156 
157 static int
158 ksem_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
159     int flags, struct thread *td)
160 {
161 
162 	return (EOPNOTSUPP);
163 }
164 
165 static int
166 ksem_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
167     int flags, struct thread *td)
168 {
169 
170 	return (EOPNOTSUPP);
171 }
172 
173 static int
174 ksem_truncate(struct file *fp, off_t length, struct ucred *active_cred,
175     struct thread *td)
176 {
177 
178 	return (EINVAL);
179 }
180 
181 static int
182 ksem_ioctl(struct file *fp, u_long com, void *data,
183     struct ucred *active_cred, struct thread *td)
184 {
185 
186 	return (EOPNOTSUPP);
187 }
188 
189 static int
190 ksem_poll(struct file *fp, int events, struct ucred *active_cred,
191     struct thread *td)
192 {
193 
194 	return (EOPNOTSUPP);
195 }
196 
197 static int
198 ksem_kqfilter(struct file *fp, struct knote *kn)
199 {
200 
201 	return (EOPNOTSUPP);
202 }
203 
204 static int
205 ksem_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
206     struct thread *td)
207 {
208 	struct ksem *ks;
209 #ifdef MAC
210 	int error;
211 #endif
212 
213 	ks = fp->f_data;
214 
215 #ifdef MAC
216 	error = mac_posixsem_check_stat(active_cred, fp->f_cred, ks);
217 	if (error)
218 		return (error);
219 #endif
220 
221 	/*
222 	 * Attempt to return sanish values for fstat() on a semaphore
223 	 * file descriptor.
224 	 */
225 	bzero(sb, sizeof(*sb));
226 
227 	mtx_lock(&sem_lock);
228 	sb->st_atim = ks->ks_atime;
229 	sb->st_ctim = ks->ks_ctime;
230 	sb->st_mtim = ks->ks_mtime;
231 	sb->st_birthtim = ks->ks_birthtime;
232 	sb->st_uid = ks->ks_uid;
233 	sb->st_gid = ks->ks_gid;
234 	sb->st_mode = S_IFREG | ks->ks_mode;		/* XXX */
235 	mtx_unlock(&sem_lock);
236 
237 	return (0);
238 }
239 
240 static int
241 ksem_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
242     struct thread *td)
243 {
244 	struct ksem *ks;
245 	int error;
246 
247 	error = 0;
248 	ks = fp->f_data;
249 	mtx_lock(&sem_lock);
250 #ifdef MAC
251 	error = mac_posixsem_check_setmode(active_cred, ks, mode);
252 	if (error != 0)
253 		goto out;
254 #endif
255 	error = vaccess(VREG, ks->ks_mode, ks->ks_uid, ks->ks_gid, VADMIN,
256 	    active_cred, NULL);
257 	if (error != 0)
258 		goto out;
259 	ks->ks_mode = mode & ACCESSPERMS;
260 out:
261 	mtx_unlock(&sem_lock);
262 	return (error);
263 }
264 
265 static int
266 ksem_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
267     struct thread *td)
268 {
269 	struct ksem *ks;
270 	int error;
271 
272 	error = 0;
273 	ks = fp->f_data;
274 	mtx_lock(&sem_lock);
275 #ifdef MAC
276 	error = mac_posixsem_check_setowner(active_cred, ks, uid, gid);
277 	if (error != 0)
278 		goto out;
279 #endif
280 	if (uid == (uid_t)-1)
281 		uid = ks->ks_uid;
282 	if (gid == (gid_t)-1)
283                  gid = ks->ks_gid;
284 	if (((uid != ks->ks_uid && uid != active_cred->cr_uid) ||
285 	    (gid != ks->ks_gid && !groupmember(gid, active_cred))) &&
286 	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN, 0)))
287 		goto out;
288 	ks->ks_uid = uid;
289 	ks->ks_gid = gid;
290 out:
291 	mtx_unlock(&sem_lock);
292 	return (error);
293 }
294 
295 static int
296 ksem_closef(struct file *fp, struct thread *td)
297 {
298 	struct ksem *ks;
299 
300 	ks = fp->f_data;
301 	fp->f_data = NULL;
302 	ksem_drop(ks);
303 
304 	return (0);
305 }
306 
307 /*
308  * ksem object management including creation and reference counting
309  * routines.
310  */
311 static struct ksem *
312 ksem_alloc(struct ucred *ucred, mode_t mode, unsigned int value)
313 {
314 	struct ksem *ks;
315 
316 	mtx_lock(&ksem_count_lock);
317 	if (nsems == p31b_getcfg(CTL_P1003_1B_SEM_NSEMS_MAX) || ksem_dead) {
318 		mtx_unlock(&ksem_count_lock);
319 		return (NULL);
320 	}
321 	nsems++;
322 	mtx_unlock(&ksem_count_lock);
323 	ks = malloc(sizeof(*ks), M_KSEM, M_WAITOK | M_ZERO);
324 	ks->ks_uid = ucred->cr_uid;
325 	ks->ks_gid = ucred->cr_gid;
326 	ks->ks_mode = mode;
327 	ks->ks_value = value;
328 	cv_init(&ks->ks_cv, "ksem");
329 	vfs_timestamp(&ks->ks_birthtime);
330 	ks->ks_atime = ks->ks_mtime = ks->ks_ctime = ks->ks_birthtime;
331 	refcount_init(&ks->ks_ref, 1);
332 #ifdef MAC
333 	mac_posixsem_init(ks);
334 	mac_posixsem_create(ucred, ks);
335 #endif
336 
337 	return (ks);
338 }
339 
340 static struct ksem *
341 ksem_hold(struct ksem *ks)
342 {
343 
344 	refcount_acquire(&ks->ks_ref);
345 	return (ks);
346 }
347 
348 static void
349 ksem_drop(struct ksem *ks)
350 {
351 
352 	if (refcount_release(&ks->ks_ref)) {
353 #ifdef MAC
354 		mac_posixsem_destroy(ks);
355 #endif
356 		cv_destroy(&ks->ks_cv);
357 		free(ks, M_KSEM);
358 		mtx_lock(&ksem_count_lock);
359 		nsems--;
360 		mtx_unlock(&ksem_count_lock);
361 	}
362 }
363 
364 /*
365  * Determine if the credentials have sufficient permissions for read
366  * and write access.
367  */
368 static int
369 ksem_access(struct ksem *ks, struct ucred *ucred)
370 {
371 	int error;
372 
373 	error = vaccess(VREG, ks->ks_mode, ks->ks_uid, ks->ks_gid,
374 	    VREAD | VWRITE, ucred, NULL);
375 	if (error)
376 		error = priv_check_cred(ucred, PRIV_SEM_WRITE, 0);
377 	return (error);
378 }
379 
380 /*
381  * Dictionary management.  We maintain an in-kernel dictionary to map
382  * paths to semaphore objects.  We use the FNV hash on the path to
383  * store the mappings in a hash table.
384  */
385 static struct ksem *
386 ksem_lookup(char *path, Fnv32_t fnv)
387 {
388 	struct ksem_mapping *map;
389 
390 	LIST_FOREACH(map, KSEM_HASH(fnv), km_link) {
391 		if (map->km_fnv != fnv)
392 			continue;
393 		if (strcmp(map->km_path, path) == 0)
394 			return (map->km_ksem);
395 	}
396 
397 	return (NULL);
398 }
399 
400 static void
401 ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks)
402 {
403 	struct ksem_mapping *map;
404 
405 	map = malloc(sizeof(struct ksem_mapping), M_KSEM, M_WAITOK);
406 	map->km_path = path;
407 	map->km_fnv = fnv;
408 	map->km_ksem = ksem_hold(ks);
409 	ks->ks_path = path;
410 	LIST_INSERT_HEAD(KSEM_HASH(fnv), map, km_link);
411 }
412 
413 static int
414 ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucred)
415 {
416 	struct ksem_mapping *map;
417 	int error;
418 
419 	LIST_FOREACH(map, KSEM_HASH(fnv), km_link) {
420 		if (map->km_fnv != fnv)
421 			continue;
422 		if (strcmp(map->km_path, path) == 0) {
423 #ifdef MAC
424 			error = mac_posixsem_check_unlink(ucred, map->km_ksem);
425 			if (error)
426 				return (error);
427 #endif
428 			error = ksem_access(map->km_ksem, ucred);
429 			if (error)
430 				return (error);
431 			map->km_ksem->ks_path = NULL;
432 			LIST_REMOVE(map, km_link);
433 			ksem_drop(map->km_ksem);
434 			free(map->km_path, M_KSEM);
435 			free(map, M_KSEM);
436 			return (0);
437 		}
438 	}
439 
440 	return (ENOENT);
441 }
442 
443 static void
444 ksem_info_impl(struct ksem *ks, char *path, size_t size, uint32_t *value)
445 {
446 
447 	if (ks->ks_path == NULL)
448 		return;
449 	sx_slock(&ksem_dict_lock);
450 	if (ks->ks_path != NULL)
451 		strlcpy(path, ks->ks_path, size);
452 	if (value != NULL)
453 		*value = ks->ks_value;
454 	sx_sunlock(&ksem_dict_lock);
455 }
456 
457 static int
458 ksem_create_copyout_semid(struct thread *td, semid_t *semidp, int fd,
459     int compat32)
460 {
461 	semid_t semid;
462 #ifdef COMPAT_FREEBSD32
463 	int32_t semid32;
464 #endif
465 	void *ptr;
466 	size_t ptrs;
467 
468 #ifdef COMPAT_FREEBSD32
469 	if (compat32) {
470 		semid32 = fd;
471 		ptr = &semid32;
472 		ptrs = sizeof(semid32);
473 	} else {
474 #endif
475 		semid = fd;
476 		ptr = &semid;
477 		ptrs = sizeof(semid);
478 		compat32 = 0; /* silence gcc */
479 #ifdef COMPAT_FREEBSD32
480 	}
481 #endif
482 
483 	return (copyout(ptr, semidp, ptrs));
484 }
485 
486 /* Other helper routines. */
487 static int
488 ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode,
489     unsigned int value, int flags, int compat32)
490 {
491 	struct filedesc *fdp;
492 	struct ksem *ks;
493 	struct file *fp;
494 	char *path;
495 	Fnv32_t fnv;
496 	int error, fd;
497 
498 	if (value > SEM_VALUE_MAX)
499 		return (EINVAL);
500 
501 	fdp = td->td_proc->p_fd;
502 	mode = (mode & ~fdp->fd_cmask) & ACCESSPERMS;
503 	error = falloc(td, &fp, &fd, O_CLOEXEC);
504 	if (error) {
505 		if (name == NULL)
506 			error = ENOSPC;
507 		return (error);
508 	}
509 
510 	/*
511 	 * Go ahead and copyout the file descriptor now.  This is a bit
512 	 * premature, but it is a lot easier to handle errors as opposed
513 	 * to later when we've possibly created a new semaphore, etc.
514 	 */
515 	error = ksem_create_copyout_semid(td, semidp, fd, compat32);
516 	if (error) {
517 		fdclose(fdp, fp, fd, td);
518 		fdrop(fp, td);
519 		return (error);
520 	}
521 
522 	if (name == NULL) {
523 		/* Create an anonymous semaphore. */
524 		ks = ksem_alloc(td->td_ucred, mode, value);
525 		if (ks == NULL)
526 			error = ENOSPC;
527 		else
528 			ks->ks_flags |= KS_ANONYMOUS;
529 	} else {
530 		path = malloc(MAXPATHLEN, M_KSEM, M_WAITOK);
531 		error = copyinstr(name, path, MAXPATHLEN, NULL);
532 
533 		/* Require paths to start with a '/' character. */
534 		if (error == 0 && path[0] != '/')
535 			error = EINVAL;
536 		if (error) {
537 			fdclose(fdp, fp, fd, td);
538 			fdrop(fp, td);
539 			free(path, M_KSEM);
540 			return (error);
541 		}
542 
543 		fnv = fnv_32_str(path, FNV1_32_INIT);
544 		sx_xlock(&ksem_dict_lock);
545 		ks = ksem_lookup(path, fnv);
546 		if (ks == NULL) {
547 			/* Object does not exist, create it if requested. */
548 			if (flags & O_CREAT) {
549 				ks = ksem_alloc(td->td_ucred, mode, value);
550 				if (ks == NULL)
551 					error = ENFILE;
552 				else {
553 					ksem_insert(path, fnv, ks);
554 					path = NULL;
555 				}
556 			} else
557 				error = ENOENT;
558 		} else {
559 			/*
560 			 * Object already exists, obtain a new
561 			 * reference if requested and permitted.
562 			 */
563 			if ((flags & (O_CREAT | O_EXCL)) ==
564 			    (O_CREAT | O_EXCL))
565 				error = EEXIST;
566 			else {
567 #ifdef MAC
568 				error = mac_posixsem_check_open(td->td_ucred,
569 				    ks);
570 				if (error == 0)
571 #endif
572 				error = ksem_access(ks, td->td_ucred);
573 			}
574 			if (error == 0)
575 				ksem_hold(ks);
576 #ifdef INVARIANTS
577 			else
578 				ks = NULL;
579 #endif
580 		}
581 		sx_xunlock(&ksem_dict_lock);
582 		if (path)
583 			free(path, M_KSEM);
584 	}
585 
586 	if (error) {
587 		KASSERT(ks == NULL, ("ksem_create error with a ksem"));
588 		fdclose(fdp, fp, fd, td);
589 		fdrop(fp, td);
590 		return (error);
591 	}
592 	KASSERT(ks != NULL, ("ksem_create w/o a ksem"));
593 
594 	finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops);
595 
596 	fdrop(fp, td);
597 
598 	return (0);
599 }
600 
601 static int
602 ksem_get(struct thread *td, semid_t id, cap_rights_t rights, struct file **fpp)
603 {
604 	struct ksem *ks;
605 	struct file *fp;
606 	int error;
607 
608 	error = fget(td, id, rights, &fp);
609 	if (error)
610 		return (EINVAL);
611 	if (fp->f_type != DTYPE_SEM) {
612 		fdrop(fp, td);
613 		return (EINVAL);
614 	}
615 	ks = fp->f_data;
616 	if (ks->ks_flags & KS_DEAD) {
617 		fdrop(fp, td);
618 		return (EINVAL);
619 	}
620 	*fpp = fp;
621 	return (0);
622 }
623 
624 /* System calls. */
625 #ifndef _SYS_SYSPROTO_H_
626 struct ksem_init_args {
627 	unsigned int	value;
628 	semid_t		*idp;
629 };
630 #endif
631 int
632 sys_ksem_init(struct thread *td, struct ksem_init_args *uap)
633 {
634 
635 	return (ksem_create(td, NULL, uap->idp, S_IRWXU | S_IRWXG, uap->value,
636 	    0, 0));
637 }
638 
639 #ifndef _SYS_SYSPROTO_H_
640 struct ksem_open_args {
641 	char		*name;
642 	int		oflag;
643 	mode_t		mode;
644 	unsigned int	value;
645 	semid_t		*idp;
646 };
647 #endif
648 int
649 sys_ksem_open(struct thread *td, struct ksem_open_args *uap)
650 {
651 
652 	DP((">>> ksem_open start, pid=%d\n", (int)td->td_proc->p_pid));
653 
654 	if ((uap->oflag & ~(O_CREAT | O_EXCL)) != 0)
655 		return (EINVAL);
656 	return (ksem_create(td, uap->name, uap->idp, uap->mode, uap->value,
657 	    uap->oflag, 0));
658 }
659 
660 #ifndef _SYS_SYSPROTO_H_
661 struct ksem_unlink_args {
662 	char		*name;
663 };
664 #endif
665 int
666 sys_ksem_unlink(struct thread *td, struct ksem_unlink_args *uap)
667 {
668 	char *path;
669 	Fnv32_t fnv;
670 	int error;
671 
672 	path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
673 	error = copyinstr(uap->name, path, MAXPATHLEN, NULL);
674 	if (error) {
675 		free(path, M_TEMP);
676 		return (error);
677 	}
678 
679 	fnv = fnv_32_str(path, FNV1_32_INIT);
680 	sx_xlock(&ksem_dict_lock);
681 	error = ksem_remove(path, fnv, td->td_ucred);
682 	sx_xunlock(&ksem_dict_lock);
683 	free(path, M_TEMP);
684 
685 	return (error);
686 }
687 
688 #ifndef _SYS_SYSPROTO_H_
689 struct ksem_close_args {
690 	semid_t		id;
691 };
692 #endif
693 int
694 sys_ksem_close(struct thread *td, struct ksem_close_args *uap)
695 {
696 	struct ksem *ks;
697 	struct file *fp;
698 	int error;
699 
700 	/* No capability rights required to close a semaphore. */
701 	error = ksem_get(td, uap->id, 0, &fp);
702 	if (error)
703 		return (error);
704 	ks = fp->f_data;
705 	if (ks->ks_flags & KS_ANONYMOUS) {
706 		fdrop(fp, td);
707 		return (EINVAL);
708 	}
709 	error = kern_close(td, uap->id);
710 	fdrop(fp, td);
711 	return (error);
712 }
713 
714 #ifndef _SYS_SYSPROTO_H_
715 struct ksem_post_args {
716 	semid_t	id;
717 };
718 #endif
719 int
720 sys_ksem_post(struct thread *td, struct ksem_post_args *uap)
721 {
722 	struct file *fp;
723 	struct ksem *ks;
724 	int error;
725 
726 	error = ksem_get(td, uap->id, CAP_SEM_POST, &fp);
727 	if (error)
728 		return (error);
729 	ks = fp->f_data;
730 
731 	mtx_lock(&sem_lock);
732 #ifdef MAC
733 	error = mac_posixsem_check_post(td->td_ucred, fp->f_cred, ks);
734 	if (error)
735 		goto err;
736 #endif
737 	if (ks->ks_value == SEM_VALUE_MAX) {
738 		error = EOVERFLOW;
739 		goto err;
740 	}
741 	++ks->ks_value;
742 	if (ks->ks_waiters > 0)
743 		cv_signal(&ks->ks_cv);
744 	error = 0;
745 	vfs_timestamp(&ks->ks_ctime);
746 err:
747 	mtx_unlock(&sem_lock);
748 	fdrop(fp, td);
749 	return (error);
750 }
751 
752 #ifndef _SYS_SYSPROTO_H_
753 struct ksem_wait_args {
754 	semid_t		id;
755 };
756 #endif
757 int
758 sys_ksem_wait(struct thread *td, struct ksem_wait_args *uap)
759 {
760 
761 	return (kern_sem_wait(td, uap->id, 0, NULL));
762 }
763 
764 #ifndef _SYS_SYSPROTO_H_
765 struct ksem_timedwait_args {
766 	semid_t		id;
767 	const struct timespec *abstime;
768 };
769 #endif
770 int
771 sys_ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap)
772 {
773 	struct timespec abstime;
774 	struct timespec *ts;
775 	int error;
776 
777 	/*
778 	 * We allow a null timespec (wait forever).
779 	 */
780 	if (uap->abstime == NULL)
781 		ts = NULL;
782 	else {
783 		error = copyin(uap->abstime, &abstime, sizeof(abstime));
784 		if (error != 0)
785 			return (error);
786 		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
787 			return (EINVAL);
788 		ts = &abstime;
789 	}
790 	return (kern_sem_wait(td, uap->id, 0, ts));
791 }
792 
793 #ifndef _SYS_SYSPROTO_H_
794 struct ksem_trywait_args {
795 	semid_t		id;
796 };
797 #endif
798 int
799 sys_ksem_trywait(struct thread *td, struct ksem_trywait_args *uap)
800 {
801 
802 	return (kern_sem_wait(td, uap->id, 1, NULL));
803 }
804 
805 static int
806 kern_sem_wait(struct thread *td, semid_t id, int tryflag,
807     struct timespec *abstime)
808 {
809 	struct timespec ts1, ts2;
810 	struct timeval tv;
811 	struct file *fp;
812 	struct ksem *ks;
813 	int error;
814 
815 	DP((">>> kern_sem_wait entered! pid=%d\n", (int)td->td_proc->p_pid));
816 	error = ksem_get(td, id, CAP_SEM_WAIT, &fp);
817 	if (error)
818 		return (error);
819 	ks = fp->f_data;
820 	mtx_lock(&sem_lock);
821 	DP((">>> kern_sem_wait critical section entered! pid=%d\n",
822 	    (int)td->td_proc->p_pid));
823 #ifdef MAC
824 	error = mac_posixsem_check_wait(td->td_ucred, fp->f_cred, ks);
825 	if (error) {
826 		DP(("kern_sem_wait mac failed\n"));
827 		goto err;
828 	}
829 #endif
830 	DP(("kern_sem_wait value = %d, tryflag %d\n", ks->ks_value, tryflag));
831 	vfs_timestamp(&ks->ks_atime);
832 	while (ks->ks_value == 0) {
833 		ks->ks_waiters++;
834 		if (tryflag != 0)
835 			error = EAGAIN;
836 		else if (abstime == NULL)
837 			error = cv_wait_sig(&ks->ks_cv, &sem_lock);
838 		else {
839 			for (;;) {
840 				ts1 = *abstime;
841 				getnanotime(&ts2);
842 				timespecsub(&ts1, &ts2);
843 				TIMESPEC_TO_TIMEVAL(&tv, &ts1);
844 				if (tv.tv_sec < 0) {
845 					error = ETIMEDOUT;
846 					break;
847 				}
848 				error = cv_timedwait_sig(&ks->ks_cv,
849 				    &sem_lock, tvtohz(&tv));
850 				if (error != EWOULDBLOCK)
851 					break;
852 			}
853 		}
854 		ks->ks_waiters--;
855 		if (error)
856 			goto err;
857 	}
858 	ks->ks_value--;
859 	DP(("kern_sem_wait value post-decrement = %d\n", ks->ks_value));
860 	error = 0;
861 err:
862 	mtx_unlock(&sem_lock);
863 	fdrop(fp, td);
864 	DP(("<<< kern_sem_wait leaving, pid=%d, error = %d\n",
865 	    (int)td->td_proc->p_pid, error));
866 	return (error);
867 }
868 
869 #ifndef _SYS_SYSPROTO_H_
870 struct ksem_getvalue_args {
871 	semid_t		id;
872 	int		*val;
873 };
874 #endif
875 int
876 sys_ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap)
877 {
878 	struct file *fp;
879 	struct ksem *ks;
880 	int error, val;
881 
882 	error = ksem_get(td, uap->id, CAP_SEM_GETVALUE, &fp);
883 	if (error)
884 		return (error);
885 	ks = fp->f_data;
886 
887 	mtx_lock(&sem_lock);
888 #ifdef MAC
889 	error = mac_posixsem_check_getvalue(td->td_ucred, fp->f_cred, ks);
890 	if (error) {
891 		mtx_unlock(&sem_lock);
892 		fdrop(fp, td);
893 		return (error);
894 	}
895 #endif
896 	val = ks->ks_value;
897 	vfs_timestamp(&ks->ks_atime);
898 	mtx_unlock(&sem_lock);
899 	fdrop(fp, td);
900 	error = copyout(&val, uap->val, sizeof(val));
901 	return (error);
902 }
903 
904 #ifndef _SYS_SYSPROTO_H_
905 struct ksem_destroy_args {
906 	semid_t		id;
907 };
908 #endif
909 int
910 sys_ksem_destroy(struct thread *td, struct ksem_destroy_args *uap)
911 {
912 	struct file *fp;
913 	struct ksem *ks;
914 	int error;
915 
916 	/* No capability rights required to close a semaphore. */
917 	error = ksem_get(td, uap->id, 0, &fp);
918 	if (error)
919 		return (error);
920 	ks = fp->f_data;
921 	if (!(ks->ks_flags & KS_ANONYMOUS)) {
922 		fdrop(fp, td);
923 		return (EINVAL);
924 	}
925 	mtx_lock(&sem_lock);
926 	if (ks->ks_waiters != 0) {
927 		mtx_unlock(&sem_lock);
928 		error = EBUSY;
929 		goto err;
930 	}
931 	ks->ks_flags |= KS_DEAD;
932 	mtx_unlock(&sem_lock);
933 
934 	error = kern_close(td, uap->id);
935 err:
936 	fdrop(fp, td);
937 	return (error);
938 }
939 
940 static struct syscall_helper_data ksem_syscalls[] = {
941 	SYSCALL_INIT_HELPER(ksem_init),
942 	SYSCALL_INIT_HELPER(ksem_open),
943 	SYSCALL_INIT_HELPER(ksem_unlink),
944 	SYSCALL_INIT_HELPER(ksem_close),
945 	SYSCALL_INIT_HELPER(ksem_post),
946 	SYSCALL_INIT_HELPER(ksem_wait),
947 	SYSCALL_INIT_HELPER(ksem_timedwait),
948 	SYSCALL_INIT_HELPER(ksem_trywait),
949 	SYSCALL_INIT_HELPER(ksem_getvalue),
950 	SYSCALL_INIT_HELPER(ksem_destroy),
951 	SYSCALL_INIT_LAST
952 };
953 
954 #ifdef COMPAT_FREEBSD32
955 #include <compat/freebsd32/freebsd32.h>
956 #include <compat/freebsd32/freebsd32_proto.h>
957 #include <compat/freebsd32/freebsd32_signal.h>
958 #include <compat/freebsd32/freebsd32_syscall.h>
959 #include <compat/freebsd32/freebsd32_util.h>
960 
961 int
962 freebsd32_ksem_init(struct thread *td, struct freebsd32_ksem_init_args *uap)
963 {
964 
965 	return (ksem_create(td, NULL, uap->idp, S_IRWXU | S_IRWXG, uap->value,
966 	    0, 1));
967 }
968 
969 int
970 freebsd32_ksem_open(struct thread *td, struct freebsd32_ksem_open_args *uap)
971 {
972 
973 	if ((uap->oflag & ~(O_CREAT | O_EXCL)) != 0)
974 		return (EINVAL);
975 	return (ksem_create(td, uap->name, uap->idp, uap->mode, uap->value,
976 	    uap->oflag, 1));
977 }
978 
979 int
980 freebsd32_ksem_timedwait(struct thread *td,
981     struct freebsd32_ksem_timedwait_args *uap)
982 {
983 	struct timespec32 abstime32;
984 	struct timespec *ts, abstime;
985 	int error;
986 
987 	/*
988 	 * We allow a null timespec (wait forever).
989 	 */
990 	if (uap->abstime == NULL)
991 		ts = NULL;
992 	else {
993 		error = copyin(uap->abstime, &abstime32, sizeof(abstime32));
994 		if (error != 0)
995 			return (error);
996 		CP(abstime32, abstime, tv_sec);
997 		CP(abstime32, abstime, tv_nsec);
998 		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
999 			return (EINVAL);
1000 		ts = &abstime;
1001 	}
1002 	return (kern_sem_wait(td, uap->id, 0, ts));
1003 }
1004 
1005 static struct syscall_helper_data ksem32_syscalls[] = {
1006 	SYSCALL32_INIT_HELPER(freebsd32_ksem_init),
1007 	SYSCALL32_INIT_HELPER(freebsd32_ksem_open),
1008 	SYSCALL32_INIT_HELPER_COMPAT(ksem_unlink),
1009 	SYSCALL32_INIT_HELPER_COMPAT(ksem_close),
1010 	SYSCALL32_INIT_HELPER_COMPAT(ksem_post),
1011 	SYSCALL32_INIT_HELPER_COMPAT(ksem_wait),
1012 	SYSCALL32_INIT_HELPER(freebsd32_ksem_timedwait),
1013 	SYSCALL32_INIT_HELPER_COMPAT(ksem_trywait),
1014 	SYSCALL32_INIT_HELPER_COMPAT(ksem_getvalue),
1015 	SYSCALL32_INIT_HELPER_COMPAT(ksem_destroy),
1016 	SYSCALL_INIT_LAST
1017 };
1018 #endif
1019 
1020 static int
1021 ksem_module_init(void)
1022 {
1023 	int error;
1024 
1025 	mtx_init(&sem_lock, "sem", NULL, MTX_DEF);
1026 	mtx_init(&ksem_count_lock, "ksem count", NULL, MTX_DEF);
1027 	sx_init(&ksem_dict_lock, "ksem dictionary");
1028 	ksem_dictionary = hashinit(1024, M_KSEM, &ksem_hash);
1029 	p31b_setcfg(CTL_P1003_1B_SEMAPHORES, 200112L);
1030 	p31b_setcfg(CTL_P1003_1B_SEM_NSEMS_MAX, SEM_MAX);
1031 	p31b_setcfg(CTL_P1003_1B_SEM_VALUE_MAX, SEM_VALUE_MAX);
1032 	ksem_info = ksem_info_impl;
1033 
1034 	error = syscall_helper_register(ksem_syscalls);
1035 	if (error)
1036 		return (error);
1037 #ifdef COMPAT_FREEBSD32
1038 	error = syscall32_helper_register(ksem32_syscalls);
1039 	if (error)
1040 		return (error);
1041 #endif
1042 	return (0);
1043 }
1044 
1045 static void
1046 ksem_module_destroy(void)
1047 {
1048 
1049 #ifdef COMPAT_FREEBSD32
1050 	syscall32_helper_unregister(ksem32_syscalls);
1051 #endif
1052 	syscall_helper_unregister(ksem_syscalls);
1053 
1054 	ksem_info = NULL;
1055 	p31b_setcfg(CTL_P1003_1B_SEMAPHORES, 0);
1056 	hashdestroy(ksem_dictionary, M_KSEM, ksem_hash);
1057 	sx_destroy(&ksem_dict_lock);
1058 	mtx_destroy(&ksem_count_lock);
1059 	mtx_destroy(&sem_lock);
1060 	p31b_unsetcfg(CTL_P1003_1B_SEM_VALUE_MAX);
1061 	p31b_unsetcfg(CTL_P1003_1B_SEM_NSEMS_MAX);
1062 }
1063 
1064 static int
1065 sem_modload(struct module *module, int cmd, void *arg)
1066 {
1067         int error = 0;
1068 
1069         switch (cmd) {
1070         case MOD_LOAD:
1071 		error = ksem_module_init();
1072 		if (error)
1073 			ksem_module_destroy();
1074                 break;
1075 
1076         case MOD_UNLOAD:
1077 		mtx_lock(&ksem_count_lock);
1078 		if (nsems != 0) {
1079 			error = EOPNOTSUPP;
1080 			mtx_unlock(&ksem_count_lock);
1081 			break;
1082 		}
1083 		ksem_dead = 1;
1084 		mtx_unlock(&ksem_count_lock);
1085 		ksem_module_destroy();
1086                 break;
1087 
1088         case MOD_SHUTDOWN:
1089                 break;
1090         default:
1091                 error = EINVAL;
1092                 break;
1093         }
1094         return (error);
1095 }
1096 
1097 static moduledata_t sem_mod = {
1098         "sem",
1099         &sem_modload,
1100         NULL
1101 };
1102 
1103 DECLARE_MODULE(sem, sem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
1104 MODULE_VERSION(sem, 1);
1105