xref: /netbsd/sys/kern/uipc_sem.c (revision a0fbdf29)
1 /*	$NetBSD: uipc_sem.c,v 1.60 2020/12/14 23:12:12 chs Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Mindaugas Rasiukevicius and Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  */
57 
58 /*
59  * Implementation of POSIX semaphore.
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.60 2020/12/14 23:12:12 chs Exp $");
64 
65 #include <sys/param.h>
66 #include <sys/kernel.h>
67 
68 #include <sys/atomic.h>
69 #include <sys/proc.h>
70 #include <sys/lwp.h>
71 #include <sys/ksem.h>
72 #include <sys/syscall.h>
73 #include <sys/stat.h>
74 #include <sys/kmem.h>
75 #include <sys/fcntl.h>
76 #include <sys/file.h>
77 #include <sys/filedesc.h>
78 #include <sys/kauth.h>
79 #include <sys/module.h>
80 #include <sys/mount.h>
81 #include <sys/mutex.h>
82 #include <sys/rwlock.h>
83 #include <sys/semaphore.h>
84 #include <sys/syscall.h>
85 #include <sys/syscallargs.h>
86 #include <sys/syscallvar.h>
87 #include <sys/sysctl.h>
88 #include <sys/uidinfo.h>
89 #include <sys/cprng.h>
90 
91 MODULE(MODULE_CLASS_MISC, ksem, NULL);
92 
93 #define	SEM_MAX_NAMELEN		NAME_MAX
94 
95 #define	KS_UNLINKED		0x01
96 
97 static kmutex_t		ksem_lock	__cacheline_aligned;
98 static LIST_HEAD(,ksem)	ksem_head	__cacheline_aligned;
99 static u_int		nsems_total	__cacheline_aligned;
100 static u_int		nsems		__cacheline_aligned;
101 
102 static krwlock_t	ksem_pshared_lock __cacheline_aligned;
103 static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned;
104 static u_long		ksem_pshared_hashmask __read_mostly;
105 
106 #define	KSEM_PSHARED_HASHSIZE	32
107 
108 static kauth_listener_t	ksem_listener;
109 
110 static int		ksem_sysinit(void);
111 static int		ksem_sysfini(bool);
112 static int		ksem_modcmd(modcmd_t, void *);
113 static void		ksem_release(ksem_t *, int);
114 static int		ksem_close_fop(file_t *);
115 static int		ksem_stat_fop(file_t *, struct stat *);
116 static int		ksem_read_fop(file_t *, off_t *, struct uio *,
117     kauth_cred_t, int);
118 
119 static const struct fileops semops = {
120 	.fo_name = "sem",
121 	.fo_read = ksem_read_fop,
122 	.fo_write = fbadop_write,
123 	.fo_ioctl = fbadop_ioctl,
124 	.fo_fcntl = fnullop_fcntl,
125 	.fo_poll = fnullop_poll,
126 	.fo_stat = ksem_stat_fop,
127 	.fo_close = ksem_close_fop,
128 	.fo_kqfilter = fnullop_kqfilter,
129 	.fo_restart = fnullop_restart,
130 };
131 
132 static const struct syscall_package ksem_syscalls[] = {
133 	{ SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
134 	{ SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open },
135 	{ SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink },
136 	{ SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close },
137 	{ SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post },
138 	{ SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait },
139 	{ SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait },
140 	{ SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue },
141 	{ SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy },
142 	{ SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait },
143 	{ 0, 0, NULL },
144 };
145 
146 struct sysctllog *ksem_clog;
147 int ksem_max = KSEM_MAX;
148 
149 static int
name_copyin(const char * uname,char ** name)150 name_copyin(const char *uname, char **name)
151 {
152 	*name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP);
153 
154 	int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL);
155 	if (error)
156 		kmem_free(*name, SEM_MAX_NAMELEN);
157 
158 	return error;
159 }
160 
161 static void
name_destroy(char ** name)162 name_destroy(char **name)
163 {
164 	if (!*name)
165 		return;
166 
167 	kmem_free(*name, SEM_MAX_NAMELEN);
168 	*name = NULL;
169 }
170 
171 static int
ksem_listener_cb(kauth_cred_t cred,kauth_action_t action,void * cookie,void * arg0,void * arg1,void * arg2,void * arg3)172 ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
173     void *arg0, void *arg1, void *arg2, void *arg3)
174 {
175 	ksem_t *ks;
176 	mode_t mode;
177 
178 	if (action != KAUTH_SYSTEM_SEMAPHORE)
179 		return KAUTH_RESULT_DEFER;
180 
181 	ks = arg1;
182 	mode = ks->ks_mode;
183 
184 	if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
185 	    (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
186 	    (mode & S_IWOTH) != 0)
187 		return KAUTH_RESULT_ALLOW;
188 
189 	return KAUTH_RESULT_DEFER;
190 }
191 
192 static int
ksem_sysinit(void)193 ksem_sysinit(void)
194 {
195 	int error;
196 	const struct sysctlnode *rnode;
197 
198 	mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
199 	LIST_INIT(&ksem_head);
200 	nsems_total = 0;
201 	nsems = 0;
202 
203 	rw_init(&ksem_pshared_lock);
204 	ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST,
205 	    true, &ksem_pshared_hashmask);
206 	KASSERT(ksem_pshared_hashtab != NULL);
207 
208 	ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
209 	    ksem_listener_cb, NULL);
210 
211 	/* Define module-specific sysctl tree */
212 
213 	ksem_clog = NULL;
214 
215 	sysctl_createv(&ksem_clog, 0, NULL, &rnode,
216 			CTLFLAG_PERMANENT,
217 			CTLTYPE_NODE, "posix",
218 			SYSCTL_DESCR("POSIX options"),
219 			NULL, 0, NULL, 0,
220 			CTL_KERN, CTL_CREATE, CTL_EOL);
221 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
222 			CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
223 			CTLTYPE_INT, "semmax",
224 			SYSCTL_DESCR("Maximal number of semaphores"),
225 			NULL, 0, &ksem_max, 0,
226 			CTL_CREATE, CTL_EOL);
227 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
228 			CTLFLAG_PERMANENT | CTLFLAG_READONLY,
229 			CTLTYPE_INT, "semcnt",
230 			SYSCTL_DESCR("Current number of semaphores"),
231 			NULL, 0, &nsems, 0,
232 			CTL_CREATE, CTL_EOL);
233 
234 	error = syscall_establish(NULL, ksem_syscalls);
235 	if (error) {
236 		(void)ksem_sysfini(false);
237 	}
238 
239 	return error;
240 }
241 
242 static int
ksem_sysfini(bool interface)243 ksem_sysfini(bool interface)
244 {
245 	int error;
246 
247 	if (interface) {
248 		error = syscall_disestablish(NULL, ksem_syscalls);
249 		if (error != 0) {
250 			return error;
251 		}
252 		/*
253 		 * Make sure that no semaphores are in use.  Note: semops
254 		 * must be unused at this point.
255 		 */
256 		if (nsems_total) {
257 			error = syscall_establish(NULL, ksem_syscalls);
258 			KASSERT(error == 0);
259 			return EBUSY;
260 		}
261 	}
262 	kauth_unlisten_scope(ksem_listener);
263 	hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask);
264 	rw_destroy(&ksem_pshared_lock);
265 	mutex_destroy(&ksem_lock);
266 	sysctl_teardown(&ksem_clog);
267 	return 0;
268 }
269 
270 static int
ksem_modcmd(modcmd_t cmd,void * arg)271 ksem_modcmd(modcmd_t cmd, void *arg)
272 {
273 
274 	switch (cmd) {
275 	case MODULE_CMD_INIT:
276 		return ksem_sysinit();
277 
278 	case MODULE_CMD_FINI:
279 		return ksem_sysfini(true);
280 
281 	default:
282 		return ENOTTY;
283 	}
284 }
285 
286 static ksem_t *
ksem_lookup(const char * name)287 ksem_lookup(const char *name)
288 {
289 	ksem_t *ks;
290 
291 	KASSERT(mutex_owned(&ksem_lock));
292 
293 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
294 		if (strcmp(ks->ks_name, name) == 0) {
295 			mutex_enter(&ks->ks_lock);
296 			return ks;
297 		}
298 	}
299 	return NULL;
300 }
301 
302 static int
ksem_perm(lwp_t * l,ksem_t * ks)303 ksem_perm(lwp_t *l, ksem_t *ks)
304 {
305 	kauth_cred_t uc = l->l_cred;
306 
307 	KASSERT(mutex_owned(&ks->ks_lock));
308 
309 	if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0)
310 		return EACCES;
311 
312 	return 0;
313 }
314 
315 /*
316  * Bits 1..23 are random, just pluck a few of those and assume the
317  * distribution is going to be pretty good.
318  */
319 #define	KSEM_PSHARED_HASH(id)	(((id) >> 1) & ksem_pshared_hashmask)
320 
321 static void
ksem_remove_pshared(ksem_t * ksem)322 ksem_remove_pshared(ksem_t *ksem)
323 {
324 	rw_enter(&ksem_pshared_lock, RW_WRITER);
325 	LIST_REMOVE(ksem, ks_entry);
326 	rw_exit(&ksem_pshared_lock);
327 }
328 
329 static ksem_t *
ksem_lookup_pshared_locked(intptr_t id)330 ksem_lookup_pshared_locked(intptr_t id)
331 {
332 	u_long bucket = KSEM_PSHARED_HASH(id);
333 	ksem_t *ksem = NULL;
334 
335 	/* ksem_t is locked and referenced upon return. */
336 
337 	LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) {
338 		if (ksem->ks_pshared_id == id) {
339 			mutex_enter(&ksem->ks_lock);
340 			if (ksem->ks_pshared_proc == NULL) {
341 				/*
342 				 * This entry is dead, and in the process
343 				 * of being torn down; skip it.
344 				 */
345 				mutex_exit(&ksem->ks_lock);
346 				continue;
347 			}
348 			ksem->ks_ref++;
349 			KASSERT(ksem->ks_ref != 0);
350 			return ksem;
351 		}
352 	}
353 
354 	return NULL;
355 }
356 
357 static ksem_t *
ksem_lookup_pshared(intptr_t id)358 ksem_lookup_pshared(intptr_t id)
359 {
360 	rw_enter(&ksem_pshared_lock, RW_READER);
361 	ksem_t *ksem = ksem_lookup_pshared_locked(id);
362 	rw_exit(&ksem_pshared_lock);
363 	return ksem;
364 }
365 
366 static void
ksem_alloc_pshared_id(ksem_t * ksem)367 ksem_alloc_pshared_id(ksem_t *ksem)
368 {
369 	ksem_t *ksem0;
370 	uint32_t try;
371 
372 	KASSERT(ksem->ks_pshared_proc != NULL);
373 
374 	rw_enter(&ksem_pshared_lock, RW_WRITER);
375 	for (;;) {
376 		try = (cprng_fast32() & ~KSEM_MARKER_MASK) |
377 		    KSEM_PSHARED_MARKER;
378 
379 		if ((ksem0 = ksem_lookup_pshared_locked(try)) == NULL) {
380 			/* Got it! */
381 			break;
382 		}
383 		ksem_release(ksem0, -1);
384 	}
385 	ksem->ks_pshared_id = try;
386 	u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id);
387 	LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry);
388 	rw_exit(&ksem_pshared_lock);
389 }
390 
391 /*
392  * ksem_get: get the semaphore from the descriptor.
393  *
394  * => locks the semaphore, if found, and holds an extra reference.
395  * => holds a reference on the file descriptor.
396  */
397 static int
ksem_get(intptr_t id,ksem_t ** ksret,int * fdp)398 ksem_get(intptr_t id, ksem_t **ksret, int *fdp)
399 {
400 	ksem_t *ks;
401 	int fd;
402 
403 	if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) {
404 		/*
405 		 * ksem_lookup_pshared() returns the ksem_t *
406 		 * locked and referenced.
407 		 */
408 		ks = ksem_lookup_pshared(id);
409 		if (ks == NULL)
410 			return EINVAL;
411 		KASSERT(ks->ks_pshared_id == id);
412 		KASSERT(ks->ks_pshared_proc != NULL);
413 		fd = -1;
414 	} else if (id <= INT_MAX) {
415 		fd = (int)id;
416 		file_t *fp = fd_getfile(fd);
417 
418 		if (__predict_false(fp == NULL))
419 			return EINVAL;
420 		if (__predict_false(fp->f_type != DTYPE_SEM)) {
421 			fd_putfile(fd);
422 			return EINVAL;
423 		}
424 		ks = fp->f_ksem;
425 		mutex_enter(&ks->ks_lock);
426 		ks->ks_ref++;
427 	} else {
428 		return EINVAL;
429 	}
430 
431 	*ksret = ks;
432 	*fdp = fd;
433 	return 0;
434 }
435 
436 /*
437  * ksem_create: allocate and setup a new semaphore structure.
438  */
439 static int
ksem_create(lwp_t * l,const char * name,ksem_t ** ksret,mode_t mode,u_int val)440 ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
441 {
442 	ksem_t *ks;
443 	kauth_cred_t uc;
444 	char *kname;
445 	size_t len;
446 
447 	/* Pre-check for the limit. */
448 	if (nsems >= ksem_max) {
449 		return ENFILE;
450 	}
451 
452 	if (val > SEM_VALUE_MAX) {
453 		return EINVAL;
454 	}
455 
456 	if (name != NULL) {
457 		len = strlen(name);
458 		if (len > SEM_MAX_NAMELEN) {
459 			return ENAMETOOLONG;
460 		}
461 		/* Name must start with a '/' but not contain one. */
462 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
463 			return EINVAL;
464 		}
465 		kname = kmem_alloc(++len, KM_SLEEP);
466 		strlcpy(kname, name, len);
467 	} else {
468 		kname = NULL;
469 		len = 0;
470 	}
471 
472 	ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
473 	mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
474 	cv_init(&ks->ks_cv, "psem");
475 	ks->ks_name = kname;
476 	ks->ks_namelen = len;
477 	ks->ks_mode = mode;
478 	ks->ks_value = val;
479 	ks->ks_ref = 1;
480 
481 	uc = l->l_cred;
482 	ks->ks_uid = kauth_cred_geteuid(uc);
483 	ks->ks_gid = kauth_cred_getegid(uc);
484 	chgsemcnt(ks->ks_uid, 1);
485 	atomic_inc_uint(&nsems_total);
486 
487 	*ksret = ks;
488 	return 0;
489 }
490 
491 static void
ksem_free(ksem_t * ks)492 ksem_free(ksem_t *ks)
493 {
494 
495 	KASSERT(!cv_has_waiters(&ks->ks_cv));
496 
497 	chgsemcnt(ks->ks_uid, -1);
498 	atomic_dec_uint(&nsems_total);
499 
500 	if (ks->ks_pshared_id) {
501 		KASSERT(ks->ks_pshared_proc == NULL);
502 		ksem_remove_pshared(ks);
503 	}
504 	if (ks->ks_name) {
505 		KASSERT(ks->ks_namelen > 0);
506 		kmem_free(ks->ks_name, ks->ks_namelen);
507 	}
508 	mutex_destroy(&ks->ks_lock);
509 	cv_destroy(&ks->ks_cv);
510 	kmem_free(ks, sizeof(ksem_t));
511 }
512 
513 #define	KSEM_ID_IS_PSHARED(id)		\
514 	(((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER)
515 
516 static void
ksem_release(ksem_t * ksem,int fd)517 ksem_release(ksem_t *ksem, int fd)
518 {
519 	bool destroy = false;
520 
521 	KASSERT(mutex_owned(&ksem->ks_lock));
522 
523 	KASSERT(ksem->ks_ref > 0);
524 	if (--ksem->ks_ref == 0) {
525 		/*
526 		 * Destroy if the last reference and semaphore is unnamed,
527 		 * or unlinked (for named semaphore).
528 		 */
529 		destroy = (ksem->ks_flags & KS_UNLINKED) ||
530 		    (ksem->ks_name == NULL);
531 	}
532 	mutex_exit(&ksem->ks_lock);
533 
534 	if (destroy) {
535 		ksem_free(ksem);
536 	}
537 	if (fd != -1) {
538 		fd_putfile(fd);
539 	}
540 }
541 
542 int
sys__ksem_init(struct lwp * l,const struct sys__ksem_init_args * uap,register_t * retval)543 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
544     register_t *retval)
545 {
546 	/* {
547 		unsigned int value;
548 		intptr_t *idp;
549 	} */
550 
551 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp),
552 	    copyin, copyout);
553 }
554 
555 int
do_ksem_init(lwp_t * l,u_int val,intptr_t * idp,copyin_t docopyin,copyout_t docopyout)556 do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin,
557     copyout_t docopyout)
558 {
559 	proc_t *p = l->l_proc;
560 	ksem_t *ks;
561 	file_t *fp;
562 	intptr_t id, arg;
563 	int fd, error;
564 
565 	/*
566 	 * Newer versions of librt / libpthread pass us 'PSRD' in *idp to
567 	 * indicate that a pshared semaphore is wanted.  In that case we
568 	 * allocate globally unique ID and return that, rather than the
569 	 * process-scoped file descriptor ID.
570 	 */
571 	error = (*docopyin)(idp, &arg, sizeof(*idp));
572 	if (error) {
573 		return error;
574 	}
575 
576 	error = fd_allocfile(&fp, &fd);
577 	if (error) {
578 		return error;
579 	}
580 	fp->f_type = DTYPE_SEM;
581 	fp->f_flag = FREAD | FWRITE;
582 	fp->f_ops = &semops;
583 
584 	if (fd >= KSEM_MARKER_MIN) {
585 		/*
586 		 * This is super-unlikely, but we check for it anyway
587 		 * because potential collisions with the pshared marker
588 		 * would be bad.
589 		 */
590 		fd_abort(p, fp, fd);
591 		return EMFILE;
592 	}
593 
594 	/* Note the mode does not matter for anonymous semaphores. */
595 	error = ksem_create(l, NULL, &ks, 0, val);
596 	if (error) {
597 		fd_abort(p, fp, fd);
598 		return error;
599 	}
600 
601 	if (arg == KSEM_PSHARED) {
602 		ks->ks_pshared_proc = curproc;
603 		ks->ks_pshared_fd = fd;
604 		ksem_alloc_pshared_id(ks);
605 		id = ks->ks_pshared_id;
606 	} else {
607 		id = (intptr_t)fd;
608 	}
609 
610 	error = (*docopyout)(&id, idp, sizeof(*idp));
611 	if (error) {
612 		ksem_free(ks);
613 		fd_abort(p, fp, fd);
614 		return error;
615 	}
616 
617 	fp->f_ksem = ks;
618 	fd_affix(p, fp, fd);
619 	return error;
620 }
621 
622 int
sys__ksem_open(struct lwp * l,const struct sys__ksem_open_args * uap,register_t * retval)623 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
624     register_t *retval)
625 {
626 	/* {
627 		const char *name;
628 		int oflag;
629 		mode_t mode;
630 		unsigned int value;
631 		intptr_t *idp;
632 	} */
633 
634 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
635 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
636 }
637 
638 int
do_ksem_open(struct lwp * l,const char * semname,int oflag,mode_t mode,unsigned int value,intptr_t * idp,copyout_t docopyout)639 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
640      unsigned int value, intptr_t *idp, copyout_t docopyout)
641 {
642 	char *name;
643 	proc_t *p = l->l_proc;
644 	ksem_t *ksnew = NULL, *ks;
645 	file_t *fp;
646 	intptr_t id;
647 	int fd, error;
648 
649 	error = name_copyin(semname, &name);
650 	if (error) {
651 		return error;
652 	}
653 	error = fd_allocfile(&fp, &fd);
654 	if (error) {
655 		name_destroy(&name);
656 		return error;
657 	}
658 	fp->f_type = DTYPE_SEM;
659 	fp->f_flag = FREAD | FWRITE;
660 	fp->f_ops = &semops;
661 
662 	if (fd >= KSEM_MARKER_MIN) {
663 		/*
664 		 * This is super-unlikely, but we check for it anyway
665 		 * because potential collisions with the pshared marker
666 		 * would be bad.
667 		 */
668 		fd_abort(p, fp, fd);
669 		return EMFILE;
670 	}
671 
672 	/*
673 	 * The ID (file descriptor number) can be stored early.
674 	 * Note that zero is a special value for libpthread.
675 	 */
676 	id = (intptr_t)fd;
677 	error = (*docopyout)(&id, idp, sizeof(*idp));
678 	if (error) {
679 		goto err;
680 	}
681 
682 	if (oflag & O_CREAT) {
683 		/* Create a new semaphore. */
684 		error = ksem_create(l, name, &ksnew, mode, value);
685 		if (error) {
686 			goto err;
687 		}
688 		KASSERT(ksnew != NULL);
689 	}
690 
691 	/* Lookup for a semaphore with such name. */
692 	mutex_enter(&ksem_lock);
693 	ks = ksem_lookup(name);
694 	name_destroy(&name);
695 	if (ks) {
696 		KASSERT(mutex_owned(&ks->ks_lock));
697 		mutex_exit(&ksem_lock);
698 
699 		/* Check for exclusive create. */
700 		if (oflag & O_EXCL) {
701 			mutex_exit(&ks->ks_lock);
702 			error = EEXIST;
703 			goto err;
704 		}
705 		/*
706 		 * Verify permissions.  If we can access it,
707 		 * add the reference of this thread.
708 		 */
709 		error = ksem_perm(l, ks);
710 		if (error == 0) {
711 			ks->ks_ref++;
712 		}
713 		mutex_exit(&ks->ks_lock);
714 		if (error) {
715 			goto err;
716 		}
717 	} else {
718 		/* Fail if not found and not creating. */
719 		if ((oflag & O_CREAT) == 0) {
720 			mutex_exit(&ksem_lock);
721 			KASSERT(ksnew == NULL);
722 			error = ENOENT;
723 			goto err;
724 		}
725 
726 		/* Check for the limit locked. */
727 		if (nsems >= ksem_max) {
728 			mutex_exit(&ksem_lock);
729 			error = ENFILE;
730 			goto err;
731 		}
732 
733 		/*
734 		 * Finally, insert semaphore into the list.
735 		 * Note: it already has the initial reference.
736 		 */
737 		ks = ksnew;
738 		LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
739 		nsems++;
740 		mutex_exit(&ksem_lock);
741 
742 		ksnew = NULL;
743 	}
744 	KASSERT(ks != NULL);
745 	fp->f_ksem = ks;
746 	fd_affix(p, fp, fd);
747 err:
748 	name_destroy(&name);
749 	if (error) {
750 		fd_abort(p, fp, fd);
751 	}
752 	if (ksnew) {
753 		ksem_free(ksnew);
754 	}
755 	return error;
756 }
757 
758 int
sys__ksem_close(struct lwp * l,const struct sys__ksem_close_args * uap,register_t * retval)759 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
760     register_t *retval)
761 {
762 	/* {
763 		intptr_t id;
764 	} */
765 	intptr_t id = SCARG(uap, id);
766 	int fd, error;
767 	ksem_t *ks;
768 
769 	error = ksem_get(id, &ks, &fd);
770 	if (error) {
771 		return error;
772 	}
773 
774 	/* This is only for named semaphores. */
775 	if (ks->ks_name == NULL) {
776 		error = EINVAL;
777 	}
778 	ksem_release(ks, -1);
779 	if (error) {
780 		if (fd != -1)
781 			fd_putfile(fd);
782 		return error;
783 	}
784 	return fd_close(fd);
785 }
786 
787 static int
ksem_read_fop(file_t * fp,off_t * offset,struct uio * uio,kauth_cred_t cred,int flags)788 ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
789     int flags)
790 {
791 	size_t len;
792 	char *name;
793 	ksem_t *ks = fp->f_ksem;
794 
795 	mutex_enter(&ks->ks_lock);
796 	len = ks->ks_namelen;
797 	name = ks->ks_name;
798 	mutex_exit(&ks->ks_lock);
799 	if (name == NULL || len == 0)
800 		return 0;
801 	return uiomove(name, len, uio);
802 }
803 
804 static int
ksem_stat_fop(file_t * fp,struct stat * ub)805 ksem_stat_fop(file_t *fp, struct stat *ub)
806 {
807 	ksem_t *ks = fp->f_ksem;
808 
809 	mutex_enter(&ks->ks_lock);
810 
811 	memset(ub, 0, sizeof(*ub));
812 
813 	ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen)
814 	    ? _S_IFLNK : _S_IFREG);
815 	ub->st_uid = ks->ks_uid;
816 	ub->st_gid = ks->ks_gid;
817 	ub->st_size = ks->ks_value;
818 	ub->st_blocks = (ub->st_size) ? 1 : 0;
819 	ub->st_nlink = ks->ks_ref;
820 	ub->st_blksize = 4096;
821 
822 	nanotime(&ub->st_atimespec);
823 	ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec =
824 	    ub->st_atimespec;
825 
826 	/*
827 	 * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen.
828 	 * XXX (st_dev, st_ino) should be unique.
829 	 */
830 	mutex_exit(&ks->ks_lock);
831 	return 0;
832 }
833 
834 static int
ksem_close_fop(file_t * fp)835 ksem_close_fop(file_t *fp)
836 {
837 	ksem_t *ks = fp->f_ksem;
838 
839 	mutex_enter(&ks->ks_lock);
840 
841 	if (ks->ks_pshared_id) {
842 		if (ks->ks_pshared_proc != curproc) {
843 			/* Do nothing if this is not the creator. */
844 			mutex_exit(&ks->ks_lock);
845 			return 0;
846 		}
847 		/* Mark this semaphore as dead. */
848 		ks->ks_pshared_proc = NULL;
849 	}
850 
851 	ksem_release(ks, -1);
852 	return 0;
853 }
854 
855 int
sys__ksem_unlink(struct lwp * l,const struct sys__ksem_unlink_args * uap,register_t * retval)856 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
857     register_t *retval)
858 {
859 	/* {
860 		const char *name;
861 	} */
862 	char *name;
863 	ksem_t *ks;
864 	u_int refcnt;
865 	int error;
866 
867 	error = name_copyin(SCARG(uap, name), &name);
868 	if (error)
869 		return error;
870 
871 	mutex_enter(&ksem_lock);
872 	ks = ksem_lookup(name);
873 	name_destroy(&name);
874 	if (ks == NULL) {
875 		mutex_exit(&ksem_lock);
876 		return ENOENT;
877 	}
878 	KASSERT(mutex_owned(&ks->ks_lock));
879 
880 	/* Verify permissions. */
881 	error = ksem_perm(l, ks);
882 	if (error) {
883 		mutex_exit(&ks->ks_lock);
884 		mutex_exit(&ksem_lock);
885 		return error;
886 	}
887 
888 	/* Remove from the global list. */
889 	LIST_REMOVE(ks, ks_entry);
890 	nsems--;
891 	mutex_exit(&ksem_lock);
892 
893 	refcnt = ks->ks_ref;
894 	if (refcnt) {
895 		/* Mark as unlinked, if there are references. */
896 		ks->ks_flags |= KS_UNLINKED;
897 	}
898 	mutex_exit(&ks->ks_lock);
899 
900 	if (refcnt == 0) {
901 		ksem_free(ks);
902 	}
903 	return 0;
904 }
905 
906 int
sys__ksem_post(struct lwp * l,const struct sys__ksem_post_args * uap,register_t * retval)907 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
908     register_t *retval)
909 {
910 	/* {
911 		intptr_t id;
912 	} */
913 	int fd, error;
914 	ksem_t *ks;
915 
916 	error = ksem_get(SCARG(uap, id), &ks, &fd);
917 	if (error) {
918 		return error;
919 	}
920 	KASSERT(mutex_owned(&ks->ks_lock));
921 	if (ks->ks_value == SEM_VALUE_MAX) {
922 		error = EOVERFLOW;
923 		goto out;
924 	}
925 	ks->ks_value++;
926 	if (ks->ks_waiters) {
927 		cv_broadcast(&ks->ks_cv);
928 	}
929 out:
930 	ksem_release(ks, fd);
931 	return error;
932 }
933 
934 int
do_ksem_wait(lwp_t * l,intptr_t id,bool try_p,struct timespec * abstime)935 do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime)
936 {
937 	int fd, error, timeo;
938 	ksem_t *ks;
939 
940 	error = ksem_get(id, &ks, &fd);
941 	if (error) {
942 		return error;
943 	}
944 	KASSERT(mutex_owned(&ks->ks_lock));
945 	while (ks->ks_value == 0) {
946 		ks->ks_waiters++;
947 		if (!try_p && abstime != NULL) {
948 			error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime,
949 			    &timeo, NULL);
950 			if (error != 0)
951 				goto out;
952 		} else {
953 			timeo = 0;
954 		}
955 		error = try_p ? EAGAIN : cv_timedwait_sig(&ks->ks_cv,
956 		    &ks->ks_lock, timeo);
957 		ks->ks_waiters--;
958 		if (error)
959 			goto out;
960 	}
961 	ks->ks_value--;
962 out:
963 	ksem_release(ks, fd);
964 	return error;
965 }
966 
967 int
sys__ksem_wait(struct lwp * l,const struct sys__ksem_wait_args * uap,register_t * retval)968 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
969     register_t *retval)
970 {
971 	/* {
972 		intptr_t id;
973 	} */
974 
975 	return do_ksem_wait(l, SCARG(uap, id), false, NULL);
976 }
977 
978 int
sys__ksem_timedwait(struct lwp * l,const struct sys__ksem_timedwait_args * uap,register_t * retval)979 sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap,
980     register_t *retval)
981 {
982 	/* {
983 		intptr_t id;
984 		const struct timespec *abstime;
985 	} */
986 	struct timespec ts;
987 	int error;
988 
989 	error = copyin(SCARG(uap, abstime), &ts, sizeof(ts));
990 	if (error != 0)
991 		return error;
992 
993 	if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
994 		return EINVAL;
995 
996 	error = do_ksem_wait(l, SCARG(uap, id), false, &ts);
997 	if (error == EWOULDBLOCK)
998 		error = ETIMEDOUT;
999 	return error;
1000 }
1001 
1002 int
sys__ksem_trywait(struct lwp * l,const struct sys__ksem_trywait_args * uap,register_t * retval)1003 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
1004     register_t *retval)
1005 {
1006 	/* {
1007 		intptr_t id;
1008 	} */
1009 
1010 	return do_ksem_wait(l, SCARG(uap, id), true, NULL);
1011 }
1012 
1013 int
sys__ksem_getvalue(struct lwp * l,const struct sys__ksem_getvalue_args * uap,register_t * retval)1014 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
1015     register_t *retval)
1016 {
1017 	/* {
1018 		intptr_t id;
1019 		unsigned int *value;
1020 	} */
1021 	int fd, error;
1022 	ksem_t *ks;
1023 	unsigned int val;
1024 
1025 	error = ksem_get(SCARG(uap, id), &ks, &fd);
1026 	if (error) {
1027 		return error;
1028 	}
1029 	KASSERT(mutex_owned(&ks->ks_lock));
1030 	val = ks->ks_value;
1031 	ksem_release(ks, fd);
1032 
1033 	return copyout(&val, SCARG(uap, value), sizeof(val));
1034 }
1035 
1036 int
sys__ksem_destroy(struct lwp * l,const struct sys__ksem_destroy_args * uap,register_t * retval)1037 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
1038     register_t *retval)
1039 {
1040 	/* {
1041 		intptr_t id;
1042 	} */
1043 	int fd, error;
1044 	ksem_t *ks;
1045 
1046 	intptr_t id = SCARG(uap, id);
1047 
1048 	error = ksem_get(id, &ks, &fd);
1049 	if (error) {
1050 		return error;
1051 	}
1052 	KASSERT(mutex_owned(&ks->ks_lock));
1053 
1054 	/* Operation is only for unnamed semaphores. */
1055 	if (ks->ks_name != NULL) {
1056 		error = EINVAL;
1057 		goto out;
1058 	}
1059 	/* Cannot destroy if there are waiters. */
1060 	if (ks->ks_waiters) {
1061 		error = EBUSY;
1062 		goto out;
1063 	}
1064 	if (KSEM_ID_IS_PSHARED(id)) {
1065 		/* Cannot destroy if we did't create it. */
1066 		KASSERT(fd == -1);
1067 		KASSERT(ks->ks_pshared_proc != NULL);
1068 		if (ks->ks_pshared_proc != curproc) {
1069 			error = EINVAL;
1070 			goto out;
1071 		}
1072 		fd = ks->ks_pshared_fd;
1073 
1074 		/* Mark it dead so subsequent lookups fail. */
1075 		ks->ks_pshared_proc = NULL;
1076 
1077 		/* Do an fd_getfile() to for the benefit of fd_close(). */
1078 		file_t *fp __diagused = fd_getfile(fd);
1079 		KASSERT(fp != NULL);
1080 		KASSERT(fp->f_ksem == ks);
1081 	}
1082 out:
1083 	ksem_release(ks, -1);
1084 	if (error) {
1085 		if (!KSEM_ID_IS_PSHARED(id))
1086 			fd_putfile(fd);
1087 		return error;
1088 	}
1089 	return fd_close(fd);
1090 }
1091