xref: /dragonfly/lib/libthread_xu/thread/thr_sem.c (revision c9c5aa9e)
1 /*
2  * Copyright (C) 2005 David Xu <davidxu@freebsd.org>.
3  * Copyright (C) 2000 Jason Evans <jasone@freebsd.org>.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice(s), this list of conditions and the following disclaimer as
11  *    the first lines of this file unmodified other than the possible
12  *    addition of one or more copyright notices.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice(s), this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
28  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "namespace.h"
32 #include <machine/tls.h>
33 #include <sys/mman.h>
34 #include <sys/queue.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 #include <errno.h>
38 #include <fcntl.h>
39 #include <limits.h>
40 #include <pthread.h>
41 #include <semaphore.h>
42 #include <stdarg.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <time.h>
46 #include <unistd.h>
47 #ifdef _PTHREADS_DEBUGGING
48 #include <stdio.h>
49 #endif
50 #include "un-namespace.h"
51 
52 #include "thr_private.h"
53 
54 #define cpu_ccfence()        __asm __volatile("" : : : "memory")
55 
56 #define container_of(ptr, type, member)				\
57 ({								\
58 	__typeof(((type *)0)->member) *_p = (ptr);		\
59 	(type *)((char *)_p - offsetof(type, member));		\
60 })
61 
62 /*
63  * Semaphore definitions.
64  */
65 struct sem {
66 	volatile umtx_t		count;
67 	u_int32_t		magic;
68 	int			semid;
69 	int			unused; /* pad */
70 } __cachealign;
71 
72 #define	SEM_MAGIC	((u_int32_t) 0x09fa4012)
73 
74 static char const *sem_prefix = "/var/run/sem";
75 
76 
77 /*
78  * POSIX requires that two successive calls to sem_open return
79  * the same address if no call to unlink nor close have been
80  * done in the middle. For that, we keep a list of open semaphore
81  * and search for an existing one before remapping a semaphore.
82  * We have to keep the fd open to check for races.
83  *
84  * Example :
85  * sem_open("/test", O_CREAT | O_EXCL...) -> fork() ->
86  * parent :
87  *   sem_unlink("/test") -> sem_open("/test", O_CREAT | O_EXCl ...)
88  * child :
89  *   sem_open("/test", 0).
90  * We need to check that the cached mapping is the one of the most up
91  * to date file linked at this name, or child process will reopen the
92  * *old* version of the semaphore, which is wrong.
93  *
94  * fstat and nlink check is used to test for this race.
95  */
96 
97 struct sem_info {
98 	int open_count;
99 	ino_t inode;
100 	dev_t dev;
101 	int fd;
102 	sem_t sem;
103 	LIST_ENTRY(sem_info) next;
104 };
105 
106 static pthread_mutex_t sem_lock;
107 static LIST_HEAD(,sem_info) sem_list = LIST_HEAD_INITIALIZER(sem_list);
108 
109 #ifdef _PTHREADS_DEBUGGING
110 
111 static
112 void
113 sem_log(const char *ctl, ...)
114 {
115         char buf[256];
116         va_list va;
117         size_t len;
118 
119         va_start(va, ctl);
120         len = vsnprintf(buf, sizeof(buf), ctl, va);
121         va_end(va);
122         _thr_log(buf, len);
123 }
124 
125 #else
126 
127 static __inline
128 void
129 sem_log(const char *ctl __unused, ...)
130 {
131 }
132 
133 #endif
134 
135 #define SEMID_LWP	0
136 #define SEMID_FORK	1
137 #define SEMID_NAMED	2
138 
139 static void
140 sem_prefork(void)
141 {
142 	_pthread_mutex_lock(&sem_lock);
143 }
144 
145 static void
146 sem_postfork(void)
147 {
148 	_pthread_mutex_unlock(&sem_lock);
149 }
150 
151 static void
152 sem_child_postfork(void)
153 {
154 	_pthread_mutex_unlock(&sem_lock);
155 }
156 
157 void
158 _thr_sem_init(void)
159 {
160 	pthread_mutexattr_t ma;
161 
162 	_pthread_mutexattr_init(&ma);
163 	_pthread_mutexattr_settype(&ma,  PTHREAD_MUTEX_RECURSIVE);
164 	_pthread_mutex_init(&sem_lock, &ma);
165 	_pthread_mutexattr_destroy(&ma);
166 	_thr_atfork_kern(sem_prefork, sem_postfork, sem_child_postfork);
167 }
168 
169 static inline int
170 sem_check_validity(sem_t *sem)
171 {
172 
173 	if ((sem != NULL) && (*sem != NULL) && ((*sem)->magic == SEM_MAGIC)) {
174 		return (0);
175 	} else {
176 		errno = EINVAL;
177 		return (-1);
178 	}
179 }
180 
181 static sem_t
182 sem_alloc(unsigned int value, int pshared)
183 {
184 	sem_t sem;
185 	int semid;
186 
187 	if (value > SEM_VALUE_MAX) {
188 		errno = EINVAL;
189 		return (NULL);
190 	}
191 	if (pshared) {
192 		static __thread sem_t sem_base;
193 		static __thread int sem_count;
194 
195 		if (sem_base == NULL) {
196 			sem_base = mmap(NULL, getpagesize(),
197 					PROT_READ | PROT_WRITE,
198 					MAP_ANON | MAP_SHARED,
199 					-1, 0);
200 			sem_count = getpagesize() / sizeof(*sem);
201 		}
202 		sem = sem_base++;
203 		if (--sem_count == 0)
204 			sem_base = NULL;
205 		semid = SEMID_FORK;
206 	} else {
207 		sem = __malloc(sizeof(struct sem));
208 		semid = SEMID_LWP;
209 	}
210 	if (sem == NULL) {
211 		errno = ENOSPC;
212 		return (NULL);
213 	}
214 	sem->magic = SEM_MAGIC;
215 	sem->count = (u_int32_t)value;
216 	sem->semid = semid;
217 
218 	sem_log("sem_alloc %p (%d)\n", sem, value);
219 
220 	return (sem);
221 }
222 
223 int
224 _sem_init(sem_t *sem, int pshared, unsigned int value)
225 {
226 	if (sem == NULL) {
227 		errno = EINVAL;
228 		return (-1);
229 	}
230 
231 	*sem = sem_alloc(value, pshared);
232 	if (*sem == NULL)
233 		return (-1);
234 	return (0);
235 }
236 
237 int
238 _sem_destroy(sem_t *sem)
239 {
240 	if (sem_check_validity(sem) != 0) {
241 		errno = EINVAL;
242 		return (-1);
243 	}
244 
245 	(*sem)->magic = 0;
246 
247 	switch ((*sem)->semid) {
248 		case SEMID_LWP:
249 			__free(*sem);
250 			break;
251 		case SEMID_FORK:
252 			/* memory is left intact */
253 			break;
254 		default:
255 			errno = EINVAL;
256 			return (-1);
257 	}
258 	return (0);
259 }
260 
261 int
262 _sem_getvalue(sem_t * __restrict sem, int * __restrict sval)
263 {
264 	if (sem_check_validity(sem) != 0) {
265 		errno = EINVAL;
266 		return (-1);
267 	}
268 	*sval = (*sem)->count;
269 
270 	return (0);
271 }
272 
273 int
274 _sem_trywait(sem_t *sem)
275 {
276 	int val;
277 
278 	if (sem_check_validity(sem) != 0) {
279 		errno = EINVAL;
280 		return (-1);
281 	}
282 
283 	sem_log("sem_trywait %p %d\n", *sem, (*sem)->count);
284 	while ((val = (*sem)->count) > 0) {
285 		cpu_ccfence();
286 		if (atomic_cmpset_int(&(*sem)->count, val, val - 1)) {
287 			sem_log("sem_trywait %p %d (success)\n", *sem, val - 1);
288 			return (0);
289 		}
290 	}
291 	errno = EAGAIN;
292 	sem_log("sem_trywait %p %d (failure)\n", *sem, val);
293 	return (-1);
294 }
295 
296 int
297 _sem_wait(sem_t *sem)
298 {
299 	pthread_t curthread;
300 	int val, oldcancel, retval;
301 
302 	if (sem_check_validity(sem) != 0) {
303 		errno = EINVAL;
304 		return (-1);
305 	}
306 
307 	curthread = tls_get_curthread();
308 	_pthread_testcancel();
309 
310 	sem_log("sem_wait %p %d (begin)\n", *sem, (*sem)->count);
311 
312 	do {
313 		cpu_ccfence();
314 		while ((val = (*sem)->count) > 0) {
315 			cpu_ccfence();
316 			if (atomic_cmpset_acq_int(&(*sem)->count, val, val - 1)) {
317 				sem_log("sem_wait %p %d (success)\n",
318 					*sem, val - 1);
319 				return (0);
320 			}
321 		}
322 		oldcancel = _thr_cancel_enter(curthread);
323 		sem_log("sem_wait %p %d (wait)\n", *sem, val);
324 		retval = _thr_umtx_wait_intr(&(*sem)->count, 0);
325 		sem_log("sem_wait %p %d (wait return %d)\n",
326 			*sem, (*sem)->count, retval);
327 		_thr_cancel_leave(curthread, oldcancel);
328 		/* ignore retval */
329 	} while (retval != EINTR);
330 
331 	sem_log("sem_wait %p %d (error %d)\n", *sem, retval);
332 	errno = retval;
333 
334 	return (-1);
335 }
336 
337 int
338 _sem_timedwait(sem_t * __restrict sem, const struct timespec * __restrict abstime)
339 {
340 	struct timespec ts, ts2;
341 	pthread_t curthread;
342 	int val, oldcancel, retval;
343 
344 	if (sem_check_validity(sem) != 0)
345 		return (-1);
346 
347 	curthread = tls_get_curthread();
348 	_pthread_testcancel();
349 	sem_log("sem_timedwait %p %d (begin)\n", *sem, (*sem)->count);
350 
351 	/*
352 	 * The timeout argument is only supposed to
353 	 * be checked if the thread would have blocked.
354 	 */
355 	do {
356 		while ((val = (*sem)->count) > 0) {
357 			cpu_ccfence();
358 			if (atomic_cmpset_acq_int(&(*sem)->count, val, val - 1)) {
359 				sem_log("sem_wait %p %d (success)\n",
360 					*sem, val - 1);
361 				return (0);
362 			}
363 		}
364 		if (abstime == NULL ||
365 		    abstime->tv_nsec >= 1000000000 ||
366 		    abstime->tv_nsec < 0) {
367 			sem_log("sem_wait %p %d (bad abstime)\n", *sem, val);
368 			errno = EINVAL;
369 			return (-1);
370 		}
371 		clock_gettime(CLOCK_REALTIME, &ts);
372 		timespecsub(abstime, &ts, &ts2);
373 		oldcancel = _thr_cancel_enter(curthread);
374 		sem_log("sem_wait %p %d (wait)\n", *sem, val);
375 		retval = _thr_umtx_wait(&(*sem)->count, 0, &ts2,
376 					CLOCK_REALTIME);
377 		sem_log("sem_wait %p %d (wait return %d)\n",
378 			*sem, (*sem)->count, retval);
379 		_thr_cancel_leave(curthread, oldcancel);
380 	} while (retval != ETIMEDOUT && retval != EINTR);
381 
382 	sem_log("sem_wait %p %d (error %d)\n", *sem, retval);
383 	errno = retval;
384 
385 	return (-1);
386 }
387 
388 int
389 _sem_post(sem_t *sem)
390 {
391 	int val;
392 
393 	if (sem_check_validity(sem) != 0)
394 		return (-1);
395 
396 	/*
397 	 * sem_post() is required to be safe to call from within
398 	 * signal handlers, these code should work as that.
399 	 */
400 	val = atomic_fetchadd_int(&(*sem)->count, 1) + 1;
401 	sem_log("sem_post %p %d\n", *sem, val);
402 	_thr_umtx_wake(&(*sem)->count, 0);
403 
404 	return (0);
405 }
406 
407 static int
408 get_path(const char *name, char *path, size_t len, char const **prefix)
409 {
410 	size_t path_len;
411 
412 	*prefix = NULL;
413 
414 	if (name[0] == '/') {
415 		*prefix = getenv("LIBTHREAD_SEM_PREFIX");
416 
417 		if (*prefix == NULL)
418 			*prefix = sem_prefix;
419 
420 		path_len = strlcpy(path, *prefix, len);
421 
422 		if (path_len > len) {
423 			return (ENAMETOOLONG);
424 		}
425 	}
426 
427 	path_len = strlcat(path, name, len);
428 
429 	if (path_len > len)
430 		return (ENAMETOOLONG);
431 
432 	return (0);
433 }
434 
435 
436 static sem_t *
437 sem_get_mapping(ino_t inode, dev_t dev)
438 {
439 	struct sem_info *ni;
440 	struct stat sbuf;
441 
442 	LIST_FOREACH(ni, &sem_list, next) {
443 		if (ni->inode == inode && ni->dev == dev) {
444 			/* Check for races */
445 			if(_fstat(ni->fd, &sbuf) == 0) {
446 				if (sbuf.st_nlink > 0) {
447 					ni->open_count++;
448 					return (&ni->sem);
449 				} else {
450 					ni->inode = 0;
451 					LIST_REMOVE(ni, next);
452 				}
453 			}
454 			return (SEM_FAILED);
455 
456 		}
457 	}
458 
459 	return (SEM_FAILED);
460 }
461 
462 
463 static sem_t *
464 sem_add_mapping(ino_t inode, dev_t dev, sem_t sem, int fd)
465 {
466 	struct sem_info *ni;
467 
468 	ni = __malloc(sizeof(struct sem_info));
469 	if (ni == NULL) {
470 		errno = ENOSPC;
471 		return (SEM_FAILED);
472 	}
473 
474 	bzero(ni, sizeof(*ni));
475 	ni->open_count = 1;
476 	ni->sem = sem;
477 	ni->fd = fd;
478 	ni->inode = inode;
479 	ni->dev = dev;
480 
481 	LIST_INSERT_HEAD(&sem_list, ni, next);
482 
483 	return (&ni->sem);
484 }
485 
486 static int
487 sem_close_mapping(sem_t *sem)
488 {
489 	struct sem_info *ni;
490 
491 	if ((*sem)->semid != SEMID_NAMED)
492 		return (EINVAL);
493 
494 	ni = container_of(sem, struct sem_info, sem);
495 
496 	if ( --ni->open_count > 0) {
497 		return (0);
498 	} else {
499 		if (ni->inode != 0) {
500 			LIST_REMOVE(ni, next);
501 		}
502 		munmap(ni->sem, getpagesize());
503 		__sys_close(ni->fd);
504 		__free(ni);
505 		return (0);
506 	}
507 }
508 
509 sem_t *
510 _sem_open(const char *name, int oflag, ...)
511 {
512 	char path[PATH_MAX];
513 	char tmppath[PATH_MAX];
514 	char const *prefix = NULL;
515 	size_t path_len;
516 	int error, fd, create;
517 	sem_t *sem;
518 	sem_t semtmp;
519 	va_list ap;
520 	mode_t mode;
521 	struct stat sbuf;
522 	unsigned int value = 0;
523 
524 	create = 0;
525 	error = 0;
526 	fd = -1;
527 	sem = SEM_FAILED;
528 
529 	/*
530 	 * Bail out if invalid flags specified.
531 	 */
532 	if (oflag & ~(O_CREAT|O_EXCL)) {
533 		errno = EINVAL;
534 		return (SEM_FAILED);
535 	}
536 
537 	oflag |= O_RDWR;
538 	oflag |= O_CLOEXEC;
539 
540 	if (name == NULL) {
541 		errno = EINVAL;
542 		return (SEM_FAILED);
543 	}
544 
545 	_pthread_mutex_lock(&sem_lock);
546 
547 	error = get_path(name, path, PATH_MAX, &prefix);
548 	if (error) {
549 		errno = error;
550 		goto error;
551 	}
552 
553 retry:
554 	fd = __sys_open(path, O_RDWR | O_CLOEXEC);
555 
556 	if (fd > 0) {
557 
558 		if ((oflag & O_EXCL) == O_EXCL) {
559 			__sys_close(fd);
560 			errno = EEXIST;
561 			goto error;
562 		}
563 
564 		if (_fstat(fd, &sbuf) != 0) {
565 			/* Bad things happened, like another thread closing our descriptor */
566 			__sys_close(fd);
567 			errno = EINVAL;
568 			goto error;
569 		}
570 
571 		sem = sem_get_mapping(sbuf.st_ino, sbuf.st_dev);
572 
573 		if (sem != SEM_FAILED) {
574 			__sys_close(fd);
575 			goto done;
576 		}
577 
578 		if ((sbuf.st_mode & S_IFREG) == 0) {
579 			/* We only want regular files here */
580 			__sys_close(fd);
581 			errno = EINVAL;
582 			goto error;
583 		}
584 	} else if ((oflag & O_CREAT) && errno == ENOENT) {
585 
586 		va_start(ap, oflag);
587 
588 		mode = (mode_t) va_arg(ap, int);
589 		value = (unsigned int) va_arg(ap, int);
590 
591 		va_end(ap);
592 
593 		if (value > SEM_VALUE_MAX) {
594 			errno = EINVAL;
595 			goto error;
596 		}
597 
598 		strlcpy(tmppath, prefix, sizeof(tmppath));
599 		path_len = strlcat(tmppath, "/sem.XXXXXX", sizeof(tmppath));
600 
601 		if (path_len > sizeof(tmppath)) {
602 			errno = ENAMETOOLONG;
603 			goto error;
604 		}
605 
606 
607 		fd = mkstemp(tmppath);
608 
609 		if ( fd == -1 ) {
610 			errno = EINVAL;
611 			goto error;
612 		}
613 
614 		error = fchmod(fd, mode);
615 		if ( error == -1 ) {
616 			__sys_close(fd);
617 			errno = EINVAL;
618 			goto error;
619 		}
620 
621 		error = __sys_fcntl(fd, F_SETFD, FD_CLOEXEC);
622 		if ( error == -1 ) {
623 			__sys_close(fd);
624 			errno = EINVAL;
625 			goto error;
626 		}
627 
628 		create = 1;
629 	}
630 
631 	if (fd == -1) {
632 		switch (errno) {
633 			case ENOTDIR:
634 			case EISDIR:
635 			case EMLINK:
636 			case ELOOP:
637 				errno = EINVAL;
638 				break;
639 			case EDQUOT:
640 			case EIO:
641 				errno = ENOSPC;
642 				break;
643 			case EROFS:
644 				errno = EACCES;
645 		}
646 		goto error;
647 	}
648 
649 	semtmp = (sem_t) mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
650 			MAP_NOSYNC | MAP_SHARED, fd, 0);
651 
652 	if (semtmp == MAP_FAILED) {
653 		if (errno != EACCES && errno != EMFILE)
654 			errno = ENOMEM;
655 
656 		if (create)
657 			_unlink(tmppath);
658 
659 		__sys_close(fd);
660 		goto error;
661 	}
662 
663 	if (create) {
664 		ftruncate(fd, sizeof(struct sem));
665 		semtmp->magic = SEM_MAGIC;
666 		semtmp->count = (u_int32_t)value;
667 		semtmp->semid = SEMID_NAMED;
668 
669 		if (link(tmppath, path) != 0) {
670 			munmap(semtmp, getpagesize());
671 			__sys_close(fd);
672 			_unlink(tmppath);
673 
674 			if (errno == EEXIST && (oflag & O_EXCL) == 0) {
675 				goto retry;
676 			}
677 
678 			goto error;
679 		}
680 		_unlink(tmppath);
681 
682 		if (_fstat(fd, &sbuf) != 0) {
683 			/* Bad things happened, like another thread closing our descriptor */
684 			munmap(semtmp, getpagesize());
685 			__sys_close(fd);
686 			errno = EINVAL;
687 			goto error;
688 		}
689 
690 	}
691 	sem = sem_add_mapping(sbuf.st_ino, sbuf.st_dev, semtmp, fd);
692 
693 done:
694 	_pthread_mutex_unlock(&sem_lock);
695 	return (sem);
696 
697 error:
698 	_pthread_mutex_unlock(&sem_lock);
699 	return (SEM_FAILED);
700 
701 }
702 
703 int
704 _sem_close(sem_t *sem)
705 {
706 	_pthread_mutex_lock(&sem_lock);
707 
708 	if (sem_check_validity(sem)) {
709 		_pthread_mutex_unlock(&sem_lock);
710 		errno = EINVAL;
711 		return (-1);
712 	}
713 
714 	if (sem_close_mapping(sem)) {
715 		_pthread_mutex_unlock(&sem_lock);
716 		errno = EINVAL;
717 		return (-1);
718 	}
719 	_pthread_mutex_unlock(&sem_lock);
720 
721 	return (0);
722 }
723 
724 int
725 _sem_unlink(const char *name)
726 {
727 	char path[PATH_MAX];
728 	const char *prefix;
729 	int error;
730 
731 	error = get_path(name, path, PATH_MAX, &prefix);
732 	if (error) {
733 		errno = error;
734 		return (-1);
735 	}
736 
737 	error = _unlink(path);
738 
739 	if(error) {
740 		if (errno != ENAMETOOLONG && errno != ENOENT)
741 			errno = EACCES;
742 
743 		return (-1);
744 	}
745 
746 	return (0);
747 }
748 
749 __strong_reference(_sem_destroy, sem_destroy);
750 __strong_reference(_sem_getvalue, sem_getvalue);
751 __strong_reference(_sem_init, sem_init);
752 __strong_reference(_sem_trywait, sem_trywait);
753 __strong_reference(_sem_wait, sem_wait);
754 __strong_reference(_sem_timedwait, sem_timedwait);
755 __strong_reference(_sem_post, sem_post);
756 __strong_reference(_sem_open, sem_open);
757 __strong_reference(_sem_close, sem_close);
758 __strong_reference(_sem_unlink, sem_unlink);
759