1 /* 2 * Copyright (C) 2005 David Xu <davidxu@freebsd.org>. 3 * Copyright (C) 2000 Jason Evans <jasone@freebsd.org>. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice(s), this list of conditions and the following disclaimer as 11 * the first lines of this file unmodified other than the possible 12 * addition of one or more copyright notices. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice(s), this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 25 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 27 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 28 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "namespace.h" 32 #include <machine/tls.h> 33 #include <sys/semaphore.h> 34 #include <sys/mman.h> 35 #include <sys/queue.h> 36 #include <sys/stat.h> 37 #include <sys/types.h> 38 39 #include <errno.h> 40 #include <fcntl.h> 41 #include <limits.h> 42 #include <pthread.h> 43 #include <stdarg.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <time.h> 47 #include <unistd.h> 48 #include "un-namespace.h" 49 #include "thr_private.h" 50 51 #define container_of(ptr, type, member) \ 52 ({ \ 53 __typeof(((type *)0)->member) *_p = (ptr); \ 54 (type *)((char *)_p - offsetof(type, member)); \ 55 }) 56 57 /* 58 * Semaphore definitions. 59 */ 60 struct sem { 61 u_int32_t magic; 62 volatile umtx_t count; 63 int semid; 64 int unused; /* pad */ 65 }; 66 67 #define SEM_MAGIC ((u_int32_t) 0x09fa4012) 68 69 static char const *sem_prefix = "/var/run/sem"; 70 71 72 /* 73 * POSIX requires that two successive calls to sem_open return 74 * the same address if no call to unlink nor close have been 75 * done in the middle. For that, we keep a list of open semaphore 76 * and search for an existing one before remapping a semaphore. 77 * We have to keep the fd open to check for races. 78 * 79 * Example : 80 * sem_open("/test", O_CREAT | O_EXCL...) -> fork() -> 81 * parent : 82 * sem_unlink("/test") -> sem_open("/test", O_CREAT | O_EXCl ...) 83 * child : 84 * sem_open("/test", 0). 85 * We need to check that the cached mapping is the one of the most up 86 * to date file linked at this name, or child process will reopen the 87 * *old* version of the semaphore, which is wrong. 88 * 89 * fstat and nlink check is used to test for this race. 90 */ 91 92 struct sem_info { 93 int open_count; 94 ino_t inode; 95 dev_t dev; 96 int fd; 97 sem_t sem; 98 LIST_ENTRY(sem_info) next; 99 }; 100 101 102 103 static pthread_once_t once = PTHREAD_ONCE_INIT; 104 static pthread_mutex_t sem_lock; 105 static LIST_HEAD(,sem_info) sem_list = LIST_HEAD_INITIALIZER(sem_list); 106 107 108 #define SEMID_LWP 0 109 #define SEMID_FORK 1 110 #define SEMID_NAMED 2 111 112 static void 113 sem_prefork(void) 114 { 115 _pthread_mutex_lock(&sem_lock); 116 } 117 118 static void 119 sem_postfork(void) 120 { 121 _pthread_mutex_unlock(&sem_lock); 122 } 123 124 static void 125 sem_child_postfork(void) 126 { 127 _pthread_mutex_unlock(&sem_lock); 128 } 129 130 static void 131 sem_module_init(void) 132 { 133 pthread_mutexattr_t ma; 134 135 _pthread_mutexattr_init(&ma); 136 _pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_RECURSIVE); 137 _pthread_mutex_init(&sem_lock, &ma); 138 _pthread_mutexattr_destroy(&ma); 139 _pthread_atfork(sem_prefork, sem_postfork, sem_child_postfork); 140 } 141 142 static inline int 143 sem_check_validity(sem_t *sem) 144 { 145 146 if ((sem != NULL) && (*sem != NULL) && ((*sem)->magic == SEM_MAGIC)) { 147 return (0); 148 } else { 149 errno = EINVAL; 150 return (-1); 151 } 152 } 153 154 static sem_t 155 sem_alloc(unsigned int value, int pshared) 156 { 157 sem_t sem; 158 int semid; 159 160 if (value > SEM_VALUE_MAX) { 161 errno = EINVAL; 162 return (NULL); 163 } 164 if (pshared) { 165 static __thread sem_t sem_base; 166 static __thread int sem_count; 167 168 if (sem_base == NULL) { 169 sem_base = mmap(NULL, getpagesize(), 170 PROT_READ | PROT_WRITE, 171 MAP_ANON | MAP_SHARED, 172 -1, 0); 173 sem_count = getpagesize() / sizeof(*sem); 174 } 175 sem = sem_base++; 176 if (--sem_count == 0) 177 sem_base = NULL; 178 semid = SEMID_FORK; 179 } else { 180 sem = malloc(sizeof(struct sem)); 181 semid = SEMID_LWP; 182 } 183 if (sem == NULL) { 184 errno = ENOSPC; 185 return (NULL); 186 } 187 sem->magic = SEM_MAGIC; 188 sem->count = (u_int32_t)value; 189 sem->semid = semid; 190 return (sem); 191 } 192 193 int 194 _sem_init(sem_t *sem, int pshared, unsigned int value) 195 { 196 if (sem == NULL) { 197 errno = EINVAL; 198 return (-1); 199 } 200 201 *sem = sem_alloc(value, pshared); 202 if (*sem == NULL) 203 return (-1); 204 return (0); 205 } 206 207 int 208 _sem_destroy(sem_t *sem) 209 { 210 if (sem_check_validity(sem) != 0) { 211 errno = EINVAL; 212 return (-1); 213 } 214 215 (*sem)->magic = 0; 216 217 switch ((*sem)->semid) { 218 case SEMID_LWP: 219 free(*sem); 220 break; 221 case SEMID_FORK: 222 /* memory is left intact */ 223 break; 224 default: 225 errno = EINVAL; 226 return (-1); 227 } 228 return (0); 229 } 230 231 int 232 _sem_getvalue(sem_t * __restrict sem, int * __restrict sval) 233 { 234 if (sem_check_validity(sem) != 0) 235 return (-1); 236 237 *sval = (*sem)->count; 238 return (0); 239 } 240 241 int 242 _sem_trywait(sem_t *sem) 243 { 244 int val; 245 246 if (sem_check_validity(sem) != 0) 247 return (-1); 248 249 while ((val = (*sem)->count) > 0) { 250 if (atomic_cmpset_int(&(*sem)->count, val, val - 1)) 251 return (0); 252 } 253 errno = EAGAIN; 254 return (-1); 255 } 256 257 int 258 _sem_wait(sem_t *sem) 259 { 260 struct pthread *curthread; 261 int val, oldcancel, retval; 262 263 if (sem_check_validity(sem) != 0) 264 return (-1); 265 266 curthread = tls_get_curthread(); 267 _pthread_testcancel(); 268 do { 269 while ((val = (*sem)->count) > 0) { 270 if (atomic_cmpset_acq_int(&(*sem)->count, val, val - 1)) 271 return (0); 272 } 273 oldcancel = _thr_cancel_enter(curthread); 274 retval = _thr_umtx_wait(&(*sem)->count, 0, NULL, 0); 275 _thr_cancel_leave(curthread, oldcancel); 276 } while (retval == 0); 277 errno = retval; 278 return (-1); 279 } 280 281 int 282 _sem_timedwait(sem_t * __restrict sem, const struct timespec * __restrict abstime) 283 { 284 struct timespec ts, ts2; 285 struct pthread *curthread; 286 int val, oldcancel, retval; 287 288 if (sem_check_validity(sem) != 0) 289 return (-1); 290 291 curthread = tls_get_curthread(); 292 293 /* 294 * The timeout argument is only supposed to 295 * be checked if the thread would have blocked. 296 */ 297 _pthread_testcancel(); 298 do { 299 while ((val = (*sem)->count) > 0) { 300 if (atomic_cmpset_acq_int(&(*sem)->count, val, val - 1)) 301 return (0); 302 } 303 if (abstime == NULL || 304 abstime->tv_nsec >= 1000000000 || abstime->tv_nsec < 0) { 305 errno = EINVAL; 306 return (-1); 307 } 308 clock_gettime(CLOCK_REALTIME, &ts); 309 TIMESPEC_SUB(&ts2, abstime, &ts); 310 oldcancel = _thr_cancel_enter(curthread); 311 retval = _thr_umtx_wait(&(*sem)->count, 0, &ts2, 312 CLOCK_REALTIME); 313 _thr_cancel_leave(curthread, oldcancel); 314 } while (retval == 0); 315 errno = retval; 316 return (-1); 317 } 318 319 int 320 _sem_post(sem_t *sem) 321 { 322 int val; 323 324 if (sem_check_validity(sem) != 0) 325 return (-1); 326 327 /* 328 * sem_post() is required to be safe to call from within 329 * signal handlers, these code should work as that. 330 */ 331 do { 332 val = (*sem)->count; 333 } while (!atomic_cmpset_acq_int(&(*sem)->count, val, val + 1)); 334 _thr_umtx_wake(&(*sem)->count, val + 1); 335 return (0); 336 } 337 338 static int 339 get_path(const char *name, char *path, size_t len, char const **prefix) 340 { 341 size_t path_len; 342 343 *prefix = NULL; 344 345 if (name[0] == '/') { 346 *prefix = getenv("LIBTHREAD_SEM_PREFIX"); 347 348 if (*prefix == NULL) 349 *prefix = sem_prefix; 350 351 path_len = strlcpy(path, *prefix, len); 352 353 if (path_len > len) { 354 return (ENAMETOOLONG); 355 } 356 } 357 358 path_len = strlcat(path, name, len); 359 360 if (path_len > len) 361 return (ENAMETOOLONG); 362 363 return (0); 364 } 365 366 367 static sem_t * 368 sem_get_mapping(ino_t inode, dev_t dev) 369 { 370 struct sem_info *ni; 371 struct stat sbuf; 372 373 LIST_FOREACH(ni, &sem_list, next) { 374 if (ni->inode == inode && ni->dev == dev) { 375 /* Check for races */ 376 if(_fstat(ni->fd, &sbuf) == 0) { 377 if (sbuf.st_nlink > 0) { 378 ni->open_count++; 379 return (&ni->sem); 380 } else { 381 ni->inode = 0; 382 LIST_REMOVE(ni, next); 383 } 384 } 385 return (SEM_FAILED); 386 387 } 388 } 389 390 return (SEM_FAILED); 391 } 392 393 394 static sem_t * 395 sem_add_mapping(ino_t inode, dev_t dev, sem_t sem, int fd) 396 { 397 struct sem_info *ni; 398 399 ni = malloc(sizeof(struct sem_info)); 400 if (ni == NULL) { 401 errno = ENOSPC; 402 return (SEM_FAILED); 403 } 404 405 bzero(ni, sizeof(*ni)); 406 ni->open_count = 1; 407 ni->sem = sem; 408 ni->fd = fd; 409 ni->inode = inode; 410 ni->dev = dev; 411 412 LIST_INSERT_HEAD(&sem_list, ni, next); 413 414 return (&ni->sem); 415 } 416 417 static int 418 sem_close_mapping(sem_t *sem) 419 { 420 struct sem_info *ni; 421 422 if ((*sem)->semid != SEMID_NAMED) 423 return (EINVAL); 424 425 ni = container_of(sem, struct sem_info, sem); 426 427 if ( --ni->open_count > 0) { 428 return (0); 429 } else { 430 if (ni->inode != 0) { 431 LIST_REMOVE(ni, next); 432 } 433 munmap(ni->sem, getpagesize()); 434 __sys_close(ni->fd); 435 free(ni); 436 return (0); 437 } 438 } 439 440 sem_t * 441 _sem_open(const char *name, int oflag, ...) 442 { 443 char path[PATH_MAX]; 444 char tmppath[PATH_MAX]; 445 char const *prefix = NULL; 446 char *tmpname = NULL; 447 size_t path_len; 448 int error, fd, create; 449 sem_t *sem; 450 sem_t semtmp; 451 va_list ap; 452 mode_t mode; 453 struct stat sbuf; 454 unsigned int value = 0; 455 unsigned int retry_count; 456 457 create = 0; 458 error = 0; 459 fd = -1; 460 sem = SEM_FAILED; 461 462 oflag = oflag & (O_CREAT | O_EXCL); 463 oflag |= O_RDWR; 464 oflag |= O_CLOEXEC; 465 466 467 if (name == NULL) { 468 errno = EINVAL; 469 return (SEM_FAILED); 470 } 471 472 _pthread_once(&once, sem_module_init); 473 474 _pthread_mutex_lock(&sem_lock); 475 476 error = get_path(name, path, PATH_MAX, &prefix); 477 if (error) { 478 errno = error; 479 goto error; 480 } 481 482 retry: 483 tmpname = NULL; 484 retry_count = 10; 485 486 fd = __sys_open(path, O_RDWR | O_CLOEXEC); 487 488 if (fd > 0) { 489 490 if ((oflag & O_EXCL) == O_EXCL) { 491 __sys_close(fd); 492 errno = EEXIST; 493 goto error; 494 } 495 496 if (_fstat(fd, &sbuf) != 0) { 497 /* Bad things happened, like another thread closing our descriptor */ 498 __sys_close(fd); 499 errno = EINVAL; 500 goto error; 501 } 502 503 sem = sem_get_mapping(sbuf.st_ino, sbuf.st_dev); 504 505 if (sem != SEM_FAILED) { 506 __sys_close(fd); 507 goto done; 508 } 509 510 if ((sbuf.st_mode & S_IFREG) == 0) { 511 /* We only want regular files here */ 512 __sys_close(fd); 513 errno = EINVAL; 514 goto error; 515 } 516 } else if ((oflag & O_CREAT) && errno == ENOENT) { 517 518 va_start(ap, oflag); 519 520 mode = (mode_t) va_arg(ap, int); 521 value = (unsigned int) va_arg(ap, int); 522 523 va_end(ap); 524 525 if (value > SEM_VALUE_MAX) { 526 errno = EINVAL; 527 goto error; 528 } 529 530 strlcpy(tmppath, prefix, sizeof(tmppath)); 531 path_len = strlcat(tmppath, "/sem.XXXXXX", sizeof(tmppath)); 532 533 if (path_len > sizeof(tmppath)) { 534 errno = ENAMETOOLONG; 535 goto error; 536 } 537 538 while (retry_count-- > 0) { 539 tmpname = mktemp(tmppath); 540 541 if ( tmpname == NULL) { 542 errno = EINVAL; 543 goto error; 544 } 545 546 fd = __sys_open(tmpname, O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, mode); 547 548 if (fd > 0 || errno != EEXIST) { 549 break; 550 } 551 552 } 553 554 if (retry_count == 0) { 555 __sys_close(fd); 556 errno = ENOSPC; /* XXX POSIX does not allow for EAGAIN */ 557 goto error; 558 } 559 560 create = 1; 561 } 562 563 if (fd == -1) { 564 switch (errno) { 565 case ENOTDIR: 566 case EISDIR: 567 case EMLINK: 568 case ELOOP: 569 errno = EINVAL; 570 break; 571 case EDQUOT: 572 case EIO: 573 errno = ENOSPC; 574 break; 575 case EROFS: 576 errno = EACCES; 577 } 578 goto error; 579 } 580 581 semtmp = (sem_t) mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, 582 MAP_NOSYNC | MAP_SHARED, fd, 0); 583 584 if (semtmp == MAP_FAILED) { 585 if (errno != EACCES && errno != EMFILE) 586 errno = ENOMEM; 587 588 if (create) 589 unlink(tmpname); 590 591 __sys_close(fd); 592 goto error; 593 } 594 595 if (create) { 596 ftruncate(fd, sizeof(struct sem)); 597 semtmp->magic = SEM_MAGIC; 598 semtmp->count = (u_int32_t)value; 599 semtmp->semid = SEMID_NAMED; 600 601 if (link(tmpname, path) != 0) { 602 munmap(semtmp, getpagesize()); 603 __sys_close(fd); 604 unlink(tmpname); 605 606 if (errno == EEXIST && (oflag & O_EXCL) == 0) { 607 goto retry; 608 } 609 610 goto error; 611 } 612 unlink(tmpname); 613 614 if (_fstat(fd, &sbuf) != 0) { 615 /* Bad things happened, like another thread closing our descriptor */ 616 munmap(semtmp, getpagesize()); 617 __sys_close(fd); 618 errno = EINVAL; 619 goto error; 620 } 621 622 } 623 sem = sem_add_mapping(sbuf.st_ino, sbuf.st_dev, semtmp, fd); 624 625 done: 626 _pthread_mutex_unlock(&sem_lock); 627 return (sem); 628 629 error: 630 _pthread_mutex_unlock(&sem_lock); 631 return (SEM_FAILED); 632 633 } 634 635 int 636 _sem_close(sem_t *sem) 637 { 638 _pthread_once(&once, sem_module_init); 639 640 _pthread_mutex_lock(&sem_lock); 641 642 if (sem_check_validity(sem)) { 643 _pthread_mutex_unlock(&sem_lock); 644 errno = EINVAL; 645 return (-1); 646 } 647 648 if (sem_close_mapping(sem)) { 649 _pthread_mutex_unlock(&sem_lock); 650 errno = EINVAL; 651 return (-1); 652 } 653 _pthread_mutex_unlock(&sem_lock); 654 655 return (0); 656 } 657 658 int 659 _sem_unlink(const char *name) 660 { 661 char path[PATH_MAX]; 662 const char *prefix; 663 int error; 664 665 error = get_path(name, path, PATH_MAX, &prefix); 666 if (error) { 667 errno = error; 668 return (-1); 669 } 670 671 error = unlink(path); 672 673 if(error) { 674 if (errno != ENAMETOOLONG && errno != ENOENT) 675 errno = EACCES; 676 677 return (-1); 678 } 679 680 return (0); 681 } 682 683 __strong_reference(_sem_destroy, sem_destroy); 684 __strong_reference(_sem_getvalue, sem_getvalue); 685 __strong_reference(_sem_init, sem_init); 686 __strong_reference(_sem_trywait, sem_trywait); 687 __strong_reference(_sem_wait, sem_wait); 688 __strong_reference(_sem_timedwait, sem_timedwait); 689 __strong_reference(_sem_post, sem_post); 690 __strong_reference(_sem_open, sem_open); 691 __strong_reference(_sem_close, sem_close); 692 __strong_reference(_sem_unlink, sem_unlink); 693 694