1 /* $FreeBSD: src/sys/kern/sysv_sem.c,v 1.69 2004/03/17 09:37:13 cperciva Exp $ */ 2 3 /* 4 * Implementation of SVID semaphores 5 * 6 * Author: Daniel Boulet 7 * 8 * This software is provided ``AS IS'' without any warranties of any kind. 9 */ 10 11 #include "opt_sysvipc.h" 12 13 #include <sys/param.h> 14 #include <sys/systm.h> 15 #include <sys/sysproto.h> 16 #include <sys/kernel.h> 17 #include <sys/proc.h> 18 #include <sys/sem.h> 19 #include <sys/sysent.h> 20 #include <sys/sysctl.h> 21 #include <sys/malloc.h> 22 #include <sys/jail.h> 23 #include <sys/thread.h> 24 25 #include <sys/thread2.h> 26 27 static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores"); 28 29 static void seminit (void *); 30 31 static struct sem_undo *semu_alloc (struct proc *p); 32 static int semundo_adjust (struct proc *p, int semid, int semnum, int adjval); 33 static void semundo_clear (int semid, int semnum); 34 35 /* XXX casting to (sy_call_t *) is bogus, as usual. */ 36 static sy_call_t *semcalls[] = { 37 (sy_call_t *)sys___semctl, (sy_call_t *)sys_semget, 38 (sy_call_t *)sys_semop 39 }; 40 41 static struct lwkt_token semu_token = LWKT_TOKEN_INITIALIZER(semu_token); 42 static int semtot = 0; 43 static struct semid_pool *sema; /* semaphore id pool */ 44 static TAILQ_HEAD(, sem_undo) semu_list = TAILQ_HEAD_INITIALIZER(semu_list); 45 static struct lock sema_lk; 46 47 struct sem { 48 u_short semval; /* semaphore value */ 49 pid_t sempid; /* pid of last operation */ 50 u_short semncnt; /* # awaiting semval > cval */ 51 u_short semzcnt; /* # awaiting semval = 0 */ 52 }; 53 54 /* 55 * Undo structure (one per process) 56 */ 57 struct sem_undo { 58 TAILQ_ENTRY(sem_undo) un_entry; /* linked list for semundo_clear() */ 59 struct proc *un_proc; /* owner of this structure */ 60 int un_refs; /* prevent unlink/kfree */ 61 short un_cnt; /* # of active entries */ 62 short un_unused; 63 struct undo { 64 short un_adjval; /* adjust on exit values */ 65 short un_num; /* semaphore # */ 66 int un_id; /* semid */ 67 } un_ent[1]; /* undo entries */ 68 }; 69 70 /* 71 * Configuration parameters 72 */ 73 #ifndef SEMMNI 74 #define SEMMNI 1024 /* # of semaphore identifiers */ 75 #endif 76 #ifndef SEMMNS 77 #define SEMMNS 32767 /* # of semaphores in system */ 78 #endif 79 #ifndef SEMUME 80 #define SEMUME 25 /* max # of undo entries per process */ 81 #endif 82 #ifndef SEMMNU 83 #define SEMMNU 1024 /* # of undo structures in system */ 84 /* NO LONGER USED */ 85 #endif 86 87 /* shouldn't need tuning */ 88 #ifndef SEMMAP 89 #define SEMMAP 128 /* # of entries in semaphore map */ 90 #endif 91 #ifndef SEMMSL 92 #define SEMMSL SEMMNS /* max # of semaphores per id */ 93 #endif 94 #ifndef SEMOPM 95 #define SEMOPM 100 /* max # of operations per semop call */ 96 #endif 97 98 #define SEMVMX 32767 /* semaphore maximum value */ 99 #define SEMAEM 16384 /* adjust on exit max value */ 100 101 /* 102 * Due to the way semaphore memory is allocated, we have to ensure that 103 * SEMUSZ is properly aligned. 104 */ 105 106 #define SEM_ALIGN(bytes) (((bytes) + (sizeof(long) - 1)) & ~(sizeof(long) - 1)) 107 108 /* actual size of an undo structure */ 109 #define SEMUSZ(nent) SEM_ALIGN(offsetof(struct sem_undo, un_ent[nent])) 110 111 /* 112 * semaphore info struct 113 */ 114 struct seminfo seminfo = { 115 SEMMAP, /* # of entries in semaphore map */ 116 SEMMNI, /* # of semaphore identifiers */ 117 SEMMNS, /* # of semaphores in system */ 118 SEMMNU, /* # of undo structures in system */ 119 SEMMSL, /* max # of semaphores per id */ 120 SEMOPM, /* max # of operations per semop call */ 121 SEMUME, /* max # of undo entries per process */ 122 SEMUSZ(SEMUME), /* size in bytes of undo structure */ 123 SEMVMX, /* semaphore maximum value */ 124 SEMAEM /* adjust on exit max value */ 125 }; 126 127 TUNABLE_INT("kern.ipc.semmap", &seminfo.semmap); 128 TUNABLE_INT("kern.ipc.semmni", &seminfo.semmni); 129 TUNABLE_INT("kern.ipc.semmns", &seminfo.semmns); 130 TUNABLE_INT("kern.ipc.semmnu", &seminfo.semmnu); 131 TUNABLE_INT("kern.ipc.semmsl", &seminfo.semmsl); 132 TUNABLE_INT("kern.ipc.semopm", &seminfo.semopm); 133 TUNABLE_INT("kern.ipc.semume", &seminfo.semume); 134 TUNABLE_INT("kern.ipc.semusz", &seminfo.semusz); 135 TUNABLE_INT("kern.ipc.semvmx", &seminfo.semvmx); 136 TUNABLE_INT("kern.ipc.semaem", &seminfo.semaem); 137 138 SYSCTL_INT(_kern_ipc, OID_AUTO, semmap, CTLFLAG_RW, &seminfo.semmap, 0, 139 "Number of entries in semaphore map"); 140 SYSCTL_INT(_kern_ipc, OID_AUTO, semmni, CTLFLAG_RD, &seminfo.semmni, 0, 141 "Number of semaphore identifiers"); 142 SYSCTL_INT(_kern_ipc, OID_AUTO, semmns, CTLFLAG_RD, &seminfo.semmns, 0, 143 "Total number of semaphores"); 144 SYSCTL_INT(_kern_ipc, OID_AUTO, semmnu, CTLFLAG_RD, &seminfo.semmnu, 0, 145 "Total number of undo structures"); 146 SYSCTL_INT(_kern_ipc, OID_AUTO, semmsl, CTLFLAG_RW, &seminfo.semmsl, 0, 147 "Max number of semaphores per id"); 148 SYSCTL_INT(_kern_ipc, OID_AUTO, semopm, CTLFLAG_RD, &seminfo.semopm, 0, 149 "Max number of operations per semop call"); 150 SYSCTL_INT(_kern_ipc, OID_AUTO, semume, CTLFLAG_RD, &seminfo.semume, 0, 151 "Max number of undo entries per process"); 152 SYSCTL_INT(_kern_ipc, OID_AUTO, semusz, CTLFLAG_RD, &seminfo.semusz, 0, 153 "Size in bytes of undo structure"); 154 SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RW, &seminfo.semvmx, 0, 155 "Semaphore maximum value"); 156 SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RW, &seminfo.semaem, 0, 157 "Adjust on exit max value"); 158 159 #if 0 160 RO seminfo.semmap /* SEMMAP unused */ 161 RO seminfo.semmni 162 RO seminfo.semmns 163 RO seminfo.semmnu /* undo entries per system */ 164 RW seminfo.semmsl 165 RO seminfo.semopm /* SEMOPM unused */ 166 RO seminfo.semume 167 RO seminfo.semusz /* param - derived from SEMUME for per-proc sizeof */ 168 RO seminfo.semvmx /* SEMVMX unused - user param */ 169 RO seminfo.semaem /* SEMAEM unused - user param */ 170 #endif 171 172 static void 173 seminit(void *dummy) 174 { 175 int i; 176 177 sema = kmalloc(sizeof(struct semid_pool) * seminfo.semmni, 178 M_SEM, M_WAITOK | M_ZERO); 179 180 lockinit(&sema_lk, "semglb", 0, 0); 181 for (i = 0; i < seminfo.semmni; i++) { 182 struct semid_pool *semaptr = &sema[i]; 183 184 lockinit(&semaptr->lk, "semary", 0, 0); 185 semaptr->ds.sem_base = NULL; 186 semaptr->ds.sem_perm.mode = 0; 187 } 188 } 189 SYSINIT(sysv_sem, SI_SUB_SYSV_SEM, SI_ORDER_FIRST, seminit, NULL) 190 191 /* 192 * Entry point for all SEM calls 193 * 194 * semsys_args(int which, a2, a3, ...) (VARARGS) 195 * 196 * MPALMOSTSAFE 197 */ 198 int 199 sys_semsys(struct semsys_args *uap) 200 { 201 struct thread *td = curthread; 202 unsigned int which = (unsigned int)uap->which; 203 int error; 204 205 if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) 206 return (ENOSYS); 207 208 if (which >= NELEM(semcalls)) 209 return (EINVAL); 210 bcopy(&uap->a2, &uap->which, 211 sizeof(struct semsys_args) - offsetof(struct semsys_args, a2)); 212 error = (*semcalls[which])(uap); 213 return (error); 214 } 215 216 /* 217 * Allocate a new sem_undo structure for a process 218 * (returns ptr to structure or NULL if no more room) 219 */ 220 static struct sem_undo * 221 semu_alloc(struct proc *p) 222 { 223 struct sem_undo *semu; 224 225 /* 226 * Allocate the semu structure and associate it with the process, 227 * as necessary. 228 */ 229 while ((semu = p->p_sem_undo) == NULL) { 230 semu = kmalloc(SEMUSZ(seminfo.semume), M_SEM, 231 M_WAITOK | M_ZERO); 232 lwkt_gettoken(&semu_token); 233 lwkt_gettoken(&p->p_token); 234 if (p->p_sem_undo == NULL) { 235 p->p_sem_undo = semu; 236 p->p_flags |= P_SYSVSEM; 237 semu->un_proc = p; 238 TAILQ_INSERT_TAIL(&semu_list, semu, un_entry); 239 } else { 240 kfree(semu, M_SEM); 241 } 242 lwkt_reltoken(&p->p_token); 243 lwkt_reltoken(&semu_token); 244 } 245 return(semu); 246 } 247 248 /* 249 * Adjust a particular entry for a particular proc 250 */ 251 static int 252 semundo_adjust(struct proc *p, int semid, int semnum, int adjval) 253 { 254 struct sem_undo *suptr; 255 struct undo *sunptr; 256 int i; 257 int error = 0; 258 259 /* 260 * Look for and remember the sem_undo if the caller doesn't 261 * provide it. 262 */ 263 suptr = semu_alloc(p); 264 lwkt_gettoken(&p->p_token); 265 266 /* 267 * Look for the requested entry and adjust it (delete if adjval becomes 268 * 0). 269 */ 270 sunptr = &suptr->un_ent[0]; 271 for (i = 0; i < suptr->un_cnt; i++, sunptr++) { 272 if (sunptr->un_id != semid || sunptr->un_num != semnum) 273 continue; 274 if (adjval == 0) 275 sunptr->un_adjval = 0; 276 else 277 sunptr->un_adjval += adjval; 278 if (sunptr->un_adjval == 0) { 279 suptr->un_cnt--; 280 if (i < suptr->un_cnt) 281 suptr->un_ent[i] = suptr->un_ent[suptr->un_cnt]; 282 } 283 goto done; 284 } 285 286 /* Didn't find the right entry - create it */ 287 if (adjval == 0) 288 goto done; 289 if (suptr->un_cnt != seminfo.semume) { 290 sunptr = &suptr->un_ent[suptr->un_cnt]; 291 suptr->un_cnt++; 292 sunptr->un_adjval = adjval; 293 sunptr->un_id = semid; 294 sunptr->un_num = semnum; 295 } else { 296 error = EINVAL; 297 } 298 done: 299 lwkt_reltoken(&p->p_token); 300 301 return (error); 302 } 303 304 /* 305 * This is rather expensive 306 */ 307 static void 308 semundo_clear(int semid, int semnum) 309 { 310 struct proc *p; 311 struct sem_undo *suptr; 312 struct sem_undo *sunext; 313 struct undo *sunptr; 314 int i; 315 316 lwkt_gettoken(&semu_token); 317 sunext = TAILQ_FIRST(&semu_list); 318 while ((suptr = sunext) != NULL) { 319 if ((p = suptr->un_proc) == NULL) { 320 suptr = TAILQ_NEXT(suptr, un_entry); 321 continue; 322 } 323 ++suptr->un_refs; 324 PHOLD(p); 325 lwkt_gettoken(&p->p_token); 326 327 sunptr = &suptr->un_ent[0]; 328 i = 0; 329 330 while (i < suptr->un_cnt) { 331 if (sunptr->un_id == semid) { 332 if (semnum == -1 || sunptr->un_num == semnum) { 333 suptr->un_cnt--; 334 if (i < suptr->un_cnt) { 335 suptr->un_ent[i] = 336 suptr->un_ent[suptr->un_cnt]; 337 /* 338 * do not increment i 339 * or sunptr after copydown. 340 */ 341 continue; 342 } 343 } 344 if (semnum != -1) 345 break; 346 } 347 ++i; 348 ++sunptr; 349 } 350 351 lwkt_reltoken(&p->p_token); 352 PRELE(p); 353 354 /* 355 * Handle deletion races 356 */ 357 sunext = TAILQ_NEXT(suptr, un_entry); 358 if (--suptr->un_refs == 0 && suptr->un_proc == NULL) { 359 KKASSERT(suptr->un_cnt == 0); 360 TAILQ_REMOVE(&semu_list, suptr, un_entry); 361 kfree(suptr, M_SEM); 362 } 363 } 364 lwkt_reltoken(&semu_token); 365 } 366 367 /* 368 * Note that the user-mode half of this passes a union, not a pointer 369 * 370 * MPALMOSTSAFE 371 */ 372 int 373 sys___semctl(struct __semctl_args *uap) 374 { 375 struct thread *td = curthread; 376 int semid = uap->semid; 377 int semnum = uap->semnum; 378 int cmd = uap->cmd; 379 union semun *arg = uap->arg; 380 union semun real_arg; 381 struct ucred *cred = td->td_ucred; 382 int i, rval, eval; 383 struct semid_ds sbuf; 384 struct semid_pool *semaptr; 385 struct semid_pool *semakptr; 386 struct sem *semptr; 387 388 #ifdef SEM_DEBUG 389 kprintf("call to semctl(%d, %d, %d, 0x%x)\n", semid, semnum, cmd, arg); 390 #endif 391 392 if (!jail_sysvipc_allowed && cred->cr_prison != NULL) 393 return (ENOSYS); 394 395 switch (cmd) { 396 case SEM_STAT: 397 /* 398 * For this command we assume semid is an array index 399 * rather than an IPC id. 400 */ 401 if (semid < 0 || semid >= seminfo.semmni) { 402 eval = EINVAL; 403 break; 404 } 405 semakptr = &sema[semid]; 406 lockmgr(&semakptr->lk, LK_EXCLUSIVE); 407 if ((semakptr->ds.sem_perm.mode & SEM_ALLOC) == 0) { 408 eval = EINVAL; 409 lockmgr(&semakptr->lk, LK_RELEASE); 410 break; 411 } 412 if ((eval = ipcperm(td->td_proc, &semakptr->ds.sem_perm, IPC_R))) { 413 lockmgr(&semakptr->lk, LK_RELEASE); 414 break; 415 } 416 bcopy(&semakptr->ds, arg->buf, sizeof(struct semid_ds)); 417 rval = IXSEQ_TO_IPCID(semid, semakptr->ds.sem_perm); 418 lockmgr(&semakptr->lk, LK_RELEASE); 419 break; 420 } 421 422 semid = IPCID_TO_IX(semid); 423 if (semid < 0 || semid >= seminfo.semmni) { 424 return(EINVAL); 425 } 426 semaptr = &sema[semid]; 427 lockmgr(&semaptr->lk, LK_EXCLUSIVE); 428 429 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0 || 430 semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 431 lockmgr(&semaptr->lk, LK_RELEASE); 432 return(EINVAL); 433 } 434 435 eval = 0; 436 rval = 0; 437 438 switch (cmd) { 439 case IPC_RMID: 440 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_M); 441 if (eval != 0) 442 break; 443 semaptr->ds.sem_perm.cuid = cred->cr_uid; 444 semaptr->ds.sem_perm.uid = cred->cr_uid; 445 446 /* 447 * NOTE: Nobody will be waiting on the semaphores since 448 * we have an exclusive lock on semaptr->lk). 449 */ 450 lockmgr(&sema_lk, LK_EXCLUSIVE); 451 semtot -= semaptr->ds.sem_nsems; 452 kfree(semaptr->ds.sem_base, M_SEM); 453 semaptr->ds.sem_base = NULL; 454 semaptr->ds.sem_perm.mode = 0; /* clears SEM_ALLOC */ 455 lockmgr(&sema_lk, LK_RELEASE); 456 457 semundo_clear(semid, -1); 458 break; 459 460 case IPC_SET: 461 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_M); 462 if (eval) 463 break; 464 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 465 break; 466 if ((eval = copyin(real_arg.buf, (caddr_t)&sbuf, 467 sizeof(sbuf))) != 0) { 468 break; 469 } 470 semaptr->ds.sem_perm.uid = sbuf.sem_perm.uid; 471 semaptr->ds.sem_perm.gid = sbuf.sem_perm.gid; 472 semaptr->ds.sem_perm.mode = 473 (semaptr->ds.sem_perm.mode & ~0777) | 474 (sbuf.sem_perm.mode & 0777); 475 semaptr->ds.sem_ctime = time_second; 476 break; 477 478 case IPC_STAT: 479 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 480 if (eval) 481 break; 482 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 483 break; 484 eval = copyout(&semaptr->ds, real_arg.buf, 485 sizeof(struct semid_ds)); 486 break; 487 488 case GETNCNT: 489 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 490 if (eval) 491 break; 492 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 493 eval = EINVAL; 494 break; 495 } 496 rval = semaptr->ds.sem_base[semnum].semncnt; 497 break; 498 499 case GETPID: 500 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 501 if (eval) 502 break; 503 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 504 eval = EINVAL; 505 break; 506 } 507 rval = semaptr->ds.sem_base[semnum].sempid; 508 break; 509 510 case GETVAL: 511 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 512 if (eval) 513 break; 514 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 515 eval = EINVAL; 516 break; 517 } 518 rval = semaptr->ds.sem_base[semnum].semval; 519 break; 520 521 case GETALL: 522 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 523 if (eval) 524 break; 525 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 526 break; 527 for (i = 0; i < semaptr->ds.sem_nsems; i++) { 528 eval = copyout(&semaptr->ds.sem_base[i].semval, 529 &real_arg.array[i], 530 sizeof(real_arg.array[0])); 531 if (eval) 532 break; 533 } 534 break; 535 536 case GETZCNT: 537 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 538 if (eval) 539 break; 540 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 541 eval = EINVAL; 542 break; 543 } 544 rval = semaptr->ds.sem_base[semnum].semzcnt; 545 break; 546 547 case SETVAL: 548 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W); 549 if (eval) 550 break; 551 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 552 eval = EINVAL; 553 break; 554 } 555 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 556 break; 557 558 /* 559 * Because we hold semaptr->lk exclusively we can safely 560 * modify any semptr content without acquiring its token. 561 */ 562 semptr = &semaptr->ds.sem_base[semnum]; 563 semptr->semval = real_arg.val; 564 semundo_clear(semid, semnum); 565 if (semptr->semzcnt || semptr->semncnt) 566 wakeup(semptr); 567 break; 568 569 case SETALL: 570 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W); 571 if (eval) 572 break; 573 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 574 break; 575 /* 576 * Because we hold semaptr->lk exclusively we can safely 577 * modify any semptr content without acquiring its token. 578 */ 579 for (i = 0; i < semaptr->ds.sem_nsems; i++) { 580 semptr = &semaptr->ds.sem_base[i]; 581 eval = copyin(&real_arg.array[i], 582 (caddr_t)&semptr->semval, 583 sizeof(real_arg.array[0])); 584 if (semptr->semzcnt || semptr->semncnt) 585 wakeup(semptr); 586 if (eval != 0) 587 break; 588 } 589 semundo_clear(semid, -1); 590 break; 591 592 default: 593 eval = EINVAL; 594 break; 595 } 596 lockmgr(&semaptr->lk, LK_RELEASE); 597 598 if (eval == 0) 599 uap->sysmsg_result = rval; 600 return(eval); 601 } 602 603 /* 604 * MPALMOSTSAFE 605 */ 606 int 607 sys_semget(struct semget_args *uap) 608 { 609 struct thread *td = curthread; 610 int semid, eval; 611 int key = uap->key; 612 int nsems = uap->nsems; 613 int semflg = uap->semflg; 614 struct ucred *cred = td->td_ucred; 615 616 #ifdef SEM_DEBUG 617 kprintf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg); 618 #endif 619 620 if (!jail_sysvipc_allowed && cred->cr_prison != NULL) 621 return (ENOSYS); 622 623 eval = 0; 624 625 if (key != IPC_PRIVATE) { 626 for (semid = 0; semid < seminfo.semmni; semid++) { 627 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0 || 628 sema[semid].ds.sem_perm.key != key) { 629 continue; 630 } 631 lockmgr(&sema[semid].lk, LK_EXCLUSIVE); 632 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0 || 633 sema[semid].ds.sem_perm.key != key) { 634 lockmgr(&sema[semid].lk, LK_RELEASE); 635 continue; 636 } 637 break; 638 } 639 if (semid < seminfo.semmni) { 640 /* sema[semid].lk still locked from above */ 641 #ifdef SEM_DEBUG 642 kprintf("found public key\n"); 643 #endif 644 if ((eval = ipcperm(td->td_proc, 645 &sema[semid].ds.sem_perm, 646 semflg & 0700))) { 647 lockmgr(&sema[semid].lk, LK_RELEASE); 648 goto done; 649 } 650 if (nsems > 0 && sema[semid].ds.sem_nsems < nsems) { 651 #ifdef SEM_DEBUG 652 kprintf("too small\n"); 653 #endif 654 eval = EINVAL; 655 lockmgr(&sema[semid].lk, LK_RELEASE); 656 goto done; 657 } 658 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { 659 #ifdef SEM_DEBUG 660 kprintf("not exclusive\n"); 661 #endif 662 eval = EEXIST; 663 lockmgr(&sema[semid].lk, LK_RELEASE); 664 goto done; 665 } 666 667 /* 668 * Return this one. 669 */ 670 lockmgr(&sema[semid].lk, LK_RELEASE); 671 goto done; 672 } 673 } 674 675 #ifdef SEM_DEBUG 676 kprintf("need to allocate the semid_ds\n"); 677 #endif 678 if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) { 679 if (nsems <= 0 || nsems > seminfo.semmsl) { 680 #ifdef SEM_DEBUG 681 kprintf("nsems out of range (0<%d<=%d)\n", 682 nsems, seminfo.semmsl); 683 #endif 684 eval = EINVAL; 685 goto done; 686 } 687 688 /* 689 * SEM_ALLOC flag cannot be set unless sema_lk is locked. 690 * semtot field also protected by sema_lk. 691 */ 692 lockmgr(&sema_lk, LK_EXCLUSIVE); 693 if (nsems > seminfo.semmns - semtot) { 694 #ifdef SEM_DEBUG 695 kprintf("not enough semaphores left " 696 "(need %d, got %d)\n", 697 nsems, seminfo.semmns - semtot); 698 #endif 699 eval = ENOSPC; 700 lockmgr(&sema_lk, LK_RELEASE); 701 goto done; 702 } 703 for (semid = 0; semid < seminfo.semmni; semid++) { 704 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0) 705 break; 706 } 707 if (semid == seminfo.semmni) { 708 #ifdef SEM_DEBUG 709 kprintf("no more semid_ds's available\n"); 710 #endif 711 eval = ENOSPC; 712 lockmgr(&sema_lk, LK_RELEASE); 713 goto done; 714 } 715 #ifdef SEM_DEBUG 716 kprintf("semid %d is available\n", semid); 717 #endif 718 lockmgr(&sema[semid].lk, LK_EXCLUSIVE); 719 sema[semid].ds.sem_perm.key = key; 720 sema[semid].ds.sem_perm.cuid = cred->cr_uid; 721 sema[semid].ds.sem_perm.uid = cred->cr_uid; 722 sema[semid].ds.sem_perm.cgid = cred->cr_gid; 723 sema[semid].ds.sem_perm.gid = cred->cr_gid; 724 sema[semid].ds.sem_perm.mode = (semflg & 0777) | SEM_ALLOC; 725 sema[semid].ds.sem_perm.seq = 726 (sema[semid].ds.sem_perm.seq + 1) & 0x7fff; 727 sema[semid].ds.sem_nsems = nsems; 728 sema[semid].ds.sem_otime = 0; 729 sema[semid].ds.sem_ctime = time_second; 730 sema[semid].ds.sem_base = kmalloc(sizeof(struct sem) * nsems, 731 M_SEM, M_WAITOK|M_ZERO); 732 semtot += nsems; 733 ++sema[semid].gen; 734 lockmgr(&sema[semid].lk, LK_RELEASE); 735 lockmgr(&sema_lk, LK_RELEASE); 736 #ifdef SEM_DEBUG 737 kprintf("sembase = 0x%x, next = 0x%x\n", 738 sema[semid].ds.sem_base, &sem[semtot]); 739 #endif 740 /* eval == 0 */ 741 } else { 742 #ifdef SEM_DEBUG 743 kprintf("didn't find it and wasn't asked to create it\n"); 744 #endif 745 eval = ENOENT; 746 } 747 748 done: 749 if (eval == 0) { 750 uap->sysmsg_result = 751 IXSEQ_TO_IPCID(semid, sema[semid].ds.sem_perm); 752 } 753 return(eval); 754 } 755 756 /* 757 * MPSAFE 758 */ 759 int 760 sys_semop(struct semop_args *uap) 761 { 762 struct thread *td = curthread; 763 int semid = uap->semid; 764 u_int nsops = uap->nsops; 765 struct sembuf sops[MAX_SOPS]; 766 struct semid_pool *semaptr; 767 struct sembuf *sopptr; 768 struct sem *semptr; 769 struct sem *xsemptr; 770 int i, j, eval; 771 int do_undos; 772 773 #ifdef SEM_DEBUG 774 kprintf("call to semop(%d, 0x%x, %u)\n", semid, sops, nsops); 775 #endif 776 if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) 777 return (ENOSYS); 778 779 semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ 780 781 if (semid < 0 || semid >= seminfo.semmni) { 782 eval = EINVAL; 783 goto done2; 784 } 785 786 wakeup_start_delayed(); 787 semaptr = &sema[semid]; 788 lockmgr(&semaptr->lk, LK_SHARED); 789 790 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0) { 791 eval = EINVAL; 792 goto done; 793 } 794 if (semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 795 eval = EINVAL; 796 goto done; 797 } 798 799 if ((eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W))) { 800 #ifdef SEM_DEBUG 801 kprintf("eval = %d from ipaccess\n", eval); 802 #endif 803 goto done; 804 } 805 806 if (nsops > MAX_SOPS) { 807 #ifdef SEM_DEBUG 808 kprintf("too many sops (max=%d, nsops=%u)\n", MAX_SOPS, nsops); 809 #endif 810 eval = E2BIG; 811 goto done; 812 } 813 814 if ((eval = copyin(uap->sops, &sops, nsops * sizeof(sops[0]))) != 0) { 815 #ifdef SEM_DEBUG 816 kprintf("eval = %d from copyin(%08x, %08x, %u)\n", eval, 817 uap->sops, &sops, nsops * sizeof(sops[0])); 818 #endif 819 goto done; 820 } 821 822 /* 823 * Loop trying to satisfy the vector of requests. 824 * If we reach a point where we must wait, any requests already 825 * performed are rolled back and we go to sleep until some other 826 * process wakes us up. At this point, we start all over again. 827 * 828 * This ensures that from the perspective of other tasks, a set 829 * of requests is atomic (never partially satisfied). 830 */ 831 do_undos = 0; 832 833 for (;;) { 834 long gen; 835 836 semptr = NULL; 837 838 for (i = 0; i < nsops; i++) { 839 sopptr = &sops[i]; 840 841 if (sopptr->sem_num >= semaptr->ds.sem_nsems) { 842 eval = EFBIG; 843 goto done; 844 } 845 846 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 847 lwkt_getpooltoken(semptr); 848 849 #ifdef SEM_DEBUG 850 kprintf("semop: semaptr=%x, sem_base=%x, semptr=%x, " 851 "sem[%d]=%d : op=%d, flag=%s\n", 852 semaptr, semaptr->ds.sem_base, semptr, 853 sopptr->sem_num, semptr->semval, sopptr->sem_op, 854 (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait"); 855 #endif 856 857 if (sopptr->sem_op < 0) { 858 if (semptr->semval + sopptr->sem_op < 0) { 859 #ifdef SEM_DEBUG 860 kprintf("semop: can't do it now\n"); 861 #endif 862 break; 863 } else { 864 semptr->semval += sopptr->sem_op; 865 if (semptr->semval == 0 && 866 semptr->semzcnt > 0) { 867 wakeup(semptr); 868 } 869 } 870 if (sopptr->sem_flg & SEM_UNDO) 871 do_undos = 1; 872 } else if (sopptr->sem_op == 0) { 873 if (semptr->semval > 0) { 874 #ifdef SEM_DEBUG 875 kprintf("semop: not zero now\n"); 876 #endif 877 break; 878 } 879 } else { 880 semptr->semval += sopptr->sem_op; 881 if (sopptr->sem_flg & SEM_UNDO) 882 do_undos = 1; 883 if (semptr->semncnt > 0) 884 wakeup(semptr); 885 } 886 lwkt_relpooltoken(semptr); 887 } 888 889 /* 890 * Did we get through the entire vector? 891 */ 892 if (i >= nsops) 893 goto donex; 894 895 /* 896 * No, protect the semaphore request which also flags that 897 * a wakeup is needed, then release semptr since we know 898 * another process is likely going to need to access it 899 * soon. 900 */ 901 if (sopptr->sem_op == 0) 902 semptr->semzcnt++; 903 else 904 semptr->semncnt++; 905 tsleep_interlock(semptr, PCATCH); 906 lwkt_relpooltoken(semptr); 907 908 /* 909 * Rollback the semaphores we had acquired. 910 */ 911 #ifdef SEM_DEBUG 912 kprintf("semop: rollback 0 through %d\n", i-1); 913 #endif 914 for (j = 0; j < i; j++) { 915 xsemptr = &semaptr->ds.sem_base[sops[j].sem_num]; 916 lwkt_getpooltoken(xsemptr); 917 xsemptr->semval -= sops[j].sem_op; 918 if (xsemptr->semval == 0 && xsemptr->semzcnt > 0) 919 wakeup(xsemptr); 920 if (xsemptr->semval <= 0 && xsemptr->semncnt > 0) 921 wakeup(xsemptr); 922 lwkt_relpooltoken(xsemptr); 923 } 924 925 /* 926 * If the request that we couldn't satisfy has the 927 * NOWAIT flag set then return with EAGAIN. 928 */ 929 if (sopptr->sem_flg & IPC_NOWAIT) { 930 eval = EAGAIN; 931 goto done; 932 } 933 934 /* 935 * Release semaptr->lk while sleeping, allowing other 936 * semops (like SETVAL, SETALL, etc), which require an 937 * exclusive lock and might wake us up. 938 * 939 * Reload and recheck the validity of semaptr on return. 940 * Note that semptr itself might have changed too, but 941 * we've already interlocked for semptr and that is what 942 * will be woken up if it wakes up the tsleep on a MP 943 * race. 944 * 945 * gen protects against destroy/re-create races where the 946 * creds match. 947 */ 948 #ifdef SEM_DEBUG 949 kprintf("semop: good night!\n"); 950 #endif 951 gen = semaptr->gen; 952 lockmgr(&semaptr->lk, LK_RELEASE); 953 eval = tsleep(semptr, PCATCH | PINTERLOCKED, "semwait", hz); 954 lockmgr(&semaptr->lk, LK_SHARED); 955 #ifdef SEM_DEBUG 956 kprintf("semop: good morning (eval=%d)!\n", eval); 957 #endif 958 959 /* return code is checked below, after sem[nz]cnt-- */ 960 961 /* 962 * Make sure that the semaphore still exists 963 */ 964 if (semaptr->gen != gen || 965 (semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0 || 966 semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 967 eval = EIDRM; 968 goto done; 969 } 970 971 /* 972 * The semaphore is still alive. Readjust the count of 973 * waiting processes. 974 */ 975 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 976 lwkt_getpooltoken(semptr); 977 if (sopptr->sem_op == 0) 978 semptr->semzcnt--; 979 else 980 semptr->semncnt--; 981 lwkt_relpooltoken(semptr); 982 983 /* 984 * Is it really morning, or was our sleep interrupted? 985 * (Delayed check of tsleep() return code because we 986 * need to decrement sem[nz]cnt either way.) 987 */ 988 if (eval) { 989 eval = EINTR; 990 goto done; 991 } 992 #ifdef SEM_DEBUG 993 kprintf("semop: good morning!\n"); 994 #endif 995 /* RETRY LOOP */ 996 } 997 998 donex: 999 /* 1000 * Process any SEM_UNDO requests. 1001 */ 1002 if (do_undos) { 1003 for (i = 0; i < nsops; i++) { 1004 /* 1005 * We only need to deal with SEM_UNDO's for non-zero 1006 * op's. 1007 */ 1008 int adjval; 1009 1010 if ((sops[i].sem_flg & SEM_UNDO) == 0) 1011 continue; 1012 adjval = sops[i].sem_op; 1013 if (adjval == 0) 1014 continue; 1015 eval = semundo_adjust(td->td_proc, semid, 1016 sops[i].sem_num, -adjval); 1017 if (eval == 0) 1018 continue; 1019 1020 /* 1021 * Oh-Oh! We ran out of either sem_undo's or undo's. 1022 * Rollback the adjustments to this point and then 1023 * rollback the semaphore ups and down so we can return 1024 * with an error with all structures restored. We 1025 * rollback the undo's in the exact reverse order that 1026 * we applied them. This guarantees that we won't run 1027 * out of space as we roll things back out. 1028 */ 1029 for (j = i - 1; j >= 0; j--) { 1030 if ((sops[j].sem_flg & SEM_UNDO) == 0) 1031 continue; 1032 adjval = sops[j].sem_op; 1033 if (adjval == 0) 1034 continue; 1035 if (semundo_adjust(td->td_proc, semid, 1036 sops[j].sem_num, adjval) != 0) 1037 panic("semop - can't undo undos"); 1038 } 1039 1040 for (j = 0; j < nsops; j++) { 1041 xsemptr = &semaptr->ds.sem_base[ 1042 sops[j].sem_num]; 1043 lwkt_getpooltoken(xsemptr); 1044 xsemptr->semval -= sops[j].sem_op; 1045 if (xsemptr->semval == 0 && 1046 xsemptr->semzcnt > 0) 1047 wakeup(xsemptr); 1048 if (xsemptr->semval <= 0 && 1049 xsemptr->semncnt > 0) 1050 wakeup(xsemptr); 1051 lwkt_relpooltoken(xsemptr); 1052 } 1053 1054 #ifdef SEM_DEBUG 1055 kprintf("eval = %d from semundo_adjust\n", eval); 1056 #endif 1057 goto done; 1058 } /* loop through the sops */ 1059 } /* if (do_undos) */ 1060 1061 /* We're definitely done - set the sempid's */ 1062 for (i = 0; i < nsops; i++) { 1063 sopptr = &sops[i]; 1064 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 1065 lwkt_getpooltoken(semptr); 1066 semptr->sempid = td->td_proc->p_pid; 1067 lwkt_relpooltoken(semptr); 1068 } 1069 1070 /* Do a wakeup if any semaphore was up'd. */ 1071 #ifdef SEM_DEBUG 1072 kprintf("semop: done\n"); 1073 #endif 1074 uap->sysmsg_result = 0; 1075 eval = 0; 1076 done: 1077 lockmgr(&semaptr->lk, LK_RELEASE); 1078 wakeup_end_delayed(); 1079 done2: 1080 return(eval); 1081 } 1082 1083 /* 1084 * Go through the undo structures for this process and apply the adjustments to 1085 * semaphores. 1086 * 1087 * (p->p_token is held by the caller) 1088 */ 1089 void 1090 semexit(struct proc *p) 1091 { 1092 struct sem_undo *suptr; 1093 struct sem *semptr; 1094 1095 /* 1096 * We're getting a global token, don't do it if we couldn't 1097 * possibly have any semaphores. 1098 */ 1099 if ((p->p_flags & P_SYSVSEM) == 0) 1100 return; 1101 suptr = p->p_sem_undo; 1102 KKASSERT(suptr != NULL); 1103 1104 /* 1105 * Disconnect suptr from the process and increment un_refs to 1106 * prevent anyone else from being able to destroy the structure. 1107 * Do not remove it from the linked list until after we are through 1108 * scanning it as other semaphore calls might still effect it. 1109 */ 1110 lwkt_gettoken(&semu_token); 1111 p->p_sem_undo = NULL; 1112 p->p_flags &= ~P_SYSVSEM; 1113 suptr->un_proc = NULL; 1114 ++suptr->un_refs; 1115 lwkt_reltoken(&semu_token); 1116 1117 while (suptr->un_cnt) { 1118 struct semid_pool *semaptr; 1119 int semid; 1120 int semnum; 1121 int adjval; 1122 int ix; 1123 1124 /* 1125 * These values are stable because we hold p->p_token. 1126 * However, they can get ripped out from under us when 1127 * we block or obtain other tokens so we have to re-check. 1128 */ 1129 ix = suptr->un_cnt - 1; 1130 semid = suptr->un_ent[ix].un_id; 1131 semnum = suptr->un_ent[ix].un_num; 1132 adjval = suptr->un_ent[ix].un_adjval; 1133 1134 semaptr = &sema[semid]; 1135 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0) 1136 panic("semexit - semid not allocated"); 1137 if (semnum >= semaptr->ds.sem_nsems) 1138 panic("semexit - semnum out of range"); 1139 1140 /* 1141 * Recheck after locking, then execute the undo 1142 * operation. semptr remains valid due to the 1143 * semaptr->lk. 1144 */ 1145 lockmgr(&semaptr->lk, LK_SHARED); 1146 semptr = &semaptr->ds.sem_base[semnum]; 1147 lwkt_getpooltoken(semptr); 1148 1149 if (ix == suptr->un_cnt - 1 && 1150 semid == suptr->un_ent[ix].un_id && 1151 semnum == suptr->un_ent[ix].un_num && 1152 adjval == suptr->un_ent[ix].un_adjval) { 1153 --suptr->un_cnt; 1154 1155 if (adjval < 0) { 1156 if (semptr->semval < -adjval) 1157 semptr->semval = 0; 1158 else 1159 semptr->semval += adjval; 1160 } else { 1161 semptr->semval += adjval; 1162 } 1163 wakeup(semptr); 1164 } 1165 lwkt_relpooltoken(semptr); 1166 lockmgr(&semaptr->lk, LK_RELEASE); 1167 } 1168 1169 /* 1170 * Final cleanup, remove from the list and deallocate on the 1171 * last ref only. 1172 */ 1173 lwkt_gettoken(&semu_token); 1174 if (--suptr->un_refs == 0) { 1175 TAILQ_REMOVE(&semu_list, suptr, un_entry); 1176 KKASSERT(suptr->un_cnt == 0); 1177 kfree(suptr, M_SEM); 1178 } 1179 lwkt_reltoken(&semu_token); 1180 } 1181