1 /* $FreeBSD: src/sys/kern/sysv_sem.c,v 1.69 2004/03/17 09:37:13 cperciva Exp $ */ 2 3 /* 4 * Implementation of SVID semaphores 5 * 6 * Author: Daniel Boulet 7 * 8 * This software is provided ``AS IS'' without any warranties of any kind. 9 */ 10 11 #include "opt_sysvipc.h" 12 13 #include <sys/param.h> 14 #include <sys/systm.h> 15 #include <sys/sysmsg.h> 16 #include <sys/kernel.h> 17 #include <sys/proc.h> 18 #include <sys/sem.h> 19 #include <sys/sysent.h> 20 #include <sys/sysctl.h> 21 #include <sys/malloc.h> 22 #include <sys/jail.h> 23 #include <sys/thread.h> 24 25 static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores"); 26 27 static void seminit (void *); 28 29 static struct sem_undo *semu_alloc (struct proc *p); 30 static int semundo_adjust (struct proc *p, int semid, int semnum, int adjval); 31 static void semundo_clear (int semid, int semnum); 32 33 static struct lwkt_token semu_token = LWKT_TOKEN_INITIALIZER(semu_token); 34 static int semtot = 0; 35 static struct semid_pool *sema; /* semaphore id pool */ 36 static TAILQ_HEAD(, sem_undo) semu_list = TAILQ_HEAD_INITIALIZER(semu_list); 37 static struct lock sema_lk; 38 39 struct sem { 40 u_short semval; /* semaphore value */ 41 pid_t sempid; /* pid of last operation */ 42 u_short semncnt; /* # awaiting semval > cval */ 43 u_short semzcnt; /* # awaiting semval = 0 */ 44 }; 45 46 /* 47 * Undo structure (one per process) 48 */ 49 struct sem_undo { 50 TAILQ_ENTRY(sem_undo) un_entry; /* linked list for semundo_clear() */ 51 struct proc *un_proc; /* owner of this structure */ 52 int un_refs; /* prevent unlink/kfree */ 53 short un_cnt; /* # of active entries */ 54 short un_unused; 55 struct undo { 56 short un_adjval; /* adjust on exit values */ 57 short un_num; /* semaphore # */ 58 int un_id; /* semid */ 59 } un_ent[1]; /* undo entries */ 60 }; 61 62 /* 63 * Configuration parameters 64 */ 65 #ifndef SEMMNI 66 #define SEMMNI 1024 /* # of semaphore identifiers */ 67 #endif 68 #ifndef SEMMNS 69 #define SEMMNS 32767 /* # of semaphores in system */ 70 #endif 71 #ifndef SEMUME 72 #define SEMUME 25 /* max # of undo entries per process */ 73 #endif 74 #ifndef SEMMNU 75 #define SEMMNU 1024 /* # of undo structures in system */ 76 /* NO LONGER USED */ 77 #endif 78 79 /* shouldn't need tuning */ 80 #ifndef SEMMAP 81 #define SEMMAP 128 /* # of entries in semaphore map */ 82 #endif 83 #ifndef SEMMSL 84 #define SEMMSL SEMMNS /* max # of semaphores per id */ 85 #endif 86 #ifndef SEMOPM 87 #define SEMOPM 100 /* max # of operations per semop call */ 88 #endif 89 90 #define SEMVMX 32767 /* semaphore maximum value */ 91 #define SEMAEM 16384 /* adjust on exit max value */ 92 93 /* 94 * Due to the way semaphore memory is allocated, we have to ensure that 95 * SEMUSZ is properly aligned. 96 */ 97 98 #define SEM_ALIGN(bytes) roundup2(bytes, sizeof(long)) 99 100 /* actual size of an undo structure */ 101 #define SEMUSZ(nent) SEM_ALIGN(offsetof(struct sem_undo, un_ent[nent])) 102 103 /* 104 * semaphore info struct 105 */ 106 struct seminfo seminfo = { 107 SEMMAP, /* # of entries in semaphore map */ 108 SEMMNI, /* # of semaphore identifiers */ 109 SEMMNS, /* # of semaphores in system */ 110 SEMMNU, /* # of undo structures in system */ 111 SEMMSL, /* max # of semaphores per id */ 112 SEMOPM, /* max # of operations per semop call */ 113 SEMUME, /* max # of undo entries per process */ 114 SEMUSZ(SEMUME), /* size in bytes of undo structure */ 115 SEMVMX, /* semaphore maximum value */ 116 SEMAEM /* adjust on exit max value */ 117 }; 118 119 TUNABLE_INT("kern.ipc.semmap", &seminfo.semmap); 120 TUNABLE_INT("kern.ipc.semmni", &seminfo.semmni); 121 TUNABLE_INT("kern.ipc.semmns", &seminfo.semmns); 122 TUNABLE_INT("kern.ipc.semmnu", &seminfo.semmnu); 123 TUNABLE_INT("kern.ipc.semmsl", &seminfo.semmsl); 124 TUNABLE_INT("kern.ipc.semopm", &seminfo.semopm); 125 TUNABLE_INT("kern.ipc.semume", &seminfo.semume); 126 TUNABLE_INT("kern.ipc.semusz", &seminfo.semusz); 127 TUNABLE_INT("kern.ipc.semvmx", &seminfo.semvmx); 128 TUNABLE_INT("kern.ipc.semaem", &seminfo.semaem); 129 130 SYSCTL_INT(_kern_ipc, OID_AUTO, semmap, CTLFLAG_RW, &seminfo.semmap, 0, 131 "Number of entries in semaphore map"); 132 SYSCTL_INT(_kern_ipc, OID_AUTO, semmni, CTLFLAG_RD, &seminfo.semmni, 0, 133 "Number of semaphore identifiers"); 134 SYSCTL_INT(_kern_ipc, OID_AUTO, semmns, CTLFLAG_RD, &seminfo.semmns, 0, 135 "Total number of semaphores"); 136 SYSCTL_INT(_kern_ipc, OID_AUTO, semmnu, CTLFLAG_RD, &seminfo.semmnu, 0, 137 "Total number of undo structures"); 138 SYSCTL_INT(_kern_ipc, OID_AUTO, semmsl, CTLFLAG_RW, &seminfo.semmsl, 0, 139 "Max number of semaphores per id"); 140 SYSCTL_INT(_kern_ipc, OID_AUTO, semopm, CTLFLAG_RD, &seminfo.semopm, 0, 141 "Max number of operations per semop call"); 142 SYSCTL_INT(_kern_ipc, OID_AUTO, semume, CTLFLAG_RD, &seminfo.semume, 0, 143 "Max number of undo entries per process"); 144 SYSCTL_INT(_kern_ipc, OID_AUTO, semusz, CTLFLAG_RD, &seminfo.semusz, 0, 145 "Size in bytes of undo structure"); 146 SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RW, &seminfo.semvmx, 0, 147 "Semaphore maximum value"); 148 SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RW, &seminfo.semaem, 0, 149 "Adjust on exit max value"); 150 151 #if 0 152 RO seminfo.semmap /* SEMMAP unused */ 153 RO seminfo.semmni 154 RO seminfo.semmns 155 RO seminfo.semmnu /* undo entries per system */ 156 RW seminfo.semmsl 157 RO seminfo.semopm /* SEMOPM unused */ 158 RO seminfo.semume 159 RO seminfo.semusz /* param - derived from SEMUME for per-proc sizeof */ 160 RO seminfo.semvmx /* SEMVMX unused - user param */ 161 RO seminfo.semaem /* SEMAEM unused - user param */ 162 #endif 163 164 static void 165 seminit(void *dummy) 166 { 167 int i; 168 169 sema = kmalloc(sizeof(struct semid_pool) * seminfo.semmni, 170 M_SEM, M_WAITOK | M_ZERO); 171 172 lockinit(&sema_lk, "semglb", 0, 0); 173 for (i = 0; i < seminfo.semmni; i++) { 174 struct semid_pool *semaptr = &sema[i]; 175 176 lockinit(&semaptr->lk, "semary", 0, 0); 177 semaptr->ds.sem_base = NULL; 178 semaptr->ds.sem_perm.mode = 0; 179 } 180 } 181 SYSINIT(sysv_sem, SI_SUB_SYSV_SEM, SI_ORDER_FIRST, seminit, NULL); 182 183 /* 184 * Allocate a new sem_undo structure for a process 185 * (returns ptr to structure or NULL if no more room) 186 */ 187 static struct sem_undo * 188 semu_alloc(struct proc *p) 189 { 190 struct sem_undo *semu; 191 192 /* 193 * Allocate the semu structure and associate it with the process, 194 * as necessary. 195 */ 196 while ((semu = p->p_sem_undo) == NULL) { 197 semu = kmalloc(SEMUSZ(seminfo.semume), M_SEM, 198 M_WAITOK | M_ZERO); 199 lwkt_gettoken(&semu_token); 200 lwkt_gettoken(&p->p_token); 201 if (p->p_sem_undo == NULL) { 202 p->p_sem_undo = semu; 203 p->p_flags |= P_SYSVSEM; 204 semu->un_proc = p; 205 TAILQ_INSERT_TAIL(&semu_list, semu, un_entry); 206 } else { 207 kfree(semu, M_SEM); 208 } 209 lwkt_reltoken(&p->p_token); 210 lwkt_reltoken(&semu_token); 211 } 212 return(semu); 213 } 214 215 /* 216 * Adjust a particular entry for a particular proc 217 */ 218 static int 219 semundo_adjust(struct proc *p, int semid, int semnum, int adjval) 220 { 221 struct sem_undo *suptr; 222 struct undo *sunptr; 223 int i; 224 int error = 0; 225 226 /* 227 * Look for and remember the sem_undo if the caller doesn't 228 * provide it. 229 */ 230 suptr = semu_alloc(p); 231 lwkt_gettoken(&p->p_token); 232 233 /* 234 * Look for the requested entry and adjust it (delete if adjval becomes 235 * 0). 236 */ 237 sunptr = &suptr->un_ent[0]; 238 for (i = 0; i < suptr->un_cnt; i++, sunptr++) { 239 if (sunptr->un_id != semid || sunptr->un_num != semnum) 240 continue; 241 if (adjval == 0) 242 sunptr->un_adjval = 0; 243 else 244 sunptr->un_adjval += adjval; 245 if (sunptr->un_adjval == 0) { 246 suptr->un_cnt--; 247 if (i < suptr->un_cnt) 248 suptr->un_ent[i] = suptr->un_ent[suptr->un_cnt]; 249 } 250 goto done; 251 } 252 253 /* Didn't find the right entry - create it */ 254 if (adjval == 0) 255 goto done; 256 if (suptr->un_cnt != seminfo.semume) { 257 sunptr = &suptr->un_ent[suptr->un_cnt]; 258 suptr->un_cnt++; 259 sunptr->un_adjval = adjval; 260 sunptr->un_id = semid; 261 sunptr->un_num = semnum; 262 } else { 263 error = EINVAL; 264 } 265 done: 266 lwkt_reltoken(&p->p_token); 267 268 return (error); 269 } 270 271 /* 272 * This is rather expensive 273 */ 274 static void 275 semundo_clear(int semid, int semnum) 276 { 277 struct proc *p; 278 struct sem_undo *suptr; 279 struct sem_undo *sunext; 280 struct undo *sunptr; 281 int i; 282 283 lwkt_gettoken(&semu_token); 284 sunext = TAILQ_FIRST(&semu_list); 285 while ((suptr = sunext) != NULL) { 286 if ((p = suptr->un_proc) == NULL) { 287 sunext = TAILQ_NEXT(suptr, un_entry); 288 continue; 289 } 290 ++suptr->un_refs; 291 PHOLD(p); 292 lwkt_gettoken(&p->p_token); 293 294 /* 295 * Check for semexit() race 296 */ 297 if (p->p_sem_undo != suptr) 298 goto skip; 299 300 sunptr = &suptr->un_ent[0]; 301 i = 0; 302 303 while (i < suptr->un_cnt) { 304 if (sunptr->un_id == semid) { 305 if (semnum == -1 || sunptr->un_num == semnum) { 306 suptr->un_cnt--; 307 if (i < suptr->un_cnt) { 308 suptr->un_ent[i] = 309 suptr->un_ent[suptr->un_cnt]; 310 /* 311 * do not increment i 312 * or sunptr after copydown. 313 */ 314 continue; 315 } 316 } 317 if (semnum != -1) 318 break; 319 } 320 ++i; 321 ++sunptr; 322 } 323 324 skip: 325 lwkt_reltoken(&p->p_token); 326 PRELE(p); 327 328 /* 329 * Handle deletion and semexit races 330 */ 331 sunext = TAILQ_NEXT(suptr, un_entry); 332 if (--suptr->un_refs == 0 && suptr->un_proc == NULL) { 333 KKASSERT(suptr->un_cnt == 0); 334 TAILQ_REMOVE(&semu_list, suptr, un_entry); 335 kfree(suptr, M_SEM); 336 } 337 } 338 lwkt_reltoken(&semu_token); 339 } 340 341 /* 342 * Note that the user-mode half of this passes a union, not a pointer 343 * 344 * MPALMOSTSAFE 345 */ 346 int 347 sys___semctl(struct sysmsg *sysmsg, const struct __semctl_args *uap) 348 { 349 struct thread *td = curthread; 350 struct prison *pr = td->td_proc->p_ucred->cr_prison; 351 int semid = uap->semid; 352 int semnum = uap->semnum; 353 int cmd = uap->cmd; 354 union semun *arg = uap->arg; 355 union semun real_arg; 356 struct ucred *cred = td->td_ucred; 357 int i, rval, eval; 358 struct semid_ds sbuf; 359 struct semid_pool *semaptr; 360 struct sem *semptr; 361 362 #ifdef SEM_DEBUG 363 kprintf("call to semctl(%d, %d, %d, 0x%x)\n", semid, semnum, cmd, arg); 364 #endif 365 366 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 367 return (ENOSYS); 368 369 semid = IPCID_TO_IX(semid); 370 if (semid < 0 || semid >= seminfo.semmni) { 371 return(EINVAL); 372 } 373 semaptr = &sema[semid]; 374 lockmgr(&semaptr->lk, LK_EXCLUSIVE); 375 376 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0 || 377 semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 378 lockmgr(&semaptr->lk, LK_RELEASE); 379 return(EINVAL); 380 } 381 382 eval = 0; 383 rval = 0; 384 385 switch (cmd) { 386 case IPC_RMID: 387 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_M); 388 if (eval != 0) 389 break; 390 semaptr->ds.sem_perm.cuid = cred->cr_uid; 391 semaptr->ds.sem_perm.uid = cred->cr_uid; 392 393 /* 394 * NOTE: Nobody will be waiting on the semaphores since 395 * we have an exclusive lock on semaptr->lk). 396 */ 397 lockmgr(&sema_lk, LK_EXCLUSIVE); 398 semtot -= semaptr->ds.sem_nsems; 399 kfree(semaptr->ds.sem_base, M_SEM); 400 semaptr->ds.sem_base = NULL; 401 semaptr->ds.sem_perm.mode = 0; /* clears SEM_ALLOC */ 402 lockmgr(&sema_lk, LK_RELEASE); 403 404 semundo_clear(semid, -1); 405 break; 406 407 case IPC_SET: 408 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_M); 409 if (eval) 410 break; 411 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 412 break; 413 if ((eval = copyin(real_arg.buf, (caddr_t)&sbuf, 414 sizeof(sbuf))) != 0) { 415 break; 416 } 417 semaptr->ds.sem_perm.uid = sbuf.sem_perm.uid; 418 semaptr->ds.sem_perm.gid = sbuf.sem_perm.gid; 419 semaptr->ds.sem_perm.mode = 420 (semaptr->ds.sem_perm.mode & ~0777) | 421 (sbuf.sem_perm.mode & 0777); 422 semaptr->ds.sem_ctime = time_second; 423 break; 424 425 case IPC_STAT: 426 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 427 if (eval) 428 break; 429 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 430 break; 431 eval = copyout(&semaptr->ds, real_arg.buf, 432 sizeof(struct semid_ds)); 433 break; 434 case SEM_STAT: 435 /* 436 * For this command we assume semid is an array index 437 * rather than an IPC id. However, the conversion is 438 * just a mask so just validate that the passed-in semid 439 * matches the masked semid. 440 */ 441 if (uap->semid != semid) { 442 eval = EINVAL; 443 break; 444 } 445 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 446 if (eval) 447 break; 448 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 449 break; 450 eval = copyout(&semaptr->ds, real_arg.buf, 451 sizeof(struct semid_ds)); 452 rval = IXSEQ_TO_IPCID(semid, semaptr->ds.sem_perm); 453 break; 454 455 case GETNCNT: 456 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 457 if (eval) 458 break; 459 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 460 eval = EINVAL; 461 break; 462 } 463 rval = semaptr->ds.sem_base[semnum].semncnt; 464 break; 465 466 case GETPID: 467 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 468 if (eval) 469 break; 470 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 471 eval = EINVAL; 472 break; 473 } 474 rval = semaptr->ds.sem_base[semnum].sempid; 475 break; 476 477 case GETVAL: 478 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 479 if (eval) 480 break; 481 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 482 eval = EINVAL; 483 break; 484 } 485 rval = semaptr->ds.sem_base[semnum].semval; 486 break; 487 488 case GETALL: 489 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 490 if (eval) 491 break; 492 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 493 break; 494 for (i = 0; i < semaptr->ds.sem_nsems; i++) { 495 eval = copyout(&semaptr->ds.sem_base[i].semval, 496 &real_arg.array[i], 497 sizeof(real_arg.array[0])); 498 if (eval) 499 break; 500 } 501 break; 502 503 case GETZCNT: 504 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 505 if (eval) 506 break; 507 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 508 eval = EINVAL; 509 break; 510 } 511 rval = semaptr->ds.sem_base[semnum].semzcnt; 512 break; 513 514 case SETVAL: 515 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W); 516 if (eval) 517 break; 518 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 519 eval = EINVAL; 520 break; 521 } 522 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 523 break; 524 525 /* 526 * Because we hold semaptr->lk exclusively we can safely 527 * modify any semptr content without acquiring its token. 528 */ 529 semptr = &semaptr->ds.sem_base[semnum]; 530 semptr->semval = real_arg.val; 531 semundo_clear(semid, semnum); 532 if (semptr->semzcnt || semptr->semncnt) 533 wakeup(semptr); 534 break; 535 536 case SETALL: 537 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W); 538 if (eval) 539 break; 540 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 541 break; 542 /* 543 * Because we hold semaptr->lk exclusively we can safely 544 * modify any semptr content without acquiring its token. 545 */ 546 for (i = 0; i < semaptr->ds.sem_nsems; i++) { 547 semptr = &semaptr->ds.sem_base[i]; 548 eval = copyin(&real_arg.array[i], 549 (caddr_t)&semptr->semval, 550 sizeof(real_arg.array[0])); 551 if (semptr->semzcnt || semptr->semncnt) 552 wakeup(semptr); 553 if (eval != 0) 554 break; 555 } 556 semundo_clear(semid, -1); 557 break; 558 559 default: 560 eval = EINVAL; 561 break; 562 } 563 lockmgr(&semaptr->lk, LK_RELEASE); 564 565 if (eval == 0) 566 sysmsg->sysmsg_result = rval; 567 return(eval); 568 } 569 570 /* 571 * MPALMOSTSAFE 572 */ 573 int 574 sys_semget(struct sysmsg *sysmsg, const struct semget_args *uap) 575 { 576 struct thread *td = curthread; 577 struct prison *pr = td->td_proc->p_ucred->cr_prison; 578 int semid, eval; 579 int key = uap->key; 580 int nsems = uap->nsems; 581 int semflg = uap->semflg; 582 struct ucred *cred = td->td_ucred; 583 584 #ifdef SEM_DEBUG 585 kprintf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg); 586 #endif 587 588 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 589 return (ENOSYS); 590 591 eval = 0; 592 593 if (key != IPC_PRIVATE) { 594 for (semid = 0; semid < seminfo.semmni; semid++) { 595 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0 || 596 sema[semid].ds.sem_perm.key != key) { 597 continue; 598 } 599 lockmgr(&sema[semid].lk, LK_EXCLUSIVE); 600 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0 || 601 sema[semid].ds.sem_perm.key != key) { 602 lockmgr(&sema[semid].lk, LK_RELEASE); 603 continue; 604 } 605 break; 606 } 607 if (semid < seminfo.semmni) { 608 /* sema[semid].lk still locked from above */ 609 #ifdef SEM_DEBUG 610 kprintf("found public key\n"); 611 #endif 612 if ((eval = ipcperm(td->td_proc, 613 &sema[semid].ds.sem_perm, 614 semflg & 0700))) { 615 lockmgr(&sema[semid].lk, LK_RELEASE); 616 goto done; 617 } 618 if (nsems > 0 && sema[semid].ds.sem_nsems < nsems) { 619 #ifdef SEM_DEBUG 620 kprintf("too small\n"); 621 #endif 622 eval = EINVAL; 623 lockmgr(&sema[semid].lk, LK_RELEASE); 624 goto done; 625 } 626 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { 627 #ifdef SEM_DEBUG 628 kprintf("not exclusive\n"); 629 #endif 630 eval = EEXIST; 631 lockmgr(&sema[semid].lk, LK_RELEASE); 632 goto done; 633 } 634 635 /* 636 * Return this one. 637 */ 638 lockmgr(&sema[semid].lk, LK_RELEASE); 639 goto done; 640 } 641 } 642 643 #ifdef SEM_DEBUG 644 kprintf("need to allocate the semid_ds\n"); 645 #endif 646 if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) { 647 if (nsems <= 0 || nsems > seminfo.semmsl) { 648 #ifdef SEM_DEBUG 649 kprintf("nsems out of range (0<%d<=%d)\n", 650 nsems, seminfo.semmsl); 651 #endif 652 eval = EINVAL; 653 goto done; 654 } 655 656 /* 657 * SEM_ALLOC flag cannot be set unless sema_lk is locked. 658 * semtot field also protected by sema_lk. 659 */ 660 lockmgr(&sema_lk, LK_EXCLUSIVE); 661 if (nsems > seminfo.semmns - semtot) { 662 #ifdef SEM_DEBUG 663 kprintf("not enough semaphores left " 664 "(need %d, got %d)\n", 665 nsems, seminfo.semmns - semtot); 666 #endif 667 eval = ENOSPC; 668 lockmgr(&sema_lk, LK_RELEASE); 669 goto done; 670 } 671 for (semid = 0; semid < seminfo.semmni; semid++) { 672 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0) 673 break; 674 } 675 if (semid == seminfo.semmni) { 676 #ifdef SEM_DEBUG 677 kprintf("no more semid_ds's available\n"); 678 #endif 679 eval = ENOSPC; 680 lockmgr(&sema_lk, LK_RELEASE); 681 goto done; 682 } 683 #ifdef SEM_DEBUG 684 kprintf("semid %d is available\n", semid); 685 #endif 686 lockmgr(&sema[semid].lk, LK_EXCLUSIVE); 687 sema[semid].ds.sem_perm.key = key; 688 sema[semid].ds.sem_perm.cuid = cred->cr_uid; 689 sema[semid].ds.sem_perm.uid = cred->cr_uid; 690 sema[semid].ds.sem_perm.cgid = cred->cr_gid; 691 sema[semid].ds.sem_perm.gid = cred->cr_gid; 692 sema[semid].ds.sem_perm.mode = (semflg & 0777) | SEM_ALLOC; 693 sema[semid].ds.sem_perm.seq = 694 (sema[semid].ds.sem_perm.seq + 1) & 0x7fff; 695 sema[semid].ds.sem_nsems = nsems; 696 sema[semid].ds.sem_otime = 0; 697 sema[semid].ds.sem_ctime = time_second; 698 sema[semid].ds.sem_base = kmalloc(sizeof(struct sem) * nsems, 699 M_SEM, M_WAITOK|M_ZERO); 700 semtot += nsems; 701 ++sema[semid].gen; 702 lockmgr(&sema[semid].lk, LK_RELEASE); 703 lockmgr(&sema_lk, LK_RELEASE); 704 #ifdef SEM_DEBUG 705 kprintf("sembase = 0x%x, next = 0x%x\n", 706 sema[semid].ds.sem_base, &sem[semtot]); 707 #endif 708 /* eval == 0 */ 709 } else { 710 #ifdef SEM_DEBUG 711 kprintf("didn't find it and wasn't asked to create it\n"); 712 #endif 713 eval = ENOENT; 714 } 715 716 done: 717 if (eval == 0) { 718 sysmsg->sysmsg_result = 719 IXSEQ_TO_IPCID(semid, sema[semid].ds.sem_perm); 720 } 721 return(eval); 722 } 723 724 /* 725 * MPSAFE 726 */ 727 int 728 sys_semop(struct sysmsg *sysmsg, const struct semop_args *uap) 729 { 730 struct thread *td = curthread; 731 struct prison *pr = td->td_proc->p_ucred->cr_prison; 732 int semid = uap->semid; 733 u_int nsops = uap->nsops; 734 struct sembuf sops[MAX_SOPS]; 735 struct semid_pool *semaptr; 736 struct sembuf *sopptr; 737 struct sem *semptr; 738 struct sem *xsemptr; 739 int i, j, eval; 740 int do_undos; 741 742 #ifdef SEM_DEBUG 743 kprintf("call to semop(%d, 0x%x, %u)\n", semid, sops, nsops); 744 #endif 745 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 746 return (ENOSYS); 747 748 semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ 749 750 if (semid < 0 || semid >= seminfo.semmni) { 751 eval = EINVAL; 752 goto done2; 753 } 754 755 wakeup_start_delayed(); 756 semaptr = &sema[semid]; 757 lockmgr(&semaptr->lk, LK_SHARED); 758 759 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0) { 760 eval = EINVAL; 761 goto done; 762 } 763 if (semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 764 eval = EINVAL; 765 goto done; 766 } 767 768 if ((eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W))) { 769 #ifdef SEM_DEBUG 770 kprintf("eval = %d from ipaccess\n", eval); 771 #endif 772 goto done; 773 } 774 775 if (nsops > MAX_SOPS) { 776 #ifdef SEM_DEBUG 777 kprintf("too many sops (max=%d, nsops=%u)\n", MAX_SOPS, nsops); 778 #endif 779 eval = E2BIG; 780 goto done; 781 } 782 783 if ((eval = copyin(uap->sops, &sops, nsops * sizeof(sops[0]))) != 0) { 784 #ifdef SEM_DEBUG 785 kprintf("eval = %d from copyin(%08x, %08x, %u)\n", eval, 786 uap->sops, &sops, nsops * sizeof(sops[0])); 787 #endif 788 goto done; 789 } 790 791 /* 792 * Loop trying to satisfy the vector of requests. 793 * If we reach a point where we must wait, any requests already 794 * performed are rolled back and we go to sleep until some other 795 * process wakes us up. At this point, we start all over again. 796 * 797 * This ensures that from the perspective of other tasks, a set 798 * of requests is atomic (never partially satisfied). 799 */ 800 do_undos = 0; 801 802 for (;;) { 803 long gen; 804 805 semptr = NULL; 806 807 for (i = 0; i < nsops; i++) { 808 sopptr = &sops[i]; 809 810 if (sopptr->sem_num >= semaptr->ds.sem_nsems) { 811 eval = EFBIG; 812 goto done; 813 } 814 815 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 816 lwkt_getpooltoken(semptr); 817 818 #ifdef SEM_DEBUG 819 kprintf("semop: semaptr=%x, sem_base=%x, semptr=%x, " 820 "sem[%d]=%d : op=%d, flag=%s\n", 821 semaptr, semaptr->ds.sem_base, semptr, 822 sopptr->sem_num, semptr->semval, sopptr->sem_op, 823 (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait"); 824 #endif 825 826 if (sopptr->sem_op < 0) { 827 if (semptr->semval + sopptr->sem_op < 0) { 828 #ifdef SEM_DEBUG 829 kprintf("semop: can't do it now\n"); 830 #endif 831 break; 832 } else { 833 semptr->semval += sopptr->sem_op; 834 if (semptr->semval == 0 && 835 semptr->semzcnt > 0) { 836 wakeup(semptr); 837 } 838 } 839 if (sopptr->sem_flg & SEM_UNDO) 840 do_undos = 1; 841 } else if (sopptr->sem_op == 0) { 842 if (semptr->semval > 0) { 843 #ifdef SEM_DEBUG 844 kprintf("semop: not zero now\n"); 845 #endif 846 break; 847 } 848 } else { 849 semptr->semval += sopptr->sem_op; 850 if (sopptr->sem_flg & SEM_UNDO) 851 do_undos = 1; 852 if (semptr->semncnt > 0) 853 wakeup(semptr); 854 } 855 lwkt_relpooltoken(semptr); 856 } 857 858 /* 859 * Did we get through the entire vector? 860 */ 861 if (i >= nsops) 862 goto donex; 863 864 /* 865 * No, protect the semaphore request which also flags that 866 * a wakeup is needed, then release semptr since we know 867 * another process is likely going to need to access it 868 * soon. 869 */ 870 if (sopptr->sem_op == 0) 871 semptr->semzcnt++; 872 else 873 semptr->semncnt++; 874 tsleep_interlock(semptr, PCATCH); 875 lwkt_relpooltoken(semptr); 876 877 /* 878 * Rollback the semaphores we had acquired. 879 */ 880 #ifdef SEM_DEBUG 881 kprintf("semop: rollback 0 through %d\n", i-1); 882 #endif 883 for (j = 0; j < i; j++) { 884 xsemptr = &semaptr->ds.sem_base[sops[j].sem_num]; 885 lwkt_getpooltoken(xsemptr); 886 xsemptr->semval -= sops[j].sem_op; 887 if (xsemptr->semval == 0 && xsemptr->semzcnt > 0) 888 wakeup(xsemptr); 889 if (xsemptr->semval <= 0 && xsemptr->semncnt > 0) 890 wakeup(xsemptr); 891 lwkt_relpooltoken(xsemptr); 892 } 893 894 /* 895 * If the request that we couldn't satisfy has the 896 * NOWAIT flag set then return with EAGAIN. 897 */ 898 if (sopptr->sem_flg & IPC_NOWAIT) { 899 eval = EAGAIN; 900 goto done; 901 } 902 903 /* 904 * Release semaptr->lk while sleeping, allowing other 905 * semops (like SETVAL, SETALL, etc), which require an 906 * exclusive lock and might wake us up. 907 * 908 * Reload and recheck the validity of semaptr on return. 909 * Note that semptr itself might have changed too, but 910 * we've already interlocked for semptr and that is what 911 * will be woken up if it wakes up the tsleep on a MP 912 * race. 913 * 914 * gen protects against destroy/re-create races where the 915 * creds match. 916 */ 917 #ifdef SEM_DEBUG 918 kprintf("semop: good night!\n"); 919 #endif 920 gen = semaptr->gen; 921 lockmgr(&semaptr->lk, LK_RELEASE); 922 eval = tsleep(semptr, PCATCH | PINTERLOCKED, "semwait", hz); 923 lockmgr(&semaptr->lk, LK_SHARED); 924 #ifdef SEM_DEBUG 925 kprintf("semop: good morning (eval=%d)!\n", eval); 926 #endif 927 928 /* return code is checked below, after sem[nz]cnt-- */ 929 930 /* 931 * Make sure that the semaphore still exists 932 */ 933 if (semaptr->gen != gen || 934 (semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0 || 935 semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 936 eval = EIDRM; 937 goto done; 938 } 939 940 /* 941 * The semaphore is still alive. Readjust the count of 942 * waiting processes. 943 */ 944 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 945 lwkt_getpooltoken(semptr); 946 if (sopptr->sem_op == 0) 947 semptr->semzcnt--; 948 else 949 semptr->semncnt--; 950 lwkt_relpooltoken(semptr); 951 952 /* 953 * Is it really morning, or was our sleep interrupted? 954 * (Delayed check of tsleep() return code because we 955 * need to decrement sem[nz]cnt either way.) 956 */ 957 if (eval) { 958 eval = EINTR; 959 goto done; 960 } 961 #ifdef SEM_DEBUG 962 kprintf("semop: good morning!\n"); 963 #endif 964 /* RETRY LOOP */ 965 } 966 967 donex: 968 /* 969 * Process any SEM_UNDO requests. 970 */ 971 if (do_undos) { 972 for (i = 0; i < nsops; i++) { 973 /* 974 * We only need to deal with SEM_UNDO's for non-zero 975 * op's. 976 */ 977 int adjval; 978 979 if ((sops[i].sem_flg & SEM_UNDO) == 0) 980 continue; 981 adjval = sops[i].sem_op; 982 if (adjval == 0) 983 continue; 984 eval = semundo_adjust(td->td_proc, semid, 985 sops[i].sem_num, -adjval); 986 if (eval == 0) 987 continue; 988 989 /* 990 * Oh-Oh! We ran out of either sem_undo's or undo's. 991 * Rollback the adjustments to this point and then 992 * rollback the semaphore ups and down so we can return 993 * with an error with all structures restored. We 994 * rollback the undo's in the exact reverse order that 995 * we applied them. This guarantees that we won't run 996 * out of space as we roll things back out. 997 */ 998 for (j = i - 1; j >= 0; j--) { 999 if ((sops[j].sem_flg & SEM_UNDO) == 0) 1000 continue; 1001 adjval = sops[j].sem_op; 1002 if (adjval == 0) 1003 continue; 1004 if (semundo_adjust(td->td_proc, semid, 1005 sops[j].sem_num, adjval) != 0) 1006 panic("semop - can't undo undos"); 1007 } 1008 1009 for (j = 0; j < nsops; j++) { 1010 xsemptr = &semaptr->ds.sem_base[ 1011 sops[j].sem_num]; 1012 lwkt_getpooltoken(xsemptr); 1013 xsemptr->semval -= sops[j].sem_op; 1014 if (xsemptr->semval == 0 && 1015 xsemptr->semzcnt > 0) 1016 wakeup(xsemptr); 1017 if (xsemptr->semval <= 0 && 1018 xsemptr->semncnt > 0) 1019 wakeup(xsemptr); 1020 lwkt_relpooltoken(xsemptr); 1021 } 1022 1023 #ifdef SEM_DEBUG 1024 kprintf("eval = %d from semundo_adjust\n", eval); 1025 #endif 1026 goto done; 1027 } /* loop through the sops */ 1028 } /* if (do_undos) */ 1029 1030 /* We're definitely done - set the sempid's */ 1031 for (i = 0; i < nsops; i++) { 1032 sopptr = &sops[i]; 1033 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 1034 lwkt_getpooltoken(semptr); 1035 semptr->sempid = td->td_proc->p_pid; 1036 lwkt_relpooltoken(semptr); 1037 } 1038 1039 /* Do a wakeup if any semaphore was up'd. */ 1040 #ifdef SEM_DEBUG 1041 kprintf("semop: done\n"); 1042 #endif 1043 sysmsg->sysmsg_result = 0; 1044 eval = 0; 1045 done: 1046 lockmgr(&semaptr->lk, LK_RELEASE); 1047 wakeup_end_delayed(); 1048 done2: 1049 return(eval); 1050 } 1051 1052 /* 1053 * Go through the undo structures for this process and apply the adjustments to 1054 * semaphores. 1055 * 1056 * (p->p_token is held by the caller) 1057 */ 1058 void 1059 semexit(struct proc *p) 1060 { 1061 struct sem_undo *suptr; 1062 struct sem *semptr; 1063 1064 /* 1065 * We're getting a global token, don't do it if we couldn't 1066 * possibly have any semaphores. 1067 */ 1068 if ((p->p_flags & P_SYSVSEM) == 0) 1069 return; 1070 suptr = p->p_sem_undo; 1071 KKASSERT(suptr != NULL); 1072 1073 /* 1074 * Disconnect suptr from the process and increment un_refs to 1075 * prevent anyone else from being able to destroy the structure. 1076 * Do not remove it from the linked list until after we are through 1077 * scanning it as other semaphore calls might still effect it. 1078 */ 1079 lwkt_gettoken(&semu_token); 1080 #if 0 1081 /* 1082 * do not disconnect proc yet, doing so prevents RMID 1083 * from cleaning up the structure atomically with SEM_ALLOC 1084 */ 1085 p->p_sem_undo = NULL; 1086 p->p_flags &= ~P_SYSVSEM; 1087 suptr->un_proc = NULL; 1088 #endif 1089 ++suptr->un_refs; 1090 lwkt_reltoken(&semu_token); 1091 1092 while (suptr->un_cnt) { 1093 struct semid_pool *semaptr; 1094 int semid; 1095 int semnum; 1096 int adjval; 1097 int ix; 1098 1099 /* 1100 * These values are stable because we hold p->p_token. 1101 * However, they can get ripped out from under us when 1102 * we block or obtain other tokens so we have to re-check. 1103 */ 1104 ix = suptr->un_cnt - 1; 1105 semid = suptr->un_ent[ix].un_id; 1106 semnum = suptr->un_ent[ix].un_num; 1107 adjval = suptr->un_ent[ix].un_adjval; 1108 1109 semaptr = &sema[semid]; 1110 1111 /* 1112 * Recheck after locking, then execute the undo 1113 * operation. semptr remains valid due to the 1114 * semaptr->lk. 1115 */ 1116 lockmgr(&semaptr->lk, LK_EXCLUSIVE); 1117 semptr = &semaptr->ds.sem_base[semnum]; 1118 lwkt_getpooltoken(semptr); 1119 1120 if (ix == suptr->un_cnt - 1 && 1121 semid == suptr->un_ent[ix].un_id && 1122 semnum == suptr->un_ent[ix].un_num && 1123 adjval == suptr->un_ent[ix].un_adjval) { 1124 /* 1125 * Only do assertions when we aren't in a SMP race. 1126 */ 1127 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0) 1128 panic("semexit - semid not allocated"); 1129 if (semnum >= semaptr->ds.sem_nsems) 1130 panic("semexit - semnum out of range"); 1131 --suptr->un_cnt; 1132 1133 if (adjval < 0) { 1134 if (semptr->semval < -adjval) 1135 semptr->semval = 0; 1136 else 1137 semptr->semval += adjval; 1138 } else { 1139 semptr->semval += adjval; 1140 } 1141 wakeup(semptr); 1142 } 1143 lwkt_relpooltoken(semptr); 1144 lockmgr(&semaptr->lk, LK_RELEASE); 1145 } 1146 1147 /* 1148 * Final cleanup, remove from the list, remove the process association, 1149 * then deallocate on last ref. 1150 */ 1151 lwkt_gettoken(&semu_token); 1152 1153 p->p_sem_undo = NULL; 1154 p->p_flags &= ~P_SYSVSEM; 1155 suptr->un_proc = NULL; 1156 1157 if (--suptr->un_refs == 0) { 1158 TAILQ_REMOVE(&semu_list, suptr, un_entry); 1159 KKASSERT(suptr->un_cnt == 0); 1160 kfree(suptr, M_SEM); 1161 } 1162 lwkt_reltoken(&semu_token); 1163 } 1164