1 /* $FreeBSD: src/sys/kern/sysv_sem.c,v 1.69 2004/03/17 09:37:13 cperciva Exp $ */ 2 3 /* 4 * Implementation of SVID semaphores 5 * 6 * Author: Daniel Boulet 7 * 8 * This software is provided ``AS IS'' without any warranties of any kind. 9 */ 10 11 #include "opt_sysvipc.h" 12 13 #include <sys/param.h> 14 #include <sys/systm.h> 15 #include <sys/sysproto.h> 16 #include <sys/kernel.h> 17 #include <sys/proc.h> 18 #include <sys/sem.h> 19 #include <sys/sysent.h> 20 #include <sys/sysctl.h> 21 #include <sys/malloc.h> 22 #include <sys/jail.h> 23 #include <sys/thread.h> 24 25 static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores"); 26 27 static void seminit (void *); 28 29 static struct sem_undo *semu_alloc (struct proc *p); 30 static int semundo_adjust (struct proc *p, int semid, int semnum, int adjval); 31 static void semundo_clear (int semid, int semnum); 32 33 static struct lwkt_token semu_token = LWKT_TOKEN_INITIALIZER(semu_token); 34 static int semtot = 0; 35 static struct semid_pool *sema; /* semaphore id pool */ 36 static TAILQ_HEAD(, sem_undo) semu_list = TAILQ_HEAD_INITIALIZER(semu_list); 37 static struct lock sema_lk; 38 39 struct sem { 40 u_short semval; /* semaphore value */ 41 pid_t sempid; /* pid of last operation */ 42 u_short semncnt; /* # awaiting semval > cval */ 43 u_short semzcnt; /* # awaiting semval = 0 */ 44 }; 45 46 /* 47 * Undo structure (one per process) 48 */ 49 struct sem_undo { 50 TAILQ_ENTRY(sem_undo) un_entry; /* linked list for semundo_clear() */ 51 struct proc *un_proc; /* owner of this structure */ 52 int un_refs; /* prevent unlink/kfree */ 53 short un_cnt; /* # of active entries */ 54 short un_unused; 55 struct undo { 56 short un_adjval; /* adjust on exit values */ 57 short un_num; /* semaphore # */ 58 int un_id; /* semid */ 59 } un_ent[1]; /* undo entries */ 60 }; 61 62 /* 63 * Configuration parameters 64 */ 65 #ifndef SEMMNI 66 #define SEMMNI 1024 /* # of semaphore identifiers */ 67 #endif 68 #ifndef SEMMNS 69 #define SEMMNS 32767 /* # of semaphores in system */ 70 #endif 71 #ifndef SEMUME 72 #define SEMUME 25 /* max # of undo entries per process */ 73 #endif 74 #ifndef SEMMNU 75 #define SEMMNU 1024 /* # of undo structures in system */ 76 /* NO LONGER USED */ 77 #endif 78 79 /* shouldn't need tuning */ 80 #ifndef SEMMAP 81 #define SEMMAP 128 /* # of entries in semaphore map */ 82 #endif 83 #ifndef SEMMSL 84 #define SEMMSL SEMMNS /* max # of semaphores per id */ 85 #endif 86 #ifndef SEMOPM 87 #define SEMOPM 100 /* max # of operations per semop call */ 88 #endif 89 90 #define SEMVMX 32767 /* semaphore maximum value */ 91 #define SEMAEM 16384 /* adjust on exit max value */ 92 93 /* 94 * Due to the way semaphore memory is allocated, we have to ensure that 95 * SEMUSZ is properly aligned. 96 */ 97 98 #define SEM_ALIGN(bytes) roundup2(bytes, sizeof(long)) 99 100 /* actual size of an undo structure */ 101 #define SEMUSZ(nent) SEM_ALIGN(offsetof(struct sem_undo, un_ent[nent])) 102 103 /* 104 * semaphore info struct 105 */ 106 struct seminfo seminfo = { 107 SEMMAP, /* # of entries in semaphore map */ 108 SEMMNI, /* # of semaphore identifiers */ 109 SEMMNS, /* # of semaphores in system */ 110 SEMMNU, /* # of undo structures in system */ 111 SEMMSL, /* max # of semaphores per id */ 112 SEMOPM, /* max # of operations per semop call */ 113 SEMUME, /* max # of undo entries per process */ 114 SEMUSZ(SEMUME), /* size in bytes of undo structure */ 115 SEMVMX, /* semaphore maximum value */ 116 SEMAEM /* adjust on exit max value */ 117 }; 118 119 TUNABLE_INT("kern.ipc.semmap", &seminfo.semmap); 120 TUNABLE_INT("kern.ipc.semmni", &seminfo.semmni); 121 TUNABLE_INT("kern.ipc.semmns", &seminfo.semmns); 122 TUNABLE_INT("kern.ipc.semmnu", &seminfo.semmnu); 123 TUNABLE_INT("kern.ipc.semmsl", &seminfo.semmsl); 124 TUNABLE_INT("kern.ipc.semopm", &seminfo.semopm); 125 TUNABLE_INT("kern.ipc.semume", &seminfo.semume); 126 TUNABLE_INT("kern.ipc.semusz", &seminfo.semusz); 127 TUNABLE_INT("kern.ipc.semvmx", &seminfo.semvmx); 128 TUNABLE_INT("kern.ipc.semaem", &seminfo.semaem); 129 130 SYSCTL_INT(_kern_ipc, OID_AUTO, semmap, CTLFLAG_RW, &seminfo.semmap, 0, 131 "Number of entries in semaphore map"); 132 SYSCTL_INT(_kern_ipc, OID_AUTO, semmni, CTLFLAG_RD, &seminfo.semmni, 0, 133 "Number of semaphore identifiers"); 134 SYSCTL_INT(_kern_ipc, OID_AUTO, semmns, CTLFLAG_RD, &seminfo.semmns, 0, 135 "Total number of semaphores"); 136 SYSCTL_INT(_kern_ipc, OID_AUTO, semmnu, CTLFLAG_RD, &seminfo.semmnu, 0, 137 "Total number of undo structures"); 138 SYSCTL_INT(_kern_ipc, OID_AUTO, semmsl, CTLFLAG_RW, &seminfo.semmsl, 0, 139 "Max number of semaphores per id"); 140 SYSCTL_INT(_kern_ipc, OID_AUTO, semopm, CTLFLAG_RD, &seminfo.semopm, 0, 141 "Max number of operations per semop call"); 142 SYSCTL_INT(_kern_ipc, OID_AUTO, semume, CTLFLAG_RD, &seminfo.semume, 0, 143 "Max number of undo entries per process"); 144 SYSCTL_INT(_kern_ipc, OID_AUTO, semusz, CTLFLAG_RD, &seminfo.semusz, 0, 145 "Size in bytes of undo structure"); 146 SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RW, &seminfo.semvmx, 0, 147 "Semaphore maximum value"); 148 SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RW, &seminfo.semaem, 0, 149 "Adjust on exit max value"); 150 151 #if 0 152 RO seminfo.semmap /* SEMMAP unused */ 153 RO seminfo.semmni 154 RO seminfo.semmns 155 RO seminfo.semmnu /* undo entries per system */ 156 RW seminfo.semmsl 157 RO seminfo.semopm /* SEMOPM unused */ 158 RO seminfo.semume 159 RO seminfo.semusz /* param - derived from SEMUME for per-proc sizeof */ 160 RO seminfo.semvmx /* SEMVMX unused - user param */ 161 RO seminfo.semaem /* SEMAEM unused - user param */ 162 #endif 163 164 static void 165 seminit(void *dummy) 166 { 167 int i; 168 169 sema = kmalloc(sizeof(struct semid_pool) * seminfo.semmni, 170 M_SEM, M_WAITOK | M_ZERO); 171 172 lockinit(&sema_lk, "semglb", 0, 0); 173 for (i = 0; i < seminfo.semmni; i++) { 174 struct semid_pool *semaptr = &sema[i]; 175 176 lockinit(&semaptr->lk, "semary", 0, 0); 177 semaptr->ds.sem_base = NULL; 178 semaptr->ds.sem_perm.mode = 0; 179 } 180 } 181 SYSINIT(sysv_sem, SI_SUB_SYSV_SEM, SI_ORDER_FIRST, seminit, NULL); 182 183 /* 184 * Allocate a new sem_undo structure for a process 185 * (returns ptr to structure or NULL if no more room) 186 */ 187 static struct sem_undo * 188 semu_alloc(struct proc *p) 189 { 190 struct sem_undo *semu; 191 192 /* 193 * Allocate the semu structure and associate it with the process, 194 * as necessary. 195 */ 196 while ((semu = p->p_sem_undo) == NULL) { 197 semu = kmalloc(SEMUSZ(seminfo.semume), M_SEM, 198 M_WAITOK | M_ZERO); 199 lwkt_gettoken(&semu_token); 200 lwkt_gettoken(&p->p_token); 201 if (p->p_sem_undo == NULL) { 202 p->p_sem_undo = semu; 203 p->p_flags |= P_SYSVSEM; 204 semu->un_proc = p; 205 TAILQ_INSERT_TAIL(&semu_list, semu, un_entry); 206 } else { 207 kfree(semu, M_SEM); 208 } 209 lwkt_reltoken(&p->p_token); 210 lwkt_reltoken(&semu_token); 211 } 212 return(semu); 213 } 214 215 /* 216 * Adjust a particular entry for a particular proc 217 */ 218 static int 219 semundo_adjust(struct proc *p, int semid, int semnum, int adjval) 220 { 221 struct sem_undo *suptr; 222 struct undo *sunptr; 223 int i; 224 int error = 0; 225 226 /* 227 * Look for and remember the sem_undo if the caller doesn't 228 * provide it. 229 */ 230 suptr = semu_alloc(p); 231 lwkt_gettoken(&p->p_token); 232 233 /* 234 * Look for the requested entry and adjust it (delete if adjval becomes 235 * 0). 236 */ 237 sunptr = &suptr->un_ent[0]; 238 for (i = 0; i < suptr->un_cnt; i++, sunptr++) { 239 if (sunptr->un_id != semid || sunptr->un_num != semnum) 240 continue; 241 if (adjval == 0) 242 sunptr->un_adjval = 0; 243 else 244 sunptr->un_adjval += adjval; 245 if (sunptr->un_adjval == 0) { 246 suptr->un_cnt--; 247 if (i < suptr->un_cnt) 248 suptr->un_ent[i] = suptr->un_ent[suptr->un_cnt]; 249 } 250 goto done; 251 } 252 253 /* Didn't find the right entry - create it */ 254 if (adjval == 0) 255 goto done; 256 if (suptr->un_cnt != seminfo.semume) { 257 sunptr = &suptr->un_ent[suptr->un_cnt]; 258 suptr->un_cnt++; 259 sunptr->un_adjval = adjval; 260 sunptr->un_id = semid; 261 sunptr->un_num = semnum; 262 } else { 263 error = EINVAL; 264 } 265 done: 266 lwkt_reltoken(&p->p_token); 267 268 return (error); 269 } 270 271 /* 272 * This is rather expensive 273 */ 274 static void 275 semundo_clear(int semid, int semnum) 276 { 277 struct proc *p; 278 struct sem_undo *suptr; 279 struct sem_undo *sunext; 280 struct undo *sunptr; 281 int i; 282 283 lwkt_gettoken(&semu_token); 284 sunext = TAILQ_FIRST(&semu_list); 285 while ((suptr = sunext) != NULL) { 286 if ((p = suptr->un_proc) == NULL) { 287 sunext = TAILQ_NEXT(suptr, un_entry); 288 continue; 289 } 290 ++suptr->un_refs; 291 PHOLD(p); 292 lwkt_gettoken(&p->p_token); 293 294 sunptr = &suptr->un_ent[0]; 295 i = 0; 296 297 while (i < suptr->un_cnt) { 298 if (sunptr->un_id == semid) { 299 if (semnum == -1 || sunptr->un_num == semnum) { 300 suptr->un_cnt--; 301 if (i < suptr->un_cnt) { 302 suptr->un_ent[i] = 303 suptr->un_ent[suptr->un_cnt]; 304 /* 305 * do not increment i 306 * or sunptr after copydown. 307 */ 308 continue; 309 } 310 } 311 if (semnum != -1) 312 break; 313 } 314 ++i; 315 ++sunptr; 316 } 317 318 lwkt_reltoken(&p->p_token); 319 PRELE(p); 320 321 /* 322 * Handle deletion races 323 */ 324 sunext = TAILQ_NEXT(suptr, un_entry); 325 if (--suptr->un_refs == 0 && suptr->un_proc == NULL) { 326 KKASSERT(suptr->un_cnt == 0); 327 TAILQ_REMOVE(&semu_list, suptr, un_entry); 328 kfree(suptr, M_SEM); 329 } 330 } 331 lwkt_reltoken(&semu_token); 332 } 333 334 /* 335 * Note that the user-mode half of this passes a union, not a pointer 336 * 337 * MPALMOSTSAFE 338 */ 339 int 340 sys___semctl(struct __semctl_args *uap) 341 { 342 struct thread *td = curthread; 343 int semid = uap->semid; 344 int semnum = uap->semnum; 345 int cmd = uap->cmd; 346 union semun *arg = uap->arg; 347 union semun real_arg; 348 struct ucred *cred = td->td_ucred; 349 int i, rval, eval; 350 struct semid_ds sbuf; 351 struct semid_pool *semaptr; 352 struct sem *semptr; 353 354 #ifdef SEM_DEBUG 355 kprintf("call to semctl(%d, %d, %d, 0x%x)\n", semid, semnum, cmd, arg); 356 #endif 357 358 if (!jail_sysvipc_allowed && cred->cr_prison != NULL) 359 return (ENOSYS); 360 361 semid = IPCID_TO_IX(semid); 362 if (semid < 0 || semid >= seminfo.semmni) { 363 return(EINVAL); 364 } 365 semaptr = &sema[semid]; 366 lockmgr(&semaptr->lk, LK_EXCLUSIVE); 367 368 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0 || 369 semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 370 lockmgr(&semaptr->lk, LK_RELEASE); 371 return(EINVAL); 372 } 373 374 eval = 0; 375 rval = 0; 376 377 switch (cmd) { 378 case IPC_RMID: 379 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_M); 380 if (eval != 0) 381 break; 382 semaptr->ds.sem_perm.cuid = cred->cr_uid; 383 semaptr->ds.sem_perm.uid = cred->cr_uid; 384 385 /* 386 * NOTE: Nobody will be waiting on the semaphores since 387 * we have an exclusive lock on semaptr->lk). 388 */ 389 lockmgr(&sema_lk, LK_EXCLUSIVE); 390 semtot -= semaptr->ds.sem_nsems; 391 kfree(semaptr->ds.sem_base, M_SEM); 392 semaptr->ds.sem_base = NULL; 393 semaptr->ds.sem_perm.mode = 0; /* clears SEM_ALLOC */ 394 lockmgr(&sema_lk, LK_RELEASE); 395 396 semundo_clear(semid, -1); 397 break; 398 399 case IPC_SET: 400 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_M); 401 if (eval) 402 break; 403 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 404 break; 405 if ((eval = copyin(real_arg.buf, (caddr_t)&sbuf, 406 sizeof(sbuf))) != 0) { 407 break; 408 } 409 semaptr->ds.sem_perm.uid = sbuf.sem_perm.uid; 410 semaptr->ds.sem_perm.gid = sbuf.sem_perm.gid; 411 semaptr->ds.sem_perm.mode = 412 (semaptr->ds.sem_perm.mode & ~0777) | 413 (sbuf.sem_perm.mode & 0777); 414 semaptr->ds.sem_ctime = time_second; 415 break; 416 417 case IPC_STAT: 418 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 419 if (eval) 420 break; 421 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 422 break; 423 eval = copyout(&semaptr->ds, real_arg.buf, 424 sizeof(struct semid_ds)); 425 break; 426 case SEM_STAT: 427 /* 428 * For this command we assume semid is an array index 429 * rather than an IPC id. However, the conversion is 430 * just a mask so just validate that the passed-in semid 431 * matches the masked semid. 432 */ 433 if (uap->semid != semid) { 434 eval = EINVAL; 435 break; 436 } 437 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 438 if (eval) 439 break; 440 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 441 break; 442 eval = copyout(&semaptr->ds, real_arg.buf, 443 sizeof(struct semid_ds)); 444 rval = IXSEQ_TO_IPCID(semid, semaptr->ds.sem_perm); 445 break; 446 447 case GETNCNT: 448 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 449 if (eval) 450 break; 451 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 452 eval = EINVAL; 453 break; 454 } 455 rval = semaptr->ds.sem_base[semnum].semncnt; 456 break; 457 458 case GETPID: 459 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 460 if (eval) 461 break; 462 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 463 eval = EINVAL; 464 break; 465 } 466 rval = semaptr->ds.sem_base[semnum].sempid; 467 break; 468 469 case GETVAL: 470 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 471 if (eval) 472 break; 473 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 474 eval = EINVAL; 475 break; 476 } 477 rval = semaptr->ds.sem_base[semnum].semval; 478 break; 479 480 case GETALL: 481 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 482 if (eval) 483 break; 484 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 485 break; 486 for (i = 0; i < semaptr->ds.sem_nsems; i++) { 487 eval = copyout(&semaptr->ds.sem_base[i].semval, 488 &real_arg.array[i], 489 sizeof(real_arg.array[0])); 490 if (eval) 491 break; 492 } 493 break; 494 495 case GETZCNT: 496 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 497 if (eval) 498 break; 499 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 500 eval = EINVAL; 501 break; 502 } 503 rval = semaptr->ds.sem_base[semnum].semzcnt; 504 break; 505 506 case SETVAL: 507 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W); 508 if (eval) 509 break; 510 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 511 eval = EINVAL; 512 break; 513 } 514 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 515 break; 516 517 /* 518 * Because we hold semaptr->lk exclusively we can safely 519 * modify any semptr content without acquiring its token. 520 */ 521 semptr = &semaptr->ds.sem_base[semnum]; 522 semptr->semval = real_arg.val; 523 semundo_clear(semid, semnum); 524 if (semptr->semzcnt || semptr->semncnt) 525 wakeup(semptr); 526 break; 527 528 case SETALL: 529 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W); 530 if (eval) 531 break; 532 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 533 break; 534 /* 535 * Because we hold semaptr->lk exclusively we can safely 536 * modify any semptr content without acquiring its token. 537 */ 538 for (i = 0; i < semaptr->ds.sem_nsems; i++) { 539 semptr = &semaptr->ds.sem_base[i]; 540 eval = copyin(&real_arg.array[i], 541 (caddr_t)&semptr->semval, 542 sizeof(real_arg.array[0])); 543 if (semptr->semzcnt || semptr->semncnt) 544 wakeup(semptr); 545 if (eval != 0) 546 break; 547 } 548 semundo_clear(semid, -1); 549 break; 550 551 default: 552 eval = EINVAL; 553 break; 554 } 555 lockmgr(&semaptr->lk, LK_RELEASE); 556 557 if (eval == 0) 558 uap->sysmsg_result = rval; 559 return(eval); 560 } 561 562 /* 563 * MPALMOSTSAFE 564 */ 565 int 566 sys_semget(struct semget_args *uap) 567 { 568 struct thread *td = curthread; 569 int semid, eval; 570 int key = uap->key; 571 int nsems = uap->nsems; 572 int semflg = uap->semflg; 573 struct ucred *cred = td->td_ucred; 574 575 #ifdef SEM_DEBUG 576 kprintf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg); 577 #endif 578 579 if (!jail_sysvipc_allowed && cred->cr_prison != NULL) 580 return (ENOSYS); 581 582 eval = 0; 583 584 if (key != IPC_PRIVATE) { 585 for (semid = 0; semid < seminfo.semmni; semid++) { 586 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0 || 587 sema[semid].ds.sem_perm.key != key) { 588 continue; 589 } 590 lockmgr(&sema[semid].lk, LK_EXCLUSIVE); 591 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0 || 592 sema[semid].ds.sem_perm.key != key) { 593 lockmgr(&sema[semid].lk, LK_RELEASE); 594 continue; 595 } 596 break; 597 } 598 if (semid < seminfo.semmni) { 599 /* sema[semid].lk still locked from above */ 600 #ifdef SEM_DEBUG 601 kprintf("found public key\n"); 602 #endif 603 if ((eval = ipcperm(td->td_proc, 604 &sema[semid].ds.sem_perm, 605 semflg & 0700))) { 606 lockmgr(&sema[semid].lk, LK_RELEASE); 607 goto done; 608 } 609 if (nsems > 0 && sema[semid].ds.sem_nsems < nsems) { 610 #ifdef SEM_DEBUG 611 kprintf("too small\n"); 612 #endif 613 eval = EINVAL; 614 lockmgr(&sema[semid].lk, LK_RELEASE); 615 goto done; 616 } 617 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { 618 #ifdef SEM_DEBUG 619 kprintf("not exclusive\n"); 620 #endif 621 eval = EEXIST; 622 lockmgr(&sema[semid].lk, LK_RELEASE); 623 goto done; 624 } 625 626 /* 627 * Return this one. 628 */ 629 lockmgr(&sema[semid].lk, LK_RELEASE); 630 goto done; 631 } 632 } 633 634 #ifdef SEM_DEBUG 635 kprintf("need to allocate the semid_ds\n"); 636 #endif 637 if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) { 638 if (nsems <= 0 || nsems > seminfo.semmsl) { 639 #ifdef SEM_DEBUG 640 kprintf("nsems out of range (0<%d<=%d)\n", 641 nsems, seminfo.semmsl); 642 #endif 643 eval = EINVAL; 644 goto done; 645 } 646 647 /* 648 * SEM_ALLOC flag cannot be set unless sema_lk is locked. 649 * semtot field also protected by sema_lk. 650 */ 651 lockmgr(&sema_lk, LK_EXCLUSIVE); 652 if (nsems > seminfo.semmns - semtot) { 653 #ifdef SEM_DEBUG 654 kprintf("not enough semaphores left " 655 "(need %d, got %d)\n", 656 nsems, seminfo.semmns - semtot); 657 #endif 658 eval = ENOSPC; 659 lockmgr(&sema_lk, LK_RELEASE); 660 goto done; 661 } 662 for (semid = 0; semid < seminfo.semmni; semid++) { 663 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0) 664 break; 665 } 666 if (semid == seminfo.semmni) { 667 #ifdef SEM_DEBUG 668 kprintf("no more semid_ds's available\n"); 669 #endif 670 eval = ENOSPC; 671 lockmgr(&sema_lk, LK_RELEASE); 672 goto done; 673 } 674 #ifdef SEM_DEBUG 675 kprintf("semid %d is available\n", semid); 676 #endif 677 lockmgr(&sema[semid].lk, LK_EXCLUSIVE); 678 sema[semid].ds.sem_perm.key = key; 679 sema[semid].ds.sem_perm.cuid = cred->cr_uid; 680 sema[semid].ds.sem_perm.uid = cred->cr_uid; 681 sema[semid].ds.sem_perm.cgid = cred->cr_gid; 682 sema[semid].ds.sem_perm.gid = cred->cr_gid; 683 sema[semid].ds.sem_perm.mode = (semflg & 0777) | SEM_ALLOC; 684 sema[semid].ds.sem_perm.seq = 685 (sema[semid].ds.sem_perm.seq + 1) & 0x7fff; 686 sema[semid].ds.sem_nsems = nsems; 687 sema[semid].ds.sem_otime = 0; 688 sema[semid].ds.sem_ctime = time_second; 689 sema[semid].ds.sem_base = kmalloc(sizeof(struct sem) * nsems, 690 M_SEM, M_WAITOK|M_ZERO); 691 semtot += nsems; 692 ++sema[semid].gen; 693 lockmgr(&sema[semid].lk, LK_RELEASE); 694 lockmgr(&sema_lk, LK_RELEASE); 695 #ifdef SEM_DEBUG 696 kprintf("sembase = 0x%x, next = 0x%x\n", 697 sema[semid].ds.sem_base, &sem[semtot]); 698 #endif 699 /* eval == 0 */ 700 } else { 701 #ifdef SEM_DEBUG 702 kprintf("didn't find it and wasn't asked to create it\n"); 703 #endif 704 eval = ENOENT; 705 } 706 707 done: 708 if (eval == 0) { 709 uap->sysmsg_result = 710 IXSEQ_TO_IPCID(semid, sema[semid].ds.sem_perm); 711 } 712 return(eval); 713 } 714 715 /* 716 * MPSAFE 717 */ 718 int 719 sys_semop(struct semop_args *uap) 720 { 721 struct thread *td = curthread; 722 int semid = uap->semid; 723 u_int nsops = uap->nsops; 724 struct sembuf sops[MAX_SOPS]; 725 struct semid_pool *semaptr; 726 struct sembuf *sopptr; 727 struct sem *semptr; 728 struct sem *xsemptr; 729 int i, j, eval; 730 int do_undos; 731 732 #ifdef SEM_DEBUG 733 kprintf("call to semop(%d, 0x%x, %u)\n", semid, sops, nsops); 734 #endif 735 if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) 736 return (ENOSYS); 737 738 semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ 739 740 if (semid < 0 || semid >= seminfo.semmni) { 741 eval = EINVAL; 742 goto done2; 743 } 744 745 wakeup_start_delayed(); 746 semaptr = &sema[semid]; 747 lockmgr(&semaptr->lk, LK_SHARED); 748 749 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0) { 750 eval = EINVAL; 751 goto done; 752 } 753 if (semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 754 eval = EINVAL; 755 goto done; 756 } 757 758 if ((eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W))) { 759 #ifdef SEM_DEBUG 760 kprintf("eval = %d from ipaccess\n", eval); 761 #endif 762 goto done; 763 } 764 765 if (nsops > MAX_SOPS) { 766 #ifdef SEM_DEBUG 767 kprintf("too many sops (max=%d, nsops=%u)\n", MAX_SOPS, nsops); 768 #endif 769 eval = E2BIG; 770 goto done; 771 } 772 773 if ((eval = copyin(uap->sops, &sops, nsops * sizeof(sops[0]))) != 0) { 774 #ifdef SEM_DEBUG 775 kprintf("eval = %d from copyin(%08x, %08x, %u)\n", eval, 776 uap->sops, &sops, nsops * sizeof(sops[0])); 777 #endif 778 goto done; 779 } 780 781 /* 782 * Loop trying to satisfy the vector of requests. 783 * If we reach a point where we must wait, any requests already 784 * performed are rolled back and we go to sleep until some other 785 * process wakes us up. At this point, we start all over again. 786 * 787 * This ensures that from the perspective of other tasks, a set 788 * of requests is atomic (never partially satisfied). 789 */ 790 do_undos = 0; 791 792 for (;;) { 793 long gen; 794 795 semptr = NULL; 796 797 for (i = 0; i < nsops; i++) { 798 sopptr = &sops[i]; 799 800 if (sopptr->sem_num >= semaptr->ds.sem_nsems) { 801 eval = EFBIG; 802 goto done; 803 } 804 805 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 806 lwkt_getpooltoken(semptr); 807 808 #ifdef SEM_DEBUG 809 kprintf("semop: semaptr=%x, sem_base=%x, semptr=%x, " 810 "sem[%d]=%d : op=%d, flag=%s\n", 811 semaptr, semaptr->ds.sem_base, semptr, 812 sopptr->sem_num, semptr->semval, sopptr->sem_op, 813 (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait"); 814 #endif 815 816 if (sopptr->sem_op < 0) { 817 if (semptr->semval + sopptr->sem_op < 0) { 818 #ifdef SEM_DEBUG 819 kprintf("semop: can't do it now\n"); 820 #endif 821 break; 822 } else { 823 semptr->semval += sopptr->sem_op; 824 if (semptr->semval == 0 && 825 semptr->semzcnt > 0) { 826 wakeup(semptr); 827 } 828 } 829 if (sopptr->sem_flg & SEM_UNDO) 830 do_undos = 1; 831 } else if (sopptr->sem_op == 0) { 832 if (semptr->semval > 0) { 833 #ifdef SEM_DEBUG 834 kprintf("semop: not zero now\n"); 835 #endif 836 break; 837 } 838 } else { 839 semptr->semval += sopptr->sem_op; 840 if (sopptr->sem_flg & SEM_UNDO) 841 do_undos = 1; 842 if (semptr->semncnt > 0) 843 wakeup(semptr); 844 } 845 lwkt_relpooltoken(semptr); 846 } 847 848 /* 849 * Did we get through the entire vector? 850 */ 851 if (i >= nsops) 852 goto donex; 853 854 /* 855 * No, protect the semaphore request which also flags that 856 * a wakeup is needed, then release semptr since we know 857 * another process is likely going to need to access it 858 * soon. 859 */ 860 if (sopptr->sem_op == 0) 861 semptr->semzcnt++; 862 else 863 semptr->semncnt++; 864 tsleep_interlock(semptr, PCATCH); 865 lwkt_relpooltoken(semptr); 866 867 /* 868 * Rollback the semaphores we had acquired. 869 */ 870 #ifdef SEM_DEBUG 871 kprintf("semop: rollback 0 through %d\n", i-1); 872 #endif 873 for (j = 0; j < i; j++) { 874 xsemptr = &semaptr->ds.sem_base[sops[j].sem_num]; 875 lwkt_getpooltoken(xsemptr); 876 xsemptr->semval -= sops[j].sem_op; 877 if (xsemptr->semval == 0 && xsemptr->semzcnt > 0) 878 wakeup(xsemptr); 879 if (xsemptr->semval <= 0 && xsemptr->semncnt > 0) 880 wakeup(xsemptr); 881 lwkt_relpooltoken(xsemptr); 882 } 883 884 /* 885 * If the request that we couldn't satisfy has the 886 * NOWAIT flag set then return with EAGAIN. 887 */ 888 if (sopptr->sem_flg & IPC_NOWAIT) { 889 eval = EAGAIN; 890 goto done; 891 } 892 893 /* 894 * Release semaptr->lk while sleeping, allowing other 895 * semops (like SETVAL, SETALL, etc), which require an 896 * exclusive lock and might wake us up. 897 * 898 * Reload and recheck the validity of semaptr on return. 899 * Note that semptr itself might have changed too, but 900 * we've already interlocked for semptr and that is what 901 * will be woken up if it wakes up the tsleep on a MP 902 * race. 903 * 904 * gen protects against destroy/re-create races where the 905 * creds match. 906 */ 907 #ifdef SEM_DEBUG 908 kprintf("semop: good night!\n"); 909 #endif 910 gen = semaptr->gen; 911 lockmgr(&semaptr->lk, LK_RELEASE); 912 eval = tsleep(semptr, PCATCH | PINTERLOCKED, "semwait", hz); 913 lockmgr(&semaptr->lk, LK_SHARED); 914 #ifdef SEM_DEBUG 915 kprintf("semop: good morning (eval=%d)!\n", eval); 916 #endif 917 918 /* return code is checked below, after sem[nz]cnt-- */ 919 920 /* 921 * Make sure that the semaphore still exists 922 */ 923 if (semaptr->gen != gen || 924 (semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0 || 925 semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 926 eval = EIDRM; 927 goto done; 928 } 929 930 /* 931 * The semaphore is still alive. Readjust the count of 932 * waiting processes. 933 */ 934 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 935 lwkt_getpooltoken(semptr); 936 if (sopptr->sem_op == 0) 937 semptr->semzcnt--; 938 else 939 semptr->semncnt--; 940 lwkt_relpooltoken(semptr); 941 942 /* 943 * Is it really morning, or was our sleep interrupted? 944 * (Delayed check of tsleep() return code because we 945 * need to decrement sem[nz]cnt either way.) 946 */ 947 if (eval) { 948 eval = EINTR; 949 goto done; 950 } 951 #ifdef SEM_DEBUG 952 kprintf("semop: good morning!\n"); 953 #endif 954 /* RETRY LOOP */ 955 } 956 957 donex: 958 /* 959 * Process any SEM_UNDO requests. 960 */ 961 if (do_undos) { 962 for (i = 0; i < nsops; i++) { 963 /* 964 * We only need to deal with SEM_UNDO's for non-zero 965 * op's. 966 */ 967 int adjval; 968 969 if ((sops[i].sem_flg & SEM_UNDO) == 0) 970 continue; 971 adjval = sops[i].sem_op; 972 if (adjval == 0) 973 continue; 974 eval = semundo_adjust(td->td_proc, semid, 975 sops[i].sem_num, -adjval); 976 if (eval == 0) 977 continue; 978 979 /* 980 * Oh-Oh! We ran out of either sem_undo's or undo's. 981 * Rollback the adjustments to this point and then 982 * rollback the semaphore ups and down so we can return 983 * with an error with all structures restored. We 984 * rollback the undo's in the exact reverse order that 985 * we applied them. This guarantees that we won't run 986 * out of space as we roll things back out. 987 */ 988 for (j = i - 1; j >= 0; j--) { 989 if ((sops[j].sem_flg & SEM_UNDO) == 0) 990 continue; 991 adjval = sops[j].sem_op; 992 if (adjval == 0) 993 continue; 994 if (semundo_adjust(td->td_proc, semid, 995 sops[j].sem_num, adjval) != 0) 996 panic("semop - can't undo undos"); 997 } 998 999 for (j = 0; j < nsops; j++) { 1000 xsemptr = &semaptr->ds.sem_base[ 1001 sops[j].sem_num]; 1002 lwkt_getpooltoken(xsemptr); 1003 xsemptr->semval -= sops[j].sem_op; 1004 if (xsemptr->semval == 0 && 1005 xsemptr->semzcnt > 0) 1006 wakeup(xsemptr); 1007 if (xsemptr->semval <= 0 && 1008 xsemptr->semncnt > 0) 1009 wakeup(xsemptr); 1010 lwkt_relpooltoken(xsemptr); 1011 } 1012 1013 #ifdef SEM_DEBUG 1014 kprintf("eval = %d from semundo_adjust\n", eval); 1015 #endif 1016 goto done; 1017 } /* loop through the sops */ 1018 } /* if (do_undos) */ 1019 1020 /* We're definitely done - set the sempid's */ 1021 for (i = 0; i < nsops; i++) { 1022 sopptr = &sops[i]; 1023 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 1024 lwkt_getpooltoken(semptr); 1025 semptr->sempid = td->td_proc->p_pid; 1026 lwkt_relpooltoken(semptr); 1027 } 1028 1029 /* Do a wakeup if any semaphore was up'd. */ 1030 #ifdef SEM_DEBUG 1031 kprintf("semop: done\n"); 1032 #endif 1033 uap->sysmsg_result = 0; 1034 eval = 0; 1035 done: 1036 lockmgr(&semaptr->lk, LK_RELEASE); 1037 wakeup_end_delayed(); 1038 done2: 1039 return(eval); 1040 } 1041 1042 /* 1043 * Go through the undo structures for this process and apply the adjustments to 1044 * semaphores. 1045 * 1046 * (p->p_token is held by the caller) 1047 */ 1048 void 1049 semexit(struct proc *p) 1050 { 1051 struct sem_undo *suptr; 1052 struct sem *semptr; 1053 1054 /* 1055 * We're getting a global token, don't do it if we couldn't 1056 * possibly have any semaphores. 1057 */ 1058 if ((p->p_flags & P_SYSVSEM) == 0) 1059 return; 1060 suptr = p->p_sem_undo; 1061 KKASSERT(suptr != NULL); 1062 1063 /* 1064 * Disconnect suptr from the process and increment un_refs to 1065 * prevent anyone else from being able to destroy the structure. 1066 * Do not remove it from the linked list until after we are through 1067 * scanning it as other semaphore calls might still effect it. 1068 */ 1069 lwkt_gettoken(&semu_token); 1070 p->p_sem_undo = NULL; 1071 p->p_flags &= ~P_SYSVSEM; 1072 suptr->un_proc = NULL; 1073 ++suptr->un_refs; 1074 lwkt_reltoken(&semu_token); 1075 1076 while (suptr->un_cnt) { 1077 struct semid_pool *semaptr; 1078 int semid; 1079 int semnum; 1080 int adjval; 1081 int ix; 1082 1083 /* 1084 * These values are stable because we hold p->p_token. 1085 * However, they can get ripped out from under us when 1086 * we block or obtain other tokens so we have to re-check. 1087 */ 1088 ix = suptr->un_cnt - 1; 1089 semid = suptr->un_ent[ix].un_id; 1090 semnum = suptr->un_ent[ix].un_num; 1091 adjval = suptr->un_ent[ix].un_adjval; 1092 1093 semaptr = &sema[semid]; 1094 1095 /* 1096 * Recheck after locking, then execute the undo 1097 * operation. semptr remains valid due to the 1098 * semaptr->lk. 1099 */ 1100 lockmgr(&semaptr->lk, LK_SHARED); 1101 semptr = &semaptr->ds.sem_base[semnum]; 1102 lwkt_getpooltoken(semptr); 1103 1104 if (ix == suptr->un_cnt - 1 && 1105 semid == suptr->un_ent[ix].un_id && 1106 semnum == suptr->un_ent[ix].un_num && 1107 adjval == suptr->un_ent[ix].un_adjval) { 1108 /* 1109 * Only do assertions when we aren't in a SMP race. 1110 */ 1111 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0) 1112 panic("semexit - semid not allocated"); 1113 if (semnum >= semaptr->ds.sem_nsems) 1114 panic("semexit - semnum out of range"); 1115 --suptr->un_cnt; 1116 1117 if (adjval < 0) { 1118 if (semptr->semval < -adjval) 1119 semptr->semval = 0; 1120 else 1121 semptr->semval += adjval; 1122 } else { 1123 semptr->semval += adjval; 1124 } 1125 wakeup(semptr); 1126 } 1127 lwkt_relpooltoken(semptr); 1128 lockmgr(&semaptr->lk, LK_RELEASE); 1129 } 1130 1131 /* 1132 * Final cleanup, remove from the list and deallocate on the 1133 * last ref only. 1134 */ 1135 lwkt_gettoken(&semu_token); 1136 if (--suptr->un_refs == 0) { 1137 TAILQ_REMOVE(&semu_list, suptr, un_entry); 1138 KKASSERT(suptr->un_cnt == 0); 1139 kfree(suptr, M_SEM); 1140 } 1141 lwkt_reltoken(&semu_token); 1142 } 1143