1 /* $FreeBSD: src/sys/kern/sysv_sem.c,v 1.69 2004/03/17 09:37:13 cperciva Exp $ */ 2 3 /* 4 * Implementation of SVID semaphores 5 * 6 * Author: Daniel Boulet 7 * 8 * This software is provided ``AS IS'' without any warranties of any kind. 9 */ 10 11 #include "opt_sysvipc.h" 12 13 #include <sys/param.h> 14 #include <sys/systm.h> 15 #include <sys/sysproto.h> 16 #include <sys/kernel.h> 17 #include <sys/proc.h> 18 #include <sys/sem.h> 19 #include <sys/sysent.h> 20 #include <sys/sysctl.h> 21 #include <sys/malloc.h> 22 #include <sys/jail.h> 23 #include <sys/thread.h> 24 25 #include <sys/thread2.h> 26 27 static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores"); 28 29 static void seminit (void *); 30 31 static struct sem_undo *semu_alloc (struct proc *p); 32 static int semundo_adjust (struct proc *p, int semid, int semnum, int adjval); 33 static void semundo_clear (int semid, int semnum); 34 35 static struct lwkt_token semu_token = LWKT_TOKEN_INITIALIZER(semu_token); 36 static int semtot = 0; 37 static struct semid_pool *sema; /* semaphore id pool */ 38 static TAILQ_HEAD(, sem_undo) semu_list = TAILQ_HEAD_INITIALIZER(semu_list); 39 static struct lock sema_lk; 40 41 struct sem { 42 u_short semval; /* semaphore value */ 43 pid_t sempid; /* pid of last operation */ 44 u_short semncnt; /* # awaiting semval > cval */ 45 u_short semzcnt; /* # awaiting semval = 0 */ 46 }; 47 48 /* 49 * Undo structure (one per process) 50 */ 51 struct sem_undo { 52 TAILQ_ENTRY(sem_undo) un_entry; /* linked list for semundo_clear() */ 53 struct proc *un_proc; /* owner of this structure */ 54 int un_refs; /* prevent unlink/kfree */ 55 short un_cnt; /* # of active entries */ 56 short un_unused; 57 struct undo { 58 short un_adjval; /* adjust on exit values */ 59 short un_num; /* semaphore # */ 60 int un_id; /* semid */ 61 } un_ent[1]; /* undo entries */ 62 }; 63 64 /* 65 * Configuration parameters 66 */ 67 #ifndef SEMMNI 68 #define SEMMNI 1024 /* # of semaphore identifiers */ 69 #endif 70 #ifndef SEMMNS 71 #define SEMMNS 32767 /* # of semaphores in system */ 72 #endif 73 #ifndef SEMUME 74 #define SEMUME 25 /* max # of undo entries per process */ 75 #endif 76 #ifndef SEMMNU 77 #define SEMMNU 1024 /* # of undo structures in system */ 78 /* NO LONGER USED */ 79 #endif 80 81 /* shouldn't need tuning */ 82 #ifndef SEMMAP 83 #define SEMMAP 128 /* # of entries in semaphore map */ 84 #endif 85 #ifndef SEMMSL 86 #define SEMMSL SEMMNS /* max # of semaphores per id */ 87 #endif 88 #ifndef SEMOPM 89 #define SEMOPM 100 /* max # of operations per semop call */ 90 #endif 91 92 #define SEMVMX 32767 /* semaphore maximum value */ 93 #define SEMAEM 16384 /* adjust on exit max value */ 94 95 /* 96 * Due to the way semaphore memory is allocated, we have to ensure that 97 * SEMUSZ is properly aligned. 98 */ 99 100 #define SEM_ALIGN(bytes) roundup2(bytes, sizeof(long)) 101 102 /* actual size of an undo structure */ 103 #define SEMUSZ(nent) SEM_ALIGN(offsetof(struct sem_undo, un_ent[nent])) 104 105 /* 106 * semaphore info struct 107 */ 108 struct seminfo seminfo = { 109 SEMMAP, /* # of entries in semaphore map */ 110 SEMMNI, /* # of semaphore identifiers */ 111 SEMMNS, /* # of semaphores in system */ 112 SEMMNU, /* # of undo structures in system */ 113 SEMMSL, /* max # of semaphores per id */ 114 SEMOPM, /* max # of operations per semop call */ 115 SEMUME, /* max # of undo entries per process */ 116 SEMUSZ(SEMUME), /* size in bytes of undo structure */ 117 SEMVMX, /* semaphore maximum value */ 118 SEMAEM /* adjust on exit max value */ 119 }; 120 121 TUNABLE_INT("kern.ipc.semmap", &seminfo.semmap); 122 TUNABLE_INT("kern.ipc.semmni", &seminfo.semmni); 123 TUNABLE_INT("kern.ipc.semmns", &seminfo.semmns); 124 TUNABLE_INT("kern.ipc.semmnu", &seminfo.semmnu); 125 TUNABLE_INT("kern.ipc.semmsl", &seminfo.semmsl); 126 TUNABLE_INT("kern.ipc.semopm", &seminfo.semopm); 127 TUNABLE_INT("kern.ipc.semume", &seminfo.semume); 128 TUNABLE_INT("kern.ipc.semusz", &seminfo.semusz); 129 TUNABLE_INT("kern.ipc.semvmx", &seminfo.semvmx); 130 TUNABLE_INT("kern.ipc.semaem", &seminfo.semaem); 131 132 SYSCTL_INT(_kern_ipc, OID_AUTO, semmap, CTLFLAG_RW, &seminfo.semmap, 0, 133 "Number of entries in semaphore map"); 134 SYSCTL_INT(_kern_ipc, OID_AUTO, semmni, CTLFLAG_RD, &seminfo.semmni, 0, 135 "Number of semaphore identifiers"); 136 SYSCTL_INT(_kern_ipc, OID_AUTO, semmns, CTLFLAG_RD, &seminfo.semmns, 0, 137 "Total number of semaphores"); 138 SYSCTL_INT(_kern_ipc, OID_AUTO, semmnu, CTLFLAG_RD, &seminfo.semmnu, 0, 139 "Total number of undo structures"); 140 SYSCTL_INT(_kern_ipc, OID_AUTO, semmsl, CTLFLAG_RW, &seminfo.semmsl, 0, 141 "Max number of semaphores per id"); 142 SYSCTL_INT(_kern_ipc, OID_AUTO, semopm, CTLFLAG_RD, &seminfo.semopm, 0, 143 "Max number of operations per semop call"); 144 SYSCTL_INT(_kern_ipc, OID_AUTO, semume, CTLFLAG_RD, &seminfo.semume, 0, 145 "Max number of undo entries per process"); 146 SYSCTL_INT(_kern_ipc, OID_AUTO, semusz, CTLFLAG_RD, &seminfo.semusz, 0, 147 "Size in bytes of undo structure"); 148 SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RW, &seminfo.semvmx, 0, 149 "Semaphore maximum value"); 150 SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RW, &seminfo.semaem, 0, 151 "Adjust on exit max value"); 152 153 #if 0 154 RO seminfo.semmap /* SEMMAP unused */ 155 RO seminfo.semmni 156 RO seminfo.semmns 157 RO seminfo.semmnu /* undo entries per system */ 158 RW seminfo.semmsl 159 RO seminfo.semopm /* SEMOPM unused */ 160 RO seminfo.semume 161 RO seminfo.semusz /* param - derived from SEMUME for per-proc sizeof */ 162 RO seminfo.semvmx /* SEMVMX unused - user param */ 163 RO seminfo.semaem /* SEMAEM unused - user param */ 164 #endif 165 166 static void 167 seminit(void *dummy) 168 { 169 int i; 170 171 sema = kmalloc(sizeof(struct semid_pool) * seminfo.semmni, 172 M_SEM, M_WAITOK | M_ZERO); 173 174 lockinit(&sema_lk, "semglb", 0, 0); 175 for (i = 0; i < seminfo.semmni; i++) { 176 struct semid_pool *semaptr = &sema[i]; 177 178 lockinit(&semaptr->lk, "semary", 0, 0); 179 semaptr->ds.sem_base = NULL; 180 semaptr->ds.sem_perm.mode = 0; 181 } 182 } 183 SYSINIT(sysv_sem, SI_SUB_SYSV_SEM, SI_ORDER_FIRST, seminit, NULL); 184 185 /* 186 * Allocate a new sem_undo structure for a process 187 * (returns ptr to structure or NULL if no more room) 188 */ 189 static struct sem_undo * 190 semu_alloc(struct proc *p) 191 { 192 struct sem_undo *semu; 193 194 /* 195 * Allocate the semu structure and associate it with the process, 196 * as necessary. 197 */ 198 while ((semu = p->p_sem_undo) == NULL) { 199 semu = kmalloc(SEMUSZ(seminfo.semume), M_SEM, 200 M_WAITOK | M_ZERO); 201 lwkt_gettoken(&semu_token); 202 lwkt_gettoken(&p->p_token); 203 if (p->p_sem_undo == NULL) { 204 p->p_sem_undo = semu; 205 p->p_flags |= P_SYSVSEM; 206 semu->un_proc = p; 207 TAILQ_INSERT_TAIL(&semu_list, semu, un_entry); 208 } else { 209 kfree(semu, M_SEM); 210 } 211 lwkt_reltoken(&p->p_token); 212 lwkt_reltoken(&semu_token); 213 } 214 return(semu); 215 } 216 217 /* 218 * Adjust a particular entry for a particular proc 219 */ 220 static int 221 semundo_adjust(struct proc *p, int semid, int semnum, int adjval) 222 { 223 struct sem_undo *suptr; 224 struct undo *sunptr; 225 int i; 226 int error = 0; 227 228 /* 229 * Look for and remember the sem_undo if the caller doesn't 230 * provide it. 231 */ 232 suptr = semu_alloc(p); 233 lwkt_gettoken(&p->p_token); 234 235 /* 236 * Look for the requested entry and adjust it (delete if adjval becomes 237 * 0). 238 */ 239 sunptr = &suptr->un_ent[0]; 240 for (i = 0; i < suptr->un_cnt; i++, sunptr++) { 241 if (sunptr->un_id != semid || sunptr->un_num != semnum) 242 continue; 243 if (adjval == 0) 244 sunptr->un_adjval = 0; 245 else 246 sunptr->un_adjval += adjval; 247 if (sunptr->un_adjval == 0) { 248 suptr->un_cnt--; 249 if (i < suptr->un_cnt) 250 suptr->un_ent[i] = suptr->un_ent[suptr->un_cnt]; 251 } 252 goto done; 253 } 254 255 /* Didn't find the right entry - create it */ 256 if (adjval == 0) 257 goto done; 258 if (suptr->un_cnt != seminfo.semume) { 259 sunptr = &suptr->un_ent[suptr->un_cnt]; 260 suptr->un_cnt++; 261 sunptr->un_adjval = adjval; 262 sunptr->un_id = semid; 263 sunptr->un_num = semnum; 264 } else { 265 error = EINVAL; 266 } 267 done: 268 lwkt_reltoken(&p->p_token); 269 270 return (error); 271 } 272 273 /* 274 * This is rather expensive 275 */ 276 static void 277 semundo_clear(int semid, int semnum) 278 { 279 struct proc *p; 280 struct sem_undo *suptr; 281 struct sem_undo *sunext; 282 struct undo *sunptr; 283 int i; 284 285 lwkt_gettoken(&semu_token); 286 sunext = TAILQ_FIRST(&semu_list); 287 while ((suptr = sunext) != NULL) { 288 if ((p = suptr->un_proc) == NULL) { 289 suptr = TAILQ_NEXT(suptr, un_entry); 290 continue; 291 } 292 ++suptr->un_refs; 293 PHOLD(p); 294 lwkt_gettoken(&p->p_token); 295 296 sunptr = &suptr->un_ent[0]; 297 i = 0; 298 299 while (i < suptr->un_cnt) { 300 if (sunptr->un_id == semid) { 301 if (semnum == -1 || sunptr->un_num == semnum) { 302 suptr->un_cnt--; 303 if (i < suptr->un_cnt) { 304 suptr->un_ent[i] = 305 suptr->un_ent[suptr->un_cnt]; 306 /* 307 * do not increment i 308 * or sunptr after copydown. 309 */ 310 continue; 311 } 312 } 313 if (semnum != -1) 314 break; 315 } 316 ++i; 317 ++sunptr; 318 } 319 320 lwkt_reltoken(&p->p_token); 321 PRELE(p); 322 323 /* 324 * Handle deletion races 325 */ 326 sunext = TAILQ_NEXT(suptr, un_entry); 327 if (--suptr->un_refs == 0 && suptr->un_proc == NULL) { 328 KKASSERT(suptr->un_cnt == 0); 329 TAILQ_REMOVE(&semu_list, suptr, un_entry); 330 kfree(suptr, M_SEM); 331 } 332 } 333 lwkt_reltoken(&semu_token); 334 } 335 336 /* 337 * Note that the user-mode half of this passes a union, not a pointer 338 * 339 * MPALMOSTSAFE 340 */ 341 int 342 sys___semctl(struct __semctl_args *uap) 343 { 344 struct thread *td = curthread; 345 int semid = uap->semid; 346 int semnum = uap->semnum; 347 int cmd = uap->cmd; 348 union semun *arg = uap->arg; 349 union semun real_arg; 350 struct ucred *cred = td->td_ucred; 351 int i, rval, eval; 352 struct semid_ds sbuf; 353 struct semid_pool *semaptr; 354 struct sem *semptr; 355 356 #ifdef SEM_DEBUG 357 kprintf("call to semctl(%d, %d, %d, 0x%x)\n", semid, semnum, cmd, arg); 358 #endif 359 360 if (!jail_sysvipc_allowed && cred->cr_prison != NULL) 361 return (ENOSYS); 362 363 semid = IPCID_TO_IX(semid); 364 if (semid < 0 || semid >= seminfo.semmni) { 365 return(EINVAL); 366 } 367 semaptr = &sema[semid]; 368 lockmgr(&semaptr->lk, LK_EXCLUSIVE); 369 370 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0 || 371 semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 372 lockmgr(&semaptr->lk, LK_RELEASE); 373 return(EINVAL); 374 } 375 376 eval = 0; 377 rval = 0; 378 379 switch (cmd) { 380 case IPC_RMID: 381 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_M); 382 if (eval != 0) 383 break; 384 semaptr->ds.sem_perm.cuid = cred->cr_uid; 385 semaptr->ds.sem_perm.uid = cred->cr_uid; 386 387 /* 388 * NOTE: Nobody will be waiting on the semaphores since 389 * we have an exclusive lock on semaptr->lk). 390 */ 391 lockmgr(&sema_lk, LK_EXCLUSIVE); 392 semtot -= semaptr->ds.sem_nsems; 393 kfree(semaptr->ds.sem_base, M_SEM); 394 semaptr->ds.sem_base = NULL; 395 semaptr->ds.sem_perm.mode = 0; /* clears SEM_ALLOC */ 396 lockmgr(&sema_lk, LK_RELEASE); 397 398 semundo_clear(semid, -1); 399 break; 400 401 case IPC_SET: 402 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_M); 403 if (eval) 404 break; 405 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 406 break; 407 if ((eval = copyin(real_arg.buf, (caddr_t)&sbuf, 408 sizeof(sbuf))) != 0) { 409 break; 410 } 411 semaptr->ds.sem_perm.uid = sbuf.sem_perm.uid; 412 semaptr->ds.sem_perm.gid = sbuf.sem_perm.gid; 413 semaptr->ds.sem_perm.mode = 414 (semaptr->ds.sem_perm.mode & ~0777) | 415 (sbuf.sem_perm.mode & 0777); 416 semaptr->ds.sem_ctime = time_second; 417 break; 418 419 case IPC_STAT: 420 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 421 if (eval) 422 break; 423 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 424 break; 425 eval = copyout(&semaptr->ds, real_arg.buf, 426 sizeof(struct semid_ds)); 427 break; 428 case SEM_STAT: 429 /* 430 * For this command we assume semid is an array index 431 * rather than an IPC id. However, the conversion is 432 * just a mask so just validate that the passed-in semid 433 * matches the masked semid. 434 */ 435 if (uap->semid != semid) { 436 eval = EINVAL; 437 break; 438 } 439 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 440 if (eval) 441 break; 442 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 443 break; 444 eval = copyout(&semaptr->ds, real_arg.buf, 445 sizeof(struct semid_ds)); 446 rval = IXSEQ_TO_IPCID(semid, semaptr->ds.sem_perm); 447 break; 448 449 case GETNCNT: 450 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 451 if (eval) 452 break; 453 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 454 eval = EINVAL; 455 break; 456 } 457 rval = semaptr->ds.sem_base[semnum].semncnt; 458 break; 459 460 case GETPID: 461 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 462 if (eval) 463 break; 464 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 465 eval = EINVAL; 466 break; 467 } 468 rval = semaptr->ds.sem_base[semnum].sempid; 469 break; 470 471 case GETVAL: 472 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 473 if (eval) 474 break; 475 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 476 eval = EINVAL; 477 break; 478 } 479 rval = semaptr->ds.sem_base[semnum].semval; 480 break; 481 482 case GETALL: 483 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 484 if (eval) 485 break; 486 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 487 break; 488 for (i = 0; i < semaptr->ds.sem_nsems; i++) { 489 eval = copyout(&semaptr->ds.sem_base[i].semval, 490 &real_arg.array[i], 491 sizeof(real_arg.array[0])); 492 if (eval) 493 break; 494 } 495 break; 496 497 case GETZCNT: 498 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_R); 499 if (eval) 500 break; 501 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 502 eval = EINVAL; 503 break; 504 } 505 rval = semaptr->ds.sem_base[semnum].semzcnt; 506 break; 507 508 case SETVAL: 509 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W); 510 if (eval) 511 break; 512 if (semnum < 0 || semnum >= semaptr->ds.sem_nsems) { 513 eval = EINVAL; 514 break; 515 } 516 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 517 break; 518 519 /* 520 * Because we hold semaptr->lk exclusively we can safely 521 * modify any semptr content without acquiring its token. 522 */ 523 semptr = &semaptr->ds.sem_base[semnum]; 524 semptr->semval = real_arg.val; 525 semundo_clear(semid, semnum); 526 if (semptr->semzcnt || semptr->semncnt) 527 wakeup(semptr); 528 break; 529 530 case SETALL: 531 eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W); 532 if (eval) 533 break; 534 if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) 535 break; 536 /* 537 * Because we hold semaptr->lk exclusively we can safely 538 * modify any semptr content without acquiring its token. 539 */ 540 for (i = 0; i < semaptr->ds.sem_nsems; i++) { 541 semptr = &semaptr->ds.sem_base[i]; 542 eval = copyin(&real_arg.array[i], 543 (caddr_t)&semptr->semval, 544 sizeof(real_arg.array[0])); 545 if (semptr->semzcnt || semptr->semncnt) 546 wakeup(semptr); 547 if (eval != 0) 548 break; 549 } 550 semundo_clear(semid, -1); 551 break; 552 553 default: 554 eval = EINVAL; 555 break; 556 } 557 lockmgr(&semaptr->lk, LK_RELEASE); 558 559 if (eval == 0) 560 uap->sysmsg_result = rval; 561 return(eval); 562 } 563 564 /* 565 * MPALMOSTSAFE 566 */ 567 int 568 sys_semget(struct semget_args *uap) 569 { 570 struct thread *td = curthread; 571 int semid, eval; 572 int key = uap->key; 573 int nsems = uap->nsems; 574 int semflg = uap->semflg; 575 struct ucred *cred = td->td_ucred; 576 577 #ifdef SEM_DEBUG 578 kprintf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg); 579 #endif 580 581 if (!jail_sysvipc_allowed && cred->cr_prison != NULL) 582 return (ENOSYS); 583 584 eval = 0; 585 586 if (key != IPC_PRIVATE) { 587 for (semid = 0; semid < seminfo.semmni; semid++) { 588 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0 || 589 sema[semid].ds.sem_perm.key != key) { 590 continue; 591 } 592 lockmgr(&sema[semid].lk, LK_EXCLUSIVE); 593 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0 || 594 sema[semid].ds.sem_perm.key != key) { 595 lockmgr(&sema[semid].lk, LK_RELEASE); 596 continue; 597 } 598 break; 599 } 600 if (semid < seminfo.semmni) { 601 /* sema[semid].lk still locked from above */ 602 #ifdef SEM_DEBUG 603 kprintf("found public key\n"); 604 #endif 605 if ((eval = ipcperm(td->td_proc, 606 &sema[semid].ds.sem_perm, 607 semflg & 0700))) { 608 lockmgr(&sema[semid].lk, LK_RELEASE); 609 goto done; 610 } 611 if (nsems > 0 && sema[semid].ds.sem_nsems < nsems) { 612 #ifdef SEM_DEBUG 613 kprintf("too small\n"); 614 #endif 615 eval = EINVAL; 616 lockmgr(&sema[semid].lk, LK_RELEASE); 617 goto done; 618 } 619 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { 620 #ifdef SEM_DEBUG 621 kprintf("not exclusive\n"); 622 #endif 623 eval = EEXIST; 624 lockmgr(&sema[semid].lk, LK_RELEASE); 625 goto done; 626 } 627 628 /* 629 * Return this one. 630 */ 631 lockmgr(&sema[semid].lk, LK_RELEASE); 632 goto done; 633 } 634 } 635 636 #ifdef SEM_DEBUG 637 kprintf("need to allocate the semid_ds\n"); 638 #endif 639 if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) { 640 if (nsems <= 0 || nsems > seminfo.semmsl) { 641 #ifdef SEM_DEBUG 642 kprintf("nsems out of range (0<%d<=%d)\n", 643 nsems, seminfo.semmsl); 644 #endif 645 eval = EINVAL; 646 goto done; 647 } 648 649 /* 650 * SEM_ALLOC flag cannot be set unless sema_lk is locked. 651 * semtot field also protected by sema_lk. 652 */ 653 lockmgr(&sema_lk, LK_EXCLUSIVE); 654 if (nsems > seminfo.semmns - semtot) { 655 #ifdef SEM_DEBUG 656 kprintf("not enough semaphores left " 657 "(need %d, got %d)\n", 658 nsems, seminfo.semmns - semtot); 659 #endif 660 eval = ENOSPC; 661 lockmgr(&sema_lk, LK_RELEASE); 662 goto done; 663 } 664 for (semid = 0; semid < seminfo.semmni; semid++) { 665 if ((sema[semid].ds.sem_perm.mode & SEM_ALLOC) == 0) 666 break; 667 } 668 if (semid == seminfo.semmni) { 669 #ifdef SEM_DEBUG 670 kprintf("no more semid_ds's available\n"); 671 #endif 672 eval = ENOSPC; 673 lockmgr(&sema_lk, LK_RELEASE); 674 goto done; 675 } 676 #ifdef SEM_DEBUG 677 kprintf("semid %d is available\n", semid); 678 #endif 679 lockmgr(&sema[semid].lk, LK_EXCLUSIVE); 680 sema[semid].ds.sem_perm.key = key; 681 sema[semid].ds.sem_perm.cuid = cred->cr_uid; 682 sema[semid].ds.sem_perm.uid = cred->cr_uid; 683 sema[semid].ds.sem_perm.cgid = cred->cr_gid; 684 sema[semid].ds.sem_perm.gid = cred->cr_gid; 685 sema[semid].ds.sem_perm.mode = (semflg & 0777) | SEM_ALLOC; 686 sema[semid].ds.sem_perm.seq = 687 (sema[semid].ds.sem_perm.seq + 1) & 0x7fff; 688 sema[semid].ds.sem_nsems = nsems; 689 sema[semid].ds.sem_otime = 0; 690 sema[semid].ds.sem_ctime = time_second; 691 sema[semid].ds.sem_base = kmalloc(sizeof(struct sem) * nsems, 692 M_SEM, M_WAITOK|M_ZERO); 693 semtot += nsems; 694 ++sema[semid].gen; 695 lockmgr(&sema[semid].lk, LK_RELEASE); 696 lockmgr(&sema_lk, LK_RELEASE); 697 #ifdef SEM_DEBUG 698 kprintf("sembase = 0x%x, next = 0x%x\n", 699 sema[semid].ds.sem_base, &sem[semtot]); 700 #endif 701 /* eval == 0 */ 702 } else { 703 #ifdef SEM_DEBUG 704 kprintf("didn't find it and wasn't asked to create it\n"); 705 #endif 706 eval = ENOENT; 707 } 708 709 done: 710 if (eval == 0) { 711 uap->sysmsg_result = 712 IXSEQ_TO_IPCID(semid, sema[semid].ds.sem_perm); 713 } 714 return(eval); 715 } 716 717 /* 718 * MPSAFE 719 */ 720 int 721 sys_semop(struct semop_args *uap) 722 { 723 struct thread *td = curthread; 724 int semid = uap->semid; 725 u_int nsops = uap->nsops; 726 struct sembuf sops[MAX_SOPS]; 727 struct semid_pool *semaptr; 728 struct sembuf *sopptr; 729 struct sem *semptr; 730 struct sem *xsemptr; 731 int i, j, eval; 732 int do_undos; 733 734 #ifdef SEM_DEBUG 735 kprintf("call to semop(%d, 0x%x, %u)\n", semid, sops, nsops); 736 #endif 737 if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) 738 return (ENOSYS); 739 740 semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ 741 742 if (semid < 0 || semid >= seminfo.semmni) { 743 eval = EINVAL; 744 goto done2; 745 } 746 747 wakeup_start_delayed(); 748 semaptr = &sema[semid]; 749 lockmgr(&semaptr->lk, LK_SHARED); 750 751 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0) { 752 eval = EINVAL; 753 goto done; 754 } 755 if (semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 756 eval = EINVAL; 757 goto done; 758 } 759 760 if ((eval = ipcperm(td->td_proc, &semaptr->ds.sem_perm, IPC_W))) { 761 #ifdef SEM_DEBUG 762 kprintf("eval = %d from ipaccess\n", eval); 763 #endif 764 goto done; 765 } 766 767 if (nsops > MAX_SOPS) { 768 #ifdef SEM_DEBUG 769 kprintf("too many sops (max=%d, nsops=%u)\n", MAX_SOPS, nsops); 770 #endif 771 eval = E2BIG; 772 goto done; 773 } 774 775 if ((eval = copyin(uap->sops, &sops, nsops * sizeof(sops[0]))) != 0) { 776 #ifdef SEM_DEBUG 777 kprintf("eval = %d from copyin(%08x, %08x, %u)\n", eval, 778 uap->sops, &sops, nsops * sizeof(sops[0])); 779 #endif 780 goto done; 781 } 782 783 /* 784 * Loop trying to satisfy the vector of requests. 785 * If we reach a point where we must wait, any requests already 786 * performed are rolled back and we go to sleep until some other 787 * process wakes us up. At this point, we start all over again. 788 * 789 * This ensures that from the perspective of other tasks, a set 790 * of requests is atomic (never partially satisfied). 791 */ 792 do_undos = 0; 793 794 for (;;) { 795 long gen; 796 797 semptr = NULL; 798 799 for (i = 0; i < nsops; i++) { 800 sopptr = &sops[i]; 801 802 if (sopptr->sem_num >= semaptr->ds.sem_nsems) { 803 eval = EFBIG; 804 goto done; 805 } 806 807 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 808 lwkt_getpooltoken(semptr); 809 810 #ifdef SEM_DEBUG 811 kprintf("semop: semaptr=%x, sem_base=%x, semptr=%x, " 812 "sem[%d]=%d : op=%d, flag=%s\n", 813 semaptr, semaptr->ds.sem_base, semptr, 814 sopptr->sem_num, semptr->semval, sopptr->sem_op, 815 (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait"); 816 #endif 817 818 if (sopptr->sem_op < 0) { 819 if (semptr->semval + sopptr->sem_op < 0) { 820 #ifdef SEM_DEBUG 821 kprintf("semop: can't do it now\n"); 822 #endif 823 break; 824 } else { 825 semptr->semval += sopptr->sem_op; 826 if (semptr->semval == 0 && 827 semptr->semzcnt > 0) { 828 wakeup(semptr); 829 } 830 } 831 if (sopptr->sem_flg & SEM_UNDO) 832 do_undos = 1; 833 } else if (sopptr->sem_op == 0) { 834 if (semptr->semval > 0) { 835 #ifdef SEM_DEBUG 836 kprintf("semop: not zero now\n"); 837 #endif 838 break; 839 } 840 } else { 841 semptr->semval += sopptr->sem_op; 842 if (sopptr->sem_flg & SEM_UNDO) 843 do_undos = 1; 844 if (semptr->semncnt > 0) 845 wakeup(semptr); 846 } 847 lwkt_relpooltoken(semptr); 848 } 849 850 /* 851 * Did we get through the entire vector? 852 */ 853 if (i >= nsops) 854 goto donex; 855 856 /* 857 * No, protect the semaphore request which also flags that 858 * a wakeup is needed, then release semptr since we know 859 * another process is likely going to need to access it 860 * soon. 861 */ 862 if (sopptr->sem_op == 0) 863 semptr->semzcnt++; 864 else 865 semptr->semncnt++; 866 tsleep_interlock(semptr, PCATCH); 867 lwkt_relpooltoken(semptr); 868 869 /* 870 * Rollback the semaphores we had acquired. 871 */ 872 #ifdef SEM_DEBUG 873 kprintf("semop: rollback 0 through %d\n", i-1); 874 #endif 875 for (j = 0; j < i; j++) { 876 xsemptr = &semaptr->ds.sem_base[sops[j].sem_num]; 877 lwkt_getpooltoken(xsemptr); 878 xsemptr->semval -= sops[j].sem_op; 879 if (xsemptr->semval == 0 && xsemptr->semzcnt > 0) 880 wakeup(xsemptr); 881 if (xsemptr->semval <= 0 && xsemptr->semncnt > 0) 882 wakeup(xsemptr); 883 lwkt_relpooltoken(xsemptr); 884 } 885 886 /* 887 * If the request that we couldn't satisfy has the 888 * NOWAIT flag set then return with EAGAIN. 889 */ 890 if (sopptr->sem_flg & IPC_NOWAIT) { 891 eval = EAGAIN; 892 goto done; 893 } 894 895 /* 896 * Release semaptr->lk while sleeping, allowing other 897 * semops (like SETVAL, SETALL, etc), which require an 898 * exclusive lock and might wake us up. 899 * 900 * Reload and recheck the validity of semaptr on return. 901 * Note that semptr itself might have changed too, but 902 * we've already interlocked for semptr and that is what 903 * will be woken up if it wakes up the tsleep on a MP 904 * race. 905 * 906 * gen protects against destroy/re-create races where the 907 * creds match. 908 */ 909 #ifdef SEM_DEBUG 910 kprintf("semop: good night!\n"); 911 #endif 912 gen = semaptr->gen; 913 lockmgr(&semaptr->lk, LK_RELEASE); 914 eval = tsleep(semptr, PCATCH | PINTERLOCKED, "semwait", hz); 915 lockmgr(&semaptr->lk, LK_SHARED); 916 #ifdef SEM_DEBUG 917 kprintf("semop: good morning (eval=%d)!\n", eval); 918 #endif 919 920 /* return code is checked below, after sem[nz]cnt-- */ 921 922 /* 923 * Make sure that the semaphore still exists 924 */ 925 if (semaptr->gen != gen || 926 (semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0 || 927 semaptr->ds.sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { 928 eval = EIDRM; 929 goto done; 930 } 931 932 /* 933 * The semaphore is still alive. Readjust the count of 934 * waiting processes. 935 */ 936 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 937 lwkt_getpooltoken(semptr); 938 if (sopptr->sem_op == 0) 939 semptr->semzcnt--; 940 else 941 semptr->semncnt--; 942 lwkt_relpooltoken(semptr); 943 944 /* 945 * Is it really morning, or was our sleep interrupted? 946 * (Delayed check of tsleep() return code because we 947 * need to decrement sem[nz]cnt either way.) 948 */ 949 if (eval) { 950 eval = EINTR; 951 goto done; 952 } 953 #ifdef SEM_DEBUG 954 kprintf("semop: good morning!\n"); 955 #endif 956 /* RETRY LOOP */ 957 } 958 959 donex: 960 /* 961 * Process any SEM_UNDO requests. 962 */ 963 if (do_undos) { 964 for (i = 0; i < nsops; i++) { 965 /* 966 * We only need to deal with SEM_UNDO's for non-zero 967 * op's. 968 */ 969 int adjval; 970 971 if ((sops[i].sem_flg & SEM_UNDO) == 0) 972 continue; 973 adjval = sops[i].sem_op; 974 if (adjval == 0) 975 continue; 976 eval = semundo_adjust(td->td_proc, semid, 977 sops[i].sem_num, -adjval); 978 if (eval == 0) 979 continue; 980 981 /* 982 * Oh-Oh! We ran out of either sem_undo's or undo's. 983 * Rollback the adjustments to this point and then 984 * rollback the semaphore ups and down so we can return 985 * with an error with all structures restored. We 986 * rollback the undo's in the exact reverse order that 987 * we applied them. This guarantees that we won't run 988 * out of space as we roll things back out. 989 */ 990 for (j = i - 1; j >= 0; j--) { 991 if ((sops[j].sem_flg & SEM_UNDO) == 0) 992 continue; 993 adjval = sops[j].sem_op; 994 if (adjval == 0) 995 continue; 996 if (semundo_adjust(td->td_proc, semid, 997 sops[j].sem_num, adjval) != 0) 998 panic("semop - can't undo undos"); 999 } 1000 1001 for (j = 0; j < nsops; j++) { 1002 xsemptr = &semaptr->ds.sem_base[ 1003 sops[j].sem_num]; 1004 lwkt_getpooltoken(xsemptr); 1005 xsemptr->semval -= sops[j].sem_op; 1006 if (xsemptr->semval == 0 && 1007 xsemptr->semzcnt > 0) 1008 wakeup(xsemptr); 1009 if (xsemptr->semval <= 0 && 1010 xsemptr->semncnt > 0) 1011 wakeup(xsemptr); 1012 lwkt_relpooltoken(xsemptr); 1013 } 1014 1015 #ifdef SEM_DEBUG 1016 kprintf("eval = %d from semundo_adjust\n", eval); 1017 #endif 1018 goto done; 1019 } /* loop through the sops */ 1020 } /* if (do_undos) */ 1021 1022 /* We're definitely done - set the sempid's */ 1023 for (i = 0; i < nsops; i++) { 1024 sopptr = &sops[i]; 1025 semptr = &semaptr->ds.sem_base[sopptr->sem_num]; 1026 lwkt_getpooltoken(semptr); 1027 semptr->sempid = td->td_proc->p_pid; 1028 lwkt_relpooltoken(semptr); 1029 } 1030 1031 /* Do a wakeup if any semaphore was up'd. */ 1032 #ifdef SEM_DEBUG 1033 kprintf("semop: done\n"); 1034 #endif 1035 uap->sysmsg_result = 0; 1036 eval = 0; 1037 done: 1038 lockmgr(&semaptr->lk, LK_RELEASE); 1039 wakeup_end_delayed(); 1040 done2: 1041 return(eval); 1042 } 1043 1044 /* 1045 * Go through the undo structures for this process and apply the adjustments to 1046 * semaphores. 1047 * 1048 * (p->p_token is held by the caller) 1049 */ 1050 void 1051 semexit(struct proc *p) 1052 { 1053 struct sem_undo *suptr; 1054 struct sem *semptr; 1055 1056 /* 1057 * We're getting a global token, don't do it if we couldn't 1058 * possibly have any semaphores. 1059 */ 1060 if ((p->p_flags & P_SYSVSEM) == 0) 1061 return; 1062 suptr = p->p_sem_undo; 1063 KKASSERT(suptr != NULL); 1064 1065 /* 1066 * Disconnect suptr from the process and increment un_refs to 1067 * prevent anyone else from being able to destroy the structure. 1068 * Do not remove it from the linked list until after we are through 1069 * scanning it as other semaphore calls might still effect it. 1070 */ 1071 lwkt_gettoken(&semu_token); 1072 p->p_sem_undo = NULL; 1073 p->p_flags &= ~P_SYSVSEM; 1074 suptr->un_proc = NULL; 1075 ++suptr->un_refs; 1076 lwkt_reltoken(&semu_token); 1077 1078 while (suptr->un_cnt) { 1079 struct semid_pool *semaptr; 1080 int semid; 1081 int semnum; 1082 int adjval; 1083 int ix; 1084 1085 /* 1086 * These values are stable because we hold p->p_token. 1087 * However, they can get ripped out from under us when 1088 * we block or obtain other tokens so we have to re-check. 1089 */ 1090 ix = suptr->un_cnt - 1; 1091 semid = suptr->un_ent[ix].un_id; 1092 semnum = suptr->un_ent[ix].un_num; 1093 adjval = suptr->un_ent[ix].un_adjval; 1094 1095 semaptr = &sema[semid]; 1096 1097 /* 1098 * Recheck after locking, then execute the undo 1099 * operation. semptr remains valid due to the 1100 * semaptr->lk. 1101 */ 1102 lockmgr(&semaptr->lk, LK_SHARED); 1103 semptr = &semaptr->ds.sem_base[semnum]; 1104 lwkt_getpooltoken(semptr); 1105 1106 if (ix == suptr->un_cnt - 1 && 1107 semid == suptr->un_ent[ix].un_id && 1108 semnum == suptr->un_ent[ix].un_num && 1109 adjval == suptr->un_ent[ix].un_adjval) { 1110 /* 1111 * Only do assertions when we aren't in a SMP race. 1112 */ 1113 if ((semaptr->ds.sem_perm.mode & SEM_ALLOC) == 0) 1114 panic("semexit - semid not allocated"); 1115 if (semnum >= semaptr->ds.sem_nsems) 1116 panic("semexit - semnum out of range"); 1117 --suptr->un_cnt; 1118 1119 if (adjval < 0) { 1120 if (semptr->semval < -adjval) 1121 semptr->semval = 0; 1122 else 1123 semptr->semval += adjval; 1124 } else { 1125 semptr->semval += adjval; 1126 } 1127 wakeup(semptr); 1128 } 1129 lwkt_relpooltoken(semptr); 1130 lockmgr(&semaptr->lk, LK_RELEASE); 1131 } 1132 1133 /* 1134 * Final cleanup, remove from the list and deallocate on the 1135 * last ref only. 1136 */ 1137 lwkt_gettoken(&semu_token); 1138 if (--suptr->un_refs == 0) { 1139 TAILQ_REMOVE(&semu_list, suptr, un_entry); 1140 KKASSERT(suptr->un_cnt == 0); 1141 kfree(suptr, M_SEM); 1142 } 1143 lwkt_reltoken(&semu_token); 1144 } 1145