1 /* $NetBSD: kern_proc.c,v 1.54 2002/09/27 15:37:45 provos Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. All advertising materials mentioning features or use of this software 53 * must display the following acknowledgement: 54 * This product includes software developed by the University of 55 * California, Berkeley and its contributors. 56 * 4. Neither the name of the University nor the names of its contributors 57 * may be used to endorse or promote products derived from this software 58 * without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 * 72 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 73 */ 74 75 #include <sys/cdefs.h> 76 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.54 2002/09/27 15:37:45 provos Exp $"); 77 78 #include "opt_kstack.h" 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/kernel.h> 83 #include <sys/proc.h> 84 #include <sys/resourcevar.h> 85 #include <sys/buf.h> 86 #include <sys/acct.h> 87 #include <sys/wait.h> 88 #include <sys/file.h> 89 #include <ufs/ufs/quota.h> 90 #include <sys/uio.h> 91 #include <sys/malloc.h> 92 #include <sys/pool.h> 93 #include <sys/mbuf.h> 94 #include <sys/ioctl.h> 95 #include <sys/tty.h> 96 #include <sys/signalvar.h> 97 #include <sys/ras.h> 98 99 /* 100 * Structure associated with user cacheing. 101 */ 102 struct uidinfo { 103 LIST_ENTRY(uidinfo) ui_hash; 104 uid_t ui_uid; 105 long ui_proccnt; 106 }; 107 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 108 LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 109 u_long uihash; /* size of hash table - 1 */ 110 111 /* 112 * Other process lists 113 */ 114 struct pidhashhead *pidhashtbl; 115 u_long pidhash; 116 struct pgrphashhead *pgrphashtbl; 117 u_long pgrphash; 118 119 struct proclist allproc; 120 struct proclist zombproc; /* resources have been freed */ 121 122 /* 123 * Process list locking: 124 * 125 * We have two types of locks on the proclists: read locks and write 126 * locks. Read locks can be used in interrupt context, so while we 127 * hold the write lock, we must also block clock interrupts to 128 * lock out any scheduling changes that may happen in interrupt 129 * context. 130 * 131 * The proclist lock locks the following structures: 132 * 133 * allproc 134 * zombproc 135 * pidhashtbl 136 */ 137 struct lock proclist_lock; 138 139 /* 140 * Locking of this proclist is special; it's accessed in a 141 * critical section of process exit, and thus locking it can't 142 * modify interrupt state. We use a simple spin lock for this 143 * proclist. Processes on this proclist are also on zombproc; 144 * we use the p_hash member to linkup to deadproc. 145 */ 146 struct simplelock deadproc_slock; 147 struct proclist deadproc; /* dead, but not yet undead */ 148 149 struct pool proc_pool; 150 struct pool pcred_pool; 151 struct pool plimit_pool; 152 struct pool pgrp_pool; 153 struct pool rusage_pool; 154 struct pool ras_pool; 155 156 /* 157 * The process list descriptors, used during pid allocation and 158 * by sysctl. No locking on this data structure is needed since 159 * it is completely static. 160 */ 161 const struct proclist_desc proclists[] = { 162 { &allproc }, 163 { &zombproc }, 164 { NULL }, 165 }; 166 167 static void orphanpg __P((struct pgrp *)); 168 #ifdef DEBUG 169 void pgrpdump __P((void)); 170 #endif 171 172 /* 173 * Initialize global process hashing structures. 174 */ 175 void 176 procinit() 177 { 178 const struct proclist_desc *pd; 179 180 for (pd = proclists; pd->pd_list != NULL; pd++) 181 LIST_INIT(pd->pd_list); 182 183 spinlockinit(&proclist_lock, "proclk", 0); 184 185 LIST_INIT(&deadproc); 186 simple_lock_init(&deadproc_slock); 187 188 pidhashtbl = 189 hashinit(maxproc / 4, HASH_LIST, M_PROC, M_WAITOK, &pidhash); 190 pgrphashtbl = 191 hashinit(maxproc / 4, HASH_LIST, M_PROC, M_WAITOK, &pgrphash); 192 uihashtbl = 193 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 194 195 pool_init(&proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 196 &pool_allocator_nointr); 197 pool_init(&pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 198 &pool_allocator_nointr); 199 pool_init(&pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl", 200 &pool_allocator_nointr); 201 pool_init(&plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 202 &pool_allocator_nointr); 203 pool_init(&rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 204 &pool_allocator_nointr); 205 pool_init(&ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 206 &pool_allocator_nointr); 207 } 208 209 /* 210 * Acquire a read lock on the proclist. 211 */ 212 void 213 proclist_lock_read() 214 { 215 int error; 216 217 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 218 #ifdef DIAGNOSTIC 219 if (__predict_false(error != 0)) 220 panic("proclist_lock_read: failed to acquire lock"); 221 #endif 222 } 223 224 /* 225 * Release a read lock on the proclist. 226 */ 227 void 228 proclist_unlock_read() 229 { 230 231 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 232 } 233 234 /* 235 * Acquire a write lock on the proclist. 236 */ 237 int 238 proclist_lock_write() 239 { 240 int s, error; 241 242 s = splclock(); 243 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 244 #ifdef DIAGNOSTIC 245 if (__predict_false(error != 0)) 246 panic("proclist_lock: failed to acquire lock"); 247 #endif 248 return (s); 249 } 250 251 /* 252 * Release a write lock on the proclist. 253 */ 254 void 255 proclist_unlock_write(s) 256 int s; 257 { 258 259 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 260 splx(s); 261 } 262 263 /* 264 * Change the count associated with number of processes 265 * a given user is using. 266 */ 267 int 268 chgproccnt(uid, diff) 269 uid_t uid; 270 int diff; 271 { 272 struct uidinfo *uip; 273 struct uihashhead *uipp; 274 275 uipp = UIHASH(uid); 276 277 LIST_FOREACH(uip, uipp, ui_hash) 278 if (uip->ui_uid == uid) 279 break; 280 281 if (uip) { 282 uip->ui_proccnt += diff; 283 if (uip->ui_proccnt > 0) 284 return (uip->ui_proccnt); 285 if (uip->ui_proccnt < 0) 286 panic("chgproccnt: procs < 0"); 287 LIST_REMOVE(uip, ui_hash); 288 FREE(uip, M_PROC); 289 return (0); 290 } 291 if (diff <= 0) { 292 if (diff == 0) 293 return(0); 294 panic("chgproccnt: lost user"); 295 } 296 MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK); 297 LIST_INSERT_HEAD(uipp, uip, ui_hash); 298 uip->ui_uid = uid; 299 uip->ui_proccnt = diff; 300 return (diff); 301 } 302 303 /* 304 * Is p an inferior of q? 305 */ 306 int 307 inferior(p, q) 308 struct proc *p; 309 struct proc *q; 310 { 311 312 for (; p != q; p = p->p_pptr) 313 if (p->p_pid == 0) 314 return (0); 315 return (1); 316 } 317 318 /* 319 * Locate a process by number 320 */ 321 struct proc * 322 pfind(pid) 323 pid_t pid; 324 { 325 struct proc *p; 326 327 proclist_lock_read(); 328 LIST_FOREACH(p, PIDHASH(pid), p_hash) 329 if (p->p_pid == pid) 330 goto out; 331 out: 332 proclist_unlock_read(); 333 return (p); 334 } 335 336 /* 337 * Locate a process group by number 338 */ 339 struct pgrp * 340 pgfind(pgid) 341 pid_t pgid; 342 { 343 struct pgrp *pgrp; 344 345 LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) 346 if (pgrp->pg_id == pgid) 347 return (pgrp); 348 return (NULL); 349 } 350 351 /* 352 * Move p to a new or existing process group (and session) 353 */ 354 int 355 enterpgrp(p, pgid, mksess) 356 struct proc *p; 357 pid_t pgid; 358 int mksess; 359 { 360 struct pgrp *pgrp = pgfind(pgid); 361 362 #ifdef DIAGNOSTIC 363 if (__predict_false(pgrp != NULL && mksess)) /* firewalls */ 364 panic("enterpgrp: setsid into non-empty pgrp"); 365 if (__predict_false(SESS_LEADER(p))) 366 panic("enterpgrp: session leader attempted setpgrp"); 367 #endif 368 if (pgrp == NULL) { 369 pid_t savepid = p->p_pid; 370 struct proc *np; 371 /* 372 * new process group 373 */ 374 #ifdef DIAGNOSTIC 375 if (__predict_false(p->p_pid != pgid)) 376 panic("enterpgrp: new pgrp and pid != pgid"); 377 #endif 378 pgrp = pool_get(&pgrp_pool, PR_WAITOK); 379 if ((np = pfind(savepid)) == NULL || np != p) { 380 pool_put(&pgrp_pool, pgrp); 381 return (ESRCH); 382 } 383 if (mksess) { 384 struct session *sess; 385 386 /* 387 * new session 388 */ 389 MALLOC(sess, struct session *, sizeof(struct session), 390 M_SESSION, M_WAITOK); 391 if ((np = pfind(savepid)) == NULL || np != p) { 392 FREE(sess, M_SESSION); 393 pool_put(&pgrp_pool, pgrp); 394 return (ESRCH); 395 } 396 sess->s_sid = p->p_pid; 397 sess->s_leader = p; 398 sess->s_count = 1; 399 sess->s_ttyvp = NULL; 400 sess->s_ttyp = NULL; 401 memcpy(sess->s_login, p->p_session->s_login, 402 sizeof(sess->s_login)); 403 p->p_flag &= ~P_CONTROLT; 404 pgrp->pg_session = sess; 405 #ifdef DIAGNOSTIC 406 if (__predict_false(p != curproc)) 407 panic("enterpgrp: mksession and p != curproc"); 408 #endif 409 } else { 410 SESSHOLD(p->p_session); 411 pgrp->pg_session = p->p_session; 412 } 413 pgrp->pg_id = pgid; 414 LIST_INIT(&pgrp->pg_members); 415 LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash); 416 pgrp->pg_jobc = 0; 417 } else if (pgrp == p->p_pgrp) 418 return (0); 419 420 /* 421 * Adjust eligibility of affected pgrps to participate in job control. 422 * Increment eligibility counts before decrementing, otherwise we 423 * could reach 0 spuriously during the first call. 424 */ 425 fixjobc(p, pgrp, 1); 426 fixjobc(p, p->p_pgrp, 0); 427 428 LIST_REMOVE(p, p_pglist); 429 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 430 pgdelete(p->p_pgrp); 431 p->p_pgrp = pgrp; 432 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 433 return (0); 434 } 435 436 /* 437 * remove process from process group 438 */ 439 int 440 leavepgrp(p) 441 struct proc *p; 442 { 443 444 LIST_REMOVE(p, p_pglist); 445 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 446 pgdelete(p->p_pgrp); 447 p->p_pgrp = 0; 448 return (0); 449 } 450 451 /* 452 * delete a process group 453 */ 454 void 455 pgdelete(pgrp) 456 struct pgrp *pgrp; 457 { 458 459 /* Remove reference (if any) from tty to this process group */ 460 if (pgrp->pg_session->s_ttyp != NULL && 461 pgrp->pg_session->s_ttyp->t_pgrp == pgrp) 462 pgrp->pg_session->s_ttyp->t_pgrp = NULL; 463 LIST_REMOVE(pgrp, pg_hash); 464 SESSRELE(pgrp->pg_session); 465 pool_put(&pgrp_pool, pgrp); 466 } 467 468 /* 469 * Adjust pgrp jobc counters when specified process changes process group. 470 * We count the number of processes in each process group that "qualify" 471 * the group for terminal job control (those with a parent in a different 472 * process group of the same session). If that count reaches zero, the 473 * process group becomes orphaned. Check both the specified process' 474 * process group and that of its children. 475 * entering == 0 => p is leaving specified group. 476 * entering == 1 => p is entering specified group. 477 */ 478 void 479 fixjobc(p, pgrp, entering) 480 struct proc *p; 481 struct pgrp *pgrp; 482 int entering; 483 { 484 struct pgrp *hispgrp; 485 struct session *mysession = pgrp->pg_session; 486 487 /* 488 * Check p's parent to see whether p qualifies its own process 489 * group; if so, adjust count for p's process group. 490 */ 491 if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && 492 hispgrp->pg_session == mysession) { 493 if (entering) 494 pgrp->pg_jobc++; 495 else if (--pgrp->pg_jobc == 0) 496 orphanpg(pgrp); 497 } 498 499 /* 500 * Check this process' children to see whether they qualify 501 * their process groups; if so, adjust counts for children's 502 * process groups. 503 */ 504 LIST_FOREACH(p, &p->p_children, p_sibling) { 505 if ((hispgrp = p->p_pgrp) != pgrp && 506 hispgrp->pg_session == mysession && 507 P_ZOMBIE(p) == 0) { 508 if (entering) 509 hispgrp->pg_jobc++; 510 else if (--hispgrp->pg_jobc == 0) 511 orphanpg(hispgrp); 512 } 513 } 514 } 515 516 /* 517 * A process group has become orphaned; 518 * if there are any stopped processes in the group, 519 * hang-up all process in that group. 520 */ 521 static void 522 orphanpg(pg) 523 struct pgrp *pg; 524 { 525 struct proc *p; 526 527 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 528 if (p->p_stat == SSTOP) { 529 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 530 psignal(p, SIGHUP); 531 psignal(p, SIGCONT); 532 } 533 return; 534 } 535 } 536 } 537 538 /* mark process as suid/sgid, reset some values do defaults */ 539 void 540 p_sugid(p) 541 struct proc *p; 542 { 543 struct plimit *newlim; 544 545 p->p_flag |= P_SUGID; 546 /* reset what needs to be reset in plimit */ 547 if (p->p_limit->pl_corename != defcorename) { 548 if (p->p_limit->p_refcnt > 1 && 549 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { 550 newlim = limcopy(p->p_limit); 551 limfree(p->p_limit); 552 p->p_limit = newlim; 553 } 554 free(p->p_limit->pl_corename, M_TEMP); 555 p->p_limit->pl_corename = defcorename; 556 } 557 } 558 559 #ifdef DEBUG 560 void 561 pgrpdump() 562 { 563 struct pgrp *pgrp; 564 struct proc *p; 565 int i; 566 567 for (i = 0; i <= pgrphash; i++) { 568 if ((pgrp = LIST_FIRST(&pgrphashtbl[i])) != NULL) { 569 printf("\tindx %d\n", i); 570 for (; pgrp != 0; pgrp = pgrp->pg_hash.le_next) { 571 printf("\tpgrp %p, pgid %d, sess %p, " 572 "sesscnt %d, mem %p\n", 573 pgrp, pgrp->pg_id, pgrp->pg_session, 574 pgrp->pg_session->s_count, 575 LIST_FIRST(&pgrp->pg_members)); 576 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { 577 printf("\t\tpid %d addr %p pgrp %p\n", 578 p->p_pid, p, p->p_pgrp); 579 } 580 } 581 } 582 } 583 } 584 #endif /* DEBUG */ 585 586 #ifdef KSTACK_CHECK_MAGIC 587 #include <sys/user.h> 588 589 #define KSTACK_MAGIC 0xdeadbeaf 590 591 /* XXX should be per process basis? */ 592 int kstackleftmin = KSTACK_SIZE; 593 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 594 less than this */ 595 596 void 597 kstack_setup_magic(const struct proc *p) 598 { 599 u_int32_t *ip; 600 u_int32_t const *end; 601 602 KASSERT(p != 0); 603 KASSERT(p != &proc0); 604 605 /* 606 * fill all the stack with magic number 607 * so that later modification on it can be detected. 608 */ 609 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(p); 610 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(p) + KSTACK_SIZE); 611 for (; ip < end; ip++) { 612 *ip = KSTACK_MAGIC; 613 } 614 } 615 616 void 617 kstack_check_magic(const struct proc *p) 618 { 619 u_int32_t const *ip, *end; 620 int stackleft; 621 622 KASSERT(p != 0); 623 624 /* don't check proc0 */ /*XXX*/ 625 if (p == &proc0) 626 return; 627 628 #ifdef __MACHINE_STACK_GROWS_UP 629 /* stack grows upwards (eg. hppa) */ 630 ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(p) + KSTACK_SIZE); 631 end = (u_int32_t *)KSTACK_LOWEST_ADDR(p); 632 for (ip--; ip >= end; ip--) 633 if (*ip != KSTACK_MAGIC) 634 break; 635 636 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(p) + KSTACK_SIZE - (caddr_t)ip; 637 #else /* __MACHINE_STACK_GROWS_UP */ 638 /* stack grows downwards (eg. i386) */ 639 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(p); 640 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(p) + KSTACK_SIZE); 641 for (; ip < end; ip++) 642 if (*ip != KSTACK_MAGIC) 643 break; 644 645 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(p); 646 #endif /* __MACHINE_STACK_GROWS_UP */ 647 648 if (kstackleftmin > stackleft) { 649 kstackleftmin = stackleft; 650 if (stackleft < kstackleftthres) 651 printf("warning: kernel stack left %d bytes(pid %u)\n", 652 stackleft, p->p_pid); 653 } 654 655 if (stackleft <= 0) { 656 panic("magic on the top of kernel stack changed for pid %u: " 657 "maybe kernel stack overflow", p->p_pid); 658 } 659 } 660 #endif /* KSTACK_CHECK_MAGIC */ 661