1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/sysctl.h> 34 #include <sys/malloc.h> 35 #include <sys/proc.h> 36 #include <sys/vnode.h> 37 #include <sys/jail.h> 38 #include <sys/filedesc.h> 39 #include <sys/tty.h> 40 #include <sys/dsched.h> 41 #include <sys/signalvar.h> 42 #include <sys/spinlock.h> 43 #include <sys/random.h> 44 #include <sys/vnode.h> 45 #include <vm/vm.h> 46 #include <sys/lock.h> 47 #include <vm/pmap.h> 48 #include <vm/vm_map.h> 49 #include <sys/user.h> 50 #include <machine/smp.h> 51 52 #include <sys/refcount.h> 53 #include <sys/spinlock2.h> 54 55 /* 56 * Hash table size must be a power of two and is not currently dynamically 57 * sized. There is a trade-off between the linear scans which must iterate 58 * all HSIZE elements and the number of elements which might accumulate 59 * within each hash chain. 60 */ 61 #define ALLPROC_HSIZE 256 62 #define ALLPROC_HMASK (ALLPROC_HSIZE - 1) 63 #define ALLPROC_HASH(pid) (pid & ALLPROC_HMASK) 64 #define PGRP_HASH(pid) (pid & ALLPROC_HMASK) 65 #define SESS_HASH(pid) (pid & ALLPROC_HMASK) 66 67 /* 68 * pid_doms[] management, used to control how quickly a PID can be recycled. 69 * Must be a multiple of ALLPROC_HSIZE for the proc_makepid() inner loops. 70 * 71 * WARNING! PIDDOM_DELAY should not be defined > 20 or so unless you change 72 * the array from int8_t's to int16_t's. 73 */ 74 #define PIDDOM_COUNT 10 /* 10 pids per domain - reduce array size */ 75 #define PIDDOM_DELAY 10 /* min 10 seconds after exit before reuse */ 76 #define PIDDOM_SCALE 10 /* (10,000*SCALE)/sec performance guarantee */ 77 #define PIDSEL_DOMAINS (PID_MAX * PIDDOM_SCALE / PIDDOM_COUNT / \ 78 ALLPROC_HSIZE * ALLPROC_HSIZE) 79 80 /* Used by libkvm */ 81 int allproc_hsize = ALLPROC_HSIZE; 82 83 LIST_HEAD(pidhashhead, proc); 84 85 static MALLOC_DEFINE(M_PGRP, "pgrp", "process group header"); 86 MALLOC_DEFINE(M_SESSION, "session", "session header"); 87 MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); 88 MALLOC_DEFINE(M_LWP, "lwp", "lwp structures"); 89 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); 90 91 int ps_showallprocs = 1; 92 static int ps_showallthreads = 1; 93 SYSCTL_INT(_security, OID_AUTO, ps_showallprocs, CTLFLAG_RW, 94 &ps_showallprocs, 0, 95 "Unprivileged processes can see processes with different UID/GID"); 96 SYSCTL_INT(_security, OID_AUTO, ps_showallthreads, CTLFLAG_RW, 97 &ps_showallthreads, 0, 98 "Unprivileged processes can see kernel threads"); 99 static u_int pid_domain_skips; 100 SYSCTL_UINT(_kern, OID_AUTO, pid_domain_skips, CTLFLAG_RW, 101 &pid_domain_skips, 0, 102 "Number of pid_doms[] skipped"); 103 static u_int pid_inner_skips; 104 SYSCTL_UINT(_kern, OID_AUTO, pid_inner_skips, CTLFLAG_RW, 105 &pid_inner_skips, 0, 106 "Number of pid_doms[] skipped"); 107 108 static void orphanpg(struct pgrp *pg); 109 static void proc_makepid(struct proc *p, int random_offset); 110 111 /* 112 * Process related lists (for proc_token, allproc, allpgrp, and allsess) 113 */ 114 typedef struct procglob procglob_t; 115 116 static procglob_t procglob[ALLPROC_HSIZE]; 117 118 /* 119 * We try our best to avoid recycling a PID too quickly. We do this by 120 * storing (uint8_t)time_second in the related pid domain on-reap and then 121 * using that to skip-over the domain on-allocate. 122 * 123 * This array has to be fairly large to support a high fork/exec rate. 124 * A ~100,000 entry array will support a 10-second reuse latency at 125 * 10,000 execs/second, worst case. Best-case multiply by PIDDOM_COUNT 126 * (approximately 100,000 execs/second). 127 * 128 * Currently we allocate around a megabyte, making the worst-case fork 129 * rate around 100,000/second. 130 */ 131 static uint8_t *pid_doms; 132 133 /* 134 * Random component to nextpid generation. We mix in a random factor to make 135 * it a little harder to predict. We sanity check the modulus value to avoid 136 * doing it in critical paths. Don't let it be too small or we pointlessly 137 * waste randomness entropy, and don't let it be impossibly large. Using a 138 * modulus that is too big causes a LOT more process table scans and slows 139 * down fork processing as the pidchecked caching is defeated. 140 */ 141 static int randompid = 0; 142 143 /* 144 * No requirements. 145 */ 146 static int 147 sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) 148 { 149 int error, pid; 150 151 pid = randompid; 152 error = sysctl_handle_int(oidp, &pid, 0, req); 153 if (error || !req->newptr) 154 return (error); 155 if (pid < 0 || pid > PID_MAX - 100) /* out of range */ 156 pid = PID_MAX - 100; 157 else if (pid < 2) /* NOP */ 158 pid = 0; 159 else if (pid < 100) /* Make it reasonable */ 160 pid = 100; 161 randompid = pid; 162 return (error); 163 } 164 165 SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 166 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); 167 168 /* 169 * Initialize global process hashing structures. 170 * 171 * These functions are ONLY called from the low level boot code and do 172 * not lock their operations. 173 */ 174 void 175 procinit(void) 176 { 177 u_long i; 178 179 /* 180 * Allocate dynamically. This array can be large (~1MB) so don't 181 * waste boot loader space. 182 */ 183 pid_doms = kmalloc(sizeof(pid_doms[0]) * PIDSEL_DOMAINS, 184 M_PROC, M_WAITOK | M_ZERO); 185 186 /* 187 * Avoid unnecessary stalls due to pid_doms[] values all being 188 * the same. Make sure that the allocation of pid 1 and pid 2 189 * succeeds. 190 */ 191 for (i = 0; i < PIDSEL_DOMAINS; ++i) 192 pid_doms[i] = (int8_t)i - (int8_t)(PIDDOM_DELAY + 1); 193 194 /* 195 * Other misc init. 196 */ 197 for (i = 0; i < ALLPROC_HSIZE; ++i) { 198 procglob_t *prg = &procglob[i]; 199 LIST_INIT(&prg->allproc); 200 LIST_INIT(&prg->allsess); 201 LIST_INIT(&prg->allpgrp); 202 lwkt_token_init(&prg->proc_token, "allproc"); 203 } 204 uihashinit(); 205 } 206 207 void 208 procinsertinit(struct proc *p) 209 { 210 LIST_INSERT_HEAD(&procglob[ALLPROC_HASH(p->p_pid)].allproc, 211 p, p_list); 212 } 213 214 void 215 pgrpinsertinit(struct pgrp *pg) 216 { 217 LIST_INSERT_HEAD(&procglob[ALLPROC_HASH(pg->pg_id)].allpgrp, 218 pg, pg_list); 219 } 220 221 void 222 sessinsertinit(struct session *sess) 223 { 224 LIST_INSERT_HEAD(&procglob[ALLPROC_HASH(sess->s_sid)].allsess, 225 sess, s_list); 226 } 227 228 /* 229 * Process hold/release support functions. Called via the PHOLD(), 230 * PRELE(), and PSTALL() macros. 231 * 232 * p->p_lock is a simple hold count with a waiting interlock. No wakeup() 233 * is issued unless someone is actually waiting for the process. 234 * 235 * Most holds are short-term, allowing a process scan or other similar 236 * operation to access a proc structure without it getting ripped out from 237 * under us. procfs and process-list sysctl ops also use the hold function 238 * interlocked with various p_flags to keep the vmspace intact when reading 239 * or writing a user process's address space. 240 * 241 * There are two situations where a hold count can be longer. Exiting lwps 242 * hold the process until the lwp is reaped, and the parent will hold the 243 * child during vfork()/exec() sequences while the child is marked P_PPWAIT. 244 * 245 * The kernel waits for the hold count to drop to 0 (or 1 in some cases) at 246 * various critical points in the fork/exec and exit paths before proceeding. 247 */ 248 #define PLOCK_ZOMB 0x20000000 249 #define PLOCK_WAITING 0x40000000 250 #define PLOCK_MASK 0x1FFFFFFF 251 252 void 253 pstall(struct proc *p, const char *wmesg, int count) 254 { 255 int o; 256 int n; 257 258 for (;;) { 259 o = p->p_lock; 260 cpu_ccfence(); 261 if ((o & PLOCK_MASK) <= count) 262 break; 263 n = o | PLOCK_WAITING; 264 tsleep_interlock(&p->p_lock, 0); 265 266 /* 267 * If someone is trying to single-step the process during 268 * an exec or an exit they can deadlock us because procfs 269 * sleeps with the process held. 270 */ 271 if (p->p_stops) { 272 if (p->p_flags & P_INEXEC) { 273 wakeup(&p->p_stype); 274 } else if (p->p_flags & P_POSTEXIT) { 275 spin_lock(&p->p_spin); 276 p->p_stops = 0; 277 p->p_step = 0; 278 spin_unlock(&p->p_spin); 279 wakeup(&p->p_stype); 280 } 281 } 282 283 if (atomic_cmpset_int(&p->p_lock, o, n)) { 284 tsleep(&p->p_lock, PINTERLOCKED, wmesg, 0); 285 } 286 } 287 } 288 289 void 290 phold(struct proc *p) 291 { 292 atomic_add_int(&p->p_lock, 1); 293 } 294 295 /* 296 * WARNING! On last release (p) can become instantly invalid due to 297 * MP races. 298 */ 299 void 300 prele(struct proc *p) 301 { 302 int o; 303 int n; 304 305 /* 306 * Fast path 307 */ 308 if (atomic_cmpset_int(&p->p_lock, 1, 0)) 309 return; 310 311 /* 312 * Slow path 313 */ 314 for (;;) { 315 o = p->p_lock; 316 KKASSERT((o & PLOCK_MASK) > 0); 317 cpu_ccfence(); 318 n = (o - 1) & ~PLOCK_WAITING; 319 if (atomic_cmpset_int(&p->p_lock, o, n)) { 320 if (o & PLOCK_WAITING) 321 wakeup(&p->p_lock); 322 break; 323 } 324 } 325 } 326 327 /* 328 * Hold and flag serialized for zombie reaping purposes. 329 * 330 * This function will fail if it has to block, returning non-zero with 331 * neither the flag set or the hold count bumped. Note that we must block 332 * without holding a ref, meaning that the caller must ensure that (p) 333 * remains valid through some other interlock (typically on its parent 334 * process's p_token). 335 * 336 * Zero is returned on success. The hold count will be incremented and 337 * the serialization flag acquired. Note that serialization is only against 338 * other pholdzomb() calls, not against phold() calls. 339 */ 340 int 341 pholdzomb(struct proc *p) 342 { 343 int o; 344 int n; 345 346 /* 347 * Fast path 348 */ 349 if (atomic_cmpset_int(&p->p_lock, 0, PLOCK_ZOMB | 1)) 350 return(0); 351 352 /* 353 * Slow path 354 */ 355 for (;;) { 356 o = p->p_lock; 357 cpu_ccfence(); 358 if ((o & PLOCK_ZOMB) == 0) { 359 n = (o + 1) | PLOCK_ZOMB; 360 if (atomic_cmpset_int(&p->p_lock, o, n)) 361 return(0); 362 } else { 363 KKASSERT((o & PLOCK_MASK) > 0); 364 n = o | PLOCK_WAITING; 365 tsleep_interlock(&p->p_lock, 0); 366 if (atomic_cmpset_int(&p->p_lock, o, n)) { 367 tsleep(&p->p_lock, PINTERLOCKED, "phldz", 0); 368 /* (p) can be ripped out at this point */ 369 return(1); 370 } 371 } 372 } 373 } 374 375 /* 376 * Release PLOCK_ZOMB and the hold count, waking up any waiters. 377 * 378 * WARNING! On last release (p) can become instantly invalid due to 379 * MP races. 380 */ 381 void 382 prelezomb(struct proc *p) 383 { 384 int o; 385 int n; 386 387 /* 388 * Fast path 389 */ 390 if (atomic_cmpset_int(&p->p_lock, PLOCK_ZOMB | 1, 0)) 391 return; 392 393 /* 394 * Slow path 395 */ 396 KKASSERT(p->p_lock & PLOCK_ZOMB); 397 for (;;) { 398 o = p->p_lock; 399 KKASSERT((o & PLOCK_MASK) > 0); 400 cpu_ccfence(); 401 n = (o - 1) & ~(PLOCK_ZOMB | PLOCK_WAITING); 402 if (atomic_cmpset_int(&p->p_lock, o, n)) { 403 if (o & PLOCK_WAITING) 404 wakeup(&p->p_lock); 405 break; 406 } 407 } 408 } 409 410 /* 411 * Is p an inferior of the current process? 412 * 413 * No requirements. 414 */ 415 int 416 inferior(struct proc *p) 417 { 418 struct proc *p2; 419 420 PHOLD(p); 421 lwkt_gettoken_shared(&p->p_token); 422 while (p != curproc) { 423 if (p->p_pid == 0) { 424 lwkt_reltoken(&p->p_token); 425 return (0); 426 } 427 p2 = p->p_pptr; 428 PHOLD(p2); 429 lwkt_reltoken(&p->p_token); 430 PRELE(p); 431 lwkt_gettoken_shared(&p2->p_token); 432 p = p2; 433 } 434 lwkt_reltoken(&p->p_token); 435 PRELE(p); 436 437 return (1); 438 } 439 440 /* 441 * Locate a process by number. The returned process will be referenced and 442 * must be released with PRELE(). 443 * 444 * No requirements. 445 */ 446 struct proc * 447 pfind(pid_t pid) 448 { 449 struct proc *p = curproc; 450 procglob_t *prg; 451 int n; 452 453 /* 454 * Shortcut the current process 455 */ 456 if (p && p->p_pid == pid) { 457 PHOLD(p); 458 return (p); 459 } 460 461 /* 462 * Otherwise find it in the hash table. 463 */ 464 n = ALLPROC_HASH(pid); 465 prg = &procglob[n]; 466 467 lwkt_gettoken_shared(&prg->proc_token); 468 LIST_FOREACH(p, &prg->allproc, p_list) { 469 if (p->p_stat == SZOMB) 470 continue; 471 if (p->p_pid == pid) { 472 PHOLD(p); 473 lwkt_reltoken(&prg->proc_token); 474 return (p); 475 } 476 } 477 lwkt_reltoken(&prg->proc_token); 478 479 return (NULL); 480 } 481 482 /* 483 * Locate a process by number. The returned process is NOT referenced. 484 * The result will not be stable and is typically only used to validate 485 * against a process that the caller has in-hand. 486 * 487 * No requirements. 488 */ 489 struct proc * 490 pfindn(pid_t pid) 491 { 492 struct proc *p = curproc; 493 procglob_t *prg; 494 int n; 495 496 /* 497 * Shortcut the current process 498 */ 499 if (p && p->p_pid == pid) 500 return (p); 501 502 /* 503 * Otherwise find it in the hash table. 504 */ 505 n = ALLPROC_HASH(pid); 506 prg = &procglob[n]; 507 508 lwkt_gettoken_shared(&prg->proc_token); 509 LIST_FOREACH(p, &prg->allproc, p_list) { 510 if (p->p_stat == SZOMB) 511 continue; 512 if (p->p_pid == pid) { 513 lwkt_reltoken(&prg->proc_token); 514 return (p); 515 } 516 } 517 lwkt_reltoken(&prg->proc_token); 518 519 return (NULL); 520 } 521 522 /* 523 * Locate a process on the zombie list. Return a process or NULL. 524 * The returned process will be referenced and the caller must release 525 * it with PRELE(). 526 * 527 * No other requirements. 528 */ 529 struct proc * 530 zpfind(pid_t pid) 531 { 532 struct proc *p = curproc; 533 procglob_t *prg; 534 int n; 535 536 /* 537 * Shortcut the current process 538 */ 539 if (p && p->p_pid == pid) { 540 PHOLD(p); 541 return (p); 542 } 543 544 /* 545 * Otherwise find it in the hash table. 546 */ 547 n = ALLPROC_HASH(pid); 548 prg = &procglob[n]; 549 550 lwkt_gettoken_shared(&prg->proc_token); 551 LIST_FOREACH(p, &prg->allproc, p_list) { 552 if (p->p_stat != SZOMB) 553 continue; 554 if (p->p_pid == pid) { 555 PHOLD(p); 556 lwkt_reltoken(&prg->proc_token); 557 return (p); 558 } 559 } 560 lwkt_reltoken(&prg->proc_token); 561 562 return (NULL); 563 } 564 565 566 void 567 pgref(struct pgrp *pgrp) 568 { 569 refcount_acquire(&pgrp->pg_refs); 570 } 571 572 void 573 pgrel(struct pgrp *pgrp) 574 { 575 procglob_t *prg; 576 int count; 577 int n; 578 579 n = PGRP_HASH(pgrp->pg_id); 580 prg = &procglob[n]; 581 582 for (;;) { 583 count = pgrp->pg_refs; 584 cpu_ccfence(); 585 KKASSERT(count > 0); 586 if (count == 1) { 587 lwkt_gettoken(&prg->proc_token); 588 if (atomic_cmpset_int(&pgrp->pg_refs, 1, 0)) 589 break; 590 lwkt_reltoken(&prg->proc_token); 591 /* retry */ 592 } else { 593 if (atomic_cmpset_int(&pgrp->pg_refs, count, count - 1)) 594 return; 595 /* retry */ 596 } 597 } 598 599 /* 600 * Successful 1->0 transition, pghash_spin is held. 601 */ 602 LIST_REMOVE(pgrp, pg_list); 603 if (pid_doms[pgrp->pg_id % PIDSEL_DOMAINS] != (uint8_t)time_second) 604 pid_doms[pgrp->pg_id % PIDSEL_DOMAINS] = (uint8_t)time_second; 605 606 /* 607 * Reset any sigio structures pointing to us as a result of 608 * F_SETOWN with our pgid. 609 */ 610 funsetownlst(&pgrp->pg_sigiolst); 611 612 if (pgrp->pg_session->s_ttyp != NULL && 613 pgrp->pg_session->s_ttyp->t_pgrp == pgrp) { 614 pgrp->pg_session->s_ttyp->t_pgrp = NULL; 615 } 616 lwkt_reltoken(&prg->proc_token); 617 618 sess_rele(pgrp->pg_session); 619 kfree(pgrp, M_PGRP); 620 } 621 622 /* 623 * Locate a process group by number. The returned process group will be 624 * referenced w/pgref() and must be released with pgrel() (or assigned 625 * somewhere if you wish to keep the reference). 626 * 627 * No requirements. 628 */ 629 struct pgrp * 630 pgfind(pid_t pgid) 631 { 632 struct pgrp *pgrp; 633 procglob_t *prg; 634 int n; 635 636 n = PGRP_HASH(pgid); 637 prg = &procglob[n]; 638 lwkt_gettoken_shared(&prg->proc_token); 639 640 LIST_FOREACH(pgrp, &prg->allpgrp, pg_list) { 641 if (pgrp->pg_id == pgid) { 642 refcount_acquire(&pgrp->pg_refs); 643 lwkt_reltoken(&prg->proc_token); 644 return (pgrp); 645 } 646 } 647 lwkt_reltoken(&prg->proc_token); 648 return (NULL); 649 } 650 651 /* 652 * Move p to a new or existing process group (and session) 653 * 654 * No requirements. 655 */ 656 int 657 enterpgrp(struct proc *p, pid_t pgid, int mksess) 658 { 659 struct pgrp *pgrp; 660 struct pgrp *opgrp; 661 int error; 662 663 pgrp = pgfind(pgid); 664 665 KASSERT(pgrp == NULL || !mksess, 666 ("enterpgrp: setsid into non-empty pgrp")); 667 KASSERT(!SESS_LEADER(p), 668 ("enterpgrp: session leader attempted setpgrp")); 669 670 if (pgrp == NULL) { 671 pid_t savepid = p->p_pid; 672 struct proc *np; 673 procglob_t *prg; 674 int n; 675 676 /* 677 * new process group 678 */ 679 KASSERT(p->p_pid == pgid, 680 ("enterpgrp: new pgrp and pid != pgid")); 681 pgrp = kmalloc(sizeof(struct pgrp), M_PGRP, M_WAITOK | M_ZERO); 682 pgrp->pg_id = pgid; 683 LIST_INIT(&pgrp->pg_members); 684 pgrp->pg_jobc = 0; 685 SLIST_INIT(&pgrp->pg_sigiolst); 686 lwkt_token_init(&pgrp->pg_token, "pgrp_token"); 687 refcount_init(&pgrp->pg_refs, 1); 688 lockinit(&pgrp->pg_lock, "pgwt", 0, 0); 689 690 n = PGRP_HASH(pgid); 691 prg = &procglob[n]; 692 693 if ((np = pfindn(savepid)) == NULL || np != p) { 694 lwkt_reltoken(&prg->proc_token); 695 error = ESRCH; 696 kfree(pgrp, M_PGRP); 697 goto fatal; 698 } 699 700 lwkt_gettoken(&prg->proc_token); 701 if (mksess) { 702 struct session *sess; 703 704 /* 705 * new session 706 */ 707 sess = kmalloc(sizeof(struct session), M_SESSION, 708 M_WAITOK | M_ZERO); 709 lwkt_gettoken(&p->p_token); 710 sess->s_leader = p; 711 sess->s_sid = p->p_pid; 712 sess->s_count = 1; 713 sess->s_ttyvp = NULL; 714 sess->s_ttyp = NULL; 715 bcopy(p->p_session->s_login, sess->s_login, 716 sizeof(sess->s_login)); 717 pgrp->pg_session = sess; 718 KASSERT(p == curproc, 719 ("enterpgrp: mksession and p != curproc")); 720 p->p_flags &= ~P_CONTROLT; 721 LIST_INSERT_HEAD(&prg->allsess, sess, s_list); 722 lwkt_reltoken(&p->p_token); 723 } else { 724 lwkt_gettoken(&p->p_token); 725 pgrp->pg_session = p->p_session; 726 sess_hold(pgrp->pg_session); 727 lwkt_reltoken(&p->p_token); 728 } 729 LIST_INSERT_HEAD(&prg->allpgrp, pgrp, pg_list); 730 731 lwkt_reltoken(&prg->proc_token); 732 } else if (pgrp == p->p_pgrp) { 733 pgrel(pgrp); 734 goto done; 735 } /* else pgfind() referenced the pgrp */ 736 737 lwkt_gettoken(&pgrp->pg_token); 738 lwkt_gettoken(&p->p_token); 739 740 /* 741 * Replace p->p_pgrp, handling any races that occur. 742 */ 743 while ((opgrp = p->p_pgrp) != NULL) { 744 pgref(opgrp); 745 lwkt_gettoken(&opgrp->pg_token); 746 if (opgrp != p->p_pgrp) { 747 lwkt_reltoken(&opgrp->pg_token); 748 pgrel(opgrp); 749 continue; 750 } 751 LIST_REMOVE(p, p_pglist); 752 break; 753 } 754 p->p_pgrp = pgrp; 755 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 756 757 /* 758 * Adjust eligibility of affected pgrps to participate in job control. 759 * Increment eligibility counts before decrementing, otherwise we 760 * could reach 0 spuriously during the first call. 761 */ 762 fixjobc(p, pgrp, 1); 763 if (opgrp) { 764 fixjobc(p, opgrp, 0); 765 lwkt_reltoken(&opgrp->pg_token); 766 pgrel(opgrp); /* manual pgref */ 767 pgrel(opgrp); /* p->p_pgrp ref */ 768 } 769 lwkt_reltoken(&p->p_token); 770 lwkt_reltoken(&pgrp->pg_token); 771 done: 772 error = 0; 773 fatal: 774 return (error); 775 } 776 777 /* 778 * Remove process from process group 779 * 780 * No requirements. 781 */ 782 int 783 leavepgrp(struct proc *p) 784 { 785 struct pgrp *pg = p->p_pgrp; 786 787 lwkt_gettoken(&p->p_token); 788 while ((pg = p->p_pgrp) != NULL) { 789 pgref(pg); 790 lwkt_gettoken(&pg->pg_token); 791 if (p->p_pgrp != pg) { 792 lwkt_reltoken(&pg->pg_token); 793 pgrel(pg); 794 continue; 795 } 796 p->p_pgrp = NULL; 797 LIST_REMOVE(p, p_pglist); 798 lwkt_reltoken(&pg->pg_token); 799 pgrel(pg); /* manual pgref */ 800 pgrel(pg); /* p->p_pgrp ref */ 801 break; 802 } 803 lwkt_reltoken(&p->p_token); 804 805 return (0); 806 } 807 808 /* 809 * Adjust the ref count on a session structure. When the ref count falls to 810 * zero the tty is disassociated from the session and the session structure 811 * is freed. Note that tty assocation is not itself ref-counted. 812 * 813 * No requirements. 814 */ 815 void 816 sess_hold(struct session *sp) 817 { 818 atomic_add_int(&sp->s_count, 1); 819 } 820 821 /* 822 * No requirements. 823 */ 824 void 825 sess_rele(struct session *sess) 826 { 827 procglob_t *prg; 828 struct tty *tp; 829 int count; 830 int n; 831 832 n = SESS_HASH(sess->s_sid); 833 prg = &procglob[n]; 834 835 for (;;) { 836 count = sess->s_count; 837 cpu_ccfence(); 838 KKASSERT(count > 0); 839 if (count == 1) { 840 lwkt_gettoken(&tty_token); 841 lwkt_gettoken(&prg->proc_token); 842 if (atomic_cmpset_int(&sess->s_count, 1, 0)) 843 break; 844 lwkt_reltoken(&prg->proc_token); 845 lwkt_reltoken(&tty_token); 846 /* retry */ 847 } else { 848 if (atomic_cmpset_int(&sess->s_count, count, count - 1)) 849 return; 850 /* retry */ 851 } 852 } 853 854 /* 855 * Successful 1->0 transition and tty_token is held. 856 */ 857 LIST_REMOVE(sess, s_list); 858 if (pid_doms[sess->s_sid % PIDSEL_DOMAINS] != (uint8_t)time_second) 859 pid_doms[sess->s_sid % PIDSEL_DOMAINS] = (uint8_t)time_second; 860 861 if (sess->s_ttyp && sess->s_ttyp->t_session) { 862 #ifdef TTY_DO_FULL_CLOSE 863 /* FULL CLOSE, see ttyclearsession() */ 864 KKASSERT(sess->s_ttyp->t_session == sess); 865 sess->s_ttyp->t_session = NULL; 866 #else 867 /* HALF CLOSE, see ttyclearsession() */ 868 if (sess->s_ttyp->t_session == sess) 869 sess->s_ttyp->t_session = NULL; 870 #endif 871 } 872 if ((tp = sess->s_ttyp) != NULL) { 873 sess->s_ttyp = NULL; 874 ttyunhold(tp); 875 } 876 lwkt_reltoken(&prg->proc_token); 877 lwkt_reltoken(&tty_token); 878 879 kfree(sess, M_SESSION); 880 } 881 882 /* 883 * Adjust pgrp jobc counters when specified process changes process group. 884 * We count the number of processes in each process group that "qualify" 885 * the group for terminal job control (those with a parent in a different 886 * process group of the same session). If that count reaches zero, the 887 * process group becomes orphaned. Check both the specified process' 888 * process group and that of its children. 889 * entering == 0 => p is leaving specified group. 890 * entering == 1 => p is entering specified group. 891 * 892 * No requirements. 893 */ 894 void 895 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 896 { 897 struct pgrp *hispgrp; 898 struct session *mysession; 899 struct proc *np; 900 901 /* 902 * Check p's parent to see whether p qualifies its own process 903 * group; if so, adjust count for p's process group. 904 */ 905 lwkt_gettoken(&p->p_token); /* p_children scan */ 906 lwkt_gettoken(&pgrp->pg_token); 907 908 mysession = pgrp->pg_session; 909 if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && 910 hispgrp->pg_session == mysession) { 911 if (entering) 912 pgrp->pg_jobc++; 913 else if (--pgrp->pg_jobc == 0) 914 orphanpg(pgrp); 915 } 916 917 /* 918 * Check this process' children to see whether they qualify 919 * their process groups; if so, adjust counts for children's 920 * process groups. 921 */ 922 LIST_FOREACH(np, &p->p_children, p_sibling) { 923 PHOLD(np); 924 lwkt_gettoken(&np->p_token); 925 if ((hispgrp = np->p_pgrp) != pgrp && 926 hispgrp->pg_session == mysession && 927 np->p_stat != SZOMB) { 928 pgref(hispgrp); 929 lwkt_gettoken(&hispgrp->pg_token); 930 if (entering) 931 hispgrp->pg_jobc++; 932 else if (--hispgrp->pg_jobc == 0) 933 orphanpg(hispgrp); 934 lwkt_reltoken(&hispgrp->pg_token); 935 pgrel(hispgrp); 936 } 937 lwkt_reltoken(&np->p_token); 938 PRELE(np); 939 } 940 KKASSERT(pgrp->pg_refs > 0); 941 lwkt_reltoken(&pgrp->pg_token); 942 lwkt_reltoken(&p->p_token); 943 } 944 945 /* 946 * A process group has become orphaned; 947 * if there are any stopped processes in the group, 948 * hang-up all process in that group. 949 * 950 * The caller must hold pg_token. 951 */ 952 static void 953 orphanpg(struct pgrp *pg) 954 { 955 struct proc *p; 956 957 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 958 if (p->p_stat == SSTOP) { 959 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 960 ksignal(p, SIGHUP); 961 ksignal(p, SIGCONT); 962 } 963 return; 964 } 965 } 966 } 967 968 /* 969 * Add a new process to the allproc list and the PID hash. This 970 * also assigns a pid to the new process. 971 * 972 * No requirements. 973 */ 974 void 975 proc_add_allproc(struct proc *p) 976 { 977 int random_offset; 978 979 if ((random_offset = randompid) != 0) { 980 read_random(&random_offset, sizeof(random_offset)); 981 random_offset = (random_offset & 0x7FFFFFFF) % randompid; 982 } 983 proc_makepid(p, random_offset); 984 } 985 986 /* 987 * Calculate a new process pid. This function is integrated into 988 * proc_add_allproc() to guarentee that the new pid is not reused before 989 * the new process can be added to the allproc list. 990 * 991 * p_pid is assigned and the process is added to the allproc hash table 992 * 993 * WARNING! We need to allocate PIDs sequentially during early boot. 994 * In particular, init needs to have a pid of 1. 995 */ 996 static 997 void 998 proc_makepid(struct proc *p, int random_offset) 999 { 1000 static pid_t nextpid = 1; /* heuristic, allowed to race */ 1001 procglob_t *prg; 1002 struct pgrp *pg; 1003 struct proc *ps; 1004 struct session *sess; 1005 pid_t base; 1006 int8_t delta8; 1007 int retries; 1008 int n; 1009 1010 /* 1011 * Select the next pid base candidate. 1012 * 1013 * Check cyclement, do not allow a pid < 100. 1014 */ 1015 retries = 0; 1016 retry: 1017 base = atomic_fetchadd_int(&nextpid, 1) + random_offset; 1018 if (base <= 0 || base >= PID_MAX) { 1019 base = base % PID_MAX; 1020 if (base < 0) 1021 base = 100; 1022 if (base < 100) 1023 base += 100; 1024 nextpid = base; /* reset (SMP race ok) */ 1025 } 1026 1027 /* 1028 * Do not allow a base pid to be selected from a domain that has 1029 * recently seen a pid/pgid/sessid reap. Sleep a little if we looped 1030 * through all available domains. 1031 * 1032 * WARNING: We want the early pids to be allocated linearly, 1033 * particularly pid 1 and pid 2. 1034 */ 1035 if (++retries >= PIDSEL_DOMAINS) 1036 tsleep(&nextpid, 0, "makepid", 1); 1037 if (base >= 100) { 1038 delta8 = (int8_t)time_second - 1039 (int8_t)pid_doms[base % PIDSEL_DOMAINS]; 1040 if (delta8 >= 0 && delta8 <= PIDDOM_DELAY) { 1041 ++pid_domain_skips; 1042 goto retry; 1043 } 1044 } 1045 1046 /* 1047 * Calculate a hash index and find an unused process id within 1048 * the table, looping if we cannot find one. 1049 * 1050 * The inner loop increments by ALLPROC_HSIZE which keeps the 1051 * PID at the same pid_doms[] index as well as the same hash index. 1052 */ 1053 n = ALLPROC_HASH(base); 1054 prg = &procglob[n]; 1055 lwkt_gettoken(&prg->proc_token); 1056 1057 restart1: 1058 LIST_FOREACH(ps, &prg->allproc, p_list) { 1059 if (ps->p_pid == base) { 1060 base += ALLPROC_HSIZE; 1061 if (base >= PID_MAX) { 1062 lwkt_reltoken(&prg->proc_token); 1063 goto retry; 1064 } 1065 ++pid_inner_skips; 1066 goto restart1; 1067 } 1068 } 1069 LIST_FOREACH(pg, &prg->allpgrp, pg_list) { 1070 if (pg->pg_id == base) { 1071 base += ALLPROC_HSIZE; 1072 if (base >= PID_MAX) { 1073 lwkt_reltoken(&prg->proc_token); 1074 goto retry; 1075 } 1076 ++pid_inner_skips; 1077 goto restart1; 1078 } 1079 } 1080 LIST_FOREACH(sess, &prg->allsess, s_list) { 1081 if (sess->s_sid == base) { 1082 base += ALLPROC_HSIZE; 1083 if (base >= PID_MAX) { 1084 lwkt_reltoken(&prg->proc_token); 1085 goto retry; 1086 } 1087 ++pid_inner_skips; 1088 goto restart1; 1089 } 1090 } 1091 1092 /* 1093 * Assign the pid and insert the process. 1094 */ 1095 p->p_pid = base; 1096 LIST_INSERT_HEAD(&prg->allproc, p, p_list); 1097 lwkt_reltoken(&prg->proc_token); 1098 } 1099 1100 /* 1101 * Called from exit1 to place the process into a zombie state. 1102 * The process is removed from the pid hash and p_stat is set 1103 * to SZOMB. Normal pfind[n]() calls will not find it any more. 1104 * 1105 * Caller must hold p->p_token. We are required to wait until p_lock 1106 * becomes zero before we can manipulate the list, allowing allproc 1107 * scans to guarantee consistency during a list scan. 1108 */ 1109 void 1110 proc_move_allproc_zombie(struct proc *p) 1111 { 1112 procglob_t *prg; 1113 int n; 1114 1115 n = ALLPROC_HASH(p->p_pid); 1116 prg = &procglob[n]; 1117 PSTALL(p, "reap1", 0); 1118 lwkt_gettoken(&prg->proc_token); 1119 1120 PSTALL(p, "reap1a", 0); 1121 p->p_stat = SZOMB; 1122 1123 lwkt_reltoken(&prg->proc_token); 1124 dsched_exit_proc(p); 1125 } 1126 1127 /* 1128 * This routine is called from kern_wait() and will remove the process 1129 * from the zombie list and the sibling list. This routine will block 1130 * if someone has a lock on the proces (p_lock). 1131 * 1132 * Caller must hold p->p_token. We are required to wait until p_lock 1133 * becomes zero before we can manipulate the list, allowing allproc 1134 * scans to guarantee consistency during a list scan. 1135 */ 1136 void 1137 proc_remove_zombie(struct proc *p) 1138 { 1139 procglob_t *prg; 1140 int n; 1141 1142 n = ALLPROC_HASH(p->p_pid); 1143 prg = &procglob[n]; 1144 1145 PSTALL(p, "reap2", 0); 1146 lwkt_gettoken(&prg->proc_token); 1147 PSTALL(p, "reap2a", 0); 1148 LIST_REMOVE(p, p_list); /* from remove master list */ 1149 LIST_REMOVE(p, p_sibling); /* and from sibling list */ 1150 p->p_pptr = NULL; 1151 if (pid_doms[p->p_pid % PIDSEL_DOMAINS] != (uint8_t)time_second) 1152 pid_doms[p->p_pid % PIDSEL_DOMAINS] = (uint8_t)time_second; 1153 lwkt_reltoken(&prg->proc_token); 1154 } 1155 1156 /* 1157 * Handle various requirements prior to returning to usermode. Called from 1158 * platform trap and system call code. 1159 */ 1160 void 1161 lwpuserret(struct lwp *lp) 1162 { 1163 struct proc *p = lp->lwp_proc; 1164 1165 if (lp->lwp_mpflags & LWP_MP_VNLRU) { 1166 atomic_clear_int(&lp->lwp_mpflags, LWP_MP_VNLRU); 1167 allocvnode_gc(); 1168 } 1169 if (lp->lwp_mpflags & LWP_MP_WEXIT) { 1170 lwkt_gettoken(&p->p_token); 1171 lwp_exit(0, NULL); 1172 lwkt_reltoken(&p->p_token); /* NOT REACHED */ 1173 } 1174 } 1175 1176 /* 1177 * Kernel threads run from user processes can also accumulate deferred 1178 * actions which need to be acted upon. Callers include: 1179 * 1180 * nfsd - Can allocate lots of vnodes 1181 */ 1182 void 1183 lwpkthreaddeferred(void) 1184 { 1185 struct lwp *lp = curthread->td_lwp; 1186 1187 if (lp) { 1188 if (lp->lwp_mpflags & LWP_MP_VNLRU) { 1189 atomic_clear_int(&lp->lwp_mpflags, LWP_MP_VNLRU); 1190 allocvnode_gc(); 1191 } 1192 } 1193 } 1194 1195 void 1196 proc_usermap(struct proc *p, int invfork) 1197 { 1198 struct sys_upmap *upmap; 1199 1200 lwkt_gettoken(&p->p_token); 1201 upmap = kmalloc(roundup2(sizeof(*upmap), PAGE_SIZE), M_PROC, 1202 M_WAITOK | M_ZERO); 1203 if (p->p_upmap == NULL) { 1204 upmap->header[0].type = UKPTYPE_VERSION; 1205 upmap->header[0].offset = offsetof(struct sys_upmap, version); 1206 upmap->header[1].type = UPTYPE_RUNTICKS; 1207 upmap->header[1].offset = offsetof(struct sys_upmap, runticks); 1208 upmap->header[2].type = UPTYPE_FORKID; 1209 upmap->header[2].offset = offsetof(struct sys_upmap, forkid); 1210 upmap->header[3].type = UPTYPE_PID; 1211 upmap->header[3].offset = offsetof(struct sys_upmap, pid); 1212 upmap->header[4].type = UPTYPE_PROC_TITLE; 1213 upmap->header[4].offset = offsetof(struct sys_upmap,proc_title); 1214 upmap->header[5].type = UPTYPE_INVFORK; 1215 upmap->header[5].offset = offsetof(struct sys_upmap, invfork); 1216 1217 upmap->version = UPMAP_VERSION; 1218 upmap->pid = p->p_pid; 1219 upmap->forkid = p->p_forkid; 1220 upmap->invfork = invfork; 1221 p->p_upmap = upmap; 1222 } else { 1223 kfree(upmap, M_PROC); 1224 } 1225 lwkt_reltoken(&p->p_token); 1226 } 1227 1228 void 1229 proc_userunmap(struct proc *p) 1230 { 1231 struct sys_upmap *upmap; 1232 1233 lwkt_gettoken(&p->p_token); 1234 if ((upmap = p->p_upmap) != NULL) { 1235 p->p_upmap = NULL; 1236 kfree(upmap, M_PROC); 1237 } 1238 lwkt_reltoken(&p->p_token); 1239 } 1240 1241 /* 1242 * Scan all processes on the allproc list. The process is automatically 1243 * held for the callback. A return value of -1 terminates the loop. 1244 * Zombie procs are skipped. 1245 * 1246 * The callback is made with the process held and proc_token held. 1247 * 1248 * We limit the scan to the number of processes as-of the start of 1249 * the scan so as not to get caught up in an endless loop if new processes 1250 * are created more quickly than we can scan the old ones. Add a little 1251 * slop to try to catch edge cases since nprocs can race. 1252 * 1253 * No requirements. 1254 */ 1255 void 1256 allproc_scan(int (*callback)(struct proc *, void *), void *data) 1257 { 1258 int limit = nprocs + ncpus; 1259 struct proc *p; 1260 int r; 1261 int n; 1262 1263 /* 1264 * prg->proc_token protects the allproc list and PHOLD() prevents the 1265 * process from being removed from the allproc list or the zombproc 1266 * list. 1267 */ 1268 for (n = 0; n < ALLPROC_HSIZE; ++n) { 1269 procglob_t *prg = &procglob[n]; 1270 if (LIST_FIRST(&prg->allproc) == NULL) 1271 continue; 1272 lwkt_gettoken(&prg->proc_token); 1273 LIST_FOREACH(p, &prg->allproc, p_list) { 1274 if (p->p_stat == SZOMB) 1275 continue; 1276 PHOLD(p); 1277 r = callback(p, data); 1278 PRELE(p); 1279 if (r < 0) 1280 break; 1281 if (--limit < 0) 1282 break; 1283 } 1284 lwkt_reltoken(&prg->proc_token); 1285 1286 /* 1287 * Check if asked to stop early 1288 */ 1289 if (p) 1290 break; 1291 } 1292 } 1293 1294 /* 1295 * Scan all lwps of processes on the allproc list. The lwp is automatically 1296 * held for the callback. A return value of -1 terminates the loop. 1297 * 1298 * The callback is made with the proces and lwp both held, and proc_token held. 1299 * 1300 * No requirements. 1301 */ 1302 void 1303 alllwp_scan(int (*callback)(struct lwp *, void *), void *data) 1304 { 1305 struct proc *p; 1306 struct lwp *lp; 1307 int r = 0; 1308 int n; 1309 1310 for (n = 0; n < ALLPROC_HSIZE; ++n) { 1311 procglob_t *prg = &procglob[n]; 1312 1313 if (LIST_FIRST(&prg->allproc) == NULL) 1314 continue; 1315 lwkt_gettoken(&prg->proc_token); 1316 LIST_FOREACH(p, &prg->allproc, p_list) { 1317 if (p->p_stat == SZOMB) 1318 continue; 1319 PHOLD(p); 1320 lwkt_gettoken(&p->p_token); 1321 FOREACH_LWP_IN_PROC(lp, p) { 1322 LWPHOLD(lp); 1323 r = callback(lp, data); 1324 LWPRELE(lp); 1325 } 1326 lwkt_reltoken(&p->p_token); 1327 PRELE(p); 1328 if (r < 0) 1329 break; 1330 } 1331 lwkt_reltoken(&prg->proc_token); 1332 1333 /* 1334 * Asked to exit early 1335 */ 1336 if (p) 1337 break; 1338 } 1339 } 1340 1341 /* 1342 * Scan all processes on the zombproc list. The process is automatically 1343 * held for the callback. A return value of -1 terminates the loop. 1344 * 1345 * No requirements. 1346 * The callback is made with the proces held and proc_token held. 1347 */ 1348 void 1349 zombproc_scan(int (*callback)(struct proc *, void *), void *data) 1350 { 1351 struct proc *p; 1352 int r; 1353 int n; 1354 1355 /* 1356 * prg->proc_token protects the allproc list and PHOLD() prevents the 1357 * process from being removed from the allproc list or the zombproc 1358 * list. 1359 */ 1360 for (n = 0; n < ALLPROC_HSIZE; ++n) { 1361 procglob_t *prg = &procglob[n]; 1362 1363 if (LIST_FIRST(&prg->allproc) == NULL) 1364 continue; 1365 lwkt_gettoken(&prg->proc_token); 1366 LIST_FOREACH(p, &prg->allproc, p_list) { 1367 if (p->p_stat != SZOMB) 1368 continue; 1369 PHOLD(p); 1370 r = callback(p, data); 1371 PRELE(p); 1372 if (r < 0) 1373 break; 1374 } 1375 lwkt_reltoken(&prg->proc_token); 1376 1377 /* 1378 * Check if asked to stop early 1379 */ 1380 if (p) 1381 break; 1382 } 1383 } 1384 1385 #include "opt_ddb.h" 1386 #ifdef DDB 1387 #include <ddb/ddb.h> 1388 1389 /* 1390 * Debugging only 1391 */ 1392 DB_SHOW_COMMAND(pgrpdump, pgrpdump) 1393 { 1394 struct pgrp *pgrp; 1395 struct proc *p; 1396 procglob_t *prg; 1397 int i; 1398 1399 for (i = 0; i < ALLPROC_HSIZE; ++i) { 1400 prg = &procglob[i]; 1401 1402 if (LIST_EMPTY(&prg->allpgrp)) 1403 continue; 1404 kprintf("\tindx %d\n", i); 1405 LIST_FOREACH(pgrp, &prg->allpgrp, pg_list) { 1406 kprintf("\tpgrp %p, pgid %ld, sess %p, " 1407 "sesscnt %d, mem %p\n", 1408 (void *)pgrp, (long)pgrp->pg_id, 1409 (void *)pgrp->pg_session, 1410 pgrp->pg_session->s_count, 1411 (void *)LIST_FIRST(&pgrp->pg_members)); 1412 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { 1413 kprintf("\t\tpid %ld addr %p pgrp %p\n", 1414 (long)p->p_pid, (void *)p, 1415 (void *)p->p_pgrp); 1416 } 1417 } 1418 } 1419 } 1420 #endif /* DDB */ 1421 1422 /* 1423 * The caller must hold proc_token. 1424 */ 1425 static int 1426 sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags) 1427 { 1428 struct kinfo_proc ki; 1429 struct lwp *lp; 1430 int skp = 0, had_output = 0; 1431 int error; 1432 1433 bzero(&ki, sizeof(ki)); 1434 lwkt_gettoken_shared(&p->p_token); 1435 fill_kinfo_proc(p, &ki); 1436 if ((flags & KERN_PROC_FLAG_LWP) == 0) 1437 skp = 1; 1438 error = 0; 1439 FOREACH_LWP_IN_PROC(lp, p) { 1440 LWPHOLD(lp); 1441 fill_kinfo_lwp(lp, &ki.kp_lwp); 1442 had_output = 1; 1443 error = SYSCTL_OUT(req, &ki, sizeof(ki)); 1444 LWPRELE(lp); 1445 if (error) 1446 break; 1447 if (skp) 1448 break; 1449 } 1450 lwkt_reltoken(&p->p_token); 1451 /* We need to output at least the proc, even if there is no lwp. */ 1452 if (had_output == 0) { 1453 error = SYSCTL_OUT(req, &ki, sizeof(ki)); 1454 } 1455 return (error); 1456 } 1457 1458 /* 1459 * The caller must hold proc_token. 1460 */ 1461 static int 1462 sysctl_out_proc_kthread(struct thread *td, struct sysctl_req *req) 1463 { 1464 struct kinfo_proc ki; 1465 int error; 1466 1467 fill_kinfo_proc_kthread(td, &ki); 1468 error = SYSCTL_OUT(req, &ki, sizeof(ki)); 1469 if (error) 1470 return error; 1471 return(0); 1472 } 1473 1474 /* 1475 * No requirements. 1476 */ 1477 static int 1478 sysctl_kern_proc(SYSCTL_HANDLER_ARGS) 1479 { 1480 int *name = (int *)arg1; 1481 int oid = oidp->oid_number; 1482 u_int namelen = arg2; 1483 struct proc *p; 1484 struct thread *td; 1485 struct thread *marker; 1486 int flags = 0; 1487 int error = 0; 1488 int n; 1489 int origcpu; 1490 struct ucred *cr1 = curproc->p_ucred; 1491 1492 flags = oid & KERN_PROC_FLAGMASK; 1493 oid &= ~KERN_PROC_FLAGMASK; 1494 1495 if ((oid == KERN_PROC_ALL && namelen != 0) || 1496 (oid != KERN_PROC_ALL && namelen != 1)) { 1497 return (EINVAL); 1498 } 1499 1500 /* 1501 * proc_token protects the allproc list and PHOLD() prevents the 1502 * process from being removed from the allproc list or the zombproc 1503 * list. 1504 */ 1505 if (oid == KERN_PROC_PID) { 1506 p = pfind((pid_t)name[0]); 1507 if (p) { 1508 if (PRISON_CHECK(cr1, p->p_ucred)) 1509 error = sysctl_out_proc(p, req, flags); 1510 PRELE(p); 1511 } 1512 goto post_threads; 1513 } 1514 p = NULL; 1515 1516 if (!req->oldptr) { 1517 /* overestimate by 5 procs */ 1518 error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5); 1519 if (error) 1520 goto post_threads; 1521 } 1522 1523 for (n = 0; n < ALLPROC_HSIZE; ++n) { 1524 procglob_t *prg = &procglob[n]; 1525 1526 if (LIST_EMPTY(&prg->allproc)) 1527 continue; 1528 lwkt_gettoken_shared(&prg->proc_token); 1529 LIST_FOREACH(p, &prg->allproc, p_list) { 1530 /* 1531 * Show a user only their processes. 1532 */ 1533 if ((!ps_showallprocs) && 1534 (p->p_ucred == NULL || p_trespass(cr1, p->p_ucred))) { 1535 continue; 1536 } 1537 /* 1538 * Skip embryonic processes. 1539 */ 1540 if (p->p_stat == SIDL) 1541 continue; 1542 /* 1543 * TODO - make more efficient (see notes below). 1544 * do by session. 1545 */ 1546 switch (oid) { 1547 case KERN_PROC_PGRP: 1548 /* could do this by traversing pgrp */ 1549 if (p->p_pgrp == NULL || 1550 p->p_pgrp->pg_id != (pid_t)name[0]) 1551 continue; 1552 break; 1553 1554 case KERN_PROC_TTY: 1555 if ((p->p_flags & P_CONTROLT) == 0 || 1556 p->p_session == NULL || 1557 p->p_session->s_ttyp == NULL || 1558 dev2udev(p->p_session->s_ttyp->t_dev) != 1559 (udev_t)name[0]) 1560 continue; 1561 break; 1562 1563 case KERN_PROC_UID: 1564 if (p->p_ucred == NULL || 1565 p->p_ucred->cr_uid != (uid_t)name[0]) 1566 continue; 1567 break; 1568 1569 case KERN_PROC_RUID: 1570 if (p->p_ucred == NULL || 1571 p->p_ucred->cr_ruid != (uid_t)name[0]) 1572 continue; 1573 break; 1574 } 1575 1576 if (!PRISON_CHECK(cr1, p->p_ucred)) 1577 continue; 1578 PHOLD(p); 1579 error = sysctl_out_proc(p, req, flags); 1580 PRELE(p); 1581 if (error) { 1582 lwkt_reltoken(&prg->proc_token); 1583 goto post_threads; 1584 } 1585 } 1586 lwkt_reltoken(&prg->proc_token); 1587 } 1588 1589 /* 1590 * Iterate over all active cpus and scan their thread list. Start 1591 * with the next logical cpu and end with our original cpu. We 1592 * migrate our own thread to each target cpu in order to safely scan 1593 * its thread list. In the last loop we migrate back to our original 1594 * cpu. 1595 */ 1596 origcpu = mycpu->gd_cpuid; 1597 if (!ps_showallthreads || jailed(cr1)) 1598 goto post_threads; 1599 1600 marker = kmalloc(sizeof(struct thread), M_TEMP, M_WAITOK|M_ZERO); 1601 marker->td_flags = TDF_MARKER; 1602 error = 0; 1603 1604 for (n = 1; n <= ncpus; ++n) { 1605 globaldata_t rgd; 1606 int nid; 1607 1608 nid = (origcpu + n) % ncpus; 1609 if (CPUMASK_TESTBIT(smp_active_mask, nid) == 0) 1610 continue; 1611 rgd = globaldata_find(nid); 1612 lwkt_setcpu_self(rgd); 1613 1614 crit_enter(); 1615 TAILQ_INSERT_TAIL(&rgd->gd_tdallq, marker, td_allq); 1616 1617 while ((td = TAILQ_PREV(marker, lwkt_queue, td_allq)) != NULL) { 1618 TAILQ_REMOVE(&rgd->gd_tdallq, marker, td_allq); 1619 TAILQ_INSERT_BEFORE(td, marker, td_allq); 1620 if (td->td_flags & TDF_MARKER) 1621 continue; 1622 if (td->td_proc) 1623 continue; 1624 1625 lwkt_hold(td); 1626 crit_exit(); 1627 1628 switch (oid) { 1629 case KERN_PROC_PGRP: 1630 case KERN_PROC_TTY: 1631 case KERN_PROC_UID: 1632 case KERN_PROC_RUID: 1633 break; 1634 default: 1635 error = sysctl_out_proc_kthread(td, req); 1636 break; 1637 } 1638 lwkt_rele(td); 1639 crit_enter(); 1640 if (error) 1641 break; 1642 } 1643 TAILQ_REMOVE(&rgd->gd_tdallq, marker, td_allq); 1644 crit_exit(); 1645 1646 if (error) 1647 break; 1648 } 1649 1650 /* 1651 * Userland scheduler expects us to return on the same cpu we 1652 * started on. 1653 */ 1654 if (mycpu->gd_cpuid != origcpu) 1655 lwkt_setcpu_self(globaldata_find(origcpu)); 1656 1657 kfree(marker, M_TEMP); 1658 1659 post_threads: 1660 return (error); 1661 } 1662 1663 /* 1664 * This sysctl allows a process to retrieve the argument list or process 1665 * title for another process without groping around in the address space 1666 * of the other process. It also allow a process to set its own "process 1667 * title to a string of its own choice. 1668 * 1669 * No requirements. 1670 */ 1671 static int 1672 sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS) 1673 { 1674 int *name = (int*) arg1; 1675 u_int namelen = arg2; 1676 struct proc *p; 1677 struct pargs *opa; 1678 struct pargs *pa; 1679 int error = 0; 1680 struct ucred *cr1 = curproc->p_ucred; 1681 1682 if (namelen != 1) 1683 return (EINVAL); 1684 1685 p = pfind((pid_t)name[0]); 1686 if (p == NULL) 1687 goto done; 1688 lwkt_gettoken(&p->p_token); 1689 1690 if ((!ps_argsopen) && p_trespass(cr1, p->p_ucred)) 1691 goto done; 1692 1693 if (req->newptr && curproc != p) { 1694 error = EPERM; 1695 goto done; 1696 } 1697 if (req->oldptr) { 1698 if (p->p_upmap != NULL && p->p_upmap->proc_title[0]) { 1699 /* 1700 * Args set via writable user process mmap. 1701 * We must calculate the string length manually 1702 * because the user data can change at any time. 1703 */ 1704 size_t n; 1705 char *base; 1706 1707 base = p->p_upmap->proc_title; 1708 for (n = 0; n < UPMAP_MAXPROCTITLE - 1; ++n) { 1709 if (base[n] == 0) 1710 break; 1711 } 1712 error = SYSCTL_OUT(req, base, n); 1713 if (error == 0) 1714 error = SYSCTL_OUT(req, "", 1); 1715 } else if ((pa = p->p_args) != NULL) { 1716 /* 1717 * Args set by setproctitle() sysctl. 1718 */ 1719 refcount_acquire(&pa->ar_ref); 1720 error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length); 1721 if (refcount_release(&pa->ar_ref)) 1722 kfree(pa, M_PARGS); 1723 } 1724 } 1725 if (req->newptr == NULL) 1726 goto done; 1727 1728 if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit) { 1729 goto done; 1730 } 1731 1732 pa = kmalloc(sizeof(struct pargs) + req->newlen, M_PARGS, M_WAITOK); 1733 refcount_init(&pa->ar_ref, 1); 1734 pa->ar_length = req->newlen; 1735 error = SYSCTL_IN(req, pa->ar_args, req->newlen); 1736 if (error) { 1737 kfree(pa, M_PARGS); 1738 goto done; 1739 } 1740 1741 1742 /* 1743 * Replace p_args with the new pa. p_args may have previously 1744 * been NULL. 1745 */ 1746 opa = p->p_args; 1747 p->p_args = pa; 1748 1749 if (opa) { 1750 KKASSERT(opa->ar_ref > 0); 1751 if (refcount_release(&opa->ar_ref)) { 1752 kfree(opa, M_PARGS); 1753 /* opa = NULL; */ 1754 } 1755 } 1756 done: 1757 if (p) { 1758 lwkt_reltoken(&p->p_token); 1759 PRELE(p); 1760 } 1761 return (error); 1762 } 1763 1764 static int 1765 sysctl_kern_proc_cwd(SYSCTL_HANDLER_ARGS) 1766 { 1767 int *name = (int*) arg1; 1768 u_int namelen = arg2; 1769 struct proc *p; 1770 int error = 0; 1771 char *fullpath, *freepath; 1772 struct ucred *cr1 = curproc->p_ucred; 1773 1774 if (namelen != 1) 1775 return (EINVAL); 1776 1777 p = pfind((pid_t)name[0]); 1778 if (p == NULL) 1779 goto done; 1780 lwkt_gettoken_shared(&p->p_token); 1781 1782 /* 1783 * If we are not allowed to see other args, we certainly shouldn't 1784 * get the cwd either. Also check the usual trespassing. 1785 */ 1786 if ((!ps_argsopen) && p_trespass(cr1, p->p_ucred)) 1787 goto done; 1788 1789 if (req->oldptr && p->p_fd != NULL && p->p_fd->fd_ncdir.ncp) { 1790 struct nchandle nch; 1791 1792 cache_copy(&p->p_fd->fd_ncdir, &nch); 1793 error = cache_fullpath(p, &nch, NULL, 1794 &fullpath, &freepath, 0); 1795 cache_drop(&nch); 1796 if (error) 1797 goto done; 1798 error = SYSCTL_OUT(req, fullpath, strlen(fullpath) + 1); 1799 kfree(freepath, M_TEMP); 1800 } 1801 1802 done: 1803 if (p) { 1804 lwkt_reltoken(&p->p_token); 1805 PRELE(p); 1806 } 1807 return (error); 1808 } 1809 1810 /* 1811 * This sysctl allows a process to retrieve the path of the executable for 1812 * itself or another process. 1813 */ 1814 static int 1815 sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS) 1816 { 1817 pid_t *pidp = (pid_t *)arg1; 1818 unsigned int arglen = arg2; 1819 struct proc *p; 1820 char *retbuf, *freebuf; 1821 int error = 0; 1822 struct nchandle nch; 1823 1824 if (arglen != 1) 1825 return (EINVAL); 1826 if (*pidp == -1) { /* -1 means this process */ 1827 p = curproc; 1828 } else { 1829 p = pfind(*pidp); 1830 if (p == NULL) 1831 return (ESRCH); 1832 } 1833 1834 cache_copy(&p->p_textnch, &nch); 1835 error = cache_fullpath(p, &nch, NULL, &retbuf, &freebuf, 0); 1836 cache_drop(&nch); 1837 if (error) 1838 goto done; 1839 error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1); 1840 kfree(freebuf, M_TEMP); 1841 done: 1842 if (*pidp != -1) 1843 PRELE(p); 1844 1845 return (error); 1846 } 1847 1848 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table"); 1849 1850 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT, 1851 0, 0, sysctl_kern_proc, "S,proc", "Return entire process table"); 1852 1853 SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD, 1854 sysctl_kern_proc, "Process table"); 1855 1856 SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD, 1857 sysctl_kern_proc, "Process table"); 1858 1859 SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD, 1860 sysctl_kern_proc, "Process table"); 1861 1862 SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD, 1863 sysctl_kern_proc, "Process table"); 1864 1865 SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD, 1866 sysctl_kern_proc, "Process table"); 1867 1868 SYSCTL_NODE(_kern_proc, (KERN_PROC_ALL | KERN_PROC_FLAG_LWP), all_lwp, CTLFLAG_RD, 1869 sysctl_kern_proc, "Process table"); 1870 1871 SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_FLAG_LWP), pgrp_lwp, CTLFLAG_RD, 1872 sysctl_kern_proc, "Process table"); 1873 1874 SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_FLAG_LWP), tty_lwp, CTLFLAG_RD, 1875 sysctl_kern_proc, "Process table"); 1876 1877 SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_FLAG_LWP), uid_lwp, CTLFLAG_RD, 1878 sysctl_kern_proc, "Process table"); 1879 1880 SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_FLAG_LWP), ruid_lwp, CTLFLAG_RD, 1881 sysctl_kern_proc, "Process table"); 1882 1883 SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_FLAG_LWP), pid_lwp, CTLFLAG_RD, 1884 sysctl_kern_proc, "Process table"); 1885 1886 SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, CTLFLAG_RW | CTLFLAG_ANYBODY, 1887 sysctl_kern_proc_args, "Process argument list"); 1888 1889 SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD | CTLFLAG_ANYBODY, 1890 sysctl_kern_proc_cwd, "Process argument list"); 1891 1892 static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD, 1893 sysctl_kern_proc_pathname, "Process executable path"); 1894