1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 35 * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $ 36 */ 37 38 #include "opt_compat.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/sysproto.h> 43 #include <sys/file.h> 44 #include <sys/kern_syscall.h> 45 #include <sys/kernel.h> 46 #include <sys/resourcevar.h> 47 #include <sys/malloc.h> 48 #include <sys/proc.h> 49 #include <sys/priv.h> 50 #include <sys/time.h> 51 #include <sys/lockf.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <sys/lock.h> 56 #include <vm/pmap.h> 57 #include <vm/vm_map.h> 58 59 #include <sys/thread2.h> 60 #include <sys/spinlock2.h> 61 62 static int donice (struct proc *chgp, int n); 63 static int doionice (struct proc *chgp, int n); 64 65 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 66 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 67 static struct spinlock uihash_lock; 68 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 69 static u_long uihash; /* size of hash table - 1 */ 70 71 static struct uidinfo *uicreate (uid_t uid); 72 static struct uidinfo *uilookup (uid_t uid); 73 74 /* 75 * Resource controls and accounting. 76 */ 77 78 struct getpriority_info { 79 int low; 80 int who; 81 }; 82 83 static int getpriority_callback(struct proc *p, void *data); 84 85 /* 86 * MPALMOSTSAFE 87 */ 88 int 89 sys_getpriority(struct getpriority_args *uap) 90 { 91 struct getpriority_info info; 92 thread_t curtd = curthread; 93 struct proc *curp = curproc; 94 struct proc *p; 95 struct pgrp *pg; 96 int low = PRIO_MAX + 1; 97 int error; 98 99 switch (uap->which) { 100 case PRIO_PROCESS: 101 if (uap->who == 0) { 102 low = curp->p_nice; 103 } else { 104 p = pfind(uap->who); 105 if (p) { 106 lwkt_gettoken_shared(&p->p_token); 107 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) 108 low = p->p_nice; 109 lwkt_reltoken(&p->p_token); 110 PRELE(p); 111 } 112 } 113 break; 114 case PRIO_PGRP: 115 if (uap->who == 0) { 116 lwkt_gettoken_shared(&curp->p_token); 117 pg = curp->p_pgrp; 118 pgref(pg); 119 lwkt_reltoken(&curp->p_token); 120 } else if ((pg = pgfind(uap->who)) == NULL) { 121 break; 122 } /* else ref held from pgfind */ 123 124 lwkt_gettoken_shared(&pg->pg_token); 125 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 126 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred) && 127 p->p_nice < low) { 128 low = p->p_nice; 129 } 130 } 131 lwkt_reltoken(&pg->pg_token); 132 pgrel(pg); 133 break; 134 case PRIO_USER: 135 if (uap->who == 0) 136 uap->who = curtd->td_ucred->cr_uid; 137 info.low = low; 138 info.who = uap->who; 139 allproc_scan(getpriority_callback, &info); 140 low = info.low; 141 break; 142 143 default: 144 error = EINVAL; 145 goto done; 146 } 147 if (low == PRIO_MAX + 1) { 148 error = ESRCH; 149 goto done; 150 } 151 uap->sysmsg_result = low; 152 error = 0; 153 done: 154 return (error); 155 } 156 157 /* 158 * Figure out the current lowest nice priority for processes owned 159 * by the specified user. 160 */ 161 static 162 int 163 getpriority_callback(struct proc *p, void *data) 164 { 165 struct getpriority_info *info = data; 166 167 lwkt_gettoken_shared(&p->p_token); 168 if (PRISON_CHECK(curthread->td_ucred, p->p_ucred) && 169 p->p_ucred->cr_uid == info->who && 170 p->p_nice < info->low) { 171 info->low = p->p_nice; 172 } 173 lwkt_reltoken(&p->p_token); 174 return(0); 175 } 176 177 struct setpriority_info { 178 int prio; 179 int who; 180 int error; 181 int found; 182 }; 183 184 static int setpriority_callback(struct proc *p, void *data); 185 186 /* 187 * MPALMOSTSAFE 188 */ 189 int 190 sys_setpriority(struct setpriority_args *uap) 191 { 192 struct setpriority_info info; 193 thread_t curtd = curthread; 194 struct proc *curp = curproc; 195 struct proc *p; 196 struct pgrp *pg; 197 int found = 0, error = 0; 198 199 switch (uap->which) { 200 case PRIO_PROCESS: 201 if (uap->who == 0) { 202 lwkt_gettoken(&curp->p_token); 203 error = donice(curp, uap->prio); 204 found++; 205 lwkt_reltoken(&curp->p_token); 206 } else { 207 p = pfind(uap->who); 208 if (p) { 209 lwkt_gettoken(&p->p_token); 210 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 211 error = donice(p, uap->prio); 212 found++; 213 } 214 lwkt_reltoken(&p->p_token); 215 PRELE(p); 216 } 217 } 218 break; 219 case PRIO_PGRP: 220 if (uap->who == 0) { 221 lwkt_gettoken_shared(&curp->p_token); 222 pg = curp->p_pgrp; 223 pgref(pg); 224 lwkt_reltoken(&curp->p_token); 225 } else if ((pg = pgfind(uap->who)) == NULL) { 226 break; 227 } /* else ref held from pgfind */ 228 229 lwkt_gettoken(&pg->pg_token); 230 restart: 231 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 232 PHOLD(p); 233 lwkt_gettoken(&p->p_token); 234 if (p->p_pgrp == pg && 235 PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 236 error = donice(p, uap->prio); 237 found++; 238 } 239 lwkt_reltoken(&p->p_token); 240 if (p->p_pgrp != pg) { 241 PRELE(p); 242 goto restart; 243 } 244 PRELE(p); 245 } 246 lwkt_reltoken(&pg->pg_token); 247 pgrel(pg); 248 break; 249 case PRIO_USER: 250 if (uap->who == 0) 251 uap->who = curtd->td_ucred->cr_uid; 252 info.prio = uap->prio; 253 info.who = uap->who; 254 info.error = 0; 255 info.found = 0; 256 allproc_scan(setpriority_callback, &info); 257 error = info.error; 258 found = info.found; 259 break; 260 default: 261 error = EINVAL; 262 found = 1; 263 break; 264 } 265 266 if (found == 0) 267 error = ESRCH; 268 return (error); 269 } 270 271 static 272 int 273 setpriority_callback(struct proc *p, void *data) 274 { 275 struct setpriority_info *info = data; 276 int error; 277 278 lwkt_gettoken(&p->p_token); 279 if (p->p_ucred->cr_uid == info->who && 280 PRISON_CHECK(curthread->td_ucred, p->p_ucred)) { 281 error = donice(p, info->prio); 282 if (error) 283 info->error = error; 284 ++info->found; 285 } 286 lwkt_reltoken(&p->p_token); 287 return(0); 288 } 289 290 /* 291 * Caller must hold chgp->p_token 292 */ 293 static int 294 donice(struct proc *chgp, int n) 295 { 296 struct ucred *cr = curthread->td_ucred; 297 struct lwp *lp; 298 299 if (cr->cr_uid && cr->cr_ruid && 300 cr->cr_uid != chgp->p_ucred->cr_uid && 301 cr->cr_ruid != chgp->p_ucred->cr_uid) 302 return (EPERM); 303 if (n > PRIO_MAX) 304 n = PRIO_MAX; 305 if (n < PRIO_MIN) 306 n = PRIO_MIN; 307 if (n < chgp->p_nice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0)) 308 return (EACCES); 309 chgp->p_nice = n; 310 FOREACH_LWP_IN_PROC(lp, chgp) { 311 LWPHOLD(lp); 312 chgp->p_usched->resetpriority(lp); 313 LWPRELE(lp); 314 } 315 return (0); 316 } 317 318 319 struct ioprio_get_info { 320 int high; 321 int who; 322 }; 323 324 static int ioprio_get_callback(struct proc *p, void *data); 325 326 /* 327 * MPALMOSTSAFE 328 */ 329 int 330 sys_ioprio_get(struct ioprio_get_args *uap) 331 { 332 struct ioprio_get_info info; 333 thread_t curtd = curthread; 334 struct proc *curp = curproc; 335 struct proc *p; 336 struct pgrp *pg; 337 int high = IOPRIO_MIN-2; 338 int error; 339 340 switch (uap->which) { 341 case PRIO_PROCESS: 342 if (uap->who == 0) { 343 high = curp->p_ionice; 344 } else { 345 p = pfind(uap->who); 346 if (p) { 347 lwkt_gettoken_shared(&p->p_token); 348 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) 349 high = p->p_ionice; 350 lwkt_reltoken(&p->p_token); 351 PRELE(p); 352 } 353 } 354 break; 355 case PRIO_PGRP: 356 if (uap->who == 0) { 357 lwkt_gettoken_shared(&curp->p_token); 358 pg = curp->p_pgrp; 359 pgref(pg); 360 lwkt_reltoken(&curp->p_token); 361 } else if ((pg = pgfind(uap->who)) == NULL) { 362 break; 363 } /* else ref held from pgfind */ 364 365 lwkt_gettoken_shared(&pg->pg_token); 366 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 367 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred) && 368 p->p_nice > high) 369 high = p->p_ionice; 370 } 371 lwkt_reltoken(&pg->pg_token); 372 pgrel(pg); 373 break; 374 case PRIO_USER: 375 if (uap->who == 0) 376 uap->who = curtd->td_ucred->cr_uid; 377 info.high = high; 378 info.who = uap->who; 379 allproc_scan(ioprio_get_callback, &info); 380 high = info.high; 381 break; 382 default: 383 error = EINVAL; 384 goto done; 385 } 386 if (high == IOPRIO_MIN-2) { 387 error = ESRCH; 388 goto done; 389 } 390 uap->sysmsg_result = high; 391 error = 0; 392 done: 393 return (error); 394 } 395 396 /* 397 * Figure out the current lowest nice priority for processes owned 398 * by the specified user. 399 */ 400 static 401 int 402 ioprio_get_callback(struct proc *p, void *data) 403 { 404 struct ioprio_get_info *info = data; 405 406 lwkt_gettoken_shared(&p->p_token); 407 if (PRISON_CHECK(curthread->td_ucred, p->p_ucred) && 408 p->p_ucred->cr_uid == info->who && 409 p->p_ionice > info->high) { 410 info->high = p->p_ionice; 411 } 412 lwkt_reltoken(&p->p_token); 413 return(0); 414 } 415 416 417 struct ioprio_set_info { 418 int prio; 419 int who; 420 int error; 421 int found; 422 }; 423 424 static int ioprio_set_callback(struct proc *p, void *data); 425 426 /* 427 * MPALMOSTSAFE 428 */ 429 int 430 sys_ioprio_set(struct ioprio_set_args *uap) 431 { 432 struct ioprio_set_info info; 433 thread_t curtd = curthread; 434 struct proc *curp = curproc; 435 struct proc *p; 436 struct pgrp *pg; 437 int found = 0, error = 0; 438 439 switch (uap->which) { 440 case PRIO_PROCESS: 441 if (uap->who == 0) { 442 lwkt_gettoken(&curp->p_token); 443 error = doionice(curp, uap->prio); 444 lwkt_reltoken(&curp->p_token); 445 found++; 446 } else { 447 p = pfind(uap->who); 448 if (p) { 449 lwkt_gettoken(&p->p_token); 450 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 451 error = doionice(p, uap->prio); 452 found++; 453 } 454 lwkt_reltoken(&p->p_token); 455 PRELE(p); 456 } 457 } 458 break; 459 case PRIO_PGRP: 460 if (uap->who == 0) { 461 lwkt_gettoken_shared(&curp->p_token); 462 pg = curp->p_pgrp; 463 pgref(pg); 464 lwkt_reltoken(&curp->p_token); 465 } else if ((pg = pgfind(uap->who)) == NULL) { 466 break; 467 } /* else ref held from pgfind */ 468 469 lwkt_gettoken(&pg->pg_token); 470 restart: 471 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 472 PHOLD(p); 473 lwkt_gettoken(&p->p_token); 474 if (p->p_pgrp == pg && 475 PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 476 error = doionice(p, uap->prio); 477 found++; 478 } 479 lwkt_reltoken(&p->p_token); 480 if (p->p_pgrp != pg) { 481 PRELE(p); 482 goto restart; 483 } 484 PRELE(p); 485 } 486 lwkt_reltoken(&pg->pg_token); 487 pgrel(pg); 488 break; 489 case PRIO_USER: 490 if (uap->who == 0) 491 uap->who = curtd->td_ucred->cr_uid; 492 info.prio = uap->prio; 493 info.who = uap->who; 494 info.error = 0; 495 info.found = 0; 496 allproc_scan(ioprio_set_callback, &info); 497 error = info.error; 498 found = info.found; 499 break; 500 default: 501 error = EINVAL; 502 found = 1; 503 break; 504 } 505 506 if (found == 0) 507 error = ESRCH; 508 return (error); 509 } 510 511 static 512 int 513 ioprio_set_callback(struct proc *p, void *data) 514 { 515 struct ioprio_set_info *info = data; 516 int error; 517 518 lwkt_gettoken(&p->p_token); 519 if (p->p_ucred->cr_uid == info->who && 520 PRISON_CHECK(curthread->td_ucred, p->p_ucred)) { 521 error = doionice(p, info->prio); 522 if (error) 523 info->error = error; 524 ++info->found; 525 } 526 lwkt_reltoken(&p->p_token); 527 return(0); 528 } 529 530 int 531 doionice(struct proc *chgp, int n) 532 { 533 struct ucred *cr = curthread->td_ucred; 534 535 if (cr->cr_uid && cr->cr_ruid && 536 cr->cr_uid != chgp->p_ucred->cr_uid && 537 cr->cr_ruid != chgp->p_ucred->cr_uid) 538 return (EPERM); 539 if (n > IOPRIO_MAX) 540 n = IOPRIO_MAX; 541 if (n < IOPRIO_MIN) 542 n = IOPRIO_MIN; 543 if (n < chgp->p_ionice && 544 priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0)) 545 return (EACCES); 546 chgp->p_ionice = n; 547 548 return (0); 549 550 } 551 552 /* 553 * MPALMOSTSAFE 554 */ 555 int 556 sys_lwp_rtprio(struct lwp_rtprio_args *uap) 557 { 558 struct ucred *cr = curthread->td_ucred; 559 struct proc *p; 560 struct lwp *lp; 561 struct rtprio rtp; 562 int error; 563 564 error = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 565 if (error) 566 return error; 567 if (uap->pid < 0) 568 return EINVAL; 569 570 if (uap->pid == 0) { 571 p = curproc; 572 PHOLD(p); 573 } else { 574 p = pfind(uap->pid); 575 } 576 if (p == NULL) { 577 error = ESRCH; 578 goto done; 579 } 580 lwkt_gettoken(&p->p_token); 581 582 if (uap->tid < -1) { 583 error = EINVAL; 584 goto done; 585 } 586 if (uap->tid == -1) { 587 /* 588 * sadly, tid can be 0 so we can't use 0 here 589 * like sys_rtprio() 590 */ 591 lp = curthread->td_lwp; 592 } else { 593 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid); 594 if (lp == NULL) { 595 error = ESRCH; 596 goto done; 597 } 598 } 599 600 switch (uap->function) { 601 case RTP_LOOKUP: 602 error = copyout(&lp->lwp_rtprio, uap->rtp, 603 sizeof(struct rtprio)); 604 break; 605 case RTP_SET: 606 if (cr->cr_uid && cr->cr_ruid && 607 cr->cr_uid != p->p_ucred->cr_uid && 608 cr->cr_ruid != p->p_ucred->cr_uid) { 609 error = EPERM; 610 break; 611 } 612 /* disallow setting rtprio in most cases if not superuser */ 613 if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) { 614 /* can't set someone else's */ 615 if (uap->pid) { /* XXX */ 616 error = EPERM; 617 break; 618 } 619 /* can't set realtime priority */ 620 /* 621 * Realtime priority has to be restricted for reasons which should be 622 * obvious. However, for idle priority, there is a potential for 623 * system deadlock if an idleprio process gains a lock on a resource 624 * that other processes need (and the idleprio process can't run 625 * due to a CPU-bound normal process). Fix me! XXX 626 */ 627 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 628 error = EPERM; 629 break; 630 } 631 } 632 switch (rtp.type) { 633 #ifdef RTP_PRIO_FIFO 634 case RTP_PRIO_FIFO: 635 #endif 636 case RTP_PRIO_REALTIME: 637 case RTP_PRIO_NORMAL: 638 case RTP_PRIO_IDLE: 639 if (rtp.prio > RTP_PRIO_MAX) { 640 error = EINVAL; 641 } else { 642 lp->lwp_rtprio = rtp; 643 error = 0; 644 } 645 break; 646 default: 647 error = EINVAL; 648 break; 649 } 650 break; 651 default: 652 error = EINVAL; 653 break; 654 } 655 656 done: 657 if (p) { 658 lwkt_reltoken(&p->p_token); 659 PRELE(p); 660 } 661 return (error); 662 } 663 664 /* 665 * Set realtime priority 666 * 667 * MPALMOSTSAFE 668 */ 669 int 670 sys_rtprio(struct rtprio_args *uap) 671 { 672 struct ucred *cr = curthread->td_ucred; 673 struct proc *p; 674 struct lwp *lp; 675 struct rtprio rtp; 676 int error; 677 678 error = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 679 if (error) 680 return (error); 681 682 if (uap->pid == 0) { 683 p = curproc; 684 PHOLD(p); 685 } else { 686 p = pfind(uap->pid); 687 } 688 689 if (p == NULL) { 690 error = ESRCH; 691 goto done; 692 } 693 lwkt_gettoken(&p->p_token); 694 695 /* XXX lwp */ 696 lp = FIRST_LWP_IN_PROC(p); 697 switch (uap->function) { 698 case RTP_LOOKUP: 699 error = copyout(&lp->lwp_rtprio, uap->rtp, 700 sizeof(struct rtprio)); 701 break; 702 case RTP_SET: 703 if (cr->cr_uid && cr->cr_ruid && 704 cr->cr_uid != p->p_ucred->cr_uid && 705 cr->cr_ruid != p->p_ucred->cr_uid) { 706 error = EPERM; 707 break; 708 } 709 /* disallow setting rtprio in most cases if not superuser */ 710 if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) { 711 /* can't set someone else's */ 712 if (uap->pid) { 713 error = EPERM; 714 break; 715 } 716 /* can't set realtime priority */ 717 /* 718 * Realtime priority has to be restricted for reasons which should be 719 * obvious. However, for idle priority, there is a potential for 720 * system deadlock if an idleprio process gains a lock on a resource 721 * that other processes need (and the idleprio process can't run 722 * due to a CPU-bound normal process). Fix me! XXX 723 */ 724 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 725 error = EPERM; 726 break; 727 } 728 } 729 switch (rtp.type) { 730 #ifdef RTP_PRIO_FIFO 731 case RTP_PRIO_FIFO: 732 #endif 733 case RTP_PRIO_REALTIME: 734 case RTP_PRIO_NORMAL: 735 case RTP_PRIO_IDLE: 736 if (rtp.prio > RTP_PRIO_MAX) { 737 error = EINVAL; 738 break; 739 } 740 lp->lwp_rtprio = rtp; 741 error = 0; 742 break; 743 default: 744 error = EINVAL; 745 break; 746 } 747 break; 748 default: 749 error = EINVAL; 750 break; 751 } 752 done: 753 if (p) { 754 lwkt_reltoken(&p->p_token); 755 PRELE(p); 756 } 757 758 return (error); 759 } 760 761 /* 762 * MPSAFE 763 */ 764 int 765 sys_setrlimit(struct __setrlimit_args *uap) 766 { 767 struct rlimit alim; 768 int error; 769 770 error = copyin(uap->rlp, &alim, sizeof(alim)); 771 if (error) 772 return (error); 773 774 error = kern_setrlimit(uap->which, &alim); 775 776 return (error); 777 } 778 779 /* 780 * MPSAFE 781 */ 782 int 783 sys_getrlimit(struct __getrlimit_args *uap) 784 { 785 struct rlimit lim; 786 int error; 787 788 error = kern_getrlimit(uap->which, &lim); 789 790 if (error == 0) 791 error = copyout(&lim, uap->rlp, sizeof(*uap->rlp)); 792 return error; 793 } 794 795 /* 796 * Transform the running time and tick information in lwp lp's thread into user, 797 * system, and interrupt time usage. 798 * 799 * Since we are limited to statclock tick granularity this is a statisical 800 * calculation which will be correct over the long haul, but should not be 801 * expected to measure fine grained deltas. 802 * 803 * It is possible to catch a lwp in the midst of being created, so 804 * check whether lwp_thread is NULL or not. 805 */ 806 void 807 calcru(struct lwp *lp, struct timeval *up, struct timeval *sp) 808 { 809 struct thread *td; 810 811 /* 812 * Calculate at the statclock level. YYY if the thread is owned by 813 * another cpu we need to forward the request to the other cpu, or 814 * have a token to interlock the information in order to avoid racing 815 * thread destruction. 816 */ 817 if ((td = lp->lwp_thread) != NULL) { 818 crit_enter(); 819 up->tv_sec = td->td_uticks / 1000000; 820 up->tv_usec = td->td_uticks % 1000000; 821 sp->tv_sec = td->td_sticks / 1000000; 822 sp->tv_usec = td->td_sticks % 1000000; 823 crit_exit(); 824 } 825 } 826 827 /* 828 * Aggregate resource statistics of all lwps of a process. 829 * 830 * proc.p_ru keeps track of all statistics directly related to a proc. This 831 * consists of RSS usage and nswap information and aggregate numbers for all 832 * former lwps of this proc. 833 * 834 * proc.p_cru is the sum of all stats of reaped children. 835 * 836 * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning 837 * packet, scheduler switch or page fault counts, etc. This information gets 838 * added to lwp.lwp_proc.p_ru when the lwp exits. 839 */ 840 void 841 calcru_proc(struct proc *p, struct rusage *ru) 842 { 843 struct timeval upt, spt; 844 long *rip1, *rip2; 845 struct lwp *lp; 846 847 *ru = p->p_ru; 848 849 FOREACH_LWP_IN_PROC(lp, p) { 850 calcru(lp, &upt, &spt); 851 timevaladd(&ru->ru_utime, &upt); 852 timevaladd(&ru->ru_stime, &spt); 853 for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first; 854 rip1 <= &ru->ru_last; 855 rip1++, rip2++) 856 *rip1 += *rip2; 857 } 858 } 859 860 861 /* 862 * MPALMOSTSAFE 863 */ 864 int 865 sys_getrusage(struct getrusage_args *uap) 866 { 867 struct proc *p = curproc; 868 struct rusage ru; 869 struct rusage *rup; 870 int error; 871 872 lwkt_gettoken(&p->p_token); 873 874 switch (uap->who) { 875 case RUSAGE_SELF: 876 rup = &ru; 877 calcru_proc(p, rup); 878 error = 0; 879 break; 880 case RUSAGE_CHILDREN: 881 rup = &p->p_cru; 882 error = 0; 883 break; 884 default: 885 error = EINVAL; 886 break; 887 } 888 lwkt_reltoken(&p->p_token); 889 890 if (error == 0) 891 error = copyout(rup, uap->rusage, sizeof(struct rusage)); 892 return (error); 893 } 894 895 void 896 ruadd(struct rusage *ru, struct rusage *ru2) 897 { 898 long *ip, *ip2; 899 int i; 900 901 timevaladd(&ru->ru_utime, &ru2->ru_utime); 902 timevaladd(&ru->ru_stime, &ru2->ru_stime); 903 if (ru->ru_maxrss < ru2->ru_maxrss) 904 ru->ru_maxrss = ru2->ru_maxrss; 905 ip = &ru->ru_first; ip2 = &ru2->ru_first; 906 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 907 *ip++ += *ip2++; 908 } 909 910 /* 911 * Find the uidinfo structure for a uid. This structure is used to 912 * track the total resource consumption (process count, socket buffer 913 * size, etc.) for the uid and impose limits. 914 */ 915 void 916 uihashinit(void) 917 { 918 spin_init(&uihash_lock, "uihashinit"); 919 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 920 } 921 922 /* 923 * NOTE: Must be called with uihash_lock held 924 * 925 * MPSAFE 926 */ 927 static struct uidinfo * 928 uilookup(uid_t uid) 929 { 930 struct uihashhead *uipp; 931 struct uidinfo *uip; 932 933 uipp = UIHASH(uid); 934 LIST_FOREACH(uip, uipp, ui_hash) { 935 if (uip->ui_uid == uid) 936 break; 937 } 938 return (uip); 939 } 940 941 /* 942 * Helper function to creat ea uid that could not be found. 943 * This function will properly deal with races. 944 * 945 * MPSAFE 946 */ 947 static struct uidinfo * 948 uicreate(uid_t uid) 949 { 950 struct uidinfo *uip, *tmp; 951 952 /* 953 * Allocate space and check for a race 954 */ 955 uip = kmalloc(sizeof(*uip), M_UIDINFO, M_WAITOK|M_ZERO); 956 957 /* 958 * Initialize structure and enter it into the hash table 959 */ 960 spin_init(&uip->ui_lock, "uicreate"); 961 uip->ui_uid = uid; 962 uip->ui_ref = 1; /* we're returning a ref */ 963 varsymset_init(&uip->ui_varsymset, NULL); 964 965 /* 966 * Somebody may have already created the uidinfo for this 967 * uid. If so, return that instead. 968 */ 969 spin_lock(&uihash_lock); 970 tmp = uilookup(uid); 971 if (tmp != NULL) { 972 uihold(tmp); 973 spin_unlock(&uihash_lock); 974 975 spin_uninit(&uip->ui_lock); 976 varsymset_clean(&uip->ui_varsymset); 977 kfree(uip, M_UIDINFO); 978 uip = tmp; 979 } else { 980 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 981 spin_unlock(&uihash_lock); 982 } 983 return (uip); 984 } 985 986 /* 987 * 988 * 989 * MPSAFE 990 */ 991 struct uidinfo * 992 uifind(uid_t uid) 993 { 994 struct uidinfo *uip; 995 996 spin_lock(&uihash_lock); 997 uip = uilookup(uid); 998 if (uip == NULL) { 999 spin_unlock(&uihash_lock); 1000 uip = uicreate(uid); 1001 } else { 1002 uihold(uip); 1003 spin_unlock(&uihash_lock); 1004 } 1005 return (uip); 1006 } 1007 1008 /* 1009 * Helper funtion to remove a uidinfo whos reference count is 1010 * transitioning from 1->0. The reference count is 1 on call. 1011 * 1012 * Zero is returned on success, otherwise non-zero and the 1013 * uiphas not been removed. 1014 * 1015 * MPSAFE 1016 */ 1017 static __inline int 1018 uifree(struct uidinfo *uip) 1019 { 1020 /* 1021 * If we are still the only holder after acquiring the uihash_lock 1022 * we can safely unlink the uip and destroy it. Otherwise we lost 1023 * a race and must fail. 1024 */ 1025 spin_lock(&uihash_lock); 1026 if (uip->ui_ref != 1) { 1027 spin_unlock(&uihash_lock); 1028 return(-1); 1029 } 1030 LIST_REMOVE(uip, ui_hash); 1031 spin_unlock(&uihash_lock); 1032 1033 /* 1034 * The uip is now orphaned and we can destroy it at our 1035 * leisure. 1036 */ 1037 if (uip->ui_sbsize != 0) 1038 kprintf("freeing uidinfo: uid = %d, sbsize = %jd\n", 1039 uip->ui_uid, (intmax_t)uip->ui_sbsize); 1040 if (uip->ui_proccnt != 0) 1041 kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n", 1042 uip->ui_uid, uip->ui_proccnt); 1043 1044 varsymset_clean(&uip->ui_varsymset); 1045 lockuninit(&uip->ui_varsymset.vx_lock); 1046 spin_uninit(&uip->ui_lock); 1047 kfree(uip, M_UIDINFO); 1048 return(0); 1049 } 1050 1051 /* 1052 * MPSAFE 1053 */ 1054 void 1055 uihold(struct uidinfo *uip) 1056 { 1057 atomic_add_int(&uip->ui_ref, 1); 1058 KKASSERT(uip->ui_ref >= 0); 1059 } 1060 1061 /* 1062 * NOTE: It is important for us to not drop the ref count to 0 1063 * because this can cause a 2->0/2->0 race with another 1064 * concurrent dropper. Losing the race in that situation 1065 * can cause uip to become stale for one of the other 1066 * threads. 1067 * 1068 * MPSAFE 1069 */ 1070 void 1071 uidrop(struct uidinfo *uip) 1072 { 1073 int ref; 1074 1075 KKASSERT(uip->ui_ref > 0); 1076 1077 for (;;) { 1078 ref = uip->ui_ref; 1079 cpu_ccfence(); 1080 if (ref == 1) { 1081 if (uifree(uip) == 0) 1082 break; 1083 } else if (atomic_cmpset_int(&uip->ui_ref, ref, ref - 1)) { 1084 break; 1085 } 1086 /* else retry */ 1087 } 1088 } 1089 1090 void 1091 uireplace(struct uidinfo **puip, struct uidinfo *nuip) 1092 { 1093 uidrop(*puip); 1094 *puip = nuip; 1095 } 1096 1097 /* 1098 * Change the count associated with number of processes 1099 * a given user is using. When 'max' is 0, don't enforce a limit 1100 */ 1101 int 1102 chgproccnt(struct uidinfo *uip, int diff, int max) 1103 { 1104 int ret; 1105 spin_lock(&uip->ui_lock); 1106 /* don't allow them to exceed max, but allow subtraction */ 1107 if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { 1108 ret = 0; 1109 } else { 1110 uip->ui_proccnt += diff; 1111 if (uip->ui_proccnt < 0) 1112 kprintf("negative proccnt for uid = %d\n", uip->ui_uid); 1113 ret = 1; 1114 } 1115 spin_unlock(&uip->ui_lock); 1116 return ret; 1117 } 1118 1119 /* 1120 * Change the total socket buffer size a user has used. 1121 */ 1122 int 1123 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max) 1124 { 1125 rlim_t new; 1126 1127 #ifdef __x86_64__ 1128 rlim_t sbsize; 1129 1130 sbsize = atomic_fetchadd_long(&uip->ui_sbsize, to - *hiwat); 1131 new = sbsize + to - *hiwat; 1132 #else 1133 spin_lock(&uip->ui_lock); 1134 new = uip->ui_sbsize + to - *hiwat; 1135 uip->ui_sbsize = new; 1136 spin_unlock(&uip->ui_lock); 1137 #endif 1138 KKASSERT(new >= 0); 1139 1140 /* 1141 * If we are trying to increase the socket buffer size 1142 * Scale down the hi water mark when we exceed the user's 1143 * allowed socket buffer space. 1144 * 1145 * We can't scale down too much or we will blow up atomic packet 1146 * operations. 1147 */ 1148 if (to > *hiwat && to > MCLBYTES && new > max) { 1149 to = to * max / new; 1150 if (to < MCLBYTES) 1151 to = MCLBYTES; 1152 } 1153 *hiwat = to; 1154 return (1); 1155 } 1156