1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 35 * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/sysproto.h> 41 #include <sys/file.h> 42 #include <sys/kern_syscall.h> 43 #include <sys/kernel.h> 44 #include <sys/resourcevar.h> 45 #include <sys/malloc.h> 46 #include <sys/proc.h> 47 #include <sys/priv.h> 48 #include <sys/time.h> 49 #include <sys/lockf.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_param.h> 53 #include <sys/lock.h> 54 #include <vm/pmap.h> 55 #include <vm/vm_map.h> 56 57 #include <sys/thread2.h> 58 #include <sys/spinlock2.h> 59 60 static int donice (struct proc *chgp, int n); 61 static int doionice (struct proc *chgp, int n); 62 63 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 64 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 65 static struct spinlock uihash_lock; 66 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 67 static u_long uihash; /* size of hash table - 1 */ 68 69 static struct uidinfo *uicreate (uid_t uid); 70 static struct uidinfo *uilookup (uid_t uid); 71 72 /* 73 * Resource controls and accounting. 74 */ 75 76 struct getpriority_info { 77 int low; 78 int who; 79 }; 80 81 static int getpriority_callback(struct proc *p, void *data); 82 83 /* 84 * MPALMOSTSAFE 85 */ 86 int 87 sys_getpriority(struct getpriority_args *uap) 88 { 89 struct getpriority_info info; 90 thread_t curtd = curthread; 91 struct proc *curp = curproc; 92 struct proc *p; 93 struct pgrp *pg; 94 int low = PRIO_MAX + 1; 95 int error; 96 97 switch (uap->which) { 98 case PRIO_PROCESS: 99 if (uap->who == 0) { 100 low = curp->p_nice; 101 } else { 102 p = pfind(uap->who); 103 if (p) { 104 lwkt_gettoken_shared(&p->p_token); 105 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) 106 low = p->p_nice; 107 lwkt_reltoken(&p->p_token); 108 PRELE(p); 109 } 110 } 111 break; 112 case PRIO_PGRP: 113 if (uap->who == 0) { 114 lwkt_gettoken_shared(&curp->p_token); 115 pg = curp->p_pgrp; 116 pgref(pg); 117 lwkt_reltoken(&curp->p_token); 118 } else if ((pg = pgfind(uap->who)) == NULL) { 119 break; 120 } /* else ref held from pgfind */ 121 122 lwkt_gettoken_shared(&pg->pg_token); 123 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 124 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred) && 125 p->p_nice < low) { 126 low = p->p_nice; 127 } 128 } 129 lwkt_reltoken(&pg->pg_token); 130 pgrel(pg); 131 break; 132 case PRIO_USER: 133 if (uap->who == 0) 134 uap->who = curtd->td_ucred->cr_uid; 135 info.low = low; 136 info.who = uap->who; 137 allproc_scan(getpriority_callback, &info, 0); 138 low = info.low; 139 break; 140 141 default: 142 error = EINVAL; 143 goto done; 144 } 145 if (low == PRIO_MAX + 1) { 146 error = ESRCH; 147 goto done; 148 } 149 uap->sysmsg_result = low; 150 error = 0; 151 done: 152 return (error); 153 } 154 155 /* 156 * Figure out the current lowest nice priority for processes owned 157 * by the specified user. 158 */ 159 static 160 int 161 getpriority_callback(struct proc *p, void *data) 162 { 163 struct getpriority_info *info = data; 164 165 lwkt_gettoken_shared(&p->p_token); 166 if (PRISON_CHECK(curthread->td_ucred, p->p_ucred) && 167 p->p_ucred->cr_uid == info->who && 168 p->p_nice < info->low) { 169 info->low = p->p_nice; 170 } 171 lwkt_reltoken(&p->p_token); 172 return(0); 173 } 174 175 struct setpriority_info { 176 int prio; 177 int who; 178 int error; 179 int found; 180 }; 181 182 static int setpriority_callback(struct proc *p, void *data); 183 184 /* 185 * MPALMOSTSAFE 186 */ 187 int 188 sys_setpriority(struct setpriority_args *uap) 189 { 190 struct setpriority_info info; 191 thread_t curtd = curthread; 192 struct proc *curp = curproc; 193 struct proc *p; 194 struct pgrp *pg; 195 int found = 0, error = 0; 196 197 switch (uap->which) { 198 case PRIO_PROCESS: 199 if (uap->who == 0) { 200 lwkt_gettoken(&curp->p_token); 201 error = donice(curp, uap->prio); 202 found++; 203 lwkt_reltoken(&curp->p_token); 204 } else { 205 p = pfind(uap->who); 206 if (p) { 207 lwkt_gettoken(&p->p_token); 208 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 209 error = donice(p, uap->prio); 210 found++; 211 } 212 lwkt_reltoken(&p->p_token); 213 PRELE(p); 214 } 215 } 216 break; 217 case PRIO_PGRP: 218 if (uap->who == 0) { 219 lwkt_gettoken_shared(&curp->p_token); 220 pg = curp->p_pgrp; 221 pgref(pg); 222 lwkt_reltoken(&curp->p_token); 223 } else if ((pg = pgfind(uap->who)) == NULL) { 224 break; 225 } /* else ref held from pgfind */ 226 227 lwkt_gettoken(&pg->pg_token); 228 restart: 229 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 230 PHOLD(p); 231 lwkt_gettoken(&p->p_token); 232 if (p->p_pgrp == pg && 233 PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 234 error = donice(p, uap->prio); 235 found++; 236 } 237 lwkt_reltoken(&p->p_token); 238 if (p->p_pgrp != pg) { 239 PRELE(p); 240 goto restart; 241 } 242 PRELE(p); 243 } 244 lwkt_reltoken(&pg->pg_token); 245 pgrel(pg); 246 break; 247 case PRIO_USER: 248 if (uap->who == 0) 249 uap->who = curtd->td_ucred->cr_uid; 250 info.prio = uap->prio; 251 info.who = uap->who; 252 info.error = 0; 253 info.found = 0; 254 allproc_scan(setpriority_callback, &info, 0); 255 error = info.error; 256 found = info.found; 257 break; 258 default: 259 error = EINVAL; 260 found = 1; 261 break; 262 } 263 264 if (found == 0) 265 error = ESRCH; 266 return (error); 267 } 268 269 static 270 int 271 setpriority_callback(struct proc *p, void *data) 272 { 273 struct setpriority_info *info = data; 274 int error; 275 276 lwkt_gettoken(&p->p_token); 277 if (p->p_ucred->cr_uid == info->who && 278 PRISON_CHECK(curthread->td_ucred, p->p_ucred)) { 279 error = donice(p, info->prio); 280 if (error) 281 info->error = error; 282 ++info->found; 283 } 284 lwkt_reltoken(&p->p_token); 285 return(0); 286 } 287 288 /* 289 * Caller must hold chgp->p_token 290 */ 291 static int 292 donice(struct proc *chgp, int n) 293 { 294 struct ucred *cr = curthread->td_ucred; 295 struct lwp *lp; 296 297 if (cr->cr_uid && cr->cr_ruid && 298 cr->cr_uid != chgp->p_ucred->cr_uid && 299 cr->cr_ruid != chgp->p_ucred->cr_uid) 300 return (EPERM); 301 if (n > PRIO_MAX) 302 n = PRIO_MAX; 303 if (n < PRIO_MIN) 304 n = PRIO_MIN; 305 if (n < chgp->p_nice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0)) 306 return (EACCES); 307 chgp->p_nice = n; 308 FOREACH_LWP_IN_PROC(lp, chgp) { 309 LWPHOLD(lp); 310 chgp->p_usched->resetpriority(lp); 311 LWPRELE(lp); 312 } 313 return (0); 314 } 315 316 317 struct ioprio_get_info { 318 int high; 319 int who; 320 }; 321 322 static int ioprio_get_callback(struct proc *p, void *data); 323 324 /* 325 * MPALMOSTSAFE 326 */ 327 int 328 sys_ioprio_get(struct ioprio_get_args *uap) 329 { 330 struct ioprio_get_info info; 331 thread_t curtd = curthread; 332 struct proc *curp = curproc; 333 struct proc *p; 334 struct pgrp *pg; 335 int high = IOPRIO_MIN-2; 336 int error; 337 338 switch (uap->which) { 339 case PRIO_PROCESS: 340 if (uap->who == 0) { 341 high = curp->p_ionice; 342 } else { 343 p = pfind(uap->who); 344 if (p) { 345 lwkt_gettoken_shared(&p->p_token); 346 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) 347 high = p->p_ionice; 348 lwkt_reltoken(&p->p_token); 349 PRELE(p); 350 } 351 } 352 break; 353 case PRIO_PGRP: 354 if (uap->who == 0) { 355 lwkt_gettoken_shared(&curp->p_token); 356 pg = curp->p_pgrp; 357 pgref(pg); 358 lwkt_reltoken(&curp->p_token); 359 } else if ((pg = pgfind(uap->who)) == NULL) { 360 break; 361 } /* else ref held from pgfind */ 362 363 lwkt_gettoken_shared(&pg->pg_token); 364 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 365 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred) && 366 p->p_nice > high) 367 high = p->p_ionice; 368 } 369 lwkt_reltoken(&pg->pg_token); 370 pgrel(pg); 371 break; 372 case PRIO_USER: 373 if (uap->who == 0) 374 uap->who = curtd->td_ucred->cr_uid; 375 info.high = high; 376 info.who = uap->who; 377 allproc_scan(ioprio_get_callback, &info, 0); 378 high = info.high; 379 break; 380 default: 381 error = EINVAL; 382 goto done; 383 } 384 if (high == IOPRIO_MIN-2) { 385 error = ESRCH; 386 goto done; 387 } 388 uap->sysmsg_result = high; 389 error = 0; 390 done: 391 return (error); 392 } 393 394 /* 395 * Figure out the current lowest nice priority for processes owned 396 * by the specified user. 397 */ 398 static 399 int 400 ioprio_get_callback(struct proc *p, void *data) 401 { 402 struct ioprio_get_info *info = data; 403 404 lwkt_gettoken_shared(&p->p_token); 405 if (PRISON_CHECK(curthread->td_ucred, p->p_ucred) && 406 p->p_ucred->cr_uid == info->who && 407 p->p_ionice > info->high) { 408 info->high = p->p_ionice; 409 } 410 lwkt_reltoken(&p->p_token); 411 return(0); 412 } 413 414 415 struct ioprio_set_info { 416 int prio; 417 int who; 418 int error; 419 int found; 420 }; 421 422 static int ioprio_set_callback(struct proc *p, void *data); 423 424 /* 425 * MPALMOSTSAFE 426 */ 427 int 428 sys_ioprio_set(struct ioprio_set_args *uap) 429 { 430 struct ioprio_set_info info; 431 thread_t curtd = curthread; 432 struct proc *curp = curproc; 433 struct proc *p; 434 struct pgrp *pg; 435 int found = 0, error = 0; 436 437 switch (uap->which) { 438 case PRIO_PROCESS: 439 if (uap->who == 0) { 440 lwkt_gettoken(&curp->p_token); 441 error = doionice(curp, uap->prio); 442 lwkt_reltoken(&curp->p_token); 443 found++; 444 } else { 445 p = pfind(uap->who); 446 if (p) { 447 lwkt_gettoken(&p->p_token); 448 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 449 error = doionice(p, uap->prio); 450 found++; 451 } 452 lwkt_reltoken(&p->p_token); 453 PRELE(p); 454 } 455 } 456 break; 457 case PRIO_PGRP: 458 if (uap->who == 0) { 459 lwkt_gettoken_shared(&curp->p_token); 460 pg = curp->p_pgrp; 461 pgref(pg); 462 lwkt_reltoken(&curp->p_token); 463 } else if ((pg = pgfind(uap->who)) == NULL) { 464 break; 465 } /* else ref held from pgfind */ 466 467 lwkt_gettoken(&pg->pg_token); 468 restart: 469 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 470 PHOLD(p); 471 lwkt_gettoken(&p->p_token); 472 if (p->p_pgrp == pg && 473 PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 474 error = doionice(p, uap->prio); 475 found++; 476 } 477 lwkt_reltoken(&p->p_token); 478 if (p->p_pgrp != pg) { 479 PRELE(p); 480 goto restart; 481 } 482 PRELE(p); 483 } 484 lwkt_reltoken(&pg->pg_token); 485 pgrel(pg); 486 break; 487 case PRIO_USER: 488 if (uap->who == 0) 489 uap->who = curtd->td_ucred->cr_uid; 490 info.prio = uap->prio; 491 info.who = uap->who; 492 info.error = 0; 493 info.found = 0; 494 allproc_scan(ioprio_set_callback, &info, 0); 495 error = info.error; 496 found = info.found; 497 break; 498 default: 499 error = EINVAL; 500 found = 1; 501 break; 502 } 503 504 if (found == 0) 505 error = ESRCH; 506 return (error); 507 } 508 509 static 510 int 511 ioprio_set_callback(struct proc *p, void *data) 512 { 513 struct ioprio_set_info *info = data; 514 int error; 515 516 lwkt_gettoken(&p->p_token); 517 if (p->p_ucred->cr_uid == info->who && 518 PRISON_CHECK(curthread->td_ucred, p->p_ucred)) { 519 error = doionice(p, info->prio); 520 if (error) 521 info->error = error; 522 ++info->found; 523 } 524 lwkt_reltoken(&p->p_token); 525 return(0); 526 } 527 528 static int 529 doionice(struct proc *chgp, int n) 530 { 531 struct ucred *cr = curthread->td_ucred; 532 533 if (cr->cr_uid && cr->cr_ruid && 534 cr->cr_uid != chgp->p_ucred->cr_uid && 535 cr->cr_ruid != chgp->p_ucred->cr_uid) 536 return (EPERM); 537 if (n > IOPRIO_MAX) 538 n = IOPRIO_MAX; 539 if (n < IOPRIO_MIN) 540 n = IOPRIO_MIN; 541 if (n < chgp->p_ionice && 542 priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0)) 543 return (EACCES); 544 chgp->p_ionice = n; 545 546 return (0); 547 548 } 549 550 /* 551 * MPALMOSTSAFE 552 */ 553 int 554 sys_lwp_rtprio(struct lwp_rtprio_args *uap) 555 { 556 struct ucred *cr = curthread->td_ucred; 557 struct proc *p; 558 struct lwp *lp; 559 struct rtprio rtp; 560 int error; 561 562 error = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 563 if (error) 564 return error; 565 if (uap->pid < 0) 566 return EINVAL; 567 568 if (uap->pid == 0) { 569 p = curproc; 570 PHOLD(p); 571 } else { 572 p = pfind(uap->pid); 573 } 574 if (p == NULL) { 575 error = ESRCH; 576 goto done; 577 } 578 lwkt_gettoken(&p->p_token); 579 580 if (uap->tid < -1) { 581 error = EINVAL; 582 goto done; 583 } 584 if (uap->tid == -1) { 585 /* 586 * sadly, tid can be 0 so we can't use 0 here 587 * like sys_rtprio() 588 */ 589 lp = curthread->td_lwp; 590 } else { 591 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid); 592 if (lp == NULL) { 593 error = ESRCH; 594 goto done; 595 } 596 } 597 598 /* 599 * Make sure that this lwp is not ripped if any of the following 600 * code blocks, e.g. copyout. 601 */ 602 LWPHOLD(lp); 603 switch (uap->function) { 604 case RTP_LOOKUP: 605 error = copyout(&lp->lwp_rtprio, uap->rtp, 606 sizeof(struct rtprio)); 607 break; 608 case RTP_SET: 609 if (cr->cr_uid && cr->cr_ruid && 610 cr->cr_uid != p->p_ucred->cr_uid && 611 cr->cr_ruid != p->p_ucred->cr_uid) { 612 error = EPERM; 613 break; 614 } 615 /* disallow setting rtprio in most cases if not superuser */ 616 if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) { 617 /* can't set someone else's */ 618 if (uap->pid) { /* XXX */ 619 error = EPERM; 620 break; 621 } 622 /* can't set realtime priority */ 623 /* 624 * Realtime priority has to be restricted for reasons which should be 625 * obvious. However, for idle priority, there is a potential for 626 * system deadlock if an idleprio process gains a lock on a resource 627 * that other processes need (and the idleprio process can't run 628 * due to a CPU-bound normal process). Fix me! XXX 629 */ 630 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 631 error = EPERM; 632 break; 633 } 634 } 635 switch (rtp.type) { 636 #ifdef RTP_PRIO_FIFO 637 case RTP_PRIO_FIFO: 638 #endif 639 case RTP_PRIO_REALTIME: 640 case RTP_PRIO_NORMAL: 641 case RTP_PRIO_IDLE: 642 if (rtp.prio > RTP_PRIO_MAX) { 643 error = EINVAL; 644 } else { 645 lp->lwp_rtprio = rtp; 646 error = 0; 647 } 648 break; 649 default: 650 error = EINVAL; 651 break; 652 } 653 break; 654 default: 655 error = EINVAL; 656 break; 657 } 658 LWPRELE(lp); 659 660 done: 661 if (p) { 662 lwkt_reltoken(&p->p_token); 663 PRELE(p); 664 } 665 return (error); 666 } 667 668 /* 669 * Set realtime priority 670 * 671 * MPALMOSTSAFE 672 */ 673 int 674 sys_rtprio(struct rtprio_args *uap) 675 { 676 struct ucred *cr = curthread->td_ucred; 677 struct proc *p; 678 struct lwp *lp; 679 struct rtprio rtp; 680 int error; 681 682 error = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 683 if (error) 684 return (error); 685 686 if (uap->pid == 0) { 687 p = curproc; 688 PHOLD(p); 689 } else { 690 p = pfind(uap->pid); 691 } 692 693 if (p == NULL) { 694 error = ESRCH; 695 goto done; 696 } 697 lwkt_gettoken(&p->p_token); 698 699 /* XXX lwp */ 700 lp = FIRST_LWP_IN_PROC(p); 701 switch (uap->function) { 702 case RTP_LOOKUP: 703 error = copyout(&lp->lwp_rtprio, uap->rtp, 704 sizeof(struct rtprio)); 705 break; 706 case RTP_SET: 707 if (cr->cr_uid && cr->cr_ruid && 708 cr->cr_uid != p->p_ucred->cr_uid && 709 cr->cr_ruid != p->p_ucred->cr_uid) { 710 error = EPERM; 711 break; 712 } 713 /* disallow setting rtprio in most cases if not superuser */ 714 if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) { 715 /* can't set someone else's */ 716 if (uap->pid) { 717 error = EPERM; 718 break; 719 } 720 /* can't set realtime priority */ 721 /* 722 * Realtime priority has to be restricted for reasons which should be 723 * obvious. However, for idle priority, there is a potential for 724 * system deadlock if an idleprio process gains a lock on a resource 725 * that other processes need (and the idleprio process can't run 726 * due to a CPU-bound normal process). Fix me! XXX 727 */ 728 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 729 error = EPERM; 730 break; 731 } 732 } 733 switch (rtp.type) { 734 #ifdef RTP_PRIO_FIFO 735 case RTP_PRIO_FIFO: 736 #endif 737 case RTP_PRIO_REALTIME: 738 case RTP_PRIO_NORMAL: 739 case RTP_PRIO_IDLE: 740 if (rtp.prio > RTP_PRIO_MAX) { 741 error = EINVAL; 742 break; 743 } 744 lp->lwp_rtprio = rtp; 745 error = 0; 746 break; 747 default: 748 error = EINVAL; 749 break; 750 } 751 break; 752 default: 753 error = EINVAL; 754 break; 755 } 756 done: 757 if (p) { 758 lwkt_reltoken(&p->p_token); 759 PRELE(p); 760 } 761 762 return (error); 763 } 764 765 /* 766 * MPSAFE 767 */ 768 int 769 sys_setrlimit(struct __setrlimit_args *uap) 770 { 771 struct rlimit alim; 772 int error; 773 774 error = copyin(uap->rlp, &alim, sizeof(alim)); 775 if (error) 776 return (error); 777 778 error = kern_setrlimit(uap->which, &alim); 779 780 return (error); 781 } 782 783 /* 784 * MPSAFE 785 */ 786 int 787 sys_getrlimit(struct __getrlimit_args *uap) 788 { 789 struct rlimit lim; 790 int error; 791 792 error = kern_getrlimit(uap->which, &lim); 793 794 if (error == 0) 795 error = copyout(&lim, uap->rlp, sizeof(*uap->rlp)); 796 return error; 797 } 798 799 /* 800 * Transform the running time and tick information in lwp lp's thread into user, 801 * system, and interrupt time usage. 802 * 803 * Since we are limited to statclock tick granularity this is a statisical 804 * calculation which will be correct over the long haul, but should not be 805 * expected to measure fine grained deltas. 806 * 807 * It is possible to catch a lwp in the midst of being created, so 808 * check whether lwp_thread is NULL or not. 809 */ 810 void 811 calcru(struct lwp *lp, struct timeval *up, struct timeval *sp) 812 { 813 struct thread *td; 814 815 /* 816 * Calculate at the statclock level. YYY if the thread is owned by 817 * another cpu we need to forward the request to the other cpu, or 818 * have a token to interlock the information in order to avoid racing 819 * thread destruction. 820 */ 821 if ((td = lp->lwp_thread) != NULL) { 822 crit_enter(); 823 up->tv_sec = td->td_uticks / 1000000; 824 up->tv_usec = td->td_uticks % 1000000; 825 sp->tv_sec = td->td_sticks / 1000000; 826 sp->tv_usec = td->td_sticks % 1000000; 827 crit_exit(); 828 } 829 } 830 831 /* 832 * Aggregate resource statistics of all lwps of a process. 833 * 834 * proc.p_ru keeps track of all statistics directly related to a proc. This 835 * consists of RSS usage and nswap information and aggregate numbers for all 836 * former lwps of this proc. 837 * 838 * proc.p_cru is the sum of all stats of reaped children. 839 * 840 * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning 841 * packet, scheduler switch or page fault counts, etc. This information gets 842 * added to lwp.lwp_proc.p_ru when the lwp exits. 843 */ 844 void 845 calcru_proc(struct proc *p, struct rusage *ru) 846 { 847 struct timeval upt, spt; 848 long *rip1, *rip2; 849 struct lwp *lp; 850 851 *ru = p->p_ru; 852 853 FOREACH_LWP_IN_PROC(lp, p) { 854 calcru(lp, &upt, &spt); 855 timevaladd(&ru->ru_utime, &upt); 856 timevaladd(&ru->ru_stime, &spt); 857 for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first; 858 rip1 <= &ru->ru_last; 859 rip1++, rip2++) 860 *rip1 += *rip2; 861 } 862 } 863 864 865 /* 866 * MPALMOSTSAFE 867 */ 868 int 869 sys_getrusage(struct getrusage_args *uap) 870 { 871 struct proc *p = curproc; 872 struct rusage ru; 873 struct rusage *rup; 874 int error; 875 876 lwkt_gettoken(&p->p_token); 877 878 switch (uap->who) { 879 case RUSAGE_SELF: 880 rup = &ru; 881 calcru_proc(p, rup); 882 error = 0; 883 break; 884 case RUSAGE_CHILDREN: 885 rup = &p->p_cru; 886 error = 0; 887 break; 888 default: 889 error = EINVAL; 890 break; 891 } 892 lwkt_reltoken(&p->p_token); 893 894 if (error == 0) 895 error = copyout(rup, uap->rusage, sizeof(struct rusage)); 896 return (error); 897 } 898 899 void 900 ruadd(struct rusage *ru, struct rusage *ru2) 901 { 902 long *ip, *ip2; 903 int i; 904 905 timevaladd(&ru->ru_utime, &ru2->ru_utime); 906 timevaladd(&ru->ru_stime, &ru2->ru_stime); 907 if (ru->ru_maxrss < ru2->ru_maxrss) 908 ru->ru_maxrss = ru2->ru_maxrss; 909 ip = &ru->ru_first; ip2 = &ru2->ru_first; 910 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 911 *ip++ += *ip2++; 912 } 913 914 /* 915 * Find the uidinfo structure for a uid. This structure is used to 916 * track the total resource consumption (process count, socket buffer 917 * size, etc.) for the uid and impose limits. 918 */ 919 void 920 uihashinit(void) 921 { 922 spin_init(&uihash_lock, "uihashinit"); 923 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 924 } 925 926 /* 927 * NOTE: Must be called with uihash_lock held 928 * 929 * MPSAFE 930 */ 931 static struct uidinfo * 932 uilookup(uid_t uid) 933 { 934 struct uihashhead *uipp; 935 struct uidinfo *uip; 936 937 uipp = UIHASH(uid); 938 LIST_FOREACH(uip, uipp, ui_hash) { 939 if (uip->ui_uid == uid) 940 break; 941 } 942 return (uip); 943 } 944 945 /* 946 * Helper function to creat ea uid that could not be found. 947 * This function will properly deal with races. 948 * 949 * MPSAFE 950 */ 951 static struct uidinfo * 952 uicreate(uid_t uid) 953 { 954 struct uidinfo *uip, *tmp; 955 956 /* 957 * Allocate space and check for a race 958 */ 959 uip = kmalloc(sizeof(*uip), M_UIDINFO, M_WAITOK|M_ZERO); 960 961 /* 962 * Initialize structure and enter it into the hash table 963 */ 964 spin_init(&uip->ui_lock, "uicreate"); 965 uip->ui_uid = uid; 966 uip->ui_ref = 1; /* we're returning a ref */ 967 varsymset_init(&uip->ui_varsymset, NULL); 968 969 /* 970 * Somebody may have already created the uidinfo for this 971 * uid. If so, return that instead. 972 */ 973 spin_lock(&uihash_lock); 974 tmp = uilookup(uid); 975 if (tmp != NULL) { 976 uihold(tmp); 977 spin_unlock(&uihash_lock); 978 979 spin_uninit(&uip->ui_lock); 980 varsymset_clean(&uip->ui_varsymset); 981 kfree(uip, M_UIDINFO); 982 uip = tmp; 983 } else { 984 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 985 spin_unlock(&uihash_lock); 986 } 987 return (uip); 988 } 989 990 /* 991 * 992 * 993 * MPSAFE 994 */ 995 struct uidinfo * 996 uifind(uid_t uid) 997 { 998 struct uidinfo *uip; 999 1000 spin_lock(&uihash_lock); 1001 uip = uilookup(uid); 1002 if (uip == NULL) { 1003 spin_unlock(&uihash_lock); 1004 uip = uicreate(uid); 1005 } else { 1006 uihold(uip); 1007 spin_unlock(&uihash_lock); 1008 } 1009 return (uip); 1010 } 1011 1012 /* 1013 * Helper funtion to remove a uidinfo whos reference count is 1014 * transitioning from 1->0. The reference count is 1 on call. 1015 * 1016 * Zero is returned on success, otherwise non-zero and the 1017 * uiphas not been removed. 1018 * 1019 * MPSAFE 1020 */ 1021 static __inline int 1022 uifree(struct uidinfo *uip) 1023 { 1024 /* 1025 * If we are still the only holder after acquiring the uihash_lock 1026 * we can safely unlink the uip and destroy it. Otherwise we lost 1027 * a race and must fail. 1028 */ 1029 spin_lock(&uihash_lock); 1030 if (uip->ui_ref != 1) { 1031 spin_unlock(&uihash_lock); 1032 return(-1); 1033 } 1034 LIST_REMOVE(uip, ui_hash); 1035 spin_unlock(&uihash_lock); 1036 1037 /* 1038 * The uip is now orphaned and we can destroy it at our 1039 * leisure. 1040 */ 1041 if (uip->ui_sbsize != 0) 1042 kprintf("freeing uidinfo: uid = %d, sbsize = %jd\n", 1043 uip->ui_uid, (intmax_t)uip->ui_sbsize); 1044 if (uip->ui_proccnt != 0) 1045 kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n", 1046 uip->ui_uid, uip->ui_proccnt); 1047 1048 varsymset_clean(&uip->ui_varsymset); 1049 lockuninit(&uip->ui_varsymset.vx_lock); 1050 spin_uninit(&uip->ui_lock); 1051 kfree(uip, M_UIDINFO); 1052 return(0); 1053 } 1054 1055 /* 1056 * MPSAFE 1057 */ 1058 void 1059 uihold(struct uidinfo *uip) 1060 { 1061 atomic_add_int(&uip->ui_ref, 1); 1062 KKASSERT(uip->ui_ref >= 0); 1063 } 1064 1065 /* 1066 * NOTE: It is important for us to not drop the ref count to 0 1067 * because this can cause a 2->0/2->0 race with another 1068 * concurrent dropper. Losing the race in that situation 1069 * can cause uip to become stale for one of the other 1070 * threads. 1071 * 1072 * MPSAFE 1073 */ 1074 void 1075 uidrop(struct uidinfo *uip) 1076 { 1077 int ref; 1078 1079 KKASSERT(uip->ui_ref > 0); 1080 1081 for (;;) { 1082 ref = uip->ui_ref; 1083 cpu_ccfence(); 1084 if (ref == 1) { 1085 if (uifree(uip) == 0) 1086 break; 1087 } else if (atomic_cmpset_int(&uip->ui_ref, ref, ref - 1)) { 1088 break; 1089 } 1090 /* else retry */ 1091 } 1092 } 1093 1094 void 1095 uireplace(struct uidinfo **puip, struct uidinfo *nuip) 1096 { 1097 uidrop(*puip); 1098 *puip = nuip; 1099 } 1100 1101 /* 1102 * Change the count associated with number of processes 1103 * a given user is using. When 'max' is 0, don't enforce a limit 1104 */ 1105 int 1106 chgproccnt(struct uidinfo *uip, int diff, int max) 1107 { 1108 int ret; 1109 spin_lock(&uip->ui_lock); 1110 /* don't allow them to exceed max, but allow subtraction */ 1111 if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { 1112 ret = 0; 1113 } else { 1114 uip->ui_proccnt += diff; 1115 if (uip->ui_proccnt < 0) 1116 kprintf("negative proccnt for uid = %d\n", uip->ui_uid); 1117 ret = 1; 1118 } 1119 spin_unlock(&uip->ui_lock); 1120 return ret; 1121 } 1122 1123 /* 1124 * Change the total socket buffer size a user has used. 1125 */ 1126 int 1127 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max) 1128 { 1129 rlim_t new; 1130 1131 #ifdef __x86_64__ 1132 rlim_t sbsize; 1133 1134 sbsize = atomic_fetchadd_long(&uip->ui_sbsize, to - *hiwat); 1135 new = sbsize + to - *hiwat; 1136 #else 1137 spin_lock(&uip->ui_lock); 1138 new = uip->ui_sbsize + to - *hiwat; 1139 uip->ui_sbsize = new; 1140 spin_unlock(&uip->ui_lock); 1141 #endif 1142 KKASSERT(new >= 0); 1143 1144 /* 1145 * If we are trying to increase the socket buffer size 1146 * Scale down the hi water mark when we exceed the user's 1147 * allowed socket buffer space. 1148 * 1149 * We can't scale down too much or we will blow up atomic packet 1150 * operations. 1151 */ 1152 if (to > *hiwat && to > MCLBYTES && new > max) { 1153 to = to * max / new; 1154 if (to < MCLBYTES) 1155 to = MCLBYTES; 1156 } 1157 *hiwat = to; 1158 return (1); 1159 } 1160