1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 35 * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $ 36 */ 37 38 #include "opt_compat.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/sysproto.h> 43 #include <sys/file.h> 44 #include <sys/kern_syscall.h> 45 #include <sys/kernel.h> 46 #include <sys/resourcevar.h> 47 #include <sys/malloc.h> 48 #include <sys/proc.h> 49 #include <sys/priv.h> 50 #include <sys/time.h> 51 #include <sys/lockf.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <sys/lock.h> 56 #include <vm/pmap.h> 57 #include <vm/vm_map.h> 58 59 #include <sys/thread2.h> 60 #include <sys/spinlock2.h> 61 62 static int donice (struct proc *chgp, int n); 63 static int doionice (struct proc *chgp, int n); 64 65 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 66 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 67 static struct spinlock uihash_lock; 68 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 69 static u_long uihash; /* size of hash table - 1 */ 70 71 static struct uidinfo *uicreate (uid_t uid); 72 static struct uidinfo *uilookup (uid_t uid); 73 74 /* 75 * Resource controls and accounting. 76 */ 77 78 struct getpriority_info { 79 int low; 80 int who; 81 }; 82 83 static int getpriority_callback(struct proc *p, void *data); 84 85 /* 86 * MPALMOSTSAFE 87 */ 88 int 89 sys_getpriority(struct getpriority_args *uap) 90 { 91 struct getpriority_info info; 92 thread_t curtd = curthread; 93 struct proc *curp = curproc; 94 struct proc *p; 95 struct pgrp *pg; 96 int low = PRIO_MAX + 1; 97 int error; 98 99 switch (uap->which) { 100 case PRIO_PROCESS: 101 if (uap->who == 0) { 102 low = curp->p_nice; 103 } else { 104 p = pfind(uap->who); 105 if (p) { 106 lwkt_gettoken_shared(&p->p_token); 107 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) 108 low = p->p_nice; 109 lwkt_reltoken(&p->p_token); 110 PRELE(p); 111 } 112 } 113 break; 114 case PRIO_PGRP: 115 if (uap->who == 0) { 116 lwkt_gettoken_shared(&curp->p_token); 117 pg = curp->p_pgrp; 118 pgref(pg); 119 lwkt_reltoken(&curp->p_token); 120 } else if ((pg = pgfind(uap->who)) == NULL) { 121 break; 122 } /* else ref held from pgfind */ 123 124 lwkt_gettoken_shared(&pg->pg_token); 125 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 126 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred) && 127 p->p_nice < low) { 128 low = p->p_nice; 129 } 130 } 131 lwkt_reltoken(&pg->pg_token); 132 pgrel(pg); 133 break; 134 case PRIO_USER: 135 if (uap->who == 0) 136 uap->who = curtd->td_ucred->cr_uid; 137 info.low = low; 138 info.who = uap->who; 139 allproc_scan(getpriority_callback, &info); 140 low = info.low; 141 break; 142 143 default: 144 error = EINVAL; 145 goto done; 146 } 147 if (low == PRIO_MAX + 1) { 148 error = ESRCH; 149 goto done; 150 } 151 uap->sysmsg_result = low; 152 error = 0; 153 done: 154 return (error); 155 } 156 157 /* 158 * Figure out the current lowest nice priority for processes owned 159 * by the specified user. 160 */ 161 static 162 int 163 getpriority_callback(struct proc *p, void *data) 164 { 165 struct getpriority_info *info = data; 166 167 lwkt_gettoken_shared(&p->p_token); 168 if (PRISON_CHECK(curthread->td_ucred, p->p_ucred) && 169 p->p_ucred->cr_uid == info->who && 170 p->p_nice < info->low) { 171 info->low = p->p_nice; 172 } 173 lwkt_reltoken(&p->p_token); 174 return(0); 175 } 176 177 struct setpriority_info { 178 int prio; 179 int who; 180 int error; 181 int found; 182 }; 183 184 static int setpriority_callback(struct proc *p, void *data); 185 186 /* 187 * MPALMOSTSAFE 188 */ 189 int 190 sys_setpriority(struct setpriority_args *uap) 191 { 192 struct setpriority_info info; 193 thread_t curtd = curthread; 194 struct proc *curp = curproc; 195 struct proc *p; 196 struct pgrp *pg; 197 int found = 0, error = 0; 198 199 switch (uap->which) { 200 case PRIO_PROCESS: 201 if (uap->who == 0) { 202 lwkt_gettoken(&curp->p_token); 203 error = donice(curp, uap->prio); 204 found++; 205 lwkt_reltoken(&curp->p_token); 206 } else { 207 p = pfind(uap->who); 208 if (p) { 209 lwkt_gettoken(&p->p_token); 210 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 211 error = donice(p, uap->prio); 212 found++; 213 } 214 lwkt_reltoken(&p->p_token); 215 PRELE(p); 216 } 217 } 218 break; 219 case PRIO_PGRP: 220 if (uap->who == 0) { 221 lwkt_gettoken_shared(&curp->p_token); 222 pg = curp->p_pgrp; 223 pgref(pg); 224 lwkt_reltoken(&curp->p_token); 225 } else if ((pg = pgfind(uap->who)) == NULL) { 226 break; 227 } /* else ref held from pgfind */ 228 229 lwkt_gettoken(&pg->pg_token); 230 restart: 231 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 232 PHOLD(p); 233 lwkt_gettoken(&p->p_token); 234 if (p->p_pgrp == pg && 235 PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 236 error = donice(p, uap->prio); 237 found++; 238 } 239 lwkt_reltoken(&p->p_token); 240 if (p->p_pgrp != pg) { 241 PRELE(p); 242 goto restart; 243 } 244 PRELE(p); 245 } 246 lwkt_reltoken(&pg->pg_token); 247 pgrel(pg); 248 break; 249 case PRIO_USER: 250 if (uap->who == 0) 251 uap->who = curtd->td_ucred->cr_uid; 252 info.prio = uap->prio; 253 info.who = uap->who; 254 info.error = 0; 255 info.found = 0; 256 allproc_scan(setpriority_callback, &info); 257 error = info.error; 258 found = info.found; 259 break; 260 default: 261 error = EINVAL; 262 found = 1; 263 break; 264 } 265 266 if (found == 0) 267 error = ESRCH; 268 return (error); 269 } 270 271 static 272 int 273 setpriority_callback(struct proc *p, void *data) 274 { 275 struct setpriority_info *info = data; 276 int error; 277 278 lwkt_gettoken(&p->p_token); 279 if (p->p_ucred->cr_uid == info->who && 280 PRISON_CHECK(curthread->td_ucred, p->p_ucred)) { 281 error = donice(p, info->prio); 282 if (error) 283 info->error = error; 284 ++info->found; 285 } 286 lwkt_reltoken(&p->p_token); 287 return(0); 288 } 289 290 /* 291 * Caller must hold chgp->p_token 292 */ 293 static int 294 donice(struct proc *chgp, int n) 295 { 296 struct ucred *cr = curthread->td_ucred; 297 struct lwp *lp; 298 299 if (cr->cr_uid && cr->cr_ruid && 300 cr->cr_uid != chgp->p_ucred->cr_uid && 301 cr->cr_ruid != chgp->p_ucred->cr_uid) 302 return (EPERM); 303 if (n > PRIO_MAX) 304 n = PRIO_MAX; 305 if (n < PRIO_MIN) 306 n = PRIO_MIN; 307 if (n < chgp->p_nice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0)) 308 return (EACCES); 309 chgp->p_nice = n; 310 FOREACH_LWP_IN_PROC(lp, chgp) { 311 LWPHOLD(lp); 312 chgp->p_usched->resetpriority(lp); 313 LWPRELE(lp); 314 } 315 return (0); 316 } 317 318 319 struct ioprio_get_info { 320 int high; 321 int who; 322 }; 323 324 static int ioprio_get_callback(struct proc *p, void *data); 325 326 /* 327 * MPALMOSTSAFE 328 */ 329 int 330 sys_ioprio_get(struct ioprio_get_args *uap) 331 { 332 struct ioprio_get_info info; 333 thread_t curtd = curthread; 334 struct proc *curp = curproc; 335 struct proc *p; 336 struct pgrp *pg; 337 int high = IOPRIO_MIN-2; 338 int error; 339 340 switch (uap->which) { 341 case PRIO_PROCESS: 342 if (uap->who == 0) { 343 high = curp->p_ionice; 344 } else { 345 p = pfind(uap->who); 346 if (p) { 347 lwkt_gettoken_shared(&p->p_token); 348 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) 349 high = p->p_ionice; 350 lwkt_reltoken(&p->p_token); 351 PRELE(p); 352 } 353 } 354 break; 355 case PRIO_PGRP: 356 if (uap->who == 0) { 357 lwkt_gettoken_shared(&curp->p_token); 358 pg = curp->p_pgrp; 359 pgref(pg); 360 lwkt_reltoken(&curp->p_token); 361 } else if ((pg = pgfind(uap->who)) == NULL) { 362 break; 363 } /* else ref held from pgfind */ 364 365 lwkt_gettoken_shared(&pg->pg_token); 366 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 367 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred) && 368 p->p_nice > high) 369 high = p->p_ionice; 370 } 371 lwkt_reltoken(&pg->pg_token); 372 pgrel(pg); 373 break; 374 case PRIO_USER: 375 if (uap->who == 0) 376 uap->who = curtd->td_ucred->cr_uid; 377 info.high = high; 378 info.who = uap->who; 379 allproc_scan(ioprio_get_callback, &info); 380 high = info.high; 381 break; 382 default: 383 error = EINVAL; 384 goto done; 385 } 386 if (high == IOPRIO_MIN-2) { 387 error = ESRCH; 388 goto done; 389 } 390 uap->sysmsg_result = high; 391 error = 0; 392 done: 393 return (error); 394 } 395 396 /* 397 * Figure out the current lowest nice priority for processes owned 398 * by the specified user. 399 */ 400 static 401 int 402 ioprio_get_callback(struct proc *p, void *data) 403 { 404 struct ioprio_get_info *info = data; 405 406 lwkt_gettoken_shared(&p->p_token); 407 if (PRISON_CHECK(curthread->td_ucred, p->p_ucred) && 408 p->p_ucred->cr_uid == info->who && 409 p->p_ionice > info->high) { 410 info->high = p->p_ionice; 411 } 412 lwkt_reltoken(&p->p_token); 413 return(0); 414 } 415 416 417 struct ioprio_set_info { 418 int prio; 419 int who; 420 int error; 421 int found; 422 }; 423 424 static int ioprio_set_callback(struct proc *p, void *data); 425 426 /* 427 * MPALMOSTSAFE 428 */ 429 int 430 sys_ioprio_set(struct ioprio_set_args *uap) 431 { 432 struct ioprio_set_info info; 433 thread_t curtd = curthread; 434 struct proc *curp = curproc; 435 struct proc *p; 436 struct pgrp *pg; 437 int found = 0, error = 0; 438 439 switch (uap->which) { 440 case PRIO_PROCESS: 441 if (uap->who == 0) { 442 lwkt_gettoken(&curp->p_token); 443 error = doionice(curp, uap->prio); 444 lwkt_reltoken(&curp->p_token); 445 found++; 446 } else { 447 p = pfind(uap->who); 448 if (p) { 449 lwkt_gettoken(&p->p_token); 450 if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 451 error = doionice(p, uap->prio); 452 found++; 453 } 454 lwkt_reltoken(&p->p_token); 455 PRELE(p); 456 } 457 } 458 break; 459 case PRIO_PGRP: 460 if (uap->who == 0) { 461 lwkt_gettoken_shared(&curp->p_token); 462 pg = curp->p_pgrp; 463 pgref(pg); 464 lwkt_reltoken(&curp->p_token); 465 } else if ((pg = pgfind(uap->who)) == NULL) { 466 break; 467 } /* else ref held from pgfind */ 468 469 lwkt_gettoken(&pg->pg_token); 470 restart: 471 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 472 PHOLD(p); 473 lwkt_gettoken(&p->p_token); 474 if (p->p_pgrp == pg && 475 PRISON_CHECK(curtd->td_ucred, p->p_ucred)) { 476 error = doionice(p, uap->prio); 477 found++; 478 } 479 lwkt_reltoken(&p->p_token); 480 if (p->p_pgrp != pg) { 481 PRELE(p); 482 goto restart; 483 } 484 PRELE(p); 485 } 486 lwkt_reltoken(&pg->pg_token); 487 pgrel(pg); 488 break; 489 case PRIO_USER: 490 if (uap->who == 0) 491 uap->who = curtd->td_ucred->cr_uid; 492 info.prio = uap->prio; 493 info.who = uap->who; 494 info.error = 0; 495 info.found = 0; 496 allproc_scan(ioprio_set_callback, &info); 497 error = info.error; 498 found = info.found; 499 break; 500 default: 501 error = EINVAL; 502 found = 1; 503 break; 504 } 505 506 if (found == 0) 507 error = ESRCH; 508 return (error); 509 } 510 511 static 512 int 513 ioprio_set_callback(struct proc *p, void *data) 514 { 515 struct ioprio_set_info *info = data; 516 int error; 517 518 lwkt_gettoken(&p->p_token); 519 if (p->p_ucred->cr_uid == info->who && 520 PRISON_CHECK(curthread->td_ucred, p->p_ucred)) { 521 error = doionice(p, info->prio); 522 if (error) 523 info->error = error; 524 ++info->found; 525 } 526 lwkt_reltoken(&p->p_token); 527 return(0); 528 } 529 530 static int 531 doionice(struct proc *chgp, int n) 532 { 533 struct ucred *cr = curthread->td_ucred; 534 535 if (cr->cr_uid && cr->cr_ruid && 536 cr->cr_uid != chgp->p_ucred->cr_uid && 537 cr->cr_ruid != chgp->p_ucred->cr_uid) 538 return (EPERM); 539 if (n > IOPRIO_MAX) 540 n = IOPRIO_MAX; 541 if (n < IOPRIO_MIN) 542 n = IOPRIO_MIN; 543 if (n < chgp->p_ionice && 544 priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0)) 545 return (EACCES); 546 chgp->p_ionice = n; 547 548 return (0); 549 550 } 551 552 /* 553 * MPALMOSTSAFE 554 */ 555 int 556 sys_lwp_rtprio(struct lwp_rtprio_args *uap) 557 { 558 struct ucred *cr = curthread->td_ucred; 559 struct proc *p; 560 struct lwp *lp; 561 struct rtprio rtp; 562 int error; 563 564 error = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 565 if (error) 566 return error; 567 if (uap->pid < 0) 568 return EINVAL; 569 570 if (uap->pid == 0) { 571 p = curproc; 572 PHOLD(p); 573 } else { 574 p = pfind(uap->pid); 575 } 576 if (p == NULL) { 577 error = ESRCH; 578 goto done; 579 } 580 lwkt_gettoken(&p->p_token); 581 582 if (uap->tid < -1) { 583 error = EINVAL; 584 goto done; 585 } 586 if (uap->tid == -1) { 587 /* 588 * sadly, tid can be 0 so we can't use 0 here 589 * like sys_rtprio() 590 */ 591 lp = curthread->td_lwp; 592 } else { 593 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid); 594 if (lp == NULL) { 595 error = ESRCH; 596 goto done; 597 } 598 } 599 600 /* 601 * Make sure that this lwp is not ripped if any of the following 602 * code blocks, e.g. copyout. 603 */ 604 LWPHOLD(lp); 605 switch (uap->function) { 606 case RTP_LOOKUP: 607 error = copyout(&lp->lwp_rtprio, uap->rtp, 608 sizeof(struct rtprio)); 609 break; 610 case RTP_SET: 611 if (cr->cr_uid && cr->cr_ruid && 612 cr->cr_uid != p->p_ucred->cr_uid && 613 cr->cr_ruid != p->p_ucred->cr_uid) { 614 error = EPERM; 615 break; 616 } 617 /* disallow setting rtprio in most cases if not superuser */ 618 if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) { 619 /* can't set someone else's */ 620 if (uap->pid) { /* XXX */ 621 error = EPERM; 622 break; 623 } 624 /* can't set realtime priority */ 625 /* 626 * Realtime priority has to be restricted for reasons which should be 627 * obvious. However, for idle priority, there is a potential for 628 * system deadlock if an idleprio process gains a lock on a resource 629 * that other processes need (and the idleprio process can't run 630 * due to a CPU-bound normal process). Fix me! XXX 631 */ 632 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 633 error = EPERM; 634 break; 635 } 636 } 637 switch (rtp.type) { 638 #ifdef RTP_PRIO_FIFO 639 case RTP_PRIO_FIFO: 640 #endif 641 case RTP_PRIO_REALTIME: 642 case RTP_PRIO_NORMAL: 643 case RTP_PRIO_IDLE: 644 if (rtp.prio > RTP_PRIO_MAX) { 645 error = EINVAL; 646 } else { 647 lp->lwp_rtprio = rtp; 648 error = 0; 649 } 650 break; 651 default: 652 error = EINVAL; 653 break; 654 } 655 break; 656 default: 657 error = EINVAL; 658 break; 659 } 660 LWPRELE(lp); 661 662 done: 663 if (p) { 664 lwkt_reltoken(&p->p_token); 665 PRELE(p); 666 } 667 return (error); 668 } 669 670 /* 671 * Set realtime priority 672 * 673 * MPALMOSTSAFE 674 */ 675 int 676 sys_rtprio(struct rtprio_args *uap) 677 { 678 struct ucred *cr = curthread->td_ucred; 679 struct proc *p; 680 struct lwp *lp; 681 struct rtprio rtp; 682 int error; 683 684 error = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 685 if (error) 686 return (error); 687 688 if (uap->pid == 0) { 689 p = curproc; 690 PHOLD(p); 691 } else { 692 p = pfind(uap->pid); 693 } 694 695 if (p == NULL) { 696 error = ESRCH; 697 goto done; 698 } 699 lwkt_gettoken(&p->p_token); 700 701 /* XXX lwp */ 702 lp = FIRST_LWP_IN_PROC(p); 703 switch (uap->function) { 704 case RTP_LOOKUP: 705 error = copyout(&lp->lwp_rtprio, uap->rtp, 706 sizeof(struct rtprio)); 707 break; 708 case RTP_SET: 709 if (cr->cr_uid && cr->cr_ruid && 710 cr->cr_uid != p->p_ucred->cr_uid && 711 cr->cr_ruid != p->p_ucred->cr_uid) { 712 error = EPERM; 713 break; 714 } 715 /* disallow setting rtprio in most cases if not superuser */ 716 if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) { 717 /* can't set someone else's */ 718 if (uap->pid) { 719 error = EPERM; 720 break; 721 } 722 /* can't set realtime priority */ 723 /* 724 * Realtime priority has to be restricted for reasons which should be 725 * obvious. However, for idle priority, there is a potential for 726 * system deadlock if an idleprio process gains a lock on a resource 727 * that other processes need (and the idleprio process can't run 728 * due to a CPU-bound normal process). Fix me! XXX 729 */ 730 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 731 error = EPERM; 732 break; 733 } 734 } 735 switch (rtp.type) { 736 #ifdef RTP_PRIO_FIFO 737 case RTP_PRIO_FIFO: 738 #endif 739 case RTP_PRIO_REALTIME: 740 case RTP_PRIO_NORMAL: 741 case RTP_PRIO_IDLE: 742 if (rtp.prio > RTP_PRIO_MAX) { 743 error = EINVAL; 744 break; 745 } 746 lp->lwp_rtprio = rtp; 747 error = 0; 748 break; 749 default: 750 error = EINVAL; 751 break; 752 } 753 break; 754 default: 755 error = EINVAL; 756 break; 757 } 758 done: 759 if (p) { 760 lwkt_reltoken(&p->p_token); 761 PRELE(p); 762 } 763 764 return (error); 765 } 766 767 /* 768 * MPSAFE 769 */ 770 int 771 sys_setrlimit(struct __setrlimit_args *uap) 772 { 773 struct rlimit alim; 774 int error; 775 776 error = copyin(uap->rlp, &alim, sizeof(alim)); 777 if (error) 778 return (error); 779 780 error = kern_setrlimit(uap->which, &alim); 781 782 return (error); 783 } 784 785 /* 786 * MPSAFE 787 */ 788 int 789 sys_getrlimit(struct __getrlimit_args *uap) 790 { 791 struct rlimit lim; 792 int error; 793 794 error = kern_getrlimit(uap->which, &lim); 795 796 if (error == 0) 797 error = copyout(&lim, uap->rlp, sizeof(*uap->rlp)); 798 return error; 799 } 800 801 /* 802 * Transform the running time and tick information in lwp lp's thread into user, 803 * system, and interrupt time usage. 804 * 805 * Since we are limited to statclock tick granularity this is a statisical 806 * calculation which will be correct over the long haul, but should not be 807 * expected to measure fine grained deltas. 808 * 809 * It is possible to catch a lwp in the midst of being created, so 810 * check whether lwp_thread is NULL or not. 811 */ 812 void 813 calcru(struct lwp *lp, struct timeval *up, struct timeval *sp) 814 { 815 struct thread *td; 816 817 /* 818 * Calculate at the statclock level. YYY if the thread is owned by 819 * another cpu we need to forward the request to the other cpu, or 820 * have a token to interlock the information in order to avoid racing 821 * thread destruction. 822 */ 823 if ((td = lp->lwp_thread) != NULL) { 824 crit_enter(); 825 up->tv_sec = td->td_uticks / 1000000; 826 up->tv_usec = td->td_uticks % 1000000; 827 sp->tv_sec = td->td_sticks / 1000000; 828 sp->tv_usec = td->td_sticks % 1000000; 829 crit_exit(); 830 } 831 } 832 833 /* 834 * Aggregate resource statistics of all lwps of a process. 835 * 836 * proc.p_ru keeps track of all statistics directly related to a proc. This 837 * consists of RSS usage and nswap information and aggregate numbers for all 838 * former lwps of this proc. 839 * 840 * proc.p_cru is the sum of all stats of reaped children. 841 * 842 * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning 843 * packet, scheduler switch or page fault counts, etc. This information gets 844 * added to lwp.lwp_proc.p_ru when the lwp exits. 845 */ 846 void 847 calcru_proc(struct proc *p, struct rusage *ru) 848 { 849 struct timeval upt, spt; 850 long *rip1, *rip2; 851 struct lwp *lp; 852 853 *ru = p->p_ru; 854 855 FOREACH_LWP_IN_PROC(lp, p) { 856 calcru(lp, &upt, &spt); 857 timevaladd(&ru->ru_utime, &upt); 858 timevaladd(&ru->ru_stime, &spt); 859 for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first; 860 rip1 <= &ru->ru_last; 861 rip1++, rip2++) 862 *rip1 += *rip2; 863 } 864 } 865 866 867 /* 868 * MPALMOSTSAFE 869 */ 870 int 871 sys_getrusage(struct getrusage_args *uap) 872 { 873 struct proc *p = curproc; 874 struct rusage ru; 875 struct rusage *rup; 876 int error; 877 878 lwkt_gettoken(&p->p_token); 879 880 switch (uap->who) { 881 case RUSAGE_SELF: 882 rup = &ru; 883 calcru_proc(p, rup); 884 error = 0; 885 break; 886 case RUSAGE_CHILDREN: 887 rup = &p->p_cru; 888 error = 0; 889 break; 890 default: 891 error = EINVAL; 892 break; 893 } 894 lwkt_reltoken(&p->p_token); 895 896 if (error == 0) 897 error = copyout(rup, uap->rusage, sizeof(struct rusage)); 898 return (error); 899 } 900 901 void 902 ruadd(struct rusage *ru, struct rusage *ru2) 903 { 904 long *ip, *ip2; 905 int i; 906 907 timevaladd(&ru->ru_utime, &ru2->ru_utime); 908 timevaladd(&ru->ru_stime, &ru2->ru_stime); 909 if (ru->ru_maxrss < ru2->ru_maxrss) 910 ru->ru_maxrss = ru2->ru_maxrss; 911 ip = &ru->ru_first; ip2 = &ru2->ru_first; 912 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 913 *ip++ += *ip2++; 914 } 915 916 /* 917 * Find the uidinfo structure for a uid. This structure is used to 918 * track the total resource consumption (process count, socket buffer 919 * size, etc.) for the uid and impose limits. 920 */ 921 void 922 uihashinit(void) 923 { 924 spin_init(&uihash_lock, "uihashinit"); 925 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 926 } 927 928 /* 929 * NOTE: Must be called with uihash_lock held 930 * 931 * MPSAFE 932 */ 933 static struct uidinfo * 934 uilookup(uid_t uid) 935 { 936 struct uihashhead *uipp; 937 struct uidinfo *uip; 938 939 uipp = UIHASH(uid); 940 LIST_FOREACH(uip, uipp, ui_hash) { 941 if (uip->ui_uid == uid) 942 break; 943 } 944 return (uip); 945 } 946 947 /* 948 * Helper function to creat ea uid that could not be found. 949 * This function will properly deal with races. 950 * 951 * MPSAFE 952 */ 953 static struct uidinfo * 954 uicreate(uid_t uid) 955 { 956 struct uidinfo *uip, *tmp; 957 958 /* 959 * Allocate space and check for a race 960 */ 961 uip = kmalloc(sizeof(*uip), M_UIDINFO, M_WAITOK|M_ZERO); 962 963 /* 964 * Initialize structure and enter it into the hash table 965 */ 966 spin_init(&uip->ui_lock, "uicreate"); 967 uip->ui_uid = uid; 968 uip->ui_ref = 1; /* we're returning a ref */ 969 varsymset_init(&uip->ui_varsymset, NULL); 970 971 /* 972 * Somebody may have already created the uidinfo for this 973 * uid. If so, return that instead. 974 */ 975 spin_lock(&uihash_lock); 976 tmp = uilookup(uid); 977 if (tmp != NULL) { 978 uihold(tmp); 979 spin_unlock(&uihash_lock); 980 981 spin_uninit(&uip->ui_lock); 982 varsymset_clean(&uip->ui_varsymset); 983 kfree(uip, M_UIDINFO); 984 uip = tmp; 985 } else { 986 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 987 spin_unlock(&uihash_lock); 988 } 989 return (uip); 990 } 991 992 /* 993 * 994 * 995 * MPSAFE 996 */ 997 struct uidinfo * 998 uifind(uid_t uid) 999 { 1000 struct uidinfo *uip; 1001 1002 spin_lock(&uihash_lock); 1003 uip = uilookup(uid); 1004 if (uip == NULL) { 1005 spin_unlock(&uihash_lock); 1006 uip = uicreate(uid); 1007 } else { 1008 uihold(uip); 1009 spin_unlock(&uihash_lock); 1010 } 1011 return (uip); 1012 } 1013 1014 /* 1015 * Helper funtion to remove a uidinfo whos reference count is 1016 * transitioning from 1->0. The reference count is 1 on call. 1017 * 1018 * Zero is returned on success, otherwise non-zero and the 1019 * uiphas not been removed. 1020 * 1021 * MPSAFE 1022 */ 1023 static __inline int 1024 uifree(struct uidinfo *uip) 1025 { 1026 /* 1027 * If we are still the only holder after acquiring the uihash_lock 1028 * we can safely unlink the uip and destroy it. Otherwise we lost 1029 * a race and must fail. 1030 */ 1031 spin_lock(&uihash_lock); 1032 if (uip->ui_ref != 1) { 1033 spin_unlock(&uihash_lock); 1034 return(-1); 1035 } 1036 LIST_REMOVE(uip, ui_hash); 1037 spin_unlock(&uihash_lock); 1038 1039 /* 1040 * The uip is now orphaned and we can destroy it at our 1041 * leisure. 1042 */ 1043 if (uip->ui_sbsize != 0) 1044 kprintf("freeing uidinfo: uid = %d, sbsize = %jd\n", 1045 uip->ui_uid, (intmax_t)uip->ui_sbsize); 1046 if (uip->ui_proccnt != 0) 1047 kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n", 1048 uip->ui_uid, uip->ui_proccnt); 1049 1050 varsymset_clean(&uip->ui_varsymset); 1051 lockuninit(&uip->ui_varsymset.vx_lock); 1052 spin_uninit(&uip->ui_lock); 1053 kfree(uip, M_UIDINFO); 1054 return(0); 1055 } 1056 1057 /* 1058 * MPSAFE 1059 */ 1060 void 1061 uihold(struct uidinfo *uip) 1062 { 1063 atomic_add_int(&uip->ui_ref, 1); 1064 KKASSERT(uip->ui_ref >= 0); 1065 } 1066 1067 /* 1068 * NOTE: It is important for us to not drop the ref count to 0 1069 * because this can cause a 2->0/2->0 race with another 1070 * concurrent dropper. Losing the race in that situation 1071 * can cause uip to become stale for one of the other 1072 * threads. 1073 * 1074 * MPSAFE 1075 */ 1076 void 1077 uidrop(struct uidinfo *uip) 1078 { 1079 int ref; 1080 1081 KKASSERT(uip->ui_ref > 0); 1082 1083 for (;;) { 1084 ref = uip->ui_ref; 1085 cpu_ccfence(); 1086 if (ref == 1) { 1087 if (uifree(uip) == 0) 1088 break; 1089 } else if (atomic_cmpset_int(&uip->ui_ref, ref, ref - 1)) { 1090 break; 1091 } 1092 /* else retry */ 1093 } 1094 } 1095 1096 void 1097 uireplace(struct uidinfo **puip, struct uidinfo *nuip) 1098 { 1099 uidrop(*puip); 1100 *puip = nuip; 1101 } 1102 1103 /* 1104 * Change the count associated with number of processes 1105 * a given user is using. When 'max' is 0, don't enforce a limit 1106 */ 1107 int 1108 chgproccnt(struct uidinfo *uip, int diff, int max) 1109 { 1110 int ret; 1111 spin_lock(&uip->ui_lock); 1112 /* don't allow them to exceed max, but allow subtraction */ 1113 if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { 1114 ret = 0; 1115 } else { 1116 uip->ui_proccnt += diff; 1117 if (uip->ui_proccnt < 0) 1118 kprintf("negative proccnt for uid = %d\n", uip->ui_uid); 1119 ret = 1; 1120 } 1121 spin_unlock(&uip->ui_lock); 1122 return ret; 1123 } 1124 1125 /* 1126 * Change the total socket buffer size a user has used. 1127 */ 1128 int 1129 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max) 1130 { 1131 rlim_t new; 1132 1133 #ifdef __x86_64__ 1134 rlim_t sbsize; 1135 1136 sbsize = atomic_fetchadd_long(&uip->ui_sbsize, to - *hiwat); 1137 new = sbsize + to - *hiwat; 1138 #else 1139 spin_lock(&uip->ui_lock); 1140 new = uip->ui_sbsize + to - *hiwat; 1141 uip->ui_sbsize = new; 1142 spin_unlock(&uip->ui_lock); 1143 #endif 1144 KKASSERT(new >= 0); 1145 1146 /* 1147 * If we are trying to increase the socket buffer size 1148 * Scale down the hi water mark when we exceed the user's 1149 * allowed socket buffer space. 1150 * 1151 * We can't scale down too much or we will blow up atomic packet 1152 * operations. 1153 */ 1154 if (to > *hiwat && to > MCLBYTES && new > max) { 1155 to = to * max / new; 1156 if (to < MCLBYTES) 1157 to = MCLBYTES; 1158 } 1159 *hiwat = to; 1160 return (1); 1161 } 1162