1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $ 40 * $DragonFly: src/sys/kern/kern_resource.c,v 1.34 2007/08/20 05:40:40 dillon Exp $ 41 */ 42 43 #include "opt_compat.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/sysproto.h> 48 #include <sys/file.h> 49 #include <sys/kern_syscall.h> 50 #include <sys/kernel.h> 51 #include <sys/resourcevar.h> 52 #include <sys/malloc.h> 53 #include <sys/proc.h> 54 #include <sys/time.h> 55 #include <sys/lockf.h> 56 57 #include <vm/vm.h> 58 #include <vm/vm_param.h> 59 #include <sys/lock.h> 60 #include <vm/pmap.h> 61 #include <vm/vm_map.h> 62 63 #include <sys/thread2.h> 64 65 static int donice (struct proc *chgp, int n); 66 67 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 68 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 69 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 70 static u_long uihash; /* size of hash table - 1 */ 71 72 static struct uidinfo *uicreate (uid_t uid); 73 static struct uidinfo *uilookup (uid_t uid); 74 75 /* 76 * Resource controls and accounting. 77 */ 78 79 struct getpriority_info { 80 int low; 81 int who; 82 }; 83 84 static int getpriority_callback(struct proc *p, void *data); 85 86 int 87 sys_getpriority(struct getpriority_args *uap) 88 { 89 struct getpriority_info info; 90 struct proc *curp = curproc; 91 struct proc *p; 92 int low = PRIO_MAX + 1; 93 94 switch (uap->which) { 95 case PRIO_PROCESS: 96 if (uap->who == 0) 97 p = curp; 98 else 99 p = pfind(uap->who); 100 if (p == 0) 101 break; 102 if (!PRISON_CHECK(curp->p_ucred, p->p_ucred)) 103 break; 104 low = p->p_nice; 105 break; 106 107 case PRIO_PGRP: 108 { 109 struct pgrp *pg; 110 111 if (uap->who == 0) 112 pg = curp->p_pgrp; 113 else if ((pg = pgfind(uap->who)) == NULL) 114 break; 115 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 116 if ((PRISON_CHECK(curp->p_ucred, p->p_ucred) && p->p_nice < low)) 117 low = p->p_nice; 118 } 119 break; 120 } 121 case PRIO_USER: 122 if (uap->who == 0) 123 uap->who = curp->p_ucred->cr_uid; 124 info.low = low; 125 info.who = uap->who; 126 allproc_scan(getpriority_callback, &info); 127 low = info.low; 128 break; 129 130 default: 131 return (EINVAL); 132 } 133 if (low == PRIO_MAX + 1) 134 return (ESRCH); 135 uap->sysmsg_result = low; 136 return (0); 137 } 138 139 /* 140 * Figure out the current lowest nice priority for processes owned 141 * by the specified user. 142 */ 143 static 144 int 145 getpriority_callback(struct proc *p, void *data) 146 { 147 struct getpriority_info *info = data; 148 149 if (PRISON_CHECK(curproc->p_ucred, p->p_ucred) && 150 p->p_ucred->cr_uid == info->who && 151 p->p_nice < info->low) { 152 info->low = p->p_nice; 153 } 154 return(0); 155 } 156 157 struct setpriority_info { 158 int prio; 159 int who; 160 int error; 161 int found; 162 }; 163 164 static int setpriority_callback(struct proc *p, void *data); 165 166 int 167 sys_setpriority(struct setpriority_args *uap) 168 { 169 struct setpriority_info info; 170 struct proc *curp = curproc; 171 struct proc *p; 172 int found = 0, error = 0; 173 174 switch (uap->which) { 175 case PRIO_PROCESS: 176 if (uap->who == 0) 177 p = curp; 178 else 179 p = pfind(uap->who); 180 if (p == 0) 181 break; 182 if (!PRISON_CHECK(curp->p_ucred, p->p_ucred)) 183 break; 184 error = donice(p, uap->prio); 185 found++; 186 break; 187 188 case PRIO_PGRP: 189 { 190 struct pgrp *pg; 191 192 if (uap->who == 0) 193 pg = curp->p_pgrp; 194 else if ((pg = pgfind(uap->who)) == NULL) 195 break; 196 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 197 if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) { 198 error = donice(p, uap->prio); 199 found++; 200 } 201 } 202 break; 203 } 204 case PRIO_USER: 205 if (uap->who == 0) 206 uap->who = curp->p_ucred->cr_uid; 207 info.prio = uap->prio; 208 info.who = uap->who; 209 info.error = 0; 210 info.found = 0; 211 allproc_scan(setpriority_callback, &info); 212 error = info.error; 213 found = info.found; 214 break; 215 216 default: 217 return (EINVAL); 218 } 219 if (found == 0) 220 return (ESRCH); 221 return (error); 222 } 223 224 static 225 int 226 setpriority_callback(struct proc *p, void *data) 227 { 228 struct setpriority_info *info = data; 229 int error; 230 231 if (p->p_ucred->cr_uid == info->who && 232 PRISON_CHECK(curproc->p_ucred, p->p_ucred)) { 233 error = donice(p, info->prio); 234 if (error) 235 info->error = error; 236 ++info->found; 237 } 238 return(0); 239 } 240 241 static int 242 donice(struct proc *chgp, int n) 243 { 244 struct proc *curp = curproc; 245 struct ucred *cr = curp->p_ucred; 246 struct lwp *lp; 247 248 if (cr->cr_uid && cr->cr_ruid && 249 cr->cr_uid != chgp->p_ucred->cr_uid && 250 cr->cr_ruid != chgp->p_ucred->cr_uid) 251 return (EPERM); 252 if (n > PRIO_MAX) 253 n = PRIO_MAX; 254 if (n < PRIO_MIN) 255 n = PRIO_MIN; 256 if (n < chgp->p_nice && suser_cred(cr, 0)) 257 return (EACCES); 258 chgp->p_nice = n; 259 FOREACH_LWP_IN_PROC(lp, chgp) 260 chgp->p_usched->resetpriority(lp); 261 return (0); 262 } 263 264 int 265 sys_lwp_rtprio(struct lwp_rtprio_args *uap) 266 { 267 struct proc *p = curproc; 268 struct lwp *lp; 269 struct rtprio rtp; 270 struct ucred *cr = p->p_ucred; 271 int error; 272 273 error = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 274 if (error) 275 return error; 276 277 if (uap->pid < 0) { 278 return EINVAL; 279 } else if (uap->pid == 0) { 280 /* curproc already loaded on p */ 281 } else { 282 p = pfind(uap->pid); 283 } 284 285 if (p == 0) { 286 return ESRCH; 287 } 288 289 if (uap->tid < -1) { 290 return EINVAL; 291 } else if (uap->tid == -1) { 292 /* 293 * sadly, tid can be 0 so we can't use 0 here 294 * like sys_rtprio() 295 */ 296 lp = curthread->td_lwp; 297 } else { 298 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid); 299 if (lp == NULL) 300 return ESRCH; 301 } 302 303 switch (uap->function) { 304 case RTP_LOOKUP: 305 return (copyout(&lp->lwp_rtprio, uap->rtp, 306 sizeof(struct rtprio))); 307 case RTP_SET: 308 if (cr->cr_uid && cr->cr_ruid && 309 cr->cr_uid != p->p_ucred->cr_uid && 310 cr->cr_ruid != p->p_ucred->cr_uid) { 311 return EPERM; 312 } 313 /* disallow setting rtprio in most cases if not superuser */ 314 if (suser_cred(cr, 0)) { 315 /* can't set someone else's */ 316 if (uap->pid) { /* XXX */ 317 return EPERM; 318 } 319 /* can't set realtime priority */ 320 /* 321 * Realtime priority has to be restricted for reasons which should be 322 * obvious. However, for idle priority, there is a potential for 323 * system deadlock if an idleprio process gains a lock on a resource 324 * that other processes need (and the idleprio process can't run 325 * due to a CPU-bound normal process). Fix me! XXX 326 */ 327 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 328 return EPERM; 329 } 330 } 331 switch (rtp.type) { 332 #ifdef RTP_PRIO_FIFO 333 case RTP_PRIO_FIFO: 334 #endif 335 case RTP_PRIO_REALTIME: 336 case RTP_PRIO_NORMAL: 337 case RTP_PRIO_IDLE: 338 if (rtp.prio > RTP_PRIO_MAX) 339 return EINVAL; 340 lp->lwp_rtprio = rtp; 341 return 0; 342 default: 343 return EINVAL; 344 } 345 default: 346 return EINVAL; 347 } 348 panic("can't get here"); 349 } 350 351 /* 352 * Set realtime priority 353 */ 354 /* ARGSUSED */ 355 int 356 sys_rtprio(struct rtprio_args *uap) 357 { 358 struct proc *curp = curproc; 359 struct proc *p; 360 struct lwp *lp; 361 struct ucred *cr = curp->p_ucred; 362 struct rtprio rtp; 363 int error; 364 365 error = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 366 if (error) 367 return (error); 368 369 if (uap->pid == 0) 370 p = curp; 371 else 372 p = pfind(uap->pid); 373 374 if (p == 0) 375 return (ESRCH); 376 377 /* XXX lwp */ 378 lp = FIRST_LWP_IN_PROC(p); 379 switch (uap->function) { 380 case RTP_LOOKUP: 381 return (copyout(&lp->lwp_rtprio, uap->rtp, sizeof(struct rtprio))); 382 case RTP_SET: 383 if (cr->cr_uid && cr->cr_ruid && 384 cr->cr_uid != p->p_ucred->cr_uid && 385 cr->cr_ruid != p->p_ucred->cr_uid) 386 return (EPERM); 387 /* disallow setting rtprio in most cases if not superuser */ 388 if (suser_cred(cr, 0)) { 389 /* can't set someone else's */ 390 if (uap->pid) 391 return (EPERM); 392 /* can't set realtime priority */ 393 /* 394 * Realtime priority has to be restricted for reasons which should be 395 * obvious. However, for idle priority, there is a potential for 396 * system deadlock if an idleprio process gains a lock on a resource 397 * that other processes need (and the idleprio process can't run 398 * due to a CPU-bound normal process). Fix me! XXX 399 */ 400 if (RTP_PRIO_IS_REALTIME(rtp.type)) 401 return (EPERM); 402 } 403 switch (rtp.type) { 404 #ifdef RTP_PRIO_FIFO 405 case RTP_PRIO_FIFO: 406 #endif 407 case RTP_PRIO_REALTIME: 408 case RTP_PRIO_NORMAL: 409 case RTP_PRIO_IDLE: 410 if (rtp.prio > RTP_PRIO_MAX) 411 return (EINVAL); 412 lp->lwp_rtprio = rtp; 413 return (0); 414 default: 415 return (EINVAL); 416 } 417 418 default: 419 return (EINVAL); 420 } 421 } 422 423 int 424 sys_setrlimit(struct __setrlimit_args *uap) 425 { 426 struct rlimit alim; 427 int error; 428 429 error = copyin(uap->rlp, &alim, sizeof(alim)); 430 if (error) 431 return (error); 432 433 error = kern_setrlimit(uap->which, &alim); 434 435 return (error); 436 } 437 438 int 439 sys_getrlimit(struct __getrlimit_args *uap) 440 { 441 struct rlimit lim; 442 int error; 443 444 error = kern_getrlimit(uap->which, &lim); 445 446 if (error == 0) 447 error = copyout(&lim, uap->rlp, sizeof(*uap->rlp)); 448 return error; 449 } 450 451 /* 452 * Transform the running time and tick information in lwp lp's thread into user, 453 * system, and interrupt time usage. 454 * 455 * Since we are limited to statclock tick granularity this is a statisical 456 * calculation which will be correct over the long haul, but should not be 457 * expected to measure fine grained deltas. 458 * 459 * It is possible to catch a lwp in the midst of being created, so 460 * check whether lwp_thread is NULL or not. 461 */ 462 void 463 calcru(struct lwp *lp, struct timeval *up, struct timeval *sp) 464 { 465 struct thread *td; 466 467 /* 468 * Calculate at the statclock level. YYY if the thread is owned by 469 * another cpu we need to forward the request to the other cpu, or 470 * have a token to interlock the information in order to avoid racing 471 * thread destruction. 472 */ 473 if ((td = lp->lwp_thread) != NULL) { 474 crit_enter(); 475 up->tv_sec = td->td_uticks / 1000000; 476 up->tv_usec = td->td_uticks % 1000000; 477 sp->tv_sec = td->td_sticks / 1000000; 478 sp->tv_usec = td->td_sticks % 1000000; 479 crit_exit(); 480 } 481 } 482 483 /* 484 * Aggregate resource statistics of all lwps of a process. 485 * 486 * proc.p_ru keeps track of all statistics directly related to a proc. This 487 * consists of RSS usage and nswap information and aggregate numbers for all 488 * former lwps of this proc. 489 * 490 * proc.p_cru is the sum of all stats of reaped children. 491 * 492 * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning 493 * packet, scheduler switch or page fault counts, etc. This information gets 494 * added to lwp.lwp_proc.p_ru when the lwp exits. 495 */ 496 void 497 calcru_proc(struct proc *p, struct rusage *ru) 498 { 499 struct timeval upt, spt; 500 long *rip1, *rip2; 501 struct lwp *lp; 502 503 *ru = p->p_ru; 504 505 FOREACH_LWP_IN_PROC(lp, p) { 506 calcru(lp, &upt, &spt); 507 timevaladd(&ru->ru_utime, &upt); 508 timevaladd(&ru->ru_stime, &spt); 509 for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first; 510 rip1 <= &ru->ru_last; 511 rip1++, rip2++) 512 *rip1 += *rip2; 513 } 514 } 515 516 517 /* ARGSUSED */ 518 int 519 sys_getrusage(struct getrusage_args *uap) 520 { 521 struct rusage ru; 522 struct rusage *rup; 523 524 switch (uap->who) { 525 526 case RUSAGE_SELF: 527 rup = &ru; 528 calcru_proc(curproc, rup); 529 break; 530 531 case RUSAGE_CHILDREN: 532 rup = &curproc->p_cru; 533 break; 534 535 default: 536 return (EINVAL); 537 } 538 return (copyout((caddr_t)rup, (caddr_t)uap->rusage, 539 sizeof (struct rusage))); 540 } 541 542 void 543 ruadd(struct rusage *ru, struct rusage *ru2) 544 { 545 long *ip, *ip2; 546 int i; 547 548 timevaladd(&ru->ru_utime, &ru2->ru_utime); 549 timevaladd(&ru->ru_stime, &ru2->ru_stime); 550 if (ru->ru_maxrss < ru2->ru_maxrss) 551 ru->ru_maxrss = ru2->ru_maxrss; 552 ip = &ru->ru_first; ip2 = &ru2->ru_first; 553 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 554 *ip++ += *ip2++; 555 } 556 557 /* 558 * Find the uidinfo structure for a uid. This structure is used to 559 * track the total resource consumption (process count, socket buffer 560 * size, etc.) for the uid and impose limits. 561 */ 562 void 563 uihashinit(void) 564 { 565 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 566 } 567 568 static struct uidinfo * 569 uilookup(uid_t uid) 570 { 571 struct uihashhead *uipp; 572 struct uidinfo *uip; 573 574 uipp = UIHASH(uid); 575 LIST_FOREACH(uip, uipp, ui_hash) { 576 if (uip->ui_uid == uid) 577 break; 578 } 579 return (uip); 580 } 581 582 static struct uidinfo * 583 uicreate(uid_t uid) 584 { 585 struct uidinfo *uip, *norace; 586 587 /* 588 * Allocate space and check for a race 589 */ 590 MALLOC(uip, struct uidinfo *, sizeof(*uip), M_UIDINFO, M_WAITOK); 591 norace = uilookup(uid); 592 if (norace != NULL) { 593 FREE(uip, M_UIDINFO); 594 return (norace); 595 } 596 597 /* 598 * Initialize structure and enter it into the hash table 599 */ 600 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 601 uip->ui_uid = uid; 602 uip->ui_proccnt = 0; 603 uip->ui_sbsize = 0; 604 uip->ui_ref = 0; 605 uip->ui_posixlocks = 0; 606 varsymset_init(&uip->ui_varsymset, NULL); 607 return (uip); 608 } 609 610 struct uidinfo * 611 uifind(uid_t uid) 612 { 613 struct uidinfo *uip; 614 615 uip = uilookup(uid); 616 if (uip == NULL) 617 uip = uicreate(uid); 618 uip->ui_ref++; 619 return (uip); 620 } 621 622 static __inline void 623 uifree(struct uidinfo *uip) 624 { 625 if (uip->ui_sbsize != 0) 626 /* XXX no %qd in kernel. Truncate. */ 627 kprintf("freeing uidinfo: uid = %d, sbsize = %ld\n", 628 uip->ui_uid, (long)uip->ui_sbsize); 629 if (uip->ui_proccnt != 0) 630 kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n", 631 uip->ui_uid, uip->ui_proccnt); 632 LIST_REMOVE(uip, ui_hash); 633 varsymset_clean(&uip->ui_varsymset); 634 FREE(uip, M_UIDINFO); 635 } 636 637 void 638 uihold(struct uidinfo *uip) 639 { 640 ++uip->ui_ref; 641 KKASSERT(uip->ui_ref > 0); 642 } 643 644 void 645 uidrop(struct uidinfo *uip) 646 { 647 KKASSERT(uip->ui_ref > 0); 648 if (--uip->ui_ref == 0) 649 uifree(uip); 650 } 651 652 void 653 uireplace(struct uidinfo **puip, struct uidinfo *nuip) 654 { 655 uidrop(*puip); 656 *puip = nuip; 657 } 658 659 /* 660 * Change the count associated with number of processes 661 * a given user is using. When 'max' is 0, don't enforce a limit 662 */ 663 int 664 chgproccnt(struct uidinfo *uip, int diff, int max) 665 { 666 /* don't allow them to exceed max, but allow subtraction */ 667 if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) 668 return (0); 669 uip->ui_proccnt += diff; 670 if (uip->ui_proccnt < 0) 671 kprintf("negative proccnt for uid = %d\n", uip->ui_uid); 672 return (1); 673 } 674 675 /* 676 * Change the total socket buffer size a user has used. 677 */ 678 int 679 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max) 680 { 681 rlim_t new; 682 683 crit_enter(); 684 new = uip->ui_sbsize + to - *hiwat; 685 /* don't allow them to exceed max, but allow subtraction */ 686 if (to > *hiwat && new > max) { 687 crit_exit(); 688 return (0); 689 } 690 uip->ui_sbsize = new; 691 *hiwat = to; 692 if (uip->ui_sbsize < 0) 693 kprintf("negative sbsize for uid = %d\n", uip->ui_uid); 694 crit_exit(); 695 return (1); 696 } 697 698