1 /* $NetBSD: kern_resource.c,v 1.67 2002/10/03 05:18:59 itojun Exp $ */ 2 3 /*- 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)kern_resource.c 8.8 (Berkeley) 2/14/95 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.67 2002/10/03 05:18:59 itojun Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/file.h> 50 #include <sys/resourcevar.h> 51 #include <sys/malloc.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 55 #include <sys/mount.h> 56 #include <sys/syscallargs.h> 57 58 #include <uvm/uvm_extern.h> 59 60 /* 61 * Maximum process data and stack limits. 62 * They are variables so they are patchable. 63 * 64 * XXXX Do we really need them to be patchable? 65 */ 66 rlim_t maxdmap = MAXDSIZ; 67 rlim_t maxsmap = MAXSSIZ; 68 69 /* 70 * Resource controls and accounting. 71 */ 72 73 int 74 sys_getpriority(curp, v, retval) 75 struct proc *curp; 76 void *v; 77 register_t *retval; 78 { 79 struct sys_getpriority_args /* { 80 syscallarg(int) which; 81 syscallarg(int) who; 82 } */ *uap = v; 83 struct proc *p; 84 int low = NZERO + PRIO_MAX + 1; 85 86 switch (SCARG(uap, which)) { 87 88 case PRIO_PROCESS: 89 if (SCARG(uap, who) == 0) 90 p = curp; 91 else 92 p = pfind(SCARG(uap, who)); 93 if (p == 0) 94 break; 95 low = p->p_nice; 96 break; 97 98 case PRIO_PGRP: { 99 struct pgrp *pg; 100 101 if (SCARG(uap, who) == 0) 102 pg = curp->p_pgrp; 103 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 104 break; 105 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 106 if (p->p_nice < low) 107 low = p->p_nice; 108 } 109 break; 110 } 111 112 case PRIO_USER: 113 if (SCARG(uap, who) == 0) 114 SCARG(uap, who) = curp->p_ucred->cr_uid; 115 proclist_lock_read(); 116 LIST_FOREACH(p, &allproc, p_list) { 117 if (p->p_ucred->cr_uid == (uid_t) SCARG(uap, who) && 118 p->p_nice < low) 119 low = p->p_nice; 120 } 121 proclist_unlock_read(); 122 break; 123 124 default: 125 return (EINVAL); 126 } 127 if (low == NZERO + PRIO_MAX + 1) 128 return (ESRCH); 129 *retval = low - NZERO; 130 return (0); 131 } 132 133 /* ARGSUSED */ 134 int 135 sys_setpriority(curp, v, retval) 136 struct proc *curp; 137 void *v; 138 register_t *retval; 139 { 140 struct sys_setpriority_args /* { 141 syscallarg(int) which; 142 syscallarg(int) who; 143 syscallarg(int) prio; 144 } */ *uap = v; 145 struct proc *p; 146 int found = 0, error = 0; 147 148 switch (SCARG(uap, which)) { 149 150 case PRIO_PROCESS: 151 if (SCARG(uap, who) == 0) 152 p = curp; 153 else 154 p = pfind(SCARG(uap, who)); 155 if (p == 0) 156 break; 157 error = donice(curp, p, SCARG(uap, prio)); 158 found++; 159 break; 160 161 case PRIO_PGRP: { 162 struct pgrp *pg; 163 164 if (SCARG(uap, who) == 0) 165 pg = curp->p_pgrp; 166 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 167 break; 168 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 169 error = donice(curp, p, SCARG(uap, prio)); 170 found++; 171 } 172 break; 173 } 174 175 case PRIO_USER: 176 if (SCARG(uap, who) == 0) 177 SCARG(uap, who) = curp->p_ucred->cr_uid; 178 proclist_lock_read(); 179 LIST_FOREACH(p, &allproc, p_list) { 180 if (p->p_ucred->cr_uid == (uid_t) SCARG(uap, who)) { 181 error = donice(curp, p, SCARG(uap, prio)); 182 found++; 183 } 184 } 185 proclist_unlock_read(); 186 break; 187 188 default: 189 return (EINVAL); 190 } 191 if (found == 0) 192 return (ESRCH); 193 return (error); 194 } 195 196 int 197 donice(curp, chgp, n) 198 struct proc *curp, *chgp; 199 int n; 200 { 201 struct pcred *pcred = curp->p_cred; 202 int s; 203 204 if (pcred->pc_ucred->cr_uid && pcred->p_ruid && 205 pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid && 206 pcred->p_ruid != chgp->p_ucred->cr_uid) 207 return (EPERM); 208 if (n > PRIO_MAX) 209 n = PRIO_MAX; 210 if (n < PRIO_MIN) 211 n = PRIO_MIN; 212 n += NZERO; 213 if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag)) 214 return (EACCES); 215 chgp->p_nice = n; 216 SCHED_LOCK(s); 217 (void)resetpriority(chgp); 218 SCHED_UNLOCK(s); 219 return (0); 220 } 221 222 /* ARGSUSED */ 223 int 224 sys_setrlimit(p, v, retval) 225 struct proc *p; 226 void *v; 227 register_t *retval; 228 { 229 struct sys_setrlimit_args /* { 230 syscallarg(int) which; 231 syscallarg(const struct rlimit *) rlp; 232 } */ *uap = v; 233 int which = SCARG(uap, which); 234 struct rlimit alim; 235 int error; 236 237 error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit)); 238 if (error) 239 return (error); 240 return (dosetrlimit(p, p->p_cred, which, &alim)); 241 } 242 243 int 244 dosetrlimit(p, cred, which, limp) 245 struct proc *p; 246 struct pcred *cred; 247 int which; 248 struct rlimit *limp; 249 { 250 struct rlimit *alimp; 251 struct plimit *newplim; 252 int error; 253 254 if ((u_int)which >= RLIM_NLIMITS) 255 return (EINVAL); 256 257 if (limp->rlim_cur < 0 || limp->rlim_max < 0) 258 return (EINVAL); 259 260 alimp = &p->p_rlimit[which]; 261 /* if we don't change the value, no need to limcopy() */ 262 if (limp->rlim_cur == alimp->rlim_cur && 263 limp->rlim_max == alimp->rlim_max) 264 return 0; 265 266 if (limp->rlim_cur > limp->rlim_max) { 267 /* 268 * This is programming error. According to SUSv2, we should 269 * return error in this case. 270 */ 271 return (EINVAL); 272 } 273 if (limp->rlim_max > alimp->rlim_max 274 && (error = suser(cred->pc_ucred, &p->p_acflag)) != 0) 275 return (error); 276 277 if (p->p_limit->p_refcnt > 1 && 278 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { 279 newplim = limcopy(p->p_limit); 280 limfree(p->p_limit); 281 p->p_limit = newplim; 282 alimp = &p->p_rlimit[which]; 283 } 284 285 switch (which) { 286 287 case RLIMIT_DATA: 288 if (limp->rlim_cur > maxdmap) 289 limp->rlim_cur = maxdmap; 290 if (limp->rlim_max > maxdmap) 291 limp->rlim_max = maxdmap; 292 break; 293 294 case RLIMIT_STACK: 295 if (limp->rlim_cur > maxsmap) 296 limp->rlim_cur = maxsmap; 297 if (limp->rlim_max > maxsmap) 298 limp->rlim_max = maxsmap; 299 300 /* 301 * Return EINVAL if the new stack size limit is lower than 302 * current usage. Otherwise, the process would get SIGSEGV the 303 * moment it would try to access anything on it's current stack. 304 * This conforms to SUSv2. 305 */ 306 if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE 307 || limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE) 308 return (EINVAL); 309 310 /* 311 * Stack is allocated to the max at exec time with 312 * only "rlim_cur" bytes accessible (In other words, 313 * allocates stack dividing two contiguous regions at 314 * "rlim_cur" bytes boundary). 315 * 316 * Since allocation is done in terms of page, roundup 317 * "rlim_cur" (otherwise, contiguous regions 318 * overlap). If stack limit is going up make more 319 * accessible, if going down make inaccessible. 320 */ 321 limp->rlim_cur = round_page(limp->rlim_cur); 322 if (limp->rlim_cur != alimp->rlim_cur) { 323 vaddr_t addr; 324 vsize_t size; 325 vm_prot_t prot; 326 327 if (limp->rlim_cur > alimp->rlim_cur) { 328 prot = VM_PROT_ALL; 329 size = limp->rlim_cur - alimp->rlim_cur; 330 addr = USRSTACK - limp->rlim_cur; 331 } else { 332 prot = VM_PROT_NONE; 333 size = alimp->rlim_cur - limp->rlim_cur; 334 addr = USRSTACK - alimp->rlim_cur; 335 } 336 (void) uvm_map_protect(&p->p_vmspace->vm_map, 337 addr, addr+size, prot, FALSE); 338 } 339 break; 340 341 case RLIMIT_NOFILE: 342 if (limp->rlim_cur > maxfiles) 343 limp->rlim_cur = maxfiles; 344 if (limp->rlim_max > maxfiles) 345 limp->rlim_max = maxfiles; 346 break; 347 348 case RLIMIT_NPROC: 349 if (limp->rlim_cur > maxproc) 350 limp->rlim_cur = maxproc; 351 if (limp->rlim_max > maxproc) 352 limp->rlim_max = maxproc; 353 break; 354 } 355 *alimp = *limp; 356 return (0); 357 } 358 359 /* ARGSUSED */ 360 int 361 sys_getrlimit(p, v, retval) 362 struct proc *p; 363 void *v; 364 register_t *retval; 365 { 366 struct sys_getrlimit_args /* { 367 syscallarg(int) which; 368 syscallarg(struct rlimit *) rlp; 369 } */ *uap = v; 370 int which = SCARG(uap, which); 371 372 if ((u_int)which >= RLIM_NLIMITS) 373 return (EINVAL); 374 return (copyout(&p->p_rlimit[which], SCARG(uap, rlp), 375 sizeof(struct rlimit))); 376 } 377 378 /* 379 * Transform the running time and tick information in proc p into user, 380 * system, and interrupt time usage. 381 */ 382 void 383 calcru(p, up, sp, ip) 384 struct proc *p; 385 struct timeval *up; 386 struct timeval *sp; 387 struct timeval *ip; 388 { 389 u_quad_t u, st, ut, it, tot; 390 long sec, usec; 391 int s; 392 struct timeval tv; 393 394 s = splstatclock(); 395 st = p->p_sticks; 396 ut = p->p_uticks; 397 it = p->p_iticks; 398 splx(s); 399 400 tot = st + ut + it; 401 if (tot == 0) { 402 up->tv_sec = up->tv_usec = 0; 403 sp->tv_sec = sp->tv_usec = 0; 404 if (ip != NULL) 405 ip->tv_sec = ip->tv_usec = 0; 406 return; 407 } 408 409 sec = p->p_rtime.tv_sec; 410 usec = p->p_rtime.tv_usec; 411 if (p->p_stat == SONPROC) { 412 struct schedstate_percpu *spc; 413 414 KDASSERT(p->p_cpu != NULL); 415 spc = &p->p_cpu->ci_schedstate; 416 417 /* 418 * Adjust for the current time slice. This is actually fairly 419 * important since the error here is on the order of a time 420 * quantum, which is much greater than the sampling error. 421 */ 422 microtime(&tv); 423 sec += tv.tv_sec - spc->spc_runtime.tv_sec; 424 usec += tv.tv_usec - spc->spc_runtime.tv_usec; 425 } 426 u = (u_quad_t) sec * 1000000 + usec; 427 st = (u * st) / tot; 428 sp->tv_sec = st / 1000000; 429 sp->tv_usec = st % 1000000; 430 ut = (u * ut) / tot; 431 up->tv_sec = ut / 1000000; 432 up->tv_usec = ut % 1000000; 433 if (ip != NULL) { 434 it = (u * it) / tot; 435 ip->tv_sec = it / 1000000; 436 ip->tv_usec = it % 1000000; 437 } 438 } 439 440 /* ARGSUSED */ 441 int 442 sys_getrusage(p, v, retval) 443 struct proc *p; 444 void *v; 445 register_t *retval; 446 { 447 struct sys_getrusage_args /* { 448 syscallarg(int) who; 449 syscallarg(struct rusage *) rusage; 450 } */ *uap = v; 451 struct rusage *rup; 452 453 switch (SCARG(uap, who)) { 454 455 case RUSAGE_SELF: 456 rup = &p->p_stats->p_ru; 457 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL); 458 break; 459 460 case RUSAGE_CHILDREN: 461 rup = &p->p_stats->p_cru; 462 break; 463 464 default: 465 return (EINVAL); 466 } 467 return (copyout(rup, SCARG(uap, rusage), sizeof(struct rusage))); 468 } 469 470 void 471 ruadd(ru, ru2) 472 struct rusage *ru, *ru2; 473 { 474 long *ip, *ip2; 475 int i; 476 477 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime); 478 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime); 479 if (ru->ru_maxrss < ru2->ru_maxrss) 480 ru->ru_maxrss = ru2->ru_maxrss; 481 ip = &ru->ru_first; ip2 = &ru2->ru_first; 482 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 483 *ip++ += *ip2++; 484 } 485 486 /* 487 * Make a copy of the plimit structure. 488 * We share these structures copy-on-write after fork, 489 * and copy when a limit is changed. 490 */ 491 struct plimit * 492 limcopy(lim) 493 struct plimit *lim; 494 { 495 struct plimit *newlim; 496 497 newlim = pool_get(&plimit_pool, PR_WAITOK); 498 memcpy(newlim->pl_rlimit, lim->pl_rlimit, 499 sizeof(struct rlimit) * RLIM_NLIMITS); 500 if (lim->pl_corename == defcorename) { 501 newlim->pl_corename = defcorename; 502 } else { 503 newlim->pl_corename = malloc(strlen(lim->pl_corename)+1, 504 M_TEMP, M_WAITOK); 505 strcpy(newlim->pl_corename, lim->pl_corename); 506 } 507 newlim->p_lflags = 0; 508 newlim->p_refcnt = 1; 509 return (newlim); 510 } 511 512 void 513 limfree(lim) 514 struct plimit *lim; 515 { 516 517 if (--lim->p_refcnt > 0) 518 return; 519 #ifdef DIAGNOSTIC 520 if (lim->p_refcnt < 0) 521 panic("limfree"); 522 #endif 523 if (lim->pl_corename != defcorename) 524 free(lim->pl_corename, M_TEMP); 525 pool_put(&plimit_pool, lim); 526 } 527