1 /* 2 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * (c) UNIX System Laboratories, Inc. 38 * All or some portions of this file are derived from material licensed 39 * to the University of California by American Telephone and Telegraph 40 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 41 * the permission of UNIX System Laboratories, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 * 67 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 68 * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $ 69 */ 70 71 #include <sys/resource.h> 72 #include <sys/spinlock.h> 73 #include <sys/proc.h> 74 #include <sys/priv.h> 75 #include <sys/file.h> 76 #include <sys/lockf.h> 77 #include <sys/kern_syscall.h> 78 79 #include <vm/vm_param.h> 80 #include <vm/vm.h> 81 #include <vm/vm_map.h> 82 83 #include <machine/pmap.h> 84 85 #include <sys/spinlock2.h> 86 87 static void plimit_copy(struct plimit *olimit, struct plimit *nlimit); 88 89 /* 90 * Initialize proc0's plimit structure. All later plimit structures 91 * are inherited through fork. 92 */ 93 void 94 plimit_init0(struct plimit *limit) 95 { 96 int i; 97 rlim_t lim; 98 99 for (i = 0; i < RLIM_NLIMITS; ++i) { 100 limit->pl_rlimit[i].rlim_cur = RLIM_INFINITY; 101 limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; 102 } 103 limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = maxfiles; 104 limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 105 limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = maxproc; 106 limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 107 lim = ptoa((rlim_t)vmstats.v_free_count); 108 limit->pl_rlimit[RLIMIT_RSS].rlim_max = lim; 109 limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim; 110 limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 111 limit->p_cpulimit = RLIM_INFINITY; 112 limit->p_refcnt = 1; 113 spin_init(&limit->p_spin, "plimitinit"); 114 } 115 116 /* 117 * Return a plimit for use by a new forked process given the one 118 * contained in the parent process. 119 * 120 * MPSAFE 121 */ 122 struct plimit * 123 plimit_fork(struct proc *p1) 124 { 125 struct plimit *olimit = p1->p_limit; 126 struct plimit *nlimit = NULL; 127 struct plimit *rlimit; 128 129 /* 130 * If we are exclusive (but not threaded-exclusive), but have only 131 * one reference, we can convert the structure to copy-on-write 132 * again. 133 * 134 * If we were threaded but are no longer threaded we can do the same 135 * thing. 136 */ 137 if (olimit->p_exclusive == 1) { 138 KKASSERT(olimit->p_refcnt == 1); 139 olimit->p_exclusive = 0; 140 } else if (olimit->p_exclusive == 2 && p1->p_nthreads == 1) { 141 KKASSERT(olimit->p_refcnt == 1); 142 olimit->p_exclusive = 0; 143 } 144 145 /* 146 * Take a short-cut that requires limited spin locks. If we aren't 147 * exclusive we will not be threaded and we can just bump the ref 148 * count. If that is true and we also have only one ref then there 149 * can be no other accessors. 150 */ 151 if (olimit->p_exclusive == 0) { 152 if (olimit->p_refcnt == 1) { 153 ++olimit->p_refcnt; 154 } else { 155 spin_lock(&olimit->p_spin); 156 ++olimit->p_refcnt; 157 spin_unlock(&olimit->p_spin); 158 } 159 return(olimit); 160 } 161 162 /* 163 * Full-blown code-up. 164 */ 165 nlimit = NULL; 166 spin_lock(&olimit->p_spin); 167 168 for (;;) { 169 if (olimit->p_exclusive == 0) { 170 ++olimit->p_refcnt; 171 rlimit = olimit; 172 break; 173 } 174 if (nlimit) { 175 plimit_copy(olimit, nlimit); 176 rlimit = nlimit; 177 nlimit = NULL; 178 break; 179 } 180 spin_unlock(&olimit->p_spin); 181 nlimit = kmalloc(sizeof(*nlimit), M_SUBPROC, M_WAITOK); 182 spin_lock(&olimit->p_spin); 183 } 184 spin_unlock(&olimit->p_spin); 185 if (nlimit) 186 kfree(nlimit, M_SUBPROC); 187 return(rlimit); 188 } 189 190 /* 191 * This routine is called when a new LWP is created for a process. We 192 * must force exclusivity (=2) so p->p_limit remains stable. 193 * 194 * LWPs share the same process structure so this does not bump refcnt. 195 */ 196 void 197 plimit_lwp_fork(struct proc *p) 198 { 199 struct plimit *olimit; 200 201 for (;;) { 202 olimit = p->p_limit; 203 if (olimit->p_exclusive == 2) { 204 KKASSERT(olimit->p_refcnt == 1); 205 break; 206 } 207 if (olimit->p_refcnt == 1) { 208 olimit->p_exclusive = 2; 209 break; 210 } 211 plimit_modify(p, -1, NULL); 212 } 213 } 214 215 /* 216 * This routine is called to fixup a proces's p_limit structure prior 217 * to it being modified. If index >= 0 the specified modification is also 218 * made. 219 * 220 * This routine must make the limit structure exclusive. A later fork 221 * will convert it back to copy-on-write if possible. 222 * 223 * We can count on p->p_limit being stable since if we had created any 224 * threads it will have already been made exclusive (=2). 225 * 226 * MPSAFE 227 */ 228 void 229 plimit_modify(struct proc *p, int index, struct rlimit *rlim) 230 { 231 struct plimit *olimit; 232 struct plimit *nlimit; 233 struct plimit *rlimit; 234 235 /* 236 * Shortcut. If we are not threaded we may be able to trivially 237 * set the structure to exclusive access without needing to acquire 238 * any spinlocks. The p_limit structure will be stable. 239 */ 240 olimit = p->p_limit; 241 if (p->p_nthreads == 1) { 242 if (olimit->p_exclusive == 0 && olimit->p_refcnt == 1) 243 olimit->p_exclusive = 1; 244 if (olimit->p_exclusive) { 245 if (index >= 0) 246 p->p_limit->pl_rlimit[index] = *rlim; 247 return; 248 } 249 } 250 251 /* 252 * Full-blown code-up. Make a copy if we aren't exclusive. If 253 * we have only one ref we can safely convert the structure to 254 * exclusive without copying. 255 */ 256 nlimit = NULL; 257 spin_lock(&olimit->p_spin); 258 259 for (;;) { 260 if (olimit->p_refcnt == 1) { 261 if (olimit->p_exclusive == 0) 262 olimit->p_exclusive = 1; 263 rlimit = olimit; 264 break; 265 } 266 KKASSERT(olimit->p_exclusive == 0); 267 if (nlimit) { 268 plimit_copy(olimit, nlimit); 269 nlimit->p_exclusive = 1; 270 p->p_limit = nlimit; 271 rlimit = nlimit; 272 nlimit = NULL; 273 break; 274 } 275 spin_unlock(&olimit->p_spin); 276 nlimit = kmalloc(sizeof(*nlimit), M_SUBPROC, M_WAITOK); 277 spin_lock(&olimit->p_spin); 278 } 279 if (index >= 0) 280 rlimit->pl_rlimit[index] = *rlim; 281 spin_unlock(&olimit->p_spin); 282 if (nlimit) 283 kfree(nlimit, M_SUBPROC); 284 } 285 286 /* 287 * Destroy a process's plimit structure. 288 * 289 * MPSAFE 290 */ 291 void 292 plimit_free(struct proc *p) 293 { 294 struct plimit *limit; 295 296 if ((limit = p->p_limit) != NULL) { 297 p->p_limit = NULL; 298 299 if (limit->p_refcnt == 1) { 300 limit->p_refcnt = -999; 301 kfree(limit, M_SUBPROC); 302 } else { 303 spin_lock(&limit->p_spin); 304 if (--limit->p_refcnt == 0) { 305 spin_unlock(&limit->p_spin); 306 kfree(limit, M_SUBPROC); 307 } else { 308 spin_unlock(&limit->p_spin); 309 } 310 } 311 } 312 } 313 314 /* 315 * Modify a resource limit (from system call) 316 * 317 * MPSAFE 318 */ 319 int 320 kern_setrlimit(u_int which, struct rlimit *limp) 321 { 322 struct proc *p = curproc; 323 struct plimit *limit; 324 struct rlimit *alimp; 325 int error; 326 327 if (which >= RLIM_NLIMITS) 328 return (EINVAL); 329 330 /* 331 * We will be modifying a resource, make a copy if necessary. 332 */ 333 plimit_modify(p, -1, NULL); 334 limit = p->p_limit; 335 alimp = &limit->pl_rlimit[which]; 336 337 /* 338 * Preserve historical bugs by treating negative limits as unsigned. 339 */ 340 if (limp->rlim_cur < 0) 341 limp->rlim_cur = RLIM_INFINITY; 342 if (limp->rlim_max < 0) 343 limp->rlim_max = RLIM_INFINITY; 344 345 spin_lock(&limit->p_spin); 346 if (limp->rlim_cur > alimp->rlim_max || 347 limp->rlim_max > alimp->rlim_max) { 348 spin_unlock(&limit->p_spin); 349 error = priv_check_cred(p->p_ucred, PRIV_PROC_SETRLIMIT, 0); 350 if (error) 351 return (error); 352 } else { 353 spin_unlock(&limit->p_spin); 354 } 355 if (limp->rlim_cur > limp->rlim_max) 356 limp->rlim_cur = limp->rlim_max; 357 358 switch (which) { 359 case RLIMIT_CPU: 360 spin_lock(&limit->p_spin); 361 if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000) 362 limit->p_cpulimit = RLIM_INFINITY; 363 else 364 limit->p_cpulimit = (rlim_t)1000000 * limp->rlim_cur; 365 spin_unlock(&limit->p_spin); 366 break; 367 case RLIMIT_DATA: 368 if (limp->rlim_cur > maxdsiz) 369 limp->rlim_cur = maxdsiz; 370 if (limp->rlim_max > maxdsiz) 371 limp->rlim_max = maxdsiz; 372 break; 373 374 case RLIMIT_STACK: 375 if (limp->rlim_cur > maxssiz) 376 limp->rlim_cur = maxssiz; 377 if (limp->rlim_max > maxssiz) 378 limp->rlim_max = maxssiz; 379 /* 380 * Stack is allocated to the max at exec time with only 381 * "rlim_cur" bytes accessible. If stack limit is going 382 * up make more accessible, if going down make inaccessible. 383 */ 384 spin_lock(&limit->p_spin); 385 if (limp->rlim_cur != alimp->rlim_cur) { 386 vm_offset_t addr; 387 vm_size_t size; 388 vm_prot_t prot; 389 390 if (limp->rlim_cur > alimp->rlim_cur) { 391 prot = VM_PROT_ALL; 392 size = limp->rlim_cur - alimp->rlim_cur; 393 addr = USRSTACK - limp->rlim_cur; 394 } else { 395 prot = VM_PROT_NONE; 396 size = alimp->rlim_cur - limp->rlim_cur; 397 addr = USRSTACK - alimp->rlim_cur; 398 } 399 spin_unlock(&limit->p_spin); 400 addr = trunc_page(addr); 401 size = round_page(size); 402 vm_map_protect(&p->p_vmspace->vm_map, 403 addr, addr+size, prot, FALSE); 404 } else { 405 spin_unlock(&limit->p_spin); 406 } 407 break; 408 409 case RLIMIT_NOFILE: 410 if (limp->rlim_cur > maxfilesperproc) 411 limp->rlim_cur = maxfilesperproc; 412 if (limp->rlim_max > maxfilesperproc) 413 limp->rlim_max = maxfilesperproc; 414 break; 415 416 case RLIMIT_NPROC: 417 if (limp->rlim_cur > maxprocperuid) 418 limp->rlim_cur = maxprocperuid; 419 if (limp->rlim_max > maxprocperuid) 420 limp->rlim_max = maxprocperuid; 421 if (limp->rlim_cur < 1) 422 limp->rlim_cur = 1; 423 if (limp->rlim_max < 1) 424 limp->rlim_max = 1; 425 break; 426 case RLIMIT_POSIXLOCKS: 427 if (limp->rlim_cur > maxposixlocksperuid) 428 limp->rlim_cur = maxposixlocksperuid; 429 if (limp->rlim_max > maxposixlocksperuid) 430 limp->rlim_max = maxposixlocksperuid; 431 break; 432 } 433 spin_lock(&limit->p_spin); 434 *alimp = *limp; 435 spin_unlock(&limit->p_spin); 436 return (0); 437 } 438 439 /* 440 * The rlimit indexed by which is returned in the second argument. 441 * 442 * MPSAFE 443 */ 444 int 445 kern_getrlimit(u_int which, struct rlimit *limp) 446 { 447 struct proc *p = curproc; 448 struct plimit *limit; 449 450 /* 451 * p is NULL when kern_getrlimit is called from a 452 * kernel thread. In this case as the calling proc 453 * isn't available we just skip the limit check. 454 */ 455 if (p == NULL) 456 return 0; 457 458 if (which >= RLIM_NLIMITS) 459 return (EINVAL); 460 461 limit = p->p_limit; 462 spin_lock(&limit->p_spin); 463 *limp = p->p_rlimit[which]; 464 spin_unlock(&limit->p_spin); 465 return (0); 466 } 467 468 /* 469 * Determine if the cpu limit has been reached and return an operations 470 * code for the caller to perform. 471 * 472 * MPSAFE 473 */ 474 int 475 plimit_testcpulimit(struct plimit *limit, u_int64_t ttime) 476 { 477 struct rlimit *rlim; 478 int mode; 479 480 /* 481 * Initial tests without the spinlock. This is the fast path. 482 * Any 32/64 bit glitches will fall through and retest with 483 * the spinlock. 484 */ 485 if (limit->p_cpulimit == RLIM_INFINITY) 486 return(PLIMIT_TESTCPU_OK); 487 if (ttime <= limit->p_cpulimit) 488 return(PLIMIT_TESTCPU_OK); 489 490 spin_lock(&limit->p_spin); 491 if (ttime > limit->p_cpulimit) { 492 rlim = &limit->pl_rlimit[RLIMIT_CPU]; 493 if (ttime / (rlim_t)1000000 >= rlim->rlim_max + 5) 494 mode = PLIMIT_TESTCPU_KILL; 495 else 496 mode = PLIMIT_TESTCPU_XCPU; 497 } else { 498 mode = PLIMIT_TESTCPU_OK; 499 } 500 spin_unlock(&limit->p_spin); 501 return(mode); 502 } 503 504 /* 505 * Helper routine to copy olimit to nlimit and initialize nlimit for 506 * use. nlimit's reference count will be set to 1 and its exclusive bit 507 * will be cleared. 508 * 509 * MPSAFE 510 */ 511 static 512 void 513 plimit_copy(struct plimit *olimit, struct plimit *nlimit) 514 { 515 *nlimit = *olimit; 516 517 spin_init(&nlimit->p_spin, "plimitcopy"); 518 nlimit->p_refcnt = 1; 519 nlimit->p_exclusive = 0; 520 } 521 522 /* 523 * This routine returns the value of a resource, downscaled based on 524 * the processes fork depth and chroot depth (up to 50%). This mechanism 525 * is designed to prevent run-aways from blowing up unrelated processes 526 * running under the same UID. 527 * 528 * NOTE: Currently only applicable to RLIMIT_NPROC. We could also limit 529 * file descriptors but we shouldn't have to as these are allocated 530 * dynamically. 531 */ 532 u_int64_t 533 plimit_getadjvalue(int i) 534 { 535 struct proc *p = curproc; 536 struct plimit *limit; 537 uint64_t v; 538 uint32_t depth; 539 540 limit = p->p_limit; 541 v = limit->pl_rlimit[i].rlim_cur; 542 if (i == RLIMIT_NPROC) { 543 /* 544 * 10% per chroot (around 1/3% per fork depth), with a 545 * maximum of 50% downscaling of the resource limit. 546 */ 547 depth = p->p_depth; 548 if (depth > 32 * 5) 549 depth = 32 * 5; 550 v -= v * depth / 320; 551 } 552 return v; 553 } 554