1 /* $OpenBSD: kern_resource.c,v 1.92 2024/10/15 12:26:53 claudio Exp $ */
2 /* $NetBSD: kern_resource.c,v 1.38 1996/10/23 07:19:38 matthias Exp $ */
3
4 /*-
5 * Copyright (c) 1982, 1986, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/file.h>
44 #include <sys/resourcevar.h>
45 #include <sys/pool.h>
46 #include <sys/proc.h>
47 #include <sys/ktrace.h>
48 #include <sys/sched.h>
49 #include <sys/signalvar.h>
50
51 #include <sys/mount.h>
52 #include <sys/syscallargs.h>
53
54 #include <uvm/uvm_extern.h>
55 #include <uvm/uvm.h>
56
57 /* Resource usage check interval in msec */
58 #define RUCHECK_INTERVAL 1000
59
60 /* SIGXCPU interval in seconds of process runtime */
61 #define SIGXCPU_INTERVAL 5
62
63 struct plimit *lim_copy(struct plimit *);
64 struct plimit *lim_write_begin(void);
65 void lim_write_commit(struct plimit *);
66
67 void tuagg_sumup(struct tusage *, const struct tusage *);
68
69 /*
70 * Patchable maximum data and stack limits.
71 */
72 rlim_t maxdmap = MAXDSIZ;
73 rlim_t maxsmap = MAXSSIZ;
74
75 /*
76 * Serializes resource limit updates.
77 * This lock has to be held together with ps_mtx when updating
78 * the process' ps_limit.
79 */
80 struct rwlock rlimit_lock = RWLOCK_INITIALIZER("rlimitlk");
81
82 /*
83 * Resource controls and accounting.
84 */
85
86 int
sys_getpriority(struct proc * curp,void * v,register_t * retval)87 sys_getpriority(struct proc *curp, void *v, register_t *retval)
88 {
89 struct sys_getpriority_args /* {
90 syscallarg(int) which;
91 syscallarg(id_t) who;
92 } */ *uap = v;
93 struct process *pr;
94 int low = NZERO + PRIO_MAX + 1;
95
96 switch (SCARG(uap, which)) {
97
98 case PRIO_PROCESS:
99 if (SCARG(uap, who) == 0)
100 pr = curp->p_p;
101 else
102 pr = prfind(SCARG(uap, who));
103 if (pr == NULL)
104 break;
105 if (pr->ps_nice < low)
106 low = pr->ps_nice;
107 break;
108
109 case PRIO_PGRP: {
110 struct pgrp *pg;
111
112 if (SCARG(uap, who) == 0)
113 pg = curp->p_p->ps_pgrp;
114 else if ((pg = pgfind(SCARG(uap, who))) == NULL)
115 break;
116 LIST_FOREACH(pr, &pg->pg_members, ps_pglist)
117 if (pr->ps_nice < low)
118 low = pr->ps_nice;
119 break;
120 }
121
122 case PRIO_USER:
123 if (SCARG(uap, who) == 0)
124 SCARG(uap, who) = curp->p_ucred->cr_uid;
125 LIST_FOREACH(pr, &allprocess, ps_list)
126 if (pr->ps_ucred->cr_uid == SCARG(uap, who) &&
127 pr->ps_nice < low)
128 low = pr->ps_nice;
129 break;
130
131 default:
132 return (EINVAL);
133 }
134 if (low == NZERO + PRIO_MAX + 1)
135 return (ESRCH);
136 *retval = low - NZERO;
137 return (0);
138 }
139
140 int
sys_setpriority(struct proc * curp,void * v,register_t * retval)141 sys_setpriority(struct proc *curp, void *v, register_t *retval)
142 {
143 struct sys_setpriority_args /* {
144 syscallarg(int) which;
145 syscallarg(id_t) who;
146 syscallarg(int) prio;
147 } */ *uap = v;
148 struct process *pr;
149 int found = 0, error = 0;
150
151 switch (SCARG(uap, which)) {
152
153 case PRIO_PROCESS:
154 if (SCARG(uap, who) == 0)
155 pr = curp->p_p;
156 else
157 pr = prfind(SCARG(uap, who));
158 if (pr == NULL)
159 break;
160 error = donice(curp, pr, SCARG(uap, prio));
161 found = 1;
162 break;
163
164 case PRIO_PGRP: {
165 struct pgrp *pg;
166
167 if (SCARG(uap, who) == 0)
168 pg = curp->p_p->ps_pgrp;
169 else if ((pg = pgfind(SCARG(uap, who))) == NULL)
170 break;
171 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) {
172 error = donice(curp, pr, SCARG(uap, prio));
173 found = 1;
174 }
175 break;
176 }
177
178 case PRIO_USER:
179 if (SCARG(uap, who) == 0)
180 SCARG(uap, who) = curp->p_ucred->cr_uid;
181 LIST_FOREACH(pr, &allprocess, ps_list)
182 if (pr->ps_ucred->cr_uid == SCARG(uap, who)) {
183 error = donice(curp, pr, SCARG(uap, prio));
184 found = 1;
185 }
186 break;
187
188 default:
189 return (EINVAL);
190 }
191 if (!found)
192 return (ESRCH);
193 return (error);
194 }
195
196 int
donice(struct proc * curp,struct process * chgpr,int n)197 donice(struct proc *curp, struct process *chgpr, int n)
198 {
199 struct ucred *ucred = curp->p_ucred;
200 struct proc *p;
201
202 if (ucred->cr_uid != 0 && ucred->cr_ruid != 0 &&
203 ucred->cr_uid != chgpr->ps_ucred->cr_uid &&
204 ucred->cr_ruid != chgpr->ps_ucred->cr_uid)
205 return (EPERM);
206 if (n > PRIO_MAX)
207 n = PRIO_MAX;
208 if (n < PRIO_MIN)
209 n = PRIO_MIN;
210 n += NZERO;
211 if (n < chgpr->ps_nice && suser(curp))
212 return (EACCES);
213 chgpr->ps_nice = n;
214 mtx_enter(&chgpr->ps_mtx);
215 SCHED_LOCK();
216 TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link) {
217 setpriority(p, p->p_estcpu, n);
218 }
219 SCHED_UNLOCK();
220 mtx_leave(&chgpr->ps_mtx);
221 return (0);
222 }
223
224 int
sys_setrlimit(struct proc * p,void * v,register_t * retval)225 sys_setrlimit(struct proc *p, void *v, register_t *retval)
226 {
227 struct sys_setrlimit_args /* {
228 syscallarg(int) which;
229 syscallarg(const struct rlimit *) rlp;
230 } */ *uap = v;
231 struct rlimit alim;
232 int error;
233
234 error = copyin((caddr_t)SCARG(uap, rlp), (caddr_t)&alim,
235 sizeof (struct rlimit));
236 if (error)
237 return (error);
238 #ifdef KTRACE
239 if (KTRPOINT(p, KTR_STRUCT))
240 ktrrlimit(p, &alim);
241 #endif
242 return (dosetrlimit(p, SCARG(uap, which), &alim));
243 }
244
245 int
dosetrlimit(struct proc * p,u_int which,struct rlimit * limp)246 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
247 {
248 struct rlimit *alimp;
249 struct plimit *limit;
250 rlim_t maxlim;
251 int error;
252
253 if (which >= RLIM_NLIMITS || limp->rlim_cur > limp->rlim_max)
254 return (EINVAL);
255
256 rw_enter_write(&rlimit_lock);
257
258 alimp = &p->p_p->ps_limit->pl_rlimit[which];
259 if (limp->rlim_max > alimp->rlim_max) {
260 if ((error = suser(p)) != 0) {
261 rw_exit_write(&rlimit_lock);
262 return (error);
263 }
264 }
265
266 /* Get exclusive write access to the limit structure. */
267 limit = lim_write_begin();
268 alimp = &limit->pl_rlimit[which];
269
270 switch (which) {
271 case RLIMIT_DATA:
272 maxlim = maxdmap;
273 break;
274 case RLIMIT_STACK:
275 maxlim = maxsmap;
276 break;
277 case RLIMIT_NOFILE:
278 maxlim = atomic_load_int(&maxfiles);
279 break;
280 case RLIMIT_NPROC:
281 maxlim = atomic_load_int(&maxprocess);
282 break;
283 default:
284 maxlim = RLIM_INFINITY;
285 break;
286 }
287
288 if (limp->rlim_max > maxlim)
289 limp->rlim_max = maxlim;
290 if (limp->rlim_cur > limp->rlim_max)
291 limp->rlim_cur = limp->rlim_max;
292
293 if (which == RLIMIT_CPU && limp->rlim_cur != RLIM_INFINITY &&
294 alimp->rlim_cur == RLIM_INFINITY)
295 timeout_add_msec(&p->p_p->ps_rucheck_to, RUCHECK_INTERVAL);
296
297 if (which == RLIMIT_STACK) {
298 /*
299 * Stack is allocated to the max at exec time with only
300 * "rlim_cur" bytes accessible. If stack limit is going
301 * up make more accessible, if going down make inaccessible.
302 */
303 if (limp->rlim_cur != alimp->rlim_cur) {
304 vaddr_t addr;
305 vsize_t size;
306 vm_prot_t prot;
307 struct vmspace *vm = p->p_vmspace;
308
309 if (limp->rlim_cur > alimp->rlim_cur) {
310 prot = PROT_READ | PROT_WRITE;
311 size = limp->rlim_cur - alimp->rlim_cur;
312 #ifdef MACHINE_STACK_GROWS_UP
313 addr = (vaddr_t)vm->vm_maxsaddr +
314 alimp->rlim_cur;
315 #else
316 addr = (vaddr_t)vm->vm_minsaddr -
317 limp->rlim_cur;
318 #endif
319 } else {
320 prot = PROT_NONE;
321 size = alimp->rlim_cur - limp->rlim_cur;
322 #ifdef MACHINE_STACK_GROWS_UP
323 addr = (vaddr_t)vm->vm_maxsaddr +
324 limp->rlim_cur;
325 #else
326 addr = (vaddr_t)vm->vm_minsaddr -
327 alimp->rlim_cur;
328 #endif
329 }
330 addr = trunc_page(addr);
331 size = round_page(size);
332 KERNEL_LOCK();
333 (void) uvm_map_protect(&vm->vm_map, addr,
334 addr+size, prot, UVM_ET_STACK, FALSE, FALSE);
335 KERNEL_UNLOCK();
336 }
337 }
338
339 *alimp = *limp;
340
341 lim_write_commit(limit);
342 rw_exit_write(&rlimit_lock);
343
344 return (0);
345 }
346
347 int
sys_getrlimit(struct proc * p,void * v,register_t * retval)348 sys_getrlimit(struct proc *p, void *v, register_t *retval)
349 {
350 struct sys_getrlimit_args /* {
351 syscallarg(int) which;
352 syscallarg(struct rlimit *) rlp;
353 } */ *uap = v;
354 struct plimit *limit;
355 struct rlimit alimp;
356 int error;
357
358 if (SCARG(uap, which) < 0 || SCARG(uap, which) >= RLIM_NLIMITS)
359 return (EINVAL);
360 limit = lim_read_enter();
361 alimp = limit->pl_rlimit[SCARG(uap, which)];
362 lim_read_leave(limit);
363 error = copyout(&alimp, SCARG(uap, rlp), sizeof(struct rlimit));
364 #ifdef KTRACE
365 if (error == 0 && KTRPOINT(p, KTR_STRUCT))
366 ktrrlimit(p, &alimp);
367 #endif
368 return (error);
369 }
370
371 /* Add the counts from *from to *tu, ensuring a consistent read of *from. */
372 void
tuagg_sumup(struct tusage * tu,const struct tusage * from)373 tuagg_sumup(struct tusage *tu, const struct tusage *from)
374 {
375 struct tusage tmp;
376 uint64_t enter, leave;
377
378 enter = from->tu_gen;
379 for (;;) {
380 /* the generation number is odd during an update */
381 while (enter & 1) {
382 CPU_BUSY_CYCLE();
383 enter = from->tu_gen;
384 }
385
386 membar_consumer();
387 tmp = *from;
388 membar_consumer();
389 leave = from->tu_gen;
390
391 if (enter == leave)
392 break;
393 enter = leave;
394 }
395
396 tu->tu_uticks += tmp.tu_uticks;
397 tu->tu_sticks += tmp.tu_sticks;
398 tu->tu_iticks += tmp.tu_iticks;
399 timespecadd(&tu->tu_runtime, &tmp.tu_runtime, &tu->tu_runtime);
400 }
401
402 void
tuagg_get_proc(struct tusage * tu,struct proc * p)403 tuagg_get_proc(struct tusage *tu, struct proc *p)
404 {
405 memset(tu, 0, sizeof(*tu));
406 tuagg_sumup(tu, &p->p_tu);
407 }
408
409 void
tuagg_get_process(struct tusage * tu,struct process * pr)410 tuagg_get_process(struct tusage *tu, struct process *pr)
411 {
412 struct proc *q;
413
414 memset(tu, 0, sizeof(*tu));
415
416 mtx_enter(&pr->ps_mtx);
417 tuagg_sumup(tu, &pr->ps_tu);
418 /* add on all living threads */
419 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link)
420 tuagg_sumup(tu, &q->p_tu);
421 mtx_leave(&pr->ps_mtx);
422 }
423
424 /*
425 * Update the process ps_tu usage with the values from proc p while
426 * doing so the times for proc p are reset.
427 * This requires that p is either curproc or SDEAD and that the
428 * IPL is higher than IPL_STATCLOCK. ps_mtx uses IPL_HIGH so
429 * this should always be the case.
430 */
431 void
tuagg_add_process(struct process * pr,struct proc * p)432 tuagg_add_process(struct process *pr, struct proc *p)
433 {
434 MUTEX_ASSERT_LOCKED(&pr->ps_mtx);
435 KASSERT(curproc == p || p->p_stat == SDEAD);
436
437 tu_enter(&pr->ps_tu);
438 tuagg_sumup(&pr->ps_tu, &p->p_tu);
439 tu_leave(&pr->ps_tu);
440
441 /* Now reset CPU time usage for the thread. */
442 timespecclear(&p->p_tu.tu_runtime);
443 p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0;
444 }
445
446 void
tuagg_add_runtime(void)447 tuagg_add_runtime(void)
448 {
449 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
450 struct proc *p = curproc;
451 struct timespec ts, delta;
452
453 /*
454 * Compute the amount of time during which the current
455 * process was running, and add that to its total so far.
456 */
457 nanouptime(&ts);
458 if (timespeccmp(&ts, &spc->spc_runtime, <)) {
459 #if 0
460 printf("uptime is not monotonic! "
461 "ts=%lld.%09lu, runtime=%lld.%09lu\n",
462 (long long)tv.tv_sec, tv.tv_nsec,
463 (long long)spc->spc_runtime.tv_sec,
464 spc->spc_runtime.tv_nsec);
465 #endif
466 timespecclear(&delta);
467 } else {
468 timespecsub(&ts, &spc->spc_runtime, &delta);
469 }
470 /* update spc_runtime */
471 spc->spc_runtime = ts;
472 tu_enter(&p->p_tu);
473 timespecadd(&p->p_tu.tu_runtime, &delta, &p->p_tu.tu_runtime);
474 tu_leave(&p->p_tu);
475 }
476
477 /*
478 * Transform the running time and tick information in a struct tusage
479 * into user, system, and interrupt time usage.
480 */
481 void
calctsru(struct tusage * tup,struct timespec * up,struct timespec * sp,struct timespec * ip)482 calctsru(struct tusage *tup, struct timespec *up, struct timespec *sp,
483 struct timespec *ip)
484 {
485 u_quad_t st, ut, it;
486
487 st = tup->tu_sticks;
488 ut = tup->tu_uticks;
489 it = tup->tu_iticks;
490
491 if (st + ut + it == 0) {
492 timespecclear(up);
493 timespecclear(sp);
494 if (ip != NULL)
495 timespecclear(ip);
496 return;
497 }
498
499 st = st * 1000000000 / stathz;
500 sp->tv_sec = st / 1000000000;
501 sp->tv_nsec = st % 1000000000;
502 ut = ut * 1000000000 / stathz;
503 up->tv_sec = ut / 1000000000;
504 up->tv_nsec = ut % 1000000000;
505 if (ip != NULL) {
506 it = it * 1000000000 / stathz;
507 ip->tv_sec = it / 1000000000;
508 ip->tv_nsec = it % 1000000000;
509 }
510 }
511
512 void
calcru(struct tusage * tup,struct timeval * up,struct timeval * sp,struct timeval * ip)513 calcru(struct tusage *tup, struct timeval *up, struct timeval *sp,
514 struct timeval *ip)
515 {
516 struct timespec u, s, i;
517
518 calctsru(tup, &u, &s, ip != NULL ? &i : NULL);
519 TIMESPEC_TO_TIMEVAL(up, &u);
520 TIMESPEC_TO_TIMEVAL(sp, &s);
521 if (ip != NULL)
522 TIMESPEC_TO_TIMEVAL(ip, &i);
523 }
524
525 int
sys_getrusage(struct proc * p,void * v,register_t * retval)526 sys_getrusage(struct proc *p, void *v, register_t *retval)
527 {
528 struct sys_getrusage_args /* {
529 syscallarg(int) who;
530 syscallarg(struct rusage *) rusage;
531 } */ *uap = v;
532 struct rusage ru;
533 int error;
534
535 error = dogetrusage(p, SCARG(uap, who), &ru);
536 if (error == 0) {
537 error = copyout(&ru, SCARG(uap, rusage), sizeof(ru));
538 #ifdef KTRACE
539 if (error == 0 && KTRPOINT(p, KTR_STRUCT))
540 ktrrusage(p, &ru);
541 #endif
542 }
543 return (error);
544 }
545
546 int
dogetrusage(struct proc * p,int who,struct rusage * rup)547 dogetrusage(struct proc *p, int who, struct rusage *rup)
548 {
549 struct process *pr = p->p_p;
550 struct proc *q;
551 struct tusage tu = { 0 };
552
553 KERNEL_ASSERT_LOCKED();
554
555 switch (who) {
556 case RUSAGE_SELF:
557 /* start with the sum of dead threads, if any */
558 if (pr->ps_ru != NULL)
559 *rup = *pr->ps_ru;
560 else
561 memset(rup, 0, sizeof(*rup));
562 tuagg_sumup(&tu, &pr->ps_tu);
563
564 /* add on all living threads */
565 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
566 ruadd(rup, &q->p_ru);
567 tuagg_sumup(&tu, &q->p_tu);
568 }
569
570 calcru(&tu, &rup->ru_utime, &rup->ru_stime, NULL);
571 break;
572
573 case RUSAGE_THREAD:
574 *rup = p->p_ru;
575 calcru(&p->p_tu, &rup->ru_utime, &rup->ru_stime, NULL);
576 break;
577
578 case RUSAGE_CHILDREN:
579 *rup = pr->ps_cru;
580 break;
581
582 default:
583 return (EINVAL);
584 }
585 return (0);
586 }
587
588 void
ruadd(struct rusage * ru,const struct rusage * ru2)589 ruadd(struct rusage *ru, const struct rusage *ru2)
590 {
591 long *ip;
592 const long *ip2;
593 int i;
594
595 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
596 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
597 if (ru->ru_maxrss < ru2->ru_maxrss)
598 ru->ru_maxrss = ru2->ru_maxrss;
599 ip = &ru->ru_first; ip2 = &ru2->ru_first;
600 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
601 *ip++ += *ip2++;
602 }
603
604 /*
605 * Check if the process exceeds its cpu resource allocation.
606 * If over max, kill it.
607 */
608 void
rucheck(void * arg)609 rucheck(void *arg)
610 {
611 struct rlimit rlim;
612 struct tusage tu = { 0 };
613 struct process *pr = arg;
614 struct proc *q;
615 time_t runtime;
616
617 KERNEL_ASSERT_LOCKED();
618
619 mtx_enter(&pr->ps_mtx);
620 rlim = pr->ps_limit->pl_rlimit[RLIMIT_CPU];
621 tuagg_sumup(&tu, &pr->ps_tu);
622 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link)
623 tuagg_sumup(&tu, &q->p_tu);
624 mtx_leave(&pr->ps_mtx);
625
626 runtime = tu.tu_runtime.tv_sec;
627
628 if ((rlim_t)runtime >= rlim.rlim_cur) {
629 if ((rlim_t)runtime >= rlim.rlim_max) {
630 prsignal(pr, SIGKILL);
631 } else if (runtime >= pr->ps_nextxcpu) {
632 prsignal(pr, SIGXCPU);
633 pr->ps_nextxcpu = runtime + SIGXCPU_INTERVAL;
634 }
635 }
636
637 timeout_add_msec(&pr->ps_rucheck_to, RUCHECK_INTERVAL);
638 }
639
640 struct pool plimit_pool;
641
642 void
lim_startup(struct plimit * limit0)643 lim_startup(struct plimit *limit0)
644 {
645 rlim_t lim;
646 int i;
647
648 pool_init(&plimit_pool, sizeof(struct plimit), 0, IPL_MPFLOOR,
649 PR_WAITOK, "plimitpl", NULL);
650
651 for (i = 0; i < nitems(limit0->pl_rlimit); i++)
652 limit0->pl_rlimit[i].rlim_cur =
653 limit0->pl_rlimit[i].rlim_max = RLIM_INFINITY;
654 limit0->pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
655 limit0->pl_rlimit[RLIMIT_NOFILE].rlim_max = MIN(NOFILE_MAX,
656 (maxfiles - NOFILE > NOFILE) ? maxfiles - NOFILE : NOFILE);
657 limit0->pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC;
658 lim = ptoa(uvmexp.free);
659 limit0->pl_rlimit[RLIMIT_RSS].rlim_max = lim;
660 lim = ptoa(64*1024); /* Default to very low */
661 limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
662 limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
663 refcnt_init(&limit0->pl_refcnt);
664 }
665
666 /*
667 * Make a copy of the plimit structure.
668 * We share these structures copy-on-write after fork,
669 * and copy when a limit is changed.
670 */
671 struct plimit *
lim_copy(struct plimit * lim)672 lim_copy(struct plimit *lim)
673 {
674 struct plimit *newlim;
675
676 newlim = pool_get(&plimit_pool, PR_WAITOK);
677 memcpy(newlim->pl_rlimit, lim->pl_rlimit,
678 sizeof(struct rlimit) * RLIM_NLIMITS);
679 refcnt_init(&newlim->pl_refcnt);
680 return (newlim);
681 }
682
683 void
lim_free(struct plimit * lim)684 lim_free(struct plimit *lim)
685 {
686 if (refcnt_rele(&lim->pl_refcnt) == 0)
687 return;
688 pool_put(&plimit_pool, lim);
689 }
690
691 void
lim_fork(struct process * parent,struct process * child)692 lim_fork(struct process *parent, struct process *child)
693 {
694 struct plimit *limit;
695
696 mtx_enter(&parent->ps_mtx);
697 limit = parent->ps_limit;
698 refcnt_take(&limit->pl_refcnt);
699 mtx_leave(&parent->ps_mtx);
700
701 child->ps_limit = limit;
702
703 if (limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY)
704 timeout_add_msec(&child->ps_rucheck_to, RUCHECK_INTERVAL);
705 }
706
707 /*
708 * Return an exclusive write reference to the process' resource limit structure.
709 * The caller has to release the structure by calling lim_write_commit().
710 *
711 * This invalidates any plimit read reference held by the calling thread.
712 */
713 struct plimit *
lim_write_begin(void)714 lim_write_begin(void)
715 {
716 struct plimit *limit;
717 struct proc *p = curproc;
718
719 rw_assert_wrlock(&rlimit_lock);
720
721 if (p->p_limit != NULL)
722 lim_free(p->p_limit);
723 p->p_limit = NULL;
724
725 /*
726 * It is safe to access ps_limit here without holding ps_mtx
727 * because rlimit_lock excludes other writers.
728 */
729
730 limit = p->p_p->ps_limit;
731 if (P_HASSIBLING(p) || refcnt_shared(&limit->pl_refcnt))
732 limit = lim_copy(limit);
733
734 return (limit);
735 }
736
737 /*
738 * Finish exclusive write access to the plimit structure.
739 * This makes the structure visible to other threads in the process.
740 */
741 void
lim_write_commit(struct plimit * limit)742 lim_write_commit(struct plimit *limit)
743 {
744 struct plimit *olimit;
745 struct proc *p = curproc;
746
747 rw_assert_wrlock(&rlimit_lock);
748
749 if (limit != p->p_p->ps_limit) {
750 mtx_enter(&p->p_p->ps_mtx);
751 olimit = p->p_p->ps_limit;
752 p->p_p->ps_limit = limit;
753 mtx_leave(&p->p_p->ps_mtx);
754
755 lim_free(olimit);
756 }
757 }
758
759 /*
760 * Begin read access to the process' resource limit structure.
761 * The access has to be finished by calling lim_read_leave().
762 *
763 * Sections denoted by lim_read_enter() and lim_read_leave() cannot nest.
764 */
765 struct plimit *
lim_read_enter(void)766 lim_read_enter(void)
767 {
768 struct plimit *limit;
769 struct proc *p = curproc;
770 struct process *pr = p->p_p;
771
772 /*
773 * This thread might not observe the latest value of ps_limit
774 * if another thread updated the limits very recently on another CPU.
775 * However, the anomaly should disappear quickly, especially if
776 * there is any synchronization activity between the threads (or
777 * the CPUs).
778 */
779
780 limit = p->p_limit;
781 if (limit != pr->ps_limit) {
782 mtx_enter(&pr->ps_mtx);
783 limit = pr->ps_limit;
784 refcnt_take(&limit->pl_refcnt);
785 mtx_leave(&pr->ps_mtx);
786 if (p->p_limit != NULL)
787 lim_free(p->p_limit);
788 p->p_limit = limit;
789 }
790 KASSERT(limit != NULL);
791 return (limit);
792 }
793
794 /*
795 * Get the value of the resource limit in given process.
796 */
797 rlim_t
lim_cur_proc(struct proc * p,int which)798 lim_cur_proc(struct proc *p, int which)
799 {
800 struct process *pr = p->p_p;
801 rlim_t val;
802
803 KASSERT(which >= 0 && which < RLIM_NLIMITS);
804
805 mtx_enter(&pr->ps_mtx);
806 val = pr->ps_limit->pl_rlimit[which].rlim_cur;
807 mtx_leave(&pr->ps_mtx);
808 return (val);
809 }
810