1 /* $OpenBSD: kern_resource.c,v 1.93 2024/11/10 06:45:36 jsg Exp $ */
2 /* $NetBSD: kern_resource.c,v 1.38 1996/10/23 07:19:38 matthias Exp $ */
3
4 /*-
5 * Copyright (c) 1982, 1986, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/file.h>
44 #include <sys/resourcevar.h>
45 #include <sys/pool.h>
46 #include <sys/proc.h>
47 #include <sys/ktrace.h>
48 #include <sys/sched.h>
49 #include <sys/signalvar.h>
50
51 #include <sys/mount.h>
52 #include <sys/syscallargs.h>
53
54 #include <uvm/uvm.h>
55
56 /* Resource usage check interval in msec */
57 #define RUCHECK_INTERVAL 1000
58
59 /* SIGXCPU interval in seconds of process runtime */
60 #define SIGXCPU_INTERVAL 5
61
62 struct plimit *lim_copy(struct plimit *);
63 struct plimit *lim_write_begin(void);
64 void lim_write_commit(struct plimit *);
65
66 void tuagg_sumup(struct tusage *, const struct tusage *);
67
68 /*
69 * Patchable maximum data and stack limits.
70 */
71 rlim_t maxdmap = MAXDSIZ;
72 rlim_t maxsmap = MAXSSIZ;
73
74 /*
75 * Serializes resource limit updates.
76 * This lock has to be held together with ps_mtx when updating
77 * the process' ps_limit.
78 */
79 struct rwlock rlimit_lock = RWLOCK_INITIALIZER("rlimitlk");
80
81 /*
82 * Resource controls and accounting.
83 */
84
85 int
sys_getpriority(struct proc * curp,void * v,register_t * retval)86 sys_getpriority(struct proc *curp, void *v, register_t *retval)
87 {
88 struct sys_getpriority_args /* {
89 syscallarg(int) which;
90 syscallarg(id_t) who;
91 } */ *uap = v;
92 struct process *pr;
93 int low = NZERO + PRIO_MAX + 1;
94
95 switch (SCARG(uap, which)) {
96
97 case PRIO_PROCESS:
98 if (SCARG(uap, who) == 0)
99 pr = curp->p_p;
100 else
101 pr = prfind(SCARG(uap, who));
102 if (pr == NULL)
103 break;
104 if (pr->ps_nice < low)
105 low = pr->ps_nice;
106 break;
107
108 case PRIO_PGRP: {
109 struct pgrp *pg;
110
111 if (SCARG(uap, who) == 0)
112 pg = curp->p_p->ps_pgrp;
113 else if ((pg = pgfind(SCARG(uap, who))) == NULL)
114 break;
115 LIST_FOREACH(pr, &pg->pg_members, ps_pglist)
116 if (pr->ps_nice < low)
117 low = pr->ps_nice;
118 break;
119 }
120
121 case PRIO_USER:
122 if (SCARG(uap, who) == 0)
123 SCARG(uap, who) = curp->p_ucred->cr_uid;
124 LIST_FOREACH(pr, &allprocess, ps_list)
125 if (pr->ps_ucred->cr_uid == SCARG(uap, who) &&
126 pr->ps_nice < low)
127 low = pr->ps_nice;
128 break;
129
130 default:
131 return (EINVAL);
132 }
133 if (low == NZERO + PRIO_MAX + 1)
134 return (ESRCH);
135 *retval = low - NZERO;
136 return (0);
137 }
138
139 int
sys_setpriority(struct proc * curp,void * v,register_t * retval)140 sys_setpriority(struct proc *curp, void *v, register_t *retval)
141 {
142 struct sys_setpriority_args /* {
143 syscallarg(int) which;
144 syscallarg(id_t) who;
145 syscallarg(int) prio;
146 } */ *uap = v;
147 struct process *pr;
148 int found = 0, error = 0;
149
150 switch (SCARG(uap, which)) {
151
152 case PRIO_PROCESS:
153 if (SCARG(uap, who) == 0)
154 pr = curp->p_p;
155 else
156 pr = prfind(SCARG(uap, who));
157 if (pr == NULL)
158 break;
159 error = donice(curp, pr, SCARG(uap, prio));
160 found = 1;
161 break;
162
163 case PRIO_PGRP: {
164 struct pgrp *pg;
165
166 if (SCARG(uap, who) == 0)
167 pg = curp->p_p->ps_pgrp;
168 else if ((pg = pgfind(SCARG(uap, who))) == NULL)
169 break;
170 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) {
171 error = donice(curp, pr, SCARG(uap, prio));
172 found = 1;
173 }
174 break;
175 }
176
177 case PRIO_USER:
178 if (SCARG(uap, who) == 0)
179 SCARG(uap, who) = curp->p_ucred->cr_uid;
180 LIST_FOREACH(pr, &allprocess, ps_list)
181 if (pr->ps_ucred->cr_uid == SCARG(uap, who)) {
182 error = donice(curp, pr, SCARG(uap, prio));
183 found = 1;
184 }
185 break;
186
187 default:
188 return (EINVAL);
189 }
190 if (!found)
191 return (ESRCH);
192 return (error);
193 }
194
195 int
donice(struct proc * curp,struct process * chgpr,int n)196 donice(struct proc *curp, struct process *chgpr, int n)
197 {
198 struct ucred *ucred = curp->p_ucred;
199 struct proc *p;
200
201 if (ucred->cr_uid != 0 && ucred->cr_ruid != 0 &&
202 ucred->cr_uid != chgpr->ps_ucred->cr_uid &&
203 ucred->cr_ruid != chgpr->ps_ucred->cr_uid)
204 return (EPERM);
205 if (n > PRIO_MAX)
206 n = PRIO_MAX;
207 if (n < PRIO_MIN)
208 n = PRIO_MIN;
209 n += NZERO;
210 if (n < chgpr->ps_nice && suser(curp))
211 return (EACCES);
212 chgpr->ps_nice = n;
213 mtx_enter(&chgpr->ps_mtx);
214 SCHED_LOCK();
215 TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link) {
216 setpriority(p, p->p_estcpu, n);
217 }
218 SCHED_UNLOCK();
219 mtx_leave(&chgpr->ps_mtx);
220 return (0);
221 }
222
223 int
sys_setrlimit(struct proc * p,void * v,register_t * retval)224 sys_setrlimit(struct proc *p, void *v, register_t *retval)
225 {
226 struct sys_setrlimit_args /* {
227 syscallarg(int) which;
228 syscallarg(const struct rlimit *) rlp;
229 } */ *uap = v;
230 struct rlimit alim;
231 int error;
232
233 error = copyin((caddr_t)SCARG(uap, rlp), (caddr_t)&alim,
234 sizeof (struct rlimit));
235 if (error)
236 return (error);
237 #ifdef KTRACE
238 if (KTRPOINT(p, KTR_STRUCT))
239 ktrrlimit(p, &alim);
240 #endif
241 return (dosetrlimit(p, SCARG(uap, which), &alim));
242 }
243
244 int
dosetrlimit(struct proc * p,u_int which,struct rlimit * limp)245 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
246 {
247 struct rlimit *alimp;
248 struct plimit *limit;
249 rlim_t maxlim;
250 int error;
251
252 if (which >= RLIM_NLIMITS || limp->rlim_cur > limp->rlim_max)
253 return (EINVAL);
254
255 rw_enter_write(&rlimit_lock);
256
257 alimp = &p->p_p->ps_limit->pl_rlimit[which];
258 if (limp->rlim_max > alimp->rlim_max) {
259 if ((error = suser(p)) != 0) {
260 rw_exit_write(&rlimit_lock);
261 return (error);
262 }
263 }
264
265 /* Get exclusive write access to the limit structure. */
266 limit = lim_write_begin();
267 alimp = &limit->pl_rlimit[which];
268
269 switch (which) {
270 case RLIMIT_DATA:
271 maxlim = maxdmap;
272 break;
273 case RLIMIT_STACK:
274 maxlim = maxsmap;
275 break;
276 case RLIMIT_NOFILE:
277 maxlim = atomic_load_int(&maxfiles);
278 break;
279 case RLIMIT_NPROC:
280 maxlim = atomic_load_int(&maxprocess);
281 break;
282 default:
283 maxlim = RLIM_INFINITY;
284 break;
285 }
286
287 if (limp->rlim_max > maxlim)
288 limp->rlim_max = maxlim;
289 if (limp->rlim_cur > limp->rlim_max)
290 limp->rlim_cur = limp->rlim_max;
291
292 if (which == RLIMIT_CPU && limp->rlim_cur != RLIM_INFINITY &&
293 alimp->rlim_cur == RLIM_INFINITY)
294 timeout_add_msec(&p->p_p->ps_rucheck_to, RUCHECK_INTERVAL);
295
296 if (which == RLIMIT_STACK) {
297 /*
298 * Stack is allocated to the max at exec time with only
299 * "rlim_cur" bytes accessible. If stack limit is going
300 * up make more accessible, if going down make inaccessible.
301 */
302 if (limp->rlim_cur != alimp->rlim_cur) {
303 vaddr_t addr;
304 vsize_t size;
305 vm_prot_t prot;
306 struct vmspace *vm = p->p_vmspace;
307
308 if (limp->rlim_cur > alimp->rlim_cur) {
309 prot = PROT_READ | PROT_WRITE;
310 size = limp->rlim_cur - alimp->rlim_cur;
311 #ifdef MACHINE_STACK_GROWS_UP
312 addr = (vaddr_t)vm->vm_maxsaddr +
313 alimp->rlim_cur;
314 #else
315 addr = (vaddr_t)vm->vm_minsaddr -
316 limp->rlim_cur;
317 #endif
318 } else {
319 prot = PROT_NONE;
320 size = alimp->rlim_cur - limp->rlim_cur;
321 #ifdef MACHINE_STACK_GROWS_UP
322 addr = (vaddr_t)vm->vm_maxsaddr +
323 limp->rlim_cur;
324 #else
325 addr = (vaddr_t)vm->vm_minsaddr -
326 alimp->rlim_cur;
327 #endif
328 }
329 addr = trunc_page(addr);
330 size = round_page(size);
331 KERNEL_LOCK();
332 (void) uvm_map_protect(&vm->vm_map, addr,
333 addr+size, prot, UVM_ET_STACK, FALSE, FALSE);
334 KERNEL_UNLOCK();
335 }
336 }
337
338 *alimp = *limp;
339
340 lim_write_commit(limit);
341 rw_exit_write(&rlimit_lock);
342
343 return (0);
344 }
345
346 int
sys_getrlimit(struct proc * p,void * v,register_t * retval)347 sys_getrlimit(struct proc *p, void *v, register_t *retval)
348 {
349 struct sys_getrlimit_args /* {
350 syscallarg(int) which;
351 syscallarg(struct rlimit *) rlp;
352 } */ *uap = v;
353 struct plimit *limit;
354 struct rlimit alimp;
355 int error;
356
357 if (SCARG(uap, which) < 0 || SCARG(uap, which) >= RLIM_NLIMITS)
358 return (EINVAL);
359 limit = lim_read_enter();
360 alimp = limit->pl_rlimit[SCARG(uap, which)];
361 lim_read_leave(limit);
362 error = copyout(&alimp, SCARG(uap, rlp), sizeof(struct rlimit));
363 #ifdef KTRACE
364 if (error == 0 && KTRPOINT(p, KTR_STRUCT))
365 ktrrlimit(p, &alimp);
366 #endif
367 return (error);
368 }
369
370 /* Add the counts from *from to *tu, ensuring a consistent read of *from. */
371 void
tuagg_sumup(struct tusage * tu,const struct tusage * from)372 tuagg_sumup(struct tusage *tu, const struct tusage *from)
373 {
374 struct tusage tmp;
375 uint64_t enter, leave;
376
377 enter = from->tu_gen;
378 for (;;) {
379 /* the generation number is odd during an update */
380 while (enter & 1) {
381 CPU_BUSY_CYCLE();
382 enter = from->tu_gen;
383 }
384
385 membar_consumer();
386 tmp = *from;
387 membar_consumer();
388 leave = from->tu_gen;
389
390 if (enter == leave)
391 break;
392 enter = leave;
393 }
394
395 tu->tu_uticks += tmp.tu_uticks;
396 tu->tu_sticks += tmp.tu_sticks;
397 tu->tu_iticks += tmp.tu_iticks;
398 timespecadd(&tu->tu_runtime, &tmp.tu_runtime, &tu->tu_runtime);
399 }
400
401 void
tuagg_get_proc(struct tusage * tu,struct proc * p)402 tuagg_get_proc(struct tusage *tu, struct proc *p)
403 {
404 memset(tu, 0, sizeof(*tu));
405 tuagg_sumup(tu, &p->p_tu);
406 }
407
408 void
tuagg_get_process(struct tusage * tu,struct process * pr)409 tuagg_get_process(struct tusage *tu, struct process *pr)
410 {
411 struct proc *q;
412
413 memset(tu, 0, sizeof(*tu));
414
415 mtx_enter(&pr->ps_mtx);
416 tuagg_sumup(tu, &pr->ps_tu);
417 /* add on all living threads */
418 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link)
419 tuagg_sumup(tu, &q->p_tu);
420 mtx_leave(&pr->ps_mtx);
421 }
422
423 /*
424 * Update the process ps_tu usage with the values from proc p while
425 * doing so the times for proc p are reset.
426 * This requires that p is either curproc or SDEAD and that the
427 * IPL is higher than IPL_STATCLOCK. ps_mtx uses IPL_HIGH so
428 * this should always be the case.
429 */
430 void
tuagg_add_process(struct process * pr,struct proc * p)431 tuagg_add_process(struct process *pr, struct proc *p)
432 {
433 MUTEX_ASSERT_LOCKED(&pr->ps_mtx);
434 KASSERT(curproc == p || p->p_stat == SDEAD);
435
436 tu_enter(&pr->ps_tu);
437 tuagg_sumup(&pr->ps_tu, &p->p_tu);
438 tu_leave(&pr->ps_tu);
439
440 /* Now reset CPU time usage for the thread. */
441 timespecclear(&p->p_tu.tu_runtime);
442 p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0;
443 }
444
445 void
tuagg_add_runtime(void)446 tuagg_add_runtime(void)
447 {
448 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
449 struct proc *p = curproc;
450 struct timespec ts, delta;
451
452 /*
453 * Compute the amount of time during which the current
454 * process was running, and add that to its total so far.
455 */
456 nanouptime(&ts);
457 if (timespeccmp(&ts, &spc->spc_runtime, <)) {
458 #if 0
459 printf("uptime is not monotonic! "
460 "ts=%lld.%09lu, runtime=%lld.%09lu\n",
461 (long long)tv.tv_sec, tv.tv_nsec,
462 (long long)spc->spc_runtime.tv_sec,
463 spc->spc_runtime.tv_nsec);
464 #endif
465 timespecclear(&delta);
466 } else {
467 timespecsub(&ts, &spc->spc_runtime, &delta);
468 }
469 /* update spc_runtime */
470 spc->spc_runtime = ts;
471 tu_enter(&p->p_tu);
472 timespecadd(&p->p_tu.tu_runtime, &delta, &p->p_tu.tu_runtime);
473 tu_leave(&p->p_tu);
474 }
475
476 /*
477 * Transform the running time and tick information in a struct tusage
478 * into user, system, and interrupt time usage.
479 */
480 void
calctsru(struct tusage * tup,struct timespec * up,struct timespec * sp,struct timespec * ip)481 calctsru(struct tusage *tup, struct timespec *up, struct timespec *sp,
482 struct timespec *ip)
483 {
484 u_quad_t st, ut, it;
485
486 st = tup->tu_sticks;
487 ut = tup->tu_uticks;
488 it = tup->tu_iticks;
489
490 if (st + ut + it == 0) {
491 timespecclear(up);
492 timespecclear(sp);
493 if (ip != NULL)
494 timespecclear(ip);
495 return;
496 }
497
498 st = st * 1000000000 / stathz;
499 sp->tv_sec = st / 1000000000;
500 sp->tv_nsec = st % 1000000000;
501 ut = ut * 1000000000 / stathz;
502 up->tv_sec = ut / 1000000000;
503 up->tv_nsec = ut % 1000000000;
504 if (ip != NULL) {
505 it = it * 1000000000 / stathz;
506 ip->tv_sec = it / 1000000000;
507 ip->tv_nsec = it % 1000000000;
508 }
509 }
510
511 void
calcru(struct tusage * tup,struct timeval * up,struct timeval * sp,struct timeval * ip)512 calcru(struct tusage *tup, struct timeval *up, struct timeval *sp,
513 struct timeval *ip)
514 {
515 struct timespec u, s, i;
516
517 calctsru(tup, &u, &s, ip != NULL ? &i : NULL);
518 TIMESPEC_TO_TIMEVAL(up, &u);
519 TIMESPEC_TO_TIMEVAL(sp, &s);
520 if (ip != NULL)
521 TIMESPEC_TO_TIMEVAL(ip, &i);
522 }
523
524 int
sys_getrusage(struct proc * p,void * v,register_t * retval)525 sys_getrusage(struct proc *p, void *v, register_t *retval)
526 {
527 struct sys_getrusage_args /* {
528 syscallarg(int) who;
529 syscallarg(struct rusage *) rusage;
530 } */ *uap = v;
531 struct rusage ru;
532 int error;
533
534 error = dogetrusage(p, SCARG(uap, who), &ru);
535 if (error == 0) {
536 error = copyout(&ru, SCARG(uap, rusage), sizeof(ru));
537 #ifdef KTRACE
538 if (error == 0 && KTRPOINT(p, KTR_STRUCT))
539 ktrrusage(p, &ru);
540 #endif
541 }
542 return (error);
543 }
544
545 int
dogetrusage(struct proc * p,int who,struct rusage * rup)546 dogetrusage(struct proc *p, int who, struct rusage *rup)
547 {
548 struct process *pr = p->p_p;
549 struct proc *q;
550 struct tusage tu = { 0 };
551
552 KERNEL_ASSERT_LOCKED();
553
554 switch (who) {
555 case RUSAGE_SELF:
556 /* start with the sum of dead threads, if any */
557 if (pr->ps_ru != NULL)
558 *rup = *pr->ps_ru;
559 else
560 memset(rup, 0, sizeof(*rup));
561 tuagg_sumup(&tu, &pr->ps_tu);
562
563 /* add on all living threads */
564 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
565 ruadd(rup, &q->p_ru);
566 tuagg_sumup(&tu, &q->p_tu);
567 }
568
569 calcru(&tu, &rup->ru_utime, &rup->ru_stime, NULL);
570 break;
571
572 case RUSAGE_THREAD:
573 *rup = p->p_ru;
574 calcru(&p->p_tu, &rup->ru_utime, &rup->ru_stime, NULL);
575 break;
576
577 case RUSAGE_CHILDREN:
578 *rup = pr->ps_cru;
579 break;
580
581 default:
582 return (EINVAL);
583 }
584 return (0);
585 }
586
587 void
ruadd(struct rusage * ru,const struct rusage * ru2)588 ruadd(struct rusage *ru, const struct rusage *ru2)
589 {
590 long *ip;
591 const long *ip2;
592 int i;
593
594 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
595 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
596 if (ru->ru_maxrss < ru2->ru_maxrss)
597 ru->ru_maxrss = ru2->ru_maxrss;
598 ip = &ru->ru_first; ip2 = &ru2->ru_first;
599 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
600 *ip++ += *ip2++;
601 }
602
603 /*
604 * Check if the process exceeds its cpu resource allocation.
605 * If over max, kill it.
606 */
607 void
rucheck(void * arg)608 rucheck(void *arg)
609 {
610 struct rlimit rlim;
611 struct tusage tu = { 0 };
612 struct process *pr = arg;
613 struct proc *q;
614 time_t runtime;
615
616 KERNEL_ASSERT_LOCKED();
617
618 mtx_enter(&pr->ps_mtx);
619 rlim = pr->ps_limit->pl_rlimit[RLIMIT_CPU];
620 tuagg_sumup(&tu, &pr->ps_tu);
621 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link)
622 tuagg_sumup(&tu, &q->p_tu);
623 mtx_leave(&pr->ps_mtx);
624
625 runtime = tu.tu_runtime.tv_sec;
626
627 if ((rlim_t)runtime >= rlim.rlim_cur) {
628 if ((rlim_t)runtime >= rlim.rlim_max) {
629 prsignal(pr, SIGKILL);
630 } else if (runtime >= pr->ps_nextxcpu) {
631 prsignal(pr, SIGXCPU);
632 pr->ps_nextxcpu = runtime + SIGXCPU_INTERVAL;
633 }
634 }
635
636 timeout_add_msec(&pr->ps_rucheck_to, RUCHECK_INTERVAL);
637 }
638
639 struct pool plimit_pool;
640
641 void
lim_startup(struct plimit * limit0)642 lim_startup(struct plimit *limit0)
643 {
644 rlim_t lim;
645 int i;
646
647 pool_init(&plimit_pool, sizeof(struct plimit), 0, IPL_MPFLOOR,
648 PR_WAITOK, "plimitpl", NULL);
649
650 for (i = 0; i < nitems(limit0->pl_rlimit); i++)
651 limit0->pl_rlimit[i].rlim_cur =
652 limit0->pl_rlimit[i].rlim_max = RLIM_INFINITY;
653 limit0->pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
654 limit0->pl_rlimit[RLIMIT_NOFILE].rlim_max = MIN(NOFILE_MAX,
655 (maxfiles - NOFILE > NOFILE) ? maxfiles - NOFILE : NOFILE);
656 limit0->pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC;
657 lim = ptoa(uvmexp.free);
658 limit0->pl_rlimit[RLIMIT_RSS].rlim_max = lim;
659 lim = ptoa(64*1024); /* Default to very low */
660 limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
661 limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
662 refcnt_init(&limit0->pl_refcnt);
663 }
664
665 /*
666 * Make a copy of the plimit structure.
667 * We share these structures copy-on-write after fork,
668 * and copy when a limit is changed.
669 */
670 struct plimit *
lim_copy(struct plimit * lim)671 lim_copy(struct plimit *lim)
672 {
673 struct plimit *newlim;
674
675 newlim = pool_get(&plimit_pool, PR_WAITOK);
676 memcpy(newlim->pl_rlimit, lim->pl_rlimit,
677 sizeof(struct rlimit) * RLIM_NLIMITS);
678 refcnt_init(&newlim->pl_refcnt);
679 return (newlim);
680 }
681
682 void
lim_free(struct plimit * lim)683 lim_free(struct plimit *lim)
684 {
685 if (refcnt_rele(&lim->pl_refcnt) == 0)
686 return;
687 pool_put(&plimit_pool, lim);
688 }
689
690 void
lim_fork(struct process * parent,struct process * child)691 lim_fork(struct process *parent, struct process *child)
692 {
693 struct plimit *limit;
694
695 mtx_enter(&parent->ps_mtx);
696 limit = parent->ps_limit;
697 refcnt_take(&limit->pl_refcnt);
698 mtx_leave(&parent->ps_mtx);
699
700 child->ps_limit = limit;
701
702 if (limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY)
703 timeout_add_msec(&child->ps_rucheck_to, RUCHECK_INTERVAL);
704 }
705
706 /*
707 * Return an exclusive write reference to the process' resource limit structure.
708 * The caller has to release the structure by calling lim_write_commit().
709 *
710 * This invalidates any plimit read reference held by the calling thread.
711 */
712 struct plimit *
lim_write_begin(void)713 lim_write_begin(void)
714 {
715 struct plimit *limit;
716 struct proc *p = curproc;
717
718 rw_assert_wrlock(&rlimit_lock);
719
720 if (p->p_limit != NULL)
721 lim_free(p->p_limit);
722 p->p_limit = NULL;
723
724 /*
725 * It is safe to access ps_limit here without holding ps_mtx
726 * because rlimit_lock excludes other writers.
727 */
728
729 limit = p->p_p->ps_limit;
730 if (P_HASSIBLING(p) || refcnt_shared(&limit->pl_refcnt))
731 limit = lim_copy(limit);
732
733 return (limit);
734 }
735
736 /*
737 * Finish exclusive write access to the plimit structure.
738 * This makes the structure visible to other threads in the process.
739 */
740 void
lim_write_commit(struct plimit * limit)741 lim_write_commit(struct plimit *limit)
742 {
743 struct plimit *olimit;
744 struct proc *p = curproc;
745
746 rw_assert_wrlock(&rlimit_lock);
747
748 if (limit != p->p_p->ps_limit) {
749 mtx_enter(&p->p_p->ps_mtx);
750 olimit = p->p_p->ps_limit;
751 p->p_p->ps_limit = limit;
752 mtx_leave(&p->p_p->ps_mtx);
753
754 lim_free(olimit);
755 }
756 }
757
758 /*
759 * Begin read access to the process' resource limit structure.
760 * The access has to be finished by calling lim_read_leave().
761 *
762 * Sections denoted by lim_read_enter() and lim_read_leave() cannot nest.
763 */
764 struct plimit *
lim_read_enter(void)765 lim_read_enter(void)
766 {
767 struct plimit *limit;
768 struct proc *p = curproc;
769 struct process *pr = p->p_p;
770
771 /*
772 * This thread might not observe the latest value of ps_limit
773 * if another thread updated the limits very recently on another CPU.
774 * However, the anomaly should disappear quickly, especially if
775 * there is any synchronization activity between the threads (or
776 * the CPUs).
777 */
778
779 limit = p->p_limit;
780 if (limit != pr->ps_limit) {
781 mtx_enter(&pr->ps_mtx);
782 limit = pr->ps_limit;
783 refcnt_take(&limit->pl_refcnt);
784 mtx_leave(&pr->ps_mtx);
785 if (p->p_limit != NULL)
786 lim_free(p->p_limit);
787 p->p_limit = limit;
788 }
789 KASSERT(limit != NULL);
790 return (limit);
791 }
792
793 /*
794 * Get the value of the resource limit in given process.
795 */
796 rlim_t
lim_cur_proc(struct proc * p,int which)797 lim_cur_proc(struct proc *p, int which)
798 {
799 struct process *pr = p->p_p;
800 rlim_t val;
801
802 KASSERT(which >= 0 && which < RLIM_NLIMITS);
803
804 mtx_enter(&pr->ps_mtx);
805 val = pr->ps_limit->pl_rlimit[which].rlim_cur;
806 mtx_leave(&pr->ps_mtx);
807 return (val);
808 }
809