xref: /openbsd/sys/kern/kern_resource.c (revision 29d5b944)
1 /*	$OpenBSD: kern_resource.c,v 1.93 2024/11/10 06:45:36 jsg Exp $	*/
2 /*	$NetBSD: kern_resource.c,v 1.38 1996/10/23 07:19:38 matthias Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/file.h>
44 #include <sys/resourcevar.h>
45 #include <sys/pool.h>
46 #include <sys/proc.h>
47 #include <sys/ktrace.h>
48 #include <sys/sched.h>
49 #include <sys/signalvar.h>
50 
51 #include <sys/mount.h>
52 #include <sys/syscallargs.h>
53 
54 #include <uvm/uvm.h>
55 
56 /* Resource usage check interval in msec */
57 #define RUCHECK_INTERVAL	1000
58 
59 /* SIGXCPU interval in seconds of process runtime */
60 #define SIGXCPU_INTERVAL	5
61 
62 struct plimit	*lim_copy(struct plimit *);
63 struct plimit	*lim_write_begin(void);
64 void		 lim_write_commit(struct plimit *);
65 
66 void	tuagg_sumup(struct tusage *, const struct tusage *);
67 
68 /*
69  * Patchable maximum data and stack limits.
70  */
71 rlim_t maxdmap = MAXDSIZ;
72 rlim_t maxsmap = MAXSSIZ;
73 
74 /*
75  * Serializes resource limit updates.
76  * This lock has to be held together with ps_mtx when updating
77  * the process' ps_limit.
78  */
79 struct rwlock rlimit_lock = RWLOCK_INITIALIZER("rlimitlk");
80 
81 /*
82  * Resource controls and accounting.
83  */
84 
85 int
sys_getpriority(struct proc * curp,void * v,register_t * retval)86 sys_getpriority(struct proc *curp, void *v, register_t *retval)
87 {
88 	struct sys_getpriority_args /* {
89 		syscallarg(int) which;
90 		syscallarg(id_t) who;
91 	} */ *uap = v;
92 	struct process *pr;
93 	int low = NZERO + PRIO_MAX + 1;
94 
95 	switch (SCARG(uap, which)) {
96 
97 	case PRIO_PROCESS:
98 		if (SCARG(uap, who) == 0)
99 			pr = curp->p_p;
100 		else
101 			pr = prfind(SCARG(uap, who));
102 		if (pr == NULL)
103 			break;
104 		if (pr->ps_nice < low)
105 			low = pr->ps_nice;
106 		break;
107 
108 	case PRIO_PGRP: {
109 		struct pgrp *pg;
110 
111 		if (SCARG(uap, who) == 0)
112 			pg = curp->p_p->ps_pgrp;
113 		else if ((pg = pgfind(SCARG(uap, who))) == NULL)
114 			break;
115 		LIST_FOREACH(pr, &pg->pg_members, ps_pglist)
116 			if (pr->ps_nice < low)
117 				low = pr->ps_nice;
118 		break;
119 	}
120 
121 	case PRIO_USER:
122 		if (SCARG(uap, who) == 0)
123 			SCARG(uap, who) = curp->p_ucred->cr_uid;
124 		LIST_FOREACH(pr, &allprocess, ps_list)
125 			if (pr->ps_ucred->cr_uid == SCARG(uap, who) &&
126 			    pr->ps_nice < low)
127 				low = pr->ps_nice;
128 		break;
129 
130 	default:
131 		return (EINVAL);
132 	}
133 	if (low == NZERO + PRIO_MAX + 1)
134 		return (ESRCH);
135 	*retval = low - NZERO;
136 	return (0);
137 }
138 
139 int
sys_setpriority(struct proc * curp,void * v,register_t * retval)140 sys_setpriority(struct proc *curp, void *v, register_t *retval)
141 {
142 	struct sys_setpriority_args /* {
143 		syscallarg(int) which;
144 		syscallarg(id_t) who;
145 		syscallarg(int) prio;
146 	} */ *uap = v;
147 	struct process *pr;
148 	int found = 0, error = 0;
149 
150 	switch (SCARG(uap, which)) {
151 
152 	case PRIO_PROCESS:
153 		if (SCARG(uap, who) == 0)
154 			pr = curp->p_p;
155 		else
156 			pr = prfind(SCARG(uap, who));
157 		if (pr == NULL)
158 			break;
159 		error = donice(curp, pr, SCARG(uap, prio));
160 		found = 1;
161 		break;
162 
163 	case PRIO_PGRP: {
164 		struct pgrp *pg;
165 
166 		if (SCARG(uap, who) == 0)
167 			pg = curp->p_p->ps_pgrp;
168 		else if ((pg = pgfind(SCARG(uap, who))) == NULL)
169 			break;
170 		LIST_FOREACH(pr, &pg->pg_members, ps_pglist) {
171 			error = donice(curp, pr, SCARG(uap, prio));
172 			found = 1;
173 		}
174 		break;
175 	}
176 
177 	case PRIO_USER:
178 		if (SCARG(uap, who) == 0)
179 			SCARG(uap, who) = curp->p_ucred->cr_uid;
180 		LIST_FOREACH(pr, &allprocess, ps_list)
181 			if (pr->ps_ucred->cr_uid == SCARG(uap, who)) {
182 				error = donice(curp, pr, SCARG(uap, prio));
183 				found = 1;
184 			}
185 		break;
186 
187 	default:
188 		return (EINVAL);
189 	}
190 	if (!found)
191 		return (ESRCH);
192 	return (error);
193 }
194 
195 int
donice(struct proc * curp,struct process * chgpr,int n)196 donice(struct proc *curp, struct process *chgpr, int n)
197 {
198 	struct ucred *ucred = curp->p_ucred;
199 	struct proc *p;
200 
201 	if (ucred->cr_uid != 0 && ucred->cr_ruid != 0 &&
202 	    ucred->cr_uid != chgpr->ps_ucred->cr_uid &&
203 	    ucred->cr_ruid != chgpr->ps_ucred->cr_uid)
204 		return (EPERM);
205 	if (n > PRIO_MAX)
206 		n = PRIO_MAX;
207 	if (n < PRIO_MIN)
208 		n = PRIO_MIN;
209 	n += NZERO;
210 	if (n < chgpr->ps_nice && suser(curp))
211 		return (EACCES);
212 	chgpr->ps_nice = n;
213 	mtx_enter(&chgpr->ps_mtx);
214 	SCHED_LOCK();
215 	TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link) {
216 		setpriority(p, p->p_estcpu, n);
217 	}
218 	SCHED_UNLOCK();
219 	mtx_leave(&chgpr->ps_mtx);
220 	return (0);
221 }
222 
223 int
sys_setrlimit(struct proc * p,void * v,register_t * retval)224 sys_setrlimit(struct proc *p, void *v, register_t *retval)
225 {
226 	struct sys_setrlimit_args /* {
227 		syscallarg(int) which;
228 		syscallarg(const struct rlimit *) rlp;
229 	} */ *uap = v;
230 	struct rlimit alim;
231 	int error;
232 
233 	error = copyin((caddr_t)SCARG(uap, rlp), (caddr_t)&alim,
234 		       sizeof (struct rlimit));
235 	if (error)
236 		return (error);
237 #ifdef KTRACE
238 	if (KTRPOINT(p, KTR_STRUCT))
239 		ktrrlimit(p, &alim);
240 #endif
241 	return (dosetrlimit(p, SCARG(uap, which), &alim));
242 }
243 
244 int
dosetrlimit(struct proc * p,u_int which,struct rlimit * limp)245 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
246 {
247 	struct rlimit *alimp;
248 	struct plimit *limit;
249 	rlim_t maxlim;
250 	int error;
251 
252 	if (which >= RLIM_NLIMITS || limp->rlim_cur > limp->rlim_max)
253 		return (EINVAL);
254 
255 	rw_enter_write(&rlimit_lock);
256 
257 	alimp = &p->p_p->ps_limit->pl_rlimit[which];
258 	if (limp->rlim_max > alimp->rlim_max) {
259 		if ((error = suser(p)) != 0) {
260 			rw_exit_write(&rlimit_lock);
261 			return (error);
262 		}
263 	}
264 
265 	/* Get exclusive write access to the limit structure. */
266 	limit = lim_write_begin();
267 	alimp = &limit->pl_rlimit[which];
268 
269 	switch (which) {
270 	case RLIMIT_DATA:
271 		maxlim = maxdmap;
272 		break;
273 	case RLIMIT_STACK:
274 		maxlim = maxsmap;
275 		break;
276 	case RLIMIT_NOFILE:
277 		maxlim = atomic_load_int(&maxfiles);
278 		break;
279 	case RLIMIT_NPROC:
280 		maxlim = atomic_load_int(&maxprocess);
281 		break;
282 	default:
283 		maxlim = RLIM_INFINITY;
284 		break;
285 	}
286 
287 	if (limp->rlim_max > maxlim)
288 		limp->rlim_max = maxlim;
289 	if (limp->rlim_cur > limp->rlim_max)
290 		limp->rlim_cur = limp->rlim_max;
291 
292 	if (which == RLIMIT_CPU && limp->rlim_cur != RLIM_INFINITY &&
293 	    alimp->rlim_cur == RLIM_INFINITY)
294 		timeout_add_msec(&p->p_p->ps_rucheck_to, RUCHECK_INTERVAL);
295 
296 	if (which == RLIMIT_STACK) {
297 		/*
298 		 * Stack is allocated to the max at exec time with only
299 		 * "rlim_cur" bytes accessible.  If stack limit is going
300 		 * up make more accessible, if going down make inaccessible.
301 		 */
302 		if (limp->rlim_cur != alimp->rlim_cur) {
303 			vaddr_t addr;
304 			vsize_t size;
305 			vm_prot_t prot;
306 			struct vmspace *vm = p->p_vmspace;
307 
308 			if (limp->rlim_cur > alimp->rlim_cur) {
309 				prot = PROT_READ | PROT_WRITE;
310 				size = limp->rlim_cur - alimp->rlim_cur;
311 #ifdef MACHINE_STACK_GROWS_UP
312 				addr = (vaddr_t)vm->vm_maxsaddr +
313 				    alimp->rlim_cur;
314 #else
315 				addr = (vaddr_t)vm->vm_minsaddr -
316 				    limp->rlim_cur;
317 #endif
318 			} else {
319 				prot = PROT_NONE;
320 				size = alimp->rlim_cur - limp->rlim_cur;
321 #ifdef MACHINE_STACK_GROWS_UP
322 				addr = (vaddr_t)vm->vm_maxsaddr +
323 				    limp->rlim_cur;
324 #else
325 				addr = (vaddr_t)vm->vm_minsaddr -
326 				    alimp->rlim_cur;
327 #endif
328 			}
329 			addr = trunc_page(addr);
330 			size = round_page(size);
331 			KERNEL_LOCK();
332 			(void) uvm_map_protect(&vm->vm_map, addr,
333 			    addr+size, prot, UVM_ET_STACK, FALSE, FALSE);
334 			KERNEL_UNLOCK();
335 		}
336 	}
337 
338 	*alimp = *limp;
339 
340 	lim_write_commit(limit);
341 	rw_exit_write(&rlimit_lock);
342 
343 	return (0);
344 }
345 
346 int
sys_getrlimit(struct proc * p,void * v,register_t * retval)347 sys_getrlimit(struct proc *p, void *v, register_t *retval)
348 {
349 	struct sys_getrlimit_args /* {
350 		syscallarg(int) which;
351 		syscallarg(struct rlimit *) rlp;
352 	} */ *uap = v;
353 	struct plimit *limit;
354 	struct rlimit alimp;
355 	int error;
356 
357 	if (SCARG(uap, which) < 0 || SCARG(uap, which) >= RLIM_NLIMITS)
358 		return (EINVAL);
359 	limit = lim_read_enter();
360 	alimp = limit->pl_rlimit[SCARG(uap, which)];
361 	lim_read_leave(limit);
362 	error = copyout(&alimp, SCARG(uap, rlp), sizeof(struct rlimit));
363 #ifdef KTRACE
364 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
365 		ktrrlimit(p, &alimp);
366 #endif
367 	return (error);
368 }
369 
370 /* Add the counts from *from to *tu, ensuring a consistent read of *from. */
371 void
tuagg_sumup(struct tusage * tu,const struct tusage * from)372 tuagg_sumup(struct tusage *tu, const struct tusage *from)
373 {
374 	struct tusage	tmp;
375 	uint64_t	enter, leave;
376 
377 	enter = from->tu_gen;
378 	for (;;) {
379 		/* the generation number is odd during an update */
380 		while (enter & 1) {
381 			CPU_BUSY_CYCLE();
382 			enter = from->tu_gen;
383 		}
384 
385 		membar_consumer();
386 		tmp = *from;
387 		membar_consumer();
388 		leave = from->tu_gen;
389 
390 		if (enter == leave)
391 			break;
392 		enter = leave;
393 	}
394 
395 	tu->tu_uticks += tmp.tu_uticks;
396 	tu->tu_sticks += tmp.tu_sticks;
397 	tu->tu_iticks += tmp.tu_iticks;
398 	timespecadd(&tu->tu_runtime, &tmp.tu_runtime, &tu->tu_runtime);
399 }
400 
401 void
tuagg_get_proc(struct tusage * tu,struct proc * p)402 tuagg_get_proc(struct tusage *tu, struct proc *p)
403 {
404 	memset(tu, 0, sizeof(*tu));
405 	tuagg_sumup(tu, &p->p_tu);
406 }
407 
408 void
tuagg_get_process(struct tusage * tu,struct process * pr)409 tuagg_get_process(struct tusage *tu, struct process *pr)
410 {
411 	struct proc *q;
412 
413 	memset(tu, 0, sizeof(*tu));
414 
415 	mtx_enter(&pr->ps_mtx);
416 	tuagg_sumup(tu, &pr->ps_tu);
417 	/* add on all living threads */
418 	TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link)
419 		tuagg_sumup(tu, &q->p_tu);
420 	mtx_leave(&pr->ps_mtx);
421 }
422 
423 /*
424  * Update the process ps_tu usage with the values from proc p while
425  * doing so the times for proc p are reset.
426  * This requires that p is either curproc or SDEAD and that the
427  * IPL is higher than IPL_STATCLOCK. ps_mtx uses IPL_HIGH so
428  * this should always be the case.
429  */
430 void
tuagg_add_process(struct process * pr,struct proc * p)431 tuagg_add_process(struct process *pr, struct proc *p)
432 {
433 	MUTEX_ASSERT_LOCKED(&pr->ps_mtx);
434 	KASSERT(curproc == p || p->p_stat == SDEAD);
435 
436 	tu_enter(&pr->ps_tu);
437 	tuagg_sumup(&pr->ps_tu, &p->p_tu);
438 	tu_leave(&pr->ps_tu);
439 
440 	/* Now reset CPU time usage for the thread. */
441 	timespecclear(&p->p_tu.tu_runtime);
442 	p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0;
443 }
444 
445 void
tuagg_add_runtime(void)446 tuagg_add_runtime(void)
447 {
448 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
449 	struct proc *p = curproc;
450 	struct timespec ts, delta;
451 
452 	/*
453 	 * Compute the amount of time during which the current
454 	 * process was running, and add that to its total so far.
455 	 */
456 	nanouptime(&ts);
457 	if (timespeccmp(&ts, &spc->spc_runtime, <)) {
458 #if 0
459 		printf("uptime is not monotonic! "
460 		    "ts=%lld.%09lu, runtime=%lld.%09lu\n",
461 		    (long long)tv.tv_sec, tv.tv_nsec,
462 		    (long long)spc->spc_runtime.tv_sec,
463 		    spc->spc_runtime.tv_nsec);
464 #endif
465 		timespecclear(&delta);
466 	} else {
467 		timespecsub(&ts, &spc->spc_runtime, &delta);
468 	}
469 	/* update spc_runtime */
470 	spc->spc_runtime = ts;
471 	tu_enter(&p->p_tu);
472 	timespecadd(&p->p_tu.tu_runtime, &delta, &p->p_tu.tu_runtime);
473 	tu_leave(&p->p_tu);
474 }
475 
476 /*
477  * Transform the running time and tick information in a struct tusage
478  * into user, system, and interrupt time usage.
479  */
480 void
calctsru(struct tusage * tup,struct timespec * up,struct timespec * sp,struct timespec * ip)481 calctsru(struct tusage *tup, struct timespec *up, struct timespec *sp,
482     struct timespec *ip)
483 {
484 	u_quad_t st, ut, it;
485 
486 	st = tup->tu_sticks;
487 	ut = tup->tu_uticks;
488 	it = tup->tu_iticks;
489 
490 	if (st + ut + it == 0) {
491 		timespecclear(up);
492 		timespecclear(sp);
493 		if (ip != NULL)
494 			timespecclear(ip);
495 		return;
496 	}
497 
498 	st = st * 1000000000 / stathz;
499 	sp->tv_sec = st / 1000000000;
500 	sp->tv_nsec = st % 1000000000;
501 	ut = ut * 1000000000 / stathz;
502 	up->tv_sec = ut / 1000000000;
503 	up->tv_nsec = ut % 1000000000;
504 	if (ip != NULL) {
505 		it = it * 1000000000 / stathz;
506 		ip->tv_sec = it / 1000000000;
507 		ip->tv_nsec = it % 1000000000;
508 	}
509 }
510 
511 void
calcru(struct tusage * tup,struct timeval * up,struct timeval * sp,struct timeval * ip)512 calcru(struct tusage *tup, struct timeval *up, struct timeval *sp,
513     struct timeval *ip)
514 {
515 	struct timespec u, s, i;
516 
517 	calctsru(tup, &u, &s, ip != NULL ? &i : NULL);
518 	TIMESPEC_TO_TIMEVAL(up, &u);
519 	TIMESPEC_TO_TIMEVAL(sp, &s);
520 	if (ip != NULL)
521 		TIMESPEC_TO_TIMEVAL(ip, &i);
522 }
523 
524 int
sys_getrusage(struct proc * p,void * v,register_t * retval)525 sys_getrusage(struct proc *p, void *v, register_t *retval)
526 {
527 	struct sys_getrusage_args /* {
528 		syscallarg(int) who;
529 		syscallarg(struct rusage *) rusage;
530 	} */ *uap = v;
531 	struct rusage ru;
532 	int error;
533 
534 	error = dogetrusage(p, SCARG(uap, who), &ru);
535 	if (error == 0) {
536 		error = copyout(&ru, SCARG(uap, rusage), sizeof(ru));
537 #ifdef KTRACE
538 		if (error == 0 && KTRPOINT(p, KTR_STRUCT))
539 			ktrrusage(p, &ru);
540 #endif
541 	}
542 	return (error);
543 }
544 
545 int
dogetrusage(struct proc * p,int who,struct rusage * rup)546 dogetrusage(struct proc *p, int who, struct rusage *rup)
547 {
548 	struct process *pr = p->p_p;
549 	struct proc *q;
550 	struct tusage tu = { 0 };
551 
552 	KERNEL_ASSERT_LOCKED();
553 
554 	switch (who) {
555 	case RUSAGE_SELF:
556 		/* start with the sum of dead threads, if any */
557 		if (pr->ps_ru != NULL)
558 			*rup = *pr->ps_ru;
559 		else
560 			memset(rup, 0, sizeof(*rup));
561 		tuagg_sumup(&tu, &pr->ps_tu);
562 
563 		/* add on all living threads */
564 		TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
565 			ruadd(rup, &q->p_ru);
566 			tuagg_sumup(&tu, &q->p_tu);
567 		}
568 
569 		calcru(&tu, &rup->ru_utime, &rup->ru_stime, NULL);
570 		break;
571 
572 	case RUSAGE_THREAD:
573 		*rup = p->p_ru;
574 		calcru(&p->p_tu, &rup->ru_utime, &rup->ru_stime, NULL);
575 		break;
576 
577 	case RUSAGE_CHILDREN:
578 		*rup = pr->ps_cru;
579 		break;
580 
581 	default:
582 		return (EINVAL);
583 	}
584 	return (0);
585 }
586 
587 void
ruadd(struct rusage * ru,const struct rusage * ru2)588 ruadd(struct rusage *ru, const struct rusage *ru2)
589 {
590 	long *ip;
591 	const long *ip2;
592 	int i;
593 
594 	timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
595 	timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
596 	if (ru->ru_maxrss < ru2->ru_maxrss)
597 		ru->ru_maxrss = ru2->ru_maxrss;
598 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
599 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
600 		*ip++ += *ip2++;
601 }
602 
603 /*
604  * Check if the process exceeds its cpu resource allocation.
605  * If over max, kill it.
606  */
607 void
rucheck(void * arg)608 rucheck(void *arg)
609 {
610 	struct rlimit rlim;
611 	struct tusage tu = { 0 };
612 	struct process *pr = arg;
613 	struct proc *q;
614 	time_t runtime;
615 
616 	KERNEL_ASSERT_LOCKED();
617 
618 	mtx_enter(&pr->ps_mtx);
619 	rlim = pr->ps_limit->pl_rlimit[RLIMIT_CPU];
620 	tuagg_sumup(&tu, &pr->ps_tu);
621 	TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link)
622 		tuagg_sumup(&tu, &q->p_tu);
623 	mtx_leave(&pr->ps_mtx);
624 
625 	runtime = tu.tu_runtime.tv_sec;
626 
627 	if ((rlim_t)runtime >= rlim.rlim_cur) {
628 		if ((rlim_t)runtime >= rlim.rlim_max) {
629 			prsignal(pr, SIGKILL);
630 		} else if (runtime >= pr->ps_nextxcpu) {
631 			prsignal(pr, SIGXCPU);
632 			pr->ps_nextxcpu = runtime + SIGXCPU_INTERVAL;
633 		}
634 	}
635 
636 	timeout_add_msec(&pr->ps_rucheck_to, RUCHECK_INTERVAL);
637 }
638 
639 struct pool plimit_pool;
640 
641 void
lim_startup(struct plimit * limit0)642 lim_startup(struct plimit *limit0)
643 {
644 	rlim_t lim;
645 	int i;
646 
647 	pool_init(&plimit_pool, sizeof(struct plimit), 0, IPL_MPFLOOR,
648 	    PR_WAITOK, "plimitpl", NULL);
649 
650 	for (i = 0; i < nitems(limit0->pl_rlimit); i++)
651 		limit0->pl_rlimit[i].rlim_cur =
652 		    limit0->pl_rlimit[i].rlim_max = RLIM_INFINITY;
653 	limit0->pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
654 	limit0->pl_rlimit[RLIMIT_NOFILE].rlim_max = MIN(NOFILE_MAX,
655 	    (maxfiles - NOFILE > NOFILE) ? maxfiles - NOFILE : NOFILE);
656 	limit0->pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC;
657 	lim = ptoa(uvmexp.free);
658 	limit0->pl_rlimit[RLIMIT_RSS].rlim_max = lim;
659 	lim = ptoa(64*1024);		/* Default to very low */
660 	limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
661 	limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
662 	refcnt_init(&limit0->pl_refcnt);
663 }
664 
665 /*
666  * Make a copy of the plimit structure.
667  * We share these structures copy-on-write after fork,
668  * and copy when a limit is changed.
669  */
670 struct plimit *
lim_copy(struct plimit * lim)671 lim_copy(struct plimit *lim)
672 {
673 	struct plimit *newlim;
674 
675 	newlim = pool_get(&plimit_pool, PR_WAITOK);
676 	memcpy(newlim->pl_rlimit, lim->pl_rlimit,
677 	    sizeof(struct rlimit) * RLIM_NLIMITS);
678 	refcnt_init(&newlim->pl_refcnt);
679 	return (newlim);
680 }
681 
682 void
lim_free(struct plimit * lim)683 lim_free(struct plimit *lim)
684 {
685 	if (refcnt_rele(&lim->pl_refcnt) == 0)
686 		return;
687 	pool_put(&plimit_pool, lim);
688 }
689 
690 void
lim_fork(struct process * parent,struct process * child)691 lim_fork(struct process *parent, struct process *child)
692 {
693 	struct plimit *limit;
694 
695 	mtx_enter(&parent->ps_mtx);
696 	limit = parent->ps_limit;
697 	refcnt_take(&limit->pl_refcnt);
698 	mtx_leave(&parent->ps_mtx);
699 
700 	child->ps_limit = limit;
701 
702 	if (limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY)
703 		timeout_add_msec(&child->ps_rucheck_to, RUCHECK_INTERVAL);
704 }
705 
706 /*
707  * Return an exclusive write reference to the process' resource limit structure.
708  * The caller has to release the structure by calling lim_write_commit().
709  *
710  * This invalidates any plimit read reference held by the calling thread.
711  */
712 struct plimit *
lim_write_begin(void)713 lim_write_begin(void)
714 {
715 	struct plimit *limit;
716 	struct proc *p = curproc;
717 
718 	rw_assert_wrlock(&rlimit_lock);
719 
720 	if (p->p_limit != NULL)
721 		lim_free(p->p_limit);
722 	p->p_limit = NULL;
723 
724 	/*
725 	 * It is safe to access ps_limit here without holding ps_mtx
726 	 * because rlimit_lock excludes other writers.
727 	 */
728 
729 	limit = p->p_p->ps_limit;
730 	if (P_HASSIBLING(p) || refcnt_shared(&limit->pl_refcnt))
731 		limit = lim_copy(limit);
732 
733 	return (limit);
734 }
735 
736 /*
737  * Finish exclusive write access to the plimit structure.
738  * This makes the structure visible to other threads in the process.
739  */
740 void
lim_write_commit(struct plimit * limit)741 lim_write_commit(struct plimit *limit)
742 {
743 	struct plimit *olimit;
744 	struct proc *p = curproc;
745 
746 	rw_assert_wrlock(&rlimit_lock);
747 
748 	if (limit != p->p_p->ps_limit) {
749 		mtx_enter(&p->p_p->ps_mtx);
750 		olimit = p->p_p->ps_limit;
751 		p->p_p->ps_limit = limit;
752 		mtx_leave(&p->p_p->ps_mtx);
753 
754 		lim_free(olimit);
755 	}
756 }
757 
758 /*
759  * Begin read access to the process' resource limit structure.
760  * The access has to be finished by calling lim_read_leave().
761  *
762  * Sections denoted by lim_read_enter() and lim_read_leave() cannot nest.
763  */
764 struct plimit *
lim_read_enter(void)765 lim_read_enter(void)
766 {
767 	struct plimit *limit;
768 	struct proc *p = curproc;
769 	struct process *pr = p->p_p;
770 
771 	/*
772 	 * This thread might not observe the latest value of ps_limit
773 	 * if another thread updated the limits very recently on another CPU.
774 	 * However, the anomaly should disappear quickly, especially if
775 	 * there is any synchronization activity between the threads (or
776 	 * the CPUs).
777 	 */
778 
779 	limit = p->p_limit;
780 	if (limit != pr->ps_limit) {
781 		mtx_enter(&pr->ps_mtx);
782 		limit = pr->ps_limit;
783 		refcnt_take(&limit->pl_refcnt);
784 		mtx_leave(&pr->ps_mtx);
785 		if (p->p_limit != NULL)
786 			lim_free(p->p_limit);
787 		p->p_limit = limit;
788 	}
789 	KASSERT(limit != NULL);
790 	return (limit);
791 }
792 
793 /*
794  * Get the value of the resource limit in given process.
795  */
796 rlim_t
lim_cur_proc(struct proc * p,int which)797 lim_cur_proc(struct proc *p, int which)
798 {
799 	struct process *pr = p->p_p;
800 	rlim_t val;
801 
802 	KASSERT(which >= 0 && which < RLIM_NLIMITS);
803 
804 	mtx_enter(&pr->ps_mtx);
805 	val = pr->ps_limit->pl_rlimit[which].rlim_cur;
806 	mtx_leave(&pr->ps_mtx);
807 	return (val);
808 }
809