xref: /dragonfly/sys/kern/kern_resource.c (revision f746689a)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
40  * $DragonFly: src/sys/kern/kern_resource.c,v 1.35 2008/05/27 05:25:34 dillon Exp $
41  */
42 
43 #include "opt_compat.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/file.h>
49 #include <sys/kern_syscall.h>
50 #include <sys/kernel.h>
51 #include <sys/resourcevar.h>
52 #include <sys/malloc.h>
53 #include <sys/proc.h>
54 #include <sys/priv.h>
55 #include <sys/time.h>
56 #include <sys/lockf.h>
57 
58 #include <vm/vm.h>
59 #include <vm/vm_param.h>
60 #include <sys/lock.h>
61 #include <vm/pmap.h>
62 #include <vm/vm_map.h>
63 
64 #include <sys/thread2.h>
65 
66 static int donice (struct proc *chgp, int n);
67 
68 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
69 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
70 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
71 static u_long uihash;		/* size of hash table - 1 */
72 
73 static struct uidinfo	*uicreate (uid_t uid);
74 static struct uidinfo	*uilookup (uid_t uid);
75 
76 /*
77  * Resource controls and accounting.
78  */
79 
80 struct getpriority_info {
81 	int low;
82 	int who;
83 };
84 
85 static int getpriority_callback(struct proc *p, void *data);
86 
87 int
88 sys_getpriority(struct getpriority_args *uap)
89 {
90 	struct getpriority_info info;
91 	struct proc *curp = curproc;
92 	struct proc *p;
93 	int low = PRIO_MAX + 1;
94 
95 	switch (uap->which) {
96 	case PRIO_PROCESS:
97 		if (uap->who == 0)
98 			p = curp;
99 		else
100 			p = pfind(uap->who);
101 		if (p == 0)
102 			break;
103 		if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
104 			break;
105 		low = p->p_nice;
106 		break;
107 
108 	case PRIO_PGRP:
109 	{
110 		struct pgrp *pg;
111 
112 		if (uap->who == 0)
113 			pg = curp->p_pgrp;
114 		else if ((pg = pgfind(uap->who)) == NULL)
115 			break;
116 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
117 			if ((PRISON_CHECK(curp->p_ucred, p->p_ucred) && p->p_nice < low))
118 				low = p->p_nice;
119 		}
120 		break;
121 	}
122 	case PRIO_USER:
123 		if (uap->who == 0)
124 			uap->who = curp->p_ucred->cr_uid;
125 		info.low = low;
126 		info.who = uap->who;
127 		allproc_scan(getpriority_callback, &info);
128 		low = info.low;
129 		break;
130 
131 	default:
132 		return (EINVAL);
133 	}
134 	if (low == PRIO_MAX + 1)
135 		return (ESRCH);
136 	uap->sysmsg_result = low;
137 	return (0);
138 }
139 
140 /*
141  * Figure out the current lowest nice priority for processes owned
142  * by the specified user.
143  */
144 static
145 int
146 getpriority_callback(struct proc *p, void *data)
147 {
148 	struct getpriority_info *info = data;
149 
150 	if (PRISON_CHECK(curproc->p_ucred, p->p_ucred) &&
151 	    p->p_ucred->cr_uid == info->who &&
152 	    p->p_nice < info->low) {
153 		info->low = p->p_nice;
154 	}
155 	return(0);
156 }
157 
158 struct setpriority_info {
159 	int prio;
160 	int who;
161 	int error;
162 	int found;
163 };
164 
165 static int setpriority_callback(struct proc *p, void *data);
166 
167 int
168 sys_setpriority(struct setpriority_args *uap)
169 {
170 	struct setpriority_info info;
171 	struct proc *curp = curproc;
172 	struct proc *p;
173 	int found = 0, error = 0;
174 
175 	switch (uap->which) {
176 	case PRIO_PROCESS:
177 		if (uap->who == 0)
178 			p = curp;
179 		else
180 			p = pfind(uap->who);
181 		if (p == 0)
182 			break;
183 		if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
184 			break;
185 		error = donice(p, uap->prio);
186 		found++;
187 		break;
188 
189 	case PRIO_PGRP:
190 	{
191 		struct pgrp *pg;
192 
193 		if (uap->who == 0)
194 			pg = curp->p_pgrp;
195 		else if ((pg = pgfind(uap->who)) == NULL)
196 			break;
197 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
198 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
199 				error = donice(p, uap->prio);
200 				found++;
201 			}
202 		}
203 		break;
204 	}
205 	case PRIO_USER:
206 		if (uap->who == 0)
207 			uap->who = curp->p_ucred->cr_uid;
208 		info.prio = uap->prio;
209 		info.who = uap->who;
210 		info.error = 0;
211 		info.found = 0;
212 		allproc_scan(setpriority_callback, &info);
213 		error = info.error;
214 		found = info.found;
215 		break;
216 
217 	default:
218 		return (EINVAL);
219 	}
220 	if (found == 0)
221 		return (ESRCH);
222 	return (error);
223 }
224 
225 static
226 int
227 setpriority_callback(struct proc *p, void *data)
228 {
229 	struct setpriority_info *info = data;
230 	int error;
231 
232 	if (p->p_ucred->cr_uid == info->who &&
233 	    PRISON_CHECK(curproc->p_ucred, p->p_ucred)) {
234 		error = donice(p, info->prio);
235 		if (error)
236 			info->error = error;
237 		++info->found;
238 	}
239 	return(0);
240 }
241 
242 static int
243 donice(struct proc *chgp, int n)
244 {
245 	struct proc *curp = curproc;
246 	struct ucred *cr = curp->p_ucred;
247 	struct lwp *lp;
248 
249 	if (cr->cr_uid && cr->cr_ruid &&
250 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
251 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
252 		return (EPERM);
253 	if (n > PRIO_MAX)
254 		n = PRIO_MAX;
255 	if (n < PRIO_MIN)
256 		n = PRIO_MIN;
257 	if (n < chgp->p_nice && priv_check_cred(cr, PRIV_ROOT, 0))
258 		return (EACCES);
259 	chgp->p_nice = n;
260 	FOREACH_LWP_IN_PROC(lp, chgp)
261 		chgp->p_usched->resetpriority(lp);
262 	return (0);
263 }
264 
265 int
266 sys_lwp_rtprio(struct lwp_rtprio_args *uap)
267 {
268 	struct proc *p = curproc;
269 	struct lwp *lp;
270 	struct rtprio rtp;
271 	struct ucred *cr = p->p_ucred;
272 	int error;
273 
274 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
275 	if (error)
276 		return error;
277 
278 	if (uap->pid < 0) {
279 		return EINVAL;
280 	} else if (uap->pid == 0) {
281 		/* curproc already loaded on p */
282 	} else {
283 		p = pfind(uap->pid);
284 	}
285 
286 	if (p == 0) {
287 		return ESRCH;
288 	}
289 
290 	if (uap->tid < -1) {
291 		return EINVAL;
292 	} else if (uap->tid == -1) {
293 		/*
294 		 * sadly, tid can be 0 so we can't use 0 here
295 		 * like sys_rtprio()
296 		 */
297 		lp = curthread->td_lwp;
298 	} else {
299 		lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
300 		if (lp == NULL)
301 			return ESRCH;
302 	}
303 
304 	switch (uap->function) {
305 	case RTP_LOOKUP:
306 		return (copyout(&lp->lwp_rtprio, uap->rtp,
307 				sizeof(struct rtprio)));
308 	case RTP_SET:
309 		if (cr->cr_uid && cr->cr_ruid &&
310 		    cr->cr_uid != p->p_ucred->cr_uid &&
311 		    cr->cr_ruid != p->p_ucred->cr_uid) {
312 			return EPERM;
313 		}
314 		/* disallow setting rtprio in most cases if not superuser */
315 		if (priv_check_cred(cr, PRIV_ROOT, 0)) {
316 			/* can't set someone else's */
317 			if (uap->pid) { /* XXX */
318 				return EPERM;
319 			}
320 			/* can't set realtime priority */
321 /*
322  * Realtime priority has to be restricted for reasons which should be
323  * obvious. However, for idle priority, there is a potential for
324  * system deadlock if an idleprio process gains a lock on a resource
325  * that other processes need (and the idleprio process can't run
326  * due to a CPU-bound normal process). Fix me! XXX
327  */
328  			if (RTP_PRIO_IS_REALTIME(rtp.type)) {
329 				return EPERM;
330 			}
331 		}
332 		switch (rtp.type) {
333 #ifdef RTP_PRIO_FIFO
334 		case RTP_PRIO_FIFO:
335 #endif
336 		case RTP_PRIO_REALTIME:
337 		case RTP_PRIO_NORMAL:
338 		case RTP_PRIO_IDLE:
339 			if (rtp.prio > RTP_PRIO_MAX)
340 				return EINVAL;
341 			lp->lwp_rtprio = rtp;
342 			return 0;
343 		default:
344 			return EINVAL;
345 		}
346 	default:
347 		return EINVAL;
348 	}
349 	panic("can't get here");
350 }
351 
352 /*
353  * Set realtime priority
354  */
355 /* ARGSUSED */
356 int
357 sys_rtprio(struct rtprio_args *uap)
358 {
359 	struct proc *curp = curproc;
360 	struct proc *p;
361 	struct lwp *lp;
362 	struct ucred *cr = curp->p_ucred;
363 	struct rtprio rtp;
364 	int error;
365 
366 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
367 	if (error)
368 		return (error);
369 
370 	if (uap->pid == 0)
371 		p = curp;
372 	else
373 		p = pfind(uap->pid);
374 
375 	if (p == 0)
376 		return (ESRCH);
377 
378 	/* XXX lwp */
379 	lp = FIRST_LWP_IN_PROC(p);
380 	switch (uap->function) {
381 	case RTP_LOOKUP:
382 		return (copyout(&lp->lwp_rtprio, uap->rtp, sizeof(struct rtprio)));
383 	case RTP_SET:
384 		if (cr->cr_uid && cr->cr_ruid &&
385 		    cr->cr_uid != p->p_ucred->cr_uid &&
386 		    cr->cr_ruid != p->p_ucred->cr_uid)
387 		        return (EPERM);
388 		/* disallow setting rtprio in most cases if not superuser */
389 		if (priv_check_cred(cr, PRIV_ROOT, 0)) {
390 			/* can't set someone else's */
391 			if (uap->pid)
392 				return (EPERM);
393 			/* can't set realtime priority */
394 /*
395  * Realtime priority has to be restricted for reasons which should be
396  * obvious. However, for idle priority, there is a potential for
397  * system deadlock if an idleprio process gains a lock on a resource
398  * that other processes need (and the idleprio process can't run
399  * due to a CPU-bound normal process). Fix me! XXX
400  */
401  			if (RTP_PRIO_IS_REALTIME(rtp.type))
402 				return (EPERM);
403 		}
404 		switch (rtp.type) {
405 #ifdef RTP_PRIO_FIFO
406 		case RTP_PRIO_FIFO:
407 #endif
408 		case RTP_PRIO_REALTIME:
409 		case RTP_PRIO_NORMAL:
410 		case RTP_PRIO_IDLE:
411 			if (rtp.prio > RTP_PRIO_MAX)
412 				return (EINVAL);
413 			lp->lwp_rtprio = rtp;
414 			return (0);
415 		default:
416 			return (EINVAL);
417 		}
418 
419 	default:
420 		return (EINVAL);
421 	}
422 }
423 
424 int
425 sys_setrlimit(struct __setrlimit_args *uap)
426 {
427 	struct rlimit alim;
428 	int error;
429 
430 	error = copyin(uap->rlp, &alim, sizeof(alim));
431 	if (error)
432 		return (error);
433 
434 	error = kern_setrlimit(uap->which, &alim);
435 
436 	return (error);
437 }
438 
439 int
440 sys_getrlimit(struct __getrlimit_args *uap)
441 {
442 	struct rlimit lim;
443 	int error;
444 
445 	error = kern_getrlimit(uap->which, &lim);
446 
447 	if (error == 0)
448 		error = copyout(&lim, uap->rlp, sizeof(*uap->rlp));
449 	return error;
450 }
451 
452 /*
453  * Transform the running time and tick information in lwp lp's thread into user,
454  * system, and interrupt time usage.
455  *
456  * Since we are limited to statclock tick granularity this is a statisical
457  * calculation which will be correct over the long haul, but should not be
458  * expected to measure fine grained deltas.
459  *
460  * It is possible to catch a lwp in the midst of being created, so
461  * check whether lwp_thread is NULL or not.
462  */
463 void
464 calcru(struct lwp *lp, struct timeval *up, struct timeval *sp)
465 {
466 	struct thread *td;
467 
468 	/*
469 	 * Calculate at the statclock level.  YYY if the thread is owned by
470 	 * another cpu we need to forward the request to the other cpu, or
471 	 * have a token to interlock the information in order to avoid racing
472 	 * thread destruction.
473 	 */
474 	if ((td = lp->lwp_thread) != NULL) {
475 		crit_enter();
476 		up->tv_sec = td->td_uticks / 1000000;
477 		up->tv_usec = td->td_uticks % 1000000;
478 		sp->tv_sec = td->td_sticks / 1000000;
479 		sp->tv_usec = td->td_sticks % 1000000;
480 		crit_exit();
481 	}
482 }
483 
484 /*
485  * Aggregate resource statistics of all lwps of a process.
486  *
487  * proc.p_ru keeps track of all statistics directly related to a proc.  This
488  * consists of RSS usage and nswap information and aggregate numbers for all
489  * former lwps of this proc.
490  *
491  * proc.p_cru is the sum of all stats of reaped children.
492  *
493  * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning
494  * packet, scheduler switch or page fault counts, etc.  This information gets
495  * added to lwp.lwp_proc.p_ru when the lwp exits.
496  */
497 void
498 calcru_proc(struct proc *p, struct rusage *ru)
499 {
500 	struct timeval upt, spt;
501 	long *rip1, *rip2;
502 	struct lwp *lp;
503 
504 	*ru = p->p_ru;
505 
506 	FOREACH_LWP_IN_PROC(lp, p) {
507 		calcru(lp, &upt, &spt);
508 		timevaladd(&ru->ru_utime, &upt);
509 		timevaladd(&ru->ru_stime, &spt);
510 		for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first;
511 		     rip1 <= &ru->ru_last;
512 		     rip1++, rip2++)
513 			*rip1 += *rip2;
514 	}
515 }
516 
517 
518 /* ARGSUSED */
519 int
520 sys_getrusage(struct getrusage_args *uap)
521 {
522 	struct rusage ru;
523 	struct rusage *rup;
524 
525 	switch (uap->who) {
526 
527 	case RUSAGE_SELF:
528 		rup = &ru;
529 		calcru_proc(curproc, rup);
530 		break;
531 
532 	case RUSAGE_CHILDREN:
533 		rup = &curproc->p_cru;
534 		break;
535 
536 	default:
537 		return (EINVAL);
538 	}
539 	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
540 	    sizeof (struct rusage)));
541 }
542 
543 void
544 ruadd(struct rusage *ru, struct rusage *ru2)
545 {
546 	long *ip, *ip2;
547 	int i;
548 
549 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
550 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
551 	if (ru->ru_maxrss < ru2->ru_maxrss)
552 		ru->ru_maxrss = ru2->ru_maxrss;
553 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
554 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
555 		*ip++ += *ip2++;
556 }
557 
558 /*
559  * Find the uidinfo structure for a uid.  This structure is used to
560  * track the total resource consumption (process count, socket buffer
561  * size, etc.) for the uid and impose limits.
562  */
563 void
564 uihashinit(void)
565 {
566 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
567 }
568 
569 static struct uidinfo *
570 uilookup(uid_t uid)
571 {
572 	struct	uihashhead *uipp;
573 	struct	uidinfo *uip;
574 
575 	uipp = UIHASH(uid);
576 	LIST_FOREACH(uip, uipp, ui_hash) {
577 		if (uip->ui_uid == uid)
578 			break;
579 	}
580 	return (uip);
581 }
582 
583 static struct uidinfo *
584 uicreate(uid_t uid)
585 {
586 	struct	uidinfo *uip, *norace;
587 
588 	/*
589 	 * Allocate space and check for a race
590 	 */
591 	MALLOC(uip, struct uidinfo *, sizeof(*uip), M_UIDINFO, M_WAITOK);
592 	norace = uilookup(uid);
593 	if (norace != NULL) {
594 		FREE(uip, M_UIDINFO);
595 		return (norace);
596 	}
597 
598 	/*
599 	 * Initialize structure and enter it into the hash table
600 	 */
601 	LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
602 	uip->ui_uid = uid;
603 	uip->ui_proccnt = 0;
604 	uip->ui_sbsize = 0;
605 	uip->ui_ref = 0;
606 	uip->ui_posixlocks = 0;
607 	varsymset_init(&uip->ui_varsymset, NULL);
608 	return (uip);
609 }
610 
611 struct uidinfo *
612 uifind(uid_t uid)
613 {
614 	struct	uidinfo *uip;
615 
616 	uip = uilookup(uid);
617 	if (uip == NULL)
618 		uip = uicreate(uid);
619 	uip->ui_ref++;
620 	return (uip);
621 }
622 
623 static __inline void
624 uifree(struct uidinfo *uip)
625 {
626 	if (uip->ui_sbsize != 0)
627 		/* XXX no %qd in kernel.  Truncate. */
628 		kprintf("freeing uidinfo: uid = %d, sbsize = %ld\n",
629 		    uip->ui_uid, (long)uip->ui_sbsize);
630 	if (uip->ui_proccnt != 0)
631 		kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n",
632 		    uip->ui_uid, uip->ui_proccnt);
633 	LIST_REMOVE(uip, ui_hash);
634 	varsymset_clean(&uip->ui_varsymset);
635 	FREE(uip, M_UIDINFO);
636 }
637 
638 void
639 uihold(struct uidinfo *uip)
640 {
641 	++uip->ui_ref;
642 	KKASSERT(uip->ui_ref > 0);
643 }
644 
645 void
646 uidrop(struct uidinfo *uip)
647 {
648 	KKASSERT(uip->ui_ref > 0);
649 	if (--uip->ui_ref == 0)
650 		uifree(uip);
651 }
652 
653 void
654 uireplace(struct uidinfo **puip, struct uidinfo *nuip)
655 {
656 	uidrop(*puip);
657 	*puip = nuip;
658 }
659 
660 /*
661  * Change the count associated with number of processes
662  * a given user is using.  When 'max' is 0, don't enforce a limit
663  */
664 int
665 chgproccnt(struct uidinfo *uip, int diff, int max)
666 {
667 	/* don't allow them to exceed max, but allow subtraction */
668 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0)
669 		return (0);
670 	uip->ui_proccnt += diff;
671 	if (uip->ui_proccnt < 0)
672 		kprintf("negative proccnt for uid = %d\n", uip->ui_uid);
673 	return (1);
674 }
675 
676 /*
677  * Change the total socket buffer size a user has used.
678  */
679 int
680 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max)
681 {
682 	rlim_t new;
683 
684 	crit_enter();
685 	new = uip->ui_sbsize + to - *hiwat;
686 
687 	/*
688 	 * If we are trying to increase the socket buffer size
689 	 * Scale down the hi water mark when we exceed the user's
690 	 * allowed socket buffer space.
691 	 *
692 	 * We can't scale down too much or we will blow up atomic packet
693 	 * operations.
694 	 */
695 	if (to > *hiwat && to > MCLBYTES && new > max) {
696 		to = to * max / new;
697 		if (to < MCLBYTES)
698 			to = MCLBYTES;
699 	}
700 	uip->ui_sbsize = new;
701 	*hiwat = to;
702 	if (uip->ui_sbsize < 0)
703 		kprintf("negative sbsize for uid = %d\n", uip->ui_uid);
704 	crit_exit();
705 	return (1);
706 }
707 
708