xref: /dragonfly/sys/kern/kern_resource.c (revision 0cfebe3d)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
40  * $DragonFly: src/sys/kern/kern_resource.c,v 1.34 2007/08/20 05:40:40 dillon Exp $
41  */
42 
43 #include "opt_compat.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/file.h>
49 #include <sys/kern_syscall.h>
50 #include <sys/kernel.h>
51 #include <sys/resourcevar.h>
52 #include <sys/malloc.h>
53 #include <sys/proc.h>
54 #include <sys/time.h>
55 #include <sys/lockf.h>
56 
57 #include <vm/vm.h>
58 #include <vm/vm_param.h>
59 #include <sys/lock.h>
60 #include <vm/pmap.h>
61 #include <vm/vm_map.h>
62 
63 #include <sys/thread2.h>
64 
65 static int donice (struct proc *chgp, int n);
66 
67 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
68 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
69 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
70 static u_long uihash;		/* size of hash table - 1 */
71 
72 static struct uidinfo	*uicreate (uid_t uid);
73 static struct uidinfo	*uilookup (uid_t uid);
74 
75 /*
76  * Resource controls and accounting.
77  */
78 
79 struct getpriority_info {
80 	int low;
81 	int who;
82 };
83 
84 static int getpriority_callback(struct proc *p, void *data);
85 
86 int
87 sys_getpriority(struct getpriority_args *uap)
88 {
89 	struct getpriority_info info;
90 	struct proc *curp = curproc;
91 	struct proc *p;
92 	int low = PRIO_MAX + 1;
93 
94 	switch (uap->which) {
95 	case PRIO_PROCESS:
96 		if (uap->who == 0)
97 			p = curp;
98 		else
99 			p = pfind(uap->who);
100 		if (p == 0)
101 			break;
102 		if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
103 			break;
104 		low = p->p_nice;
105 		break;
106 
107 	case PRIO_PGRP:
108 	{
109 		struct pgrp *pg;
110 
111 		if (uap->who == 0)
112 			pg = curp->p_pgrp;
113 		else if ((pg = pgfind(uap->who)) == NULL)
114 			break;
115 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
116 			if ((PRISON_CHECK(curp->p_ucred, p->p_ucred) && p->p_nice < low))
117 				low = p->p_nice;
118 		}
119 		break;
120 	}
121 	case PRIO_USER:
122 		if (uap->who == 0)
123 			uap->who = curp->p_ucred->cr_uid;
124 		info.low = low;
125 		info.who = uap->who;
126 		allproc_scan(getpriority_callback, &info);
127 		low = info.low;
128 		break;
129 
130 	default:
131 		return (EINVAL);
132 	}
133 	if (low == PRIO_MAX + 1)
134 		return (ESRCH);
135 	uap->sysmsg_result = low;
136 	return (0);
137 }
138 
139 /*
140  * Figure out the current lowest nice priority for processes owned
141  * by the specified user.
142  */
143 static
144 int
145 getpriority_callback(struct proc *p, void *data)
146 {
147 	struct getpriority_info *info = data;
148 
149 	if (PRISON_CHECK(curproc->p_ucred, p->p_ucred) &&
150 	    p->p_ucred->cr_uid == info->who &&
151 	    p->p_nice < info->low) {
152 		info->low = p->p_nice;
153 	}
154 	return(0);
155 }
156 
157 struct setpriority_info {
158 	int prio;
159 	int who;
160 	int error;
161 	int found;
162 };
163 
164 static int setpriority_callback(struct proc *p, void *data);
165 
166 int
167 sys_setpriority(struct setpriority_args *uap)
168 {
169 	struct setpriority_info info;
170 	struct proc *curp = curproc;
171 	struct proc *p;
172 	int found = 0, error = 0;
173 
174 	switch (uap->which) {
175 	case PRIO_PROCESS:
176 		if (uap->who == 0)
177 			p = curp;
178 		else
179 			p = pfind(uap->who);
180 		if (p == 0)
181 			break;
182 		if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
183 			break;
184 		error = donice(p, uap->prio);
185 		found++;
186 		break;
187 
188 	case PRIO_PGRP:
189 	{
190 		struct pgrp *pg;
191 
192 		if (uap->who == 0)
193 			pg = curp->p_pgrp;
194 		else if ((pg = pgfind(uap->who)) == NULL)
195 			break;
196 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
197 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
198 				error = donice(p, uap->prio);
199 				found++;
200 			}
201 		}
202 		break;
203 	}
204 	case PRIO_USER:
205 		if (uap->who == 0)
206 			uap->who = curp->p_ucred->cr_uid;
207 		info.prio = uap->prio;
208 		info.who = uap->who;
209 		info.error = 0;
210 		info.found = 0;
211 		allproc_scan(setpriority_callback, &info);
212 		error = info.error;
213 		found = info.found;
214 		break;
215 
216 	default:
217 		return (EINVAL);
218 	}
219 	if (found == 0)
220 		return (ESRCH);
221 	return (error);
222 }
223 
224 static
225 int
226 setpriority_callback(struct proc *p, void *data)
227 {
228 	struct setpriority_info *info = data;
229 	int error;
230 
231 	if (p->p_ucred->cr_uid == info->who &&
232 	    PRISON_CHECK(curproc->p_ucred, p->p_ucred)) {
233 		error = donice(p, info->prio);
234 		if (error)
235 			info->error = error;
236 		++info->found;
237 	}
238 	return(0);
239 }
240 
241 static int
242 donice(struct proc *chgp, int n)
243 {
244 	struct proc *curp = curproc;
245 	struct ucred *cr = curp->p_ucred;
246 	struct lwp *lp;
247 
248 	if (cr->cr_uid && cr->cr_ruid &&
249 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
250 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
251 		return (EPERM);
252 	if (n > PRIO_MAX)
253 		n = PRIO_MAX;
254 	if (n < PRIO_MIN)
255 		n = PRIO_MIN;
256 	if (n < chgp->p_nice && suser_cred(cr, 0))
257 		return (EACCES);
258 	chgp->p_nice = n;
259 	FOREACH_LWP_IN_PROC(lp, chgp)
260 		chgp->p_usched->resetpriority(lp);
261 	return (0);
262 }
263 
264 int
265 sys_lwp_rtprio(struct lwp_rtprio_args *uap)
266 {
267 	struct proc *p = curproc;
268 	struct lwp *lp;
269 	struct rtprio rtp;
270 	struct ucred *cr = p->p_ucred;
271 	int error;
272 
273 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
274 	if (error)
275 		return error;
276 
277 	if (uap->pid < 0) {
278 		return EINVAL;
279 	} else if (uap->pid == 0) {
280 		/* curproc already loaded on p */
281 	} else {
282 		p = pfind(uap->pid);
283 	}
284 
285 	if (p == 0) {
286 		return ESRCH;
287 	}
288 
289 	if (uap->tid < -1) {
290 		return EINVAL;
291 	} else if (uap->tid == -1) {
292 		/*
293 		 * sadly, tid can be 0 so we can't use 0 here
294 		 * like sys_rtprio()
295 		 */
296 		lp = curthread->td_lwp;
297 	} else {
298 		lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
299 		if (lp == NULL)
300 			return ESRCH;
301 	}
302 
303 	switch (uap->function) {
304 	case RTP_LOOKUP:
305 		return (copyout(&lp->lwp_rtprio, uap->rtp,
306 				sizeof(struct rtprio)));
307 	case RTP_SET:
308 		if (cr->cr_uid && cr->cr_ruid &&
309 		    cr->cr_uid != p->p_ucred->cr_uid &&
310 		    cr->cr_ruid != p->p_ucred->cr_uid) {
311 			return EPERM;
312 		}
313 		/* disallow setting rtprio in most cases if not superuser */
314 		if (suser_cred(cr, 0)) {
315 			/* can't set someone else's */
316 			if (uap->pid) { /* XXX */
317 				return EPERM;
318 			}
319 			/* can't set realtime priority */
320 /*
321  * Realtime priority has to be restricted for reasons which should be
322  * obvious. However, for idle priority, there is a potential for
323  * system deadlock if an idleprio process gains a lock on a resource
324  * that other processes need (and the idleprio process can't run
325  * due to a CPU-bound normal process). Fix me! XXX
326  */
327  			if (RTP_PRIO_IS_REALTIME(rtp.type)) {
328 				return EPERM;
329 			}
330 		}
331 		switch (rtp.type) {
332 #ifdef RTP_PRIO_FIFO
333 		case RTP_PRIO_FIFO:
334 #endif
335 		case RTP_PRIO_REALTIME:
336 		case RTP_PRIO_NORMAL:
337 		case RTP_PRIO_IDLE:
338 			if (rtp.prio > RTP_PRIO_MAX)
339 				return EINVAL;
340 			lp->lwp_rtprio = rtp;
341 			return 0;
342 		default:
343 			return EINVAL;
344 		}
345 	default:
346 		return EINVAL;
347 	}
348 	panic("can't get here");
349 }
350 
351 /*
352  * Set realtime priority
353  */
354 /* ARGSUSED */
355 int
356 sys_rtprio(struct rtprio_args *uap)
357 {
358 	struct proc *curp = curproc;
359 	struct proc *p;
360 	struct lwp *lp;
361 	struct ucred *cr = curp->p_ucred;
362 	struct rtprio rtp;
363 	int error;
364 
365 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
366 	if (error)
367 		return (error);
368 
369 	if (uap->pid == 0)
370 		p = curp;
371 	else
372 		p = pfind(uap->pid);
373 
374 	if (p == 0)
375 		return (ESRCH);
376 
377 	/* XXX lwp */
378 	lp = FIRST_LWP_IN_PROC(p);
379 	switch (uap->function) {
380 	case RTP_LOOKUP:
381 		return (copyout(&lp->lwp_rtprio, uap->rtp, sizeof(struct rtprio)));
382 	case RTP_SET:
383 		if (cr->cr_uid && cr->cr_ruid &&
384 		    cr->cr_uid != p->p_ucred->cr_uid &&
385 		    cr->cr_ruid != p->p_ucred->cr_uid)
386 		        return (EPERM);
387 		/* disallow setting rtprio in most cases if not superuser */
388 		if (suser_cred(cr, 0)) {
389 			/* can't set someone else's */
390 			if (uap->pid)
391 				return (EPERM);
392 			/* can't set realtime priority */
393 /*
394  * Realtime priority has to be restricted for reasons which should be
395  * obvious. However, for idle priority, there is a potential for
396  * system deadlock if an idleprio process gains a lock on a resource
397  * that other processes need (and the idleprio process can't run
398  * due to a CPU-bound normal process). Fix me! XXX
399  */
400  			if (RTP_PRIO_IS_REALTIME(rtp.type))
401 				return (EPERM);
402 		}
403 		switch (rtp.type) {
404 #ifdef RTP_PRIO_FIFO
405 		case RTP_PRIO_FIFO:
406 #endif
407 		case RTP_PRIO_REALTIME:
408 		case RTP_PRIO_NORMAL:
409 		case RTP_PRIO_IDLE:
410 			if (rtp.prio > RTP_PRIO_MAX)
411 				return (EINVAL);
412 			lp->lwp_rtprio = rtp;
413 			return (0);
414 		default:
415 			return (EINVAL);
416 		}
417 
418 	default:
419 		return (EINVAL);
420 	}
421 }
422 
423 int
424 sys_setrlimit(struct __setrlimit_args *uap)
425 {
426 	struct rlimit alim;
427 	int error;
428 
429 	error = copyin(uap->rlp, &alim, sizeof(alim));
430 	if (error)
431 		return (error);
432 
433 	error = kern_setrlimit(uap->which, &alim);
434 
435 	return (error);
436 }
437 
438 int
439 sys_getrlimit(struct __getrlimit_args *uap)
440 {
441 	struct rlimit lim;
442 	int error;
443 
444 	error = kern_getrlimit(uap->which, &lim);
445 
446 	if (error == 0)
447 		error = copyout(&lim, uap->rlp, sizeof(*uap->rlp));
448 	return error;
449 }
450 
451 /*
452  * Transform the running time and tick information in lwp lp's thread into user,
453  * system, and interrupt time usage.
454  *
455  * Since we are limited to statclock tick granularity this is a statisical
456  * calculation which will be correct over the long haul, but should not be
457  * expected to measure fine grained deltas.
458  *
459  * It is possible to catch a lwp in the midst of being created, so
460  * check whether lwp_thread is NULL or not.
461  */
462 void
463 calcru(struct lwp *lp, struct timeval *up, struct timeval *sp)
464 {
465 	struct thread *td;
466 
467 	/*
468 	 * Calculate at the statclock level.  YYY if the thread is owned by
469 	 * another cpu we need to forward the request to the other cpu, or
470 	 * have a token to interlock the information in order to avoid racing
471 	 * thread destruction.
472 	 */
473 	if ((td = lp->lwp_thread) != NULL) {
474 		crit_enter();
475 		up->tv_sec = td->td_uticks / 1000000;
476 		up->tv_usec = td->td_uticks % 1000000;
477 		sp->tv_sec = td->td_sticks / 1000000;
478 		sp->tv_usec = td->td_sticks % 1000000;
479 		crit_exit();
480 	}
481 }
482 
483 /*
484  * Aggregate resource statistics of all lwps of a process.
485  *
486  * proc.p_ru keeps track of all statistics directly related to a proc.  This
487  * consists of RSS usage and nswap information and aggregate numbers for all
488  * former lwps of this proc.
489  *
490  * proc.p_cru is the sum of all stats of reaped children.
491  *
492  * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning
493  * packet, scheduler switch or page fault counts, etc.  This information gets
494  * added to lwp.lwp_proc.p_ru when the lwp exits.
495  */
496 void
497 calcru_proc(struct proc *p, struct rusage *ru)
498 {
499 	struct timeval upt, spt;
500 	long *rip1, *rip2;
501 	struct lwp *lp;
502 
503 	*ru = p->p_ru;
504 
505 	FOREACH_LWP_IN_PROC(lp, p) {
506 		calcru(lp, &upt, &spt);
507 		timevaladd(&ru->ru_utime, &upt);
508 		timevaladd(&ru->ru_stime, &spt);
509 		for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first;
510 		     rip1 <= &ru->ru_last;
511 		     rip1++, rip2++)
512 			*rip1 += *rip2;
513 	}
514 }
515 
516 
517 /* ARGSUSED */
518 int
519 sys_getrusage(struct getrusage_args *uap)
520 {
521 	struct rusage ru;
522 	struct rusage *rup;
523 
524 	switch (uap->who) {
525 
526 	case RUSAGE_SELF:
527 		rup = &ru;
528 		calcru_proc(curproc, rup);
529 		break;
530 
531 	case RUSAGE_CHILDREN:
532 		rup = &curproc->p_cru;
533 		break;
534 
535 	default:
536 		return (EINVAL);
537 	}
538 	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
539 	    sizeof (struct rusage)));
540 }
541 
542 void
543 ruadd(struct rusage *ru, struct rusage *ru2)
544 {
545 	long *ip, *ip2;
546 	int i;
547 
548 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
549 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
550 	if (ru->ru_maxrss < ru2->ru_maxrss)
551 		ru->ru_maxrss = ru2->ru_maxrss;
552 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
553 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
554 		*ip++ += *ip2++;
555 }
556 
557 /*
558  * Find the uidinfo structure for a uid.  This structure is used to
559  * track the total resource consumption (process count, socket buffer
560  * size, etc.) for the uid and impose limits.
561  */
562 void
563 uihashinit(void)
564 {
565 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
566 }
567 
568 static struct uidinfo *
569 uilookup(uid_t uid)
570 {
571 	struct	uihashhead *uipp;
572 	struct	uidinfo *uip;
573 
574 	uipp = UIHASH(uid);
575 	LIST_FOREACH(uip, uipp, ui_hash) {
576 		if (uip->ui_uid == uid)
577 			break;
578 	}
579 	return (uip);
580 }
581 
582 static struct uidinfo *
583 uicreate(uid_t uid)
584 {
585 	struct	uidinfo *uip, *norace;
586 
587 	/*
588 	 * Allocate space and check for a race
589 	 */
590 	MALLOC(uip, struct uidinfo *, sizeof(*uip), M_UIDINFO, M_WAITOK);
591 	norace = uilookup(uid);
592 	if (norace != NULL) {
593 		FREE(uip, M_UIDINFO);
594 		return (norace);
595 	}
596 
597 	/*
598 	 * Initialize structure and enter it into the hash table
599 	 */
600 	LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
601 	uip->ui_uid = uid;
602 	uip->ui_proccnt = 0;
603 	uip->ui_sbsize = 0;
604 	uip->ui_ref = 0;
605 	uip->ui_posixlocks = 0;
606 	varsymset_init(&uip->ui_varsymset, NULL);
607 	return (uip);
608 }
609 
610 struct uidinfo *
611 uifind(uid_t uid)
612 {
613 	struct	uidinfo *uip;
614 
615 	uip = uilookup(uid);
616 	if (uip == NULL)
617 		uip = uicreate(uid);
618 	uip->ui_ref++;
619 	return (uip);
620 }
621 
622 static __inline void
623 uifree(struct uidinfo *uip)
624 {
625 	if (uip->ui_sbsize != 0)
626 		/* XXX no %qd in kernel.  Truncate. */
627 		kprintf("freeing uidinfo: uid = %d, sbsize = %ld\n",
628 		    uip->ui_uid, (long)uip->ui_sbsize);
629 	if (uip->ui_proccnt != 0)
630 		kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n",
631 		    uip->ui_uid, uip->ui_proccnt);
632 	LIST_REMOVE(uip, ui_hash);
633 	varsymset_clean(&uip->ui_varsymset);
634 	FREE(uip, M_UIDINFO);
635 }
636 
637 void
638 uihold(struct uidinfo *uip)
639 {
640 	++uip->ui_ref;
641 	KKASSERT(uip->ui_ref > 0);
642 }
643 
644 void
645 uidrop(struct uidinfo *uip)
646 {
647 	KKASSERT(uip->ui_ref > 0);
648 	if (--uip->ui_ref == 0)
649 		uifree(uip);
650 }
651 
652 void
653 uireplace(struct uidinfo **puip, struct uidinfo *nuip)
654 {
655 	uidrop(*puip);
656 	*puip = nuip;
657 }
658 
659 /*
660  * Change the count associated with number of processes
661  * a given user is using.  When 'max' is 0, don't enforce a limit
662  */
663 int
664 chgproccnt(struct uidinfo *uip, int diff, int max)
665 {
666 	/* don't allow them to exceed max, but allow subtraction */
667 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0)
668 		return (0);
669 	uip->ui_proccnt += diff;
670 	if (uip->ui_proccnt < 0)
671 		kprintf("negative proccnt for uid = %d\n", uip->ui_uid);
672 	return (1);
673 }
674 
675 /*
676  * Change the total socket buffer size a user has used.
677  */
678 int
679 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max)
680 {
681 	rlim_t new;
682 
683 	crit_enter();
684 	new = uip->ui_sbsize + to - *hiwat;
685 	/* don't allow them to exceed max, but allow subtraction */
686 	if (to > *hiwat && new > max) {
687 		crit_exit();
688 		return (0);
689 	}
690 	uip->ui_sbsize = new;
691 	*hiwat = to;
692 	if (uip->ui_sbsize < 0)
693 		kprintf("negative sbsize for uid = %d\n", uip->ui_uid);
694 	crit_exit();
695 	return (1);
696 }
697 
698