xref: /dragonfly/sys/kern/kern_resource.c (revision 277350a0)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
35  * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
36  */
37 
38 #include "opt_compat.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysproto.h>
43 #include <sys/file.h>
44 #include <sys/kern_syscall.h>
45 #include <sys/kernel.h>
46 #include <sys/resourcevar.h>
47 #include <sys/malloc.h>
48 #include <sys/proc.h>
49 #include <sys/priv.h>
50 #include <sys/time.h>
51 #include <sys/lockf.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_param.h>
55 #include <sys/lock.h>
56 #include <vm/pmap.h>
57 #include <vm/vm_map.h>
58 
59 #include <sys/thread2.h>
60 #include <sys/spinlock2.h>
61 
62 static int donice (struct proc *chgp, int n);
63 static int doionice (struct proc *chgp, int n);
64 
65 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
66 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
67 static struct spinlock uihash_lock;
68 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
69 static u_long uihash;		/* size of hash table - 1 */
70 
71 static struct uidinfo	*uicreate (uid_t uid);
72 static struct uidinfo	*uilookup (uid_t uid);
73 
74 /*
75  * Resource controls and accounting.
76  */
77 
78 struct getpriority_info {
79 	int low;
80 	int who;
81 };
82 
83 static int getpriority_callback(struct proc *p, void *data);
84 
85 /*
86  * MPALMOSTSAFE
87  */
88 int
89 sys_getpriority(struct getpriority_args *uap)
90 {
91 	struct getpriority_info info;
92 	thread_t curtd = curthread;
93 	struct proc *curp = curproc;
94 	struct proc *p;
95 	struct pgrp *pg;
96 	int low = PRIO_MAX + 1;
97 	int error;
98 
99 	switch (uap->which) {
100 	case PRIO_PROCESS:
101 		if (uap->who == 0) {
102 			low = curp->p_nice;
103 		} else {
104 			p = pfind(uap->who);
105 			if (p) {
106 				lwkt_gettoken_shared(&p->p_token);
107 				if (PRISON_CHECK(curtd->td_ucred, p->p_ucred))
108 					low = p->p_nice;
109 				lwkt_reltoken(&p->p_token);
110 				PRELE(p);
111 			}
112 		}
113 		break;
114 	case PRIO_PGRP:
115 		if (uap->who == 0) {
116 			lwkt_gettoken_shared(&curp->p_token);
117 			pg = curp->p_pgrp;
118 			pgref(pg);
119 			lwkt_reltoken(&curp->p_token);
120 		} else if ((pg = pgfind(uap->who)) == NULL) {
121 			break;
122 		} /* else ref held from pgfind */
123 
124 		lwkt_gettoken_shared(&pg->pg_token);
125 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
126 			if (PRISON_CHECK(curtd->td_ucred, p->p_ucred) &&
127 			    p->p_nice < low) {
128 				low = p->p_nice;
129 			}
130 		}
131 		lwkt_reltoken(&pg->pg_token);
132 		pgrel(pg);
133 		break;
134 	case PRIO_USER:
135 		if (uap->who == 0)
136 			uap->who = curtd->td_ucred->cr_uid;
137 		info.low = low;
138 		info.who = uap->who;
139 		allproc_scan(getpriority_callback, &info);
140 		low = info.low;
141 		break;
142 
143 	default:
144 		error = EINVAL;
145 		goto done;
146 	}
147 	if (low == PRIO_MAX + 1) {
148 		error = ESRCH;
149 		goto done;
150 	}
151 	uap->sysmsg_result = low;
152 	error = 0;
153 done:
154 	return (error);
155 }
156 
157 /*
158  * Figure out the current lowest nice priority for processes owned
159  * by the specified user.
160  */
161 static
162 int
163 getpriority_callback(struct proc *p, void *data)
164 {
165 	struct getpriority_info *info = data;
166 
167 	lwkt_gettoken_shared(&p->p_token);
168 	if (PRISON_CHECK(curthread->td_ucred, p->p_ucred) &&
169 	    p->p_ucred->cr_uid == info->who &&
170 	    p->p_nice < info->low) {
171 		info->low = p->p_nice;
172 	}
173 	lwkt_reltoken(&p->p_token);
174 	return(0);
175 }
176 
177 struct setpriority_info {
178 	int prio;
179 	int who;
180 	int error;
181 	int found;
182 };
183 
184 static int setpriority_callback(struct proc *p, void *data);
185 
186 /*
187  * MPALMOSTSAFE
188  */
189 int
190 sys_setpriority(struct setpriority_args *uap)
191 {
192 	struct setpriority_info info;
193 	thread_t curtd = curthread;
194 	struct proc *curp = curproc;
195 	struct proc *p;
196 	struct pgrp *pg;
197 	int found = 0, error = 0;
198 
199 	switch (uap->which) {
200 	case PRIO_PROCESS:
201 		if (uap->who == 0) {
202 			lwkt_gettoken(&curp->p_token);
203 			error = donice(curp, uap->prio);
204 			found++;
205 			lwkt_reltoken(&curp->p_token);
206 		} else {
207 			p = pfind(uap->who);
208 			if (p) {
209 				lwkt_gettoken(&p->p_token);
210 				if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) {
211 					error = donice(p, uap->prio);
212 					found++;
213 				}
214 				lwkt_reltoken(&p->p_token);
215 				PRELE(p);
216 			}
217 		}
218 		break;
219 	case PRIO_PGRP:
220 		if (uap->who == 0) {
221 			lwkt_gettoken_shared(&curp->p_token);
222 			pg = curp->p_pgrp;
223 			pgref(pg);
224 			lwkt_reltoken(&curp->p_token);
225 		} else if ((pg = pgfind(uap->who)) == NULL) {
226 			break;
227 		} /* else ref held from pgfind */
228 
229 		lwkt_gettoken(&pg->pg_token);
230 restart:
231 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
232 			PHOLD(p);
233 			lwkt_gettoken(&p->p_token);
234 			if (p->p_pgrp == pg &&
235 			    PRISON_CHECK(curtd->td_ucred, p->p_ucred)) {
236 				error = donice(p, uap->prio);
237 				found++;
238 			}
239 			lwkt_reltoken(&p->p_token);
240 			if (p->p_pgrp != pg) {
241 				PRELE(p);
242 				goto restart;
243 			}
244 			PRELE(p);
245 		}
246 		lwkt_reltoken(&pg->pg_token);
247 		pgrel(pg);
248 		break;
249 	case PRIO_USER:
250 		if (uap->who == 0)
251 			uap->who = curtd->td_ucred->cr_uid;
252 		info.prio = uap->prio;
253 		info.who = uap->who;
254 		info.error = 0;
255 		info.found = 0;
256 		allproc_scan(setpriority_callback, &info);
257 		error = info.error;
258 		found = info.found;
259 		break;
260 	default:
261 		error = EINVAL;
262 		found = 1;
263 		break;
264 	}
265 
266 	if (found == 0)
267 		error = ESRCH;
268 	return (error);
269 }
270 
271 static
272 int
273 setpriority_callback(struct proc *p, void *data)
274 {
275 	struct setpriority_info *info = data;
276 	int error;
277 
278 	lwkt_gettoken(&p->p_token);
279 	if (p->p_ucred->cr_uid == info->who &&
280 	    PRISON_CHECK(curthread->td_ucred, p->p_ucred)) {
281 		error = donice(p, info->prio);
282 		if (error)
283 			info->error = error;
284 		++info->found;
285 	}
286 	lwkt_reltoken(&p->p_token);
287 	return(0);
288 }
289 
290 /*
291  * Caller must hold chgp->p_token
292  */
293 static int
294 donice(struct proc *chgp, int n)
295 {
296 	struct ucred *cr = curthread->td_ucred;
297 	struct lwp *lp;
298 
299 	if (cr->cr_uid && cr->cr_ruid &&
300 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
301 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
302 		return (EPERM);
303 	if (n > PRIO_MAX)
304 		n = PRIO_MAX;
305 	if (n < PRIO_MIN)
306 		n = PRIO_MIN;
307 	if (n < chgp->p_nice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0))
308 		return (EACCES);
309 	chgp->p_nice = n;
310 	FOREACH_LWP_IN_PROC(lp, chgp) {
311 		LWPHOLD(lp);
312 		chgp->p_usched->resetpriority(lp);
313 		LWPRELE(lp);
314 	}
315 	return (0);
316 }
317 
318 
319 struct ioprio_get_info {
320 	int high;
321 	int who;
322 };
323 
324 static int ioprio_get_callback(struct proc *p, void *data);
325 
326 /*
327  * MPALMOSTSAFE
328  */
329 int
330 sys_ioprio_get(struct ioprio_get_args *uap)
331 {
332 	struct ioprio_get_info info;
333 	thread_t curtd = curthread;
334 	struct proc *curp = curproc;
335 	struct proc *p;
336 	struct pgrp *pg;
337 	int high = IOPRIO_MIN-2;
338 	int error;
339 
340 	switch (uap->which) {
341 	case PRIO_PROCESS:
342 		if (uap->who == 0) {
343 			high = curp->p_ionice;
344 		} else {
345 			p = pfind(uap->who);
346 			if (p) {
347 				lwkt_gettoken_shared(&p->p_token);
348 				if (PRISON_CHECK(curtd->td_ucred, p->p_ucred))
349 					high = p->p_ionice;
350 				lwkt_reltoken(&p->p_token);
351 				PRELE(p);
352 			}
353 		}
354 		break;
355 	case PRIO_PGRP:
356 		if (uap->who == 0) {
357 			lwkt_gettoken_shared(&curp->p_token);
358 			pg = curp->p_pgrp;
359 			pgref(pg);
360 			lwkt_reltoken(&curp->p_token);
361 		} else if ((pg = pgfind(uap->who)) == NULL) {
362 			break;
363 		} /* else ref held from pgfind */
364 
365 		lwkt_gettoken_shared(&pg->pg_token);
366 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
367 			if (PRISON_CHECK(curtd->td_ucred, p->p_ucred) &&
368 			    p->p_nice > high)
369 				high = p->p_ionice;
370 		}
371 		lwkt_reltoken(&pg->pg_token);
372 		pgrel(pg);
373 		break;
374 	case PRIO_USER:
375 		if (uap->who == 0)
376 			uap->who = curtd->td_ucred->cr_uid;
377 		info.high = high;
378 		info.who = uap->who;
379 		allproc_scan(ioprio_get_callback, &info);
380 		high = info.high;
381 		break;
382 	default:
383 		error = EINVAL;
384 		goto done;
385 	}
386 	if (high == IOPRIO_MIN-2) {
387 		error = ESRCH;
388 		goto done;
389 	}
390 	uap->sysmsg_result = high;
391 	error = 0;
392 done:
393 	return (error);
394 }
395 
396 /*
397  * Figure out the current lowest nice priority for processes owned
398  * by the specified user.
399  */
400 static
401 int
402 ioprio_get_callback(struct proc *p, void *data)
403 {
404 	struct ioprio_get_info *info = data;
405 
406 	lwkt_gettoken_shared(&p->p_token);
407 	if (PRISON_CHECK(curthread->td_ucred, p->p_ucred) &&
408 	    p->p_ucred->cr_uid == info->who &&
409 	    p->p_ionice > info->high) {
410 		info->high = p->p_ionice;
411 	}
412 	lwkt_reltoken(&p->p_token);
413 	return(0);
414 }
415 
416 
417 struct ioprio_set_info {
418 	int prio;
419 	int who;
420 	int error;
421 	int found;
422 };
423 
424 static int ioprio_set_callback(struct proc *p, void *data);
425 
426 /*
427  * MPALMOSTSAFE
428  */
429 int
430 sys_ioprio_set(struct ioprio_set_args *uap)
431 {
432 	struct ioprio_set_info info;
433 	thread_t curtd = curthread;
434 	struct proc *curp = curproc;
435 	struct proc *p;
436 	struct pgrp *pg;
437 	int found = 0, error = 0;
438 
439 	switch (uap->which) {
440 	case PRIO_PROCESS:
441 		if (uap->who == 0) {
442 			lwkt_gettoken(&curp->p_token);
443 			error = doionice(curp, uap->prio);
444 			lwkt_reltoken(&curp->p_token);
445 			found++;
446 		} else {
447 			p = pfind(uap->who);
448 			if (p) {
449 				lwkt_gettoken(&p->p_token);
450 				if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) {
451 					error = doionice(p, uap->prio);
452 					found++;
453 				}
454 				lwkt_reltoken(&p->p_token);
455 				PRELE(p);
456 			}
457 		}
458 		break;
459 	case PRIO_PGRP:
460 		if (uap->who == 0) {
461 			lwkt_gettoken_shared(&curp->p_token);
462 			pg = curp->p_pgrp;
463 			pgref(pg);
464 			lwkt_reltoken(&curp->p_token);
465 		} else if ((pg = pgfind(uap->who)) == NULL) {
466 			break;
467 		} /* else ref held from pgfind */
468 
469 		lwkt_gettoken(&pg->pg_token);
470 restart:
471 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
472 			PHOLD(p);
473 			lwkt_gettoken(&p->p_token);
474 			if (p->p_pgrp == pg &&
475 			    PRISON_CHECK(curtd->td_ucred, p->p_ucred)) {
476 				error = doionice(p, uap->prio);
477 				found++;
478 			}
479 			lwkt_reltoken(&p->p_token);
480 			if (p->p_pgrp != pg) {
481 				PRELE(p);
482 				goto restart;
483 			}
484 			PRELE(p);
485 		}
486 		lwkt_reltoken(&pg->pg_token);
487 		pgrel(pg);
488 		break;
489 	case PRIO_USER:
490 		if (uap->who == 0)
491 			uap->who = curtd->td_ucred->cr_uid;
492 		info.prio = uap->prio;
493 		info.who = uap->who;
494 		info.error = 0;
495 		info.found = 0;
496 		allproc_scan(ioprio_set_callback, &info);
497 		error = info.error;
498 		found = info.found;
499 		break;
500 	default:
501 		error = EINVAL;
502 		found = 1;
503 		break;
504 	}
505 
506 	if (found == 0)
507 		error = ESRCH;
508 	return (error);
509 }
510 
511 static
512 int
513 ioprio_set_callback(struct proc *p, void *data)
514 {
515 	struct ioprio_set_info *info = data;
516 	int error;
517 
518 	lwkt_gettoken(&p->p_token);
519 	if (p->p_ucred->cr_uid == info->who &&
520 	    PRISON_CHECK(curthread->td_ucred, p->p_ucred)) {
521 		error = doionice(p, info->prio);
522 		if (error)
523 			info->error = error;
524 		++info->found;
525 	}
526 	lwkt_reltoken(&p->p_token);
527 	return(0);
528 }
529 
530 static int
531 doionice(struct proc *chgp, int n)
532 {
533 	struct ucred *cr = curthread->td_ucred;
534 
535 	if (cr->cr_uid && cr->cr_ruid &&
536 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
537 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
538 		return (EPERM);
539 	if (n > IOPRIO_MAX)
540 		n = IOPRIO_MAX;
541 	if (n < IOPRIO_MIN)
542 		n = IOPRIO_MIN;
543 	if (n < chgp->p_ionice &&
544 	    priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0))
545 		return (EACCES);
546 	chgp->p_ionice = n;
547 
548 	return (0);
549 
550 }
551 
552 /*
553  * MPALMOSTSAFE
554  */
555 int
556 sys_lwp_rtprio(struct lwp_rtprio_args *uap)
557 {
558 	struct ucred *cr = curthread->td_ucred;
559 	struct proc *p;
560 	struct lwp *lp;
561 	struct rtprio rtp;
562 	int error;
563 
564 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
565 	if (error)
566 		return error;
567 	if (uap->pid < 0)
568 		return EINVAL;
569 
570 	if (uap->pid == 0) {
571 		p = curproc;
572 		PHOLD(p);
573 	} else {
574 		p = pfind(uap->pid);
575 	}
576 	if (p == NULL) {
577 		error = ESRCH;
578 		goto done;
579 	}
580 	lwkt_gettoken(&p->p_token);
581 
582 	if (uap->tid < -1) {
583 		error = EINVAL;
584 		goto done;
585 	}
586 	if (uap->tid == -1) {
587 		/*
588 		 * sadly, tid can be 0 so we can't use 0 here
589 		 * like sys_rtprio()
590 		 */
591 		lp = curthread->td_lwp;
592 	} else {
593 		lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
594 		if (lp == NULL) {
595 			error = ESRCH;
596 			goto done;
597 		}
598 	}
599 
600 	/*
601 	 * Make sure that this lwp is not ripped if any of the following
602 	 * code blocks, e.g. copyout.
603 	 */
604 	LWPHOLD(lp);
605 	switch (uap->function) {
606 	case RTP_LOOKUP:
607 		error = copyout(&lp->lwp_rtprio, uap->rtp,
608 				sizeof(struct rtprio));
609 		break;
610 	case RTP_SET:
611 		if (cr->cr_uid && cr->cr_ruid &&
612 		    cr->cr_uid != p->p_ucred->cr_uid &&
613 		    cr->cr_ruid != p->p_ucred->cr_uid) {
614 			error = EPERM;
615 			break;
616 		}
617 		/* disallow setting rtprio in most cases if not superuser */
618 		if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) {
619 			/* can't set someone else's */
620 			if (uap->pid) { /* XXX */
621 				error = EPERM;
622 				break;
623 			}
624 			/* can't set realtime priority */
625 /*
626  * Realtime priority has to be restricted for reasons which should be
627  * obvious. However, for idle priority, there is a potential for
628  * system deadlock if an idleprio process gains a lock on a resource
629  * that other processes need (and the idleprio process can't run
630  * due to a CPU-bound normal process). Fix me! XXX
631  */
632  			if (RTP_PRIO_IS_REALTIME(rtp.type)) {
633 				error = EPERM;
634 				break;
635 			}
636 		}
637 		switch (rtp.type) {
638 #ifdef RTP_PRIO_FIFO
639 		case RTP_PRIO_FIFO:
640 #endif
641 		case RTP_PRIO_REALTIME:
642 		case RTP_PRIO_NORMAL:
643 		case RTP_PRIO_IDLE:
644 			if (rtp.prio > RTP_PRIO_MAX) {
645 				error = EINVAL;
646 			} else {
647 				lp->lwp_rtprio = rtp;
648 				error = 0;
649 			}
650 			break;
651 		default:
652 			error = EINVAL;
653 			break;
654 		}
655 		break;
656 	default:
657 		error = EINVAL;
658 		break;
659 	}
660 	LWPRELE(lp);
661 
662 done:
663 	if (p) {
664 		lwkt_reltoken(&p->p_token);
665 		PRELE(p);
666 	}
667 	return (error);
668 }
669 
670 /*
671  * Set realtime priority
672  *
673  * MPALMOSTSAFE
674  */
675 int
676 sys_rtprio(struct rtprio_args *uap)
677 {
678 	struct ucred *cr = curthread->td_ucred;
679 	struct proc *p;
680 	struct lwp *lp;
681 	struct rtprio rtp;
682 	int error;
683 
684 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
685 	if (error)
686 		return (error);
687 
688 	if (uap->pid == 0) {
689 		p = curproc;
690 		PHOLD(p);
691 	} else {
692 		p = pfind(uap->pid);
693 	}
694 
695 	if (p == NULL) {
696 		error = ESRCH;
697 		goto done;
698 	}
699 	lwkt_gettoken(&p->p_token);
700 
701 	/* XXX lwp */
702 	lp = FIRST_LWP_IN_PROC(p);
703 	switch (uap->function) {
704 	case RTP_LOOKUP:
705 		error = copyout(&lp->lwp_rtprio, uap->rtp,
706 				sizeof(struct rtprio));
707 		break;
708 	case RTP_SET:
709 		if (cr->cr_uid && cr->cr_ruid &&
710 		    cr->cr_uid != p->p_ucred->cr_uid &&
711 		    cr->cr_ruid != p->p_ucred->cr_uid) {
712 			error = EPERM;
713 			break;
714 		}
715 		/* disallow setting rtprio in most cases if not superuser */
716 		if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) {
717 			/* can't set someone else's */
718 			if (uap->pid) {
719 				error = EPERM;
720 				break;
721 			}
722 			/* can't set realtime priority */
723 /*
724  * Realtime priority has to be restricted for reasons which should be
725  * obvious. However, for idle priority, there is a potential for
726  * system deadlock if an idleprio process gains a lock on a resource
727  * that other processes need (and the idleprio process can't run
728  * due to a CPU-bound normal process). Fix me! XXX
729  */
730 			if (RTP_PRIO_IS_REALTIME(rtp.type)) {
731 				error = EPERM;
732 				break;
733 			}
734 		}
735 		switch (rtp.type) {
736 #ifdef RTP_PRIO_FIFO
737 		case RTP_PRIO_FIFO:
738 #endif
739 		case RTP_PRIO_REALTIME:
740 		case RTP_PRIO_NORMAL:
741 		case RTP_PRIO_IDLE:
742 			if (rtp.prio > RTP_PRIO_MAX) {
743 				error = EINVAL;
744 				break;
745 			}
746 			lp->lwp_rtprio = rtp;
747 			error = 0;
748 			break;
749 		default:
750 			error = EINVAL;
751 			break;
752 		}
753 		break;
754 	default:
755 		error = EINVAL;
756 		break;
757 	}
758 done:
759 	if (p) {
760 		lwkt_reltoken(&p->p_token);
761 		PRELE(p);
762 	}
763 
764 	return (error);
765 }
766 
767 /*
768  * MPSAFE
769  */
770 int
771 sys_setrlimit(struct __setrlimit_args *uap)
772 {
773 	struct rlimit alim;
774 	int error;
775 
776 	error = copyin(uap->rlp, &alim, sizeof(alim));
777 	if (error)
778 		return (error);
779 
780 	error = kern_setrlimit(uap->which, &alim);
781 
782 	return (error);
783 }
784 
785 /*
786  * MPSAFE
787  */
788 int
789 sys_getrlimit(struct __getrlimit_args *uap)
790 {
791 	struct rlimit lim;
792 	int error;
793 
794 	error = kern_getrlimit(uap->which, &lim);
795 
796 	if (error == 0)
797 		error = copyout(&lim, uap->rlp, sizeof(*uap->rlp));
798 	return error;
799 }
800 
801 /*
802  * Transform the running time and tick information in lwp lp's thread into user,
803  * system, and interrupt time usage.
804  *
805  * Since we are limited to statclock tick granularity this is a statisical
806  * calculation which will be correct over the long haul, but should not be
807  * expected to measure fine grained deltas.
808  *
809  * It is possible to catch a lwp in the midst of being created, so
810  * check whether lwp_thread is NULL or not.
811  */
812 void
813 calcru(struct lwp *lp, struct timeval *up, struct timeval *sp)
814 {
815 	struct thread *td;
816 
817 	/*
818 	 * Calculate at the statclock level.  YYY if the thread is owned by
819 	 * another cpu we need to forward the request to the other cpu, or
820 	 * have a token to interlock the information in order to avoid racing
821 	 * thread destruction.
822 	 */
823 	if ((td = lp->lwp_thread) != NULL) {
824 		crit_enter();
825 		up->tv_sec = td->td_uticks / 1000000;
826 		up->tv_usec = td->td_uticks % 1000000;
827 		sp->tv_sec = td->td_sticks / 1000000;
828 		sp->tv_usec = td->td_sticks % 1000000;
829 		crit_exit();
830 	}
831 }
832 
833 /*
834  * Aggregate resource statistics of all lwps of a process.
835  *
836  * proc.p_ru keeps track of all statistics directly related to a proc.  This
837  * consists of RSS usage and nswap information and aggregate numbers for all
838  * former lwps of this proc.
839  *
840  * proc.p_cru is the sum of all stats of reaped children.
841  *
842  * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning
843  * packet, scheduler switch or page fault counts, etc.  This information gets
844  * added to lwp.lwp_proc.p_ru when the lwp exits.
845  */
846 void
847 calcru_proc(struct proc *p, struct rusage *ru)
848 {
849 	struct timeval upt, spt;
850 	long *rip1, *rip2;
851 	struct lwp *lp;
852 
853 	*ru = p->p_ru;
854 
855 	FOREACH_LWP_IN_PROC(lp, p) {
856 		calcru(lp, &upt, &spt);
857 		timevaladd(&ru->ru_utime, &upt);
858 		timevaladd(&ru->ru_stime, &spt);
859 		for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first;
860 		     rip1 <= &ru->ru_last;
861 		     rip1++, rip2++)
862 			*rip1 += *rip2;
863 	}
864 }
865 
866 
867 /*
868  * MPALMOSTSAFE
869  */
870 int
871 sys_getrusage(struct getrusage_args *uap)
872 {
873 	struct proc *p = curproc;
874 	struct rusage ru;
875 	struct rusage *rup;
876 	int error;
877 
878 	lwkt_gettoken(&p->p_token);
879 
880 	switch (uap->who) {
881 	case RUSAGE_SELF:
882 		rup = &ru;
883 		calcru_proc(p, rup);
884 		error = 0;
885 		break;
886 	case RUSAGE_CHILDREN:
887 		rup = &p->p_cru;
888 		error = 0;
889 		break;
890 	default:
891 		error = EINVAL;
892 		break;
893 	}
894 	lwkt_reltoken(&p->p_token);
895 
896 	if (error == 0)
897 		error = copyout(rup, uap->rusage, sizeof(struct rusage));
898 	return (error);
899 }
900 
901 void
902 ruadd(struct rusage *ru, struct rusage *ru2)
903 {
904 	long *ip, *ip2;
905 	int i;
906 
907 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
908 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
909 	if (ru->ru_maxrss < ru2->ru_maxrss)
910 		ru->ru_maxrss = ru2->ru_maxrss;
911 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
912 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
913 		*ip++ += *ip2++;
914 }
915 
916 /*
917  * Find the uidinfo structure for a uid.  This structure is used to
918  * track the total resource consumption (process count, socket buffer
919  * size, etc.) for the uid and impose limits.
920  */
921 void
922 uihashinit(void)
923 {
924 	spin_init(&uihash_lock, "uihashinit");
925 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
926 }
927 
928 /*
929  * NOTE: Must be called with uihash_lock held
930  *
931  * MPSAFE
932  */
933 static struct uidinfo *
934 uilookup(uid_t uid)
935 {
936 	struct	uihashhead *uipp;
937 	struct	uidinfo *uip;
938 
939 	uipp = UIHASH(uid);
940 	LIST_FOREACH(uip, uipp, ui_hash) {
941 		if (uip->ui_uid == uid)
942 			break;
943 	}
944 	return (uip);
945 }
946 
947 /*
948  * Helper function to creat ea uid that could not be found.
949  * This function will properly deal with races.
950  *
951  * MPSAFE
952  */
953 static struct uidinfo *
954 uicreate(uid_t uid)
955 {
956 	struct	uidinfo *uip, *tmp;
957 
958 	/*
959 	 * Allocate space and check for a race
960 	 */
961 	uip = kmalloc(sizeof(*uip), M_UIDINFO, M_WAITOK|M_ZERO);
962 
963 	/*
964 	 * Initialize structure and enter it into the hash table
965 	 */
966 	spin_init(&uip->ui_lock, "uicreate");
967 	uip->ui_uid = uid;
968 	uip->ui_ref = 1;	/* we're returning a ref */
969 	varsymset_init(&uip->ui_varsymset, NULL);
970 
971 	/*
972 	 * Somebody may have already created the uidinfo for this
973 	 * uid. If so, return that instead.
974 	 */
975 	spin_lock(&uihash_lock);
976 	tmp = uilookup(uid);
977 	if (tmp != NULL) {
978 		uihold(tmp);
979 		spin_unlock(&uihash_lock);
980 
981 		spin_uninit(&uip->ui_lock);
982 		varsymset_clean(&uip->ui_varsymset);
983 		kfree(uip, M_UIDINFO);
984 		uip = tmp;
985 	} else {
986 		LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
987 		spin_unlock(&uihash_lock);
988 	}
989 	return (uip);
990 }
991 
992 /*
993  *
994  *
995  * MPSAFE
996  */
997 struct uidinfo *
998 uifind(uid_t uid)
999 {
1000 	struct	uidinfo *uip;
1001 
1002 	spin_lock(&uihash_lock);
1003 	uip = uilookup(uid);
1004 	if (uip == NULL) {
1005 		spin_unlock(&uihash_lock);
1006 		uip = uicreate(uid);
1007 	} else {
1008 		uihold(uip);
1009 		spin_unlock(&uihash_lock);
1010 	}
1011 	return (uip);
1012 }
1013 
1014 /*
1015  * Helper funtion to remove a uidinfo whos reference count is
1016  * transitioning from 1->0.  The reference count is 1 on call.
1017  *
1018  * Zero is returned on success, otherwise non-zero and the
1019  * uiphas not been removed.
1020  *
1021  * MPSAFE
1022  */
1023 static __inline int
1024 uifree(struct uidinfo *uip)
1025 {
1026 	/*
1027 	 * If we are still the only holder after acquiring the uihash_lock
1028 	 * we can safely unlink the uip and destroy it.  Otherwise we lost
1029 	 * a race and must fail.
1030 	 */
1031 	spin_lock(&uihash_lock);
1032 	if (uip->ui_ref != 1) {
1033 		spin_unlock(&uihash_lock);
1034 		return(-1);
1035 	}
1036 	LIST_REMOVE(uip, ui_hash);
1037 	spin_unlock(&uihash_lock);
1038 
1039 	/*
1040 	 * The uip is now orphaned and we can destroy it at our
1041 	 * leisure.
1042 	 */
1043 	if (uip->ui_sbsize != 0)
1044 		kprintf("freeing uidinfo: uid = %d, sbsize = %jd\n",
1045 		    uip->ui_uid, (intmax_t)uip->ui_sbsize);
1046 	if (uip->ui_proccnt != 0)
1047 		kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1048 		    uip->ui_uid, uip->ui_proccnt);
1049 
1050 	varsymset_clean(&uip->ui_varsymset);
1051 	lockuninit(&uip->ui_varsymset.vx_lock);
1052 	spin_uninit(&uip->ui_lock);
1053 	kfree(uip, M_UIDINFO);
1054 	return(0);
1055 }
1056 
1057 /*
1058  * MPSAFE
1059  */
1060 void
1061 uihold(struct uidinfo *uip)
1062 {
1063 	atomic_add_int(&uip->ui_ref, 1);
1064 	KKASSERT(uip->ui_ref >= 0);
1065 }
1066 
1067 /*
1068  * NOTE: It is important for us to not drop the ref count to 0
1069  *	 because this can cause a 2->0/2->0 race with another
1070  *	 concurrent dropper.  Losing the race in that situation
1071  *	 can cause uip to become stale for one of the other
1072  *	 threads.
1073  *
1074  * MPSAFE
1075  */
1076 void
1077 uidrop(struct uidinfo *uip)
1078 {
1079 	int ref;
1080 
1081 	KKASSERT(uip->ui_ref > 0);
1082 
1083 	for (;;) {
1084 		ref = uip->ui_ref;
1085 		cpu_ccfence();
1086 		if (ref == 1) {
1087 			if (uifree(uip) == 0)
1088 				break;
1089 		} else if (atomic_cmpset_int(&uip->ui_ref, ref, ref - 1)) {
1090 			break;
1091 		}
1092 		/* else retry */
1093 	}
1094 }
1095 
1096 void
1097 uireplace(struct uidinfo **puip, struct uidinfo *nuip)
1098 {
1099 	uidrop(*puip);
1100 	*puip = nuip;
1101 }
1102 
1103 /*
1104  * Change the count associated with number of processes
1105  * a given user is using.  When 'max' is 0, don't enforce a limit
1106  */
1107 int
1108 chgproccnt(struct uidinfo *uip, int diff, int max)
1109 {
1110 	int ret;
1111 	spin_lock(&uip->ui_lock);
1112 	/* don't allow them to exceed max, but allow subtraction */
1113 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
1114 		ret = 0;
1115 	} else {
1116 		uip->ui_proccnt += diff;
1117 		if (uip->ui_proccnt < 0)
1118 			kprintf("negative proccnt for uid = %d\n", uip->ui_uid);
1119 		ret = 1;
1120 	}
1121 	spin_unlock(&uip->ui_lock);
1122 	return ret;
1123 }
1124 
1125 /*
1126  * Change the total socket buffer size a user has used.
1127  */
1128 int
1129 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max)
1130 {
1131 	rlim_t new;
1132 
1133 #ifdef __x86_64__
1134 	rlim_t sbsize;
1135 
1136 	sbsize = atomic_fetchadd_long(&uip->ui_sbsize, to - *hiwat);
1137 	new = sbsize + to - *hiwat;
1138 #else
1139 	spin_lock(&uip->ui_lock);
1140 	new = uip->ui_sbsize + to - *hiwat;
1141 	uip->ui_sbsize = new;
1142 	spin_unlock(&uip->ui_lock);
1143 #endif
1144 	KKASSERT(new >= 0);
1145 
1146 	/*
1147 	 * If we are trying to increase the socket buffer size
1148 	 * Scale down the hi water mark when we exceed the user's
1149 	 * allowed socket buffer space.
1150 	 *
1151 	 * We can't scale down too much or we will blow up atomic packet
1152 	 * operations.
1153 	 */
1154 	if (to > *hiwat && to > MCLBYTES && new > max) {
1155 		to = to * max / new;
1156 		if (to < MCLBYTES)
1157 			to = MCLBYTES;
1158 	}
1159 	*hiwat = to;
1160 	return (1);
1161 }
1162