xref: /dragonfly/sys/kern/kern_resource.c (revision dc71b7ab)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
35  * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
36  */
37 
38 #include "opt_compat.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysproto.h>
43 #include <sys/file.h>
44 #include <sys/kern_syscall.h>
45 #include <sys/kernel.h>
46 #include <sys/resourcevar.h>
47 #include <sys/malloc.h>
48 #include <sys/proc.h>
49 #include <sys/priv.h>
50 #include <sys/time.h>
51 #include <sys/lockf.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_param.h>
55 #include <sys/lock.h>
56 #include <vm/pmap.h>
57 #include <vm/vm_map.h>
58 
59 #include <sys/thread2.h>
60 #include <sys/spinlock2.h>
61 
62 static int donice (struct proc *chgp, int n);
63 static int doionice (struct proc *chgp, int n);
64 
65 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
66 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
67 static struct spinlock uihash_lock;
68 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
69 static u_long uihash;		/* size of hash table - 1 */
70 
71 static struct uidinfo	*uicreate (uid_t uid);
72 static struct uidinfo	*uilookup (uid_t uid);
73 
74 /*
75  * Resource controls and accounting.
76  */
77 
78 struct getpriority_info {
79 	int low;
80 	int who;
81 };
82 
83 static int getpriority_callback(struct proc *p, void *data);
84 
85 /*
86  * MPALMOSTSAFE
87  */
88 int
89 sys_getpriority(struct getpriority_args *uap)
90 {
91 	struct getpriority_info info;
92 	struct proc *curp = curproc;
93 	struct proc *p;
94 	int low = PRIO_MAX + 1;
95 	int error;
96 
97 	switch (uap->which) {
98 	case PRIO_PROCESS:
99 		if (uap->who == 0) {
100 			p = curp;
101 			PHOLD(p);
102 		} else {
103 			p = pfind(uap->who);
104 		}
105 		if (p) {
106 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
107 				low = p->p_nice;
108 			}
109 			PRELE(p);
110 		}
111 		break;
112 
113 	case PRIO_PGRP:
114 	{
115 		struct pgrp *pg;
116 
117 		if (uap->who == 0) {
118 			pg = curp->p_pgrp;
119 			pgref(pg);
120 		} else if ((pg = pgfind(uap->who)) == NULL) {
121 			break;
122 		} /* else ref held from pgfind */
123 
124 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
125 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred) &&
126 			    p->p_nice < low) {
127 				low = p->p_nice;
128 			}
129 		}
130 		pgrel(pg);
131 		break;
132 	}
133 	case PRIO_USER:
134 		if (uap->who == 0)
135 			uap->who = curp->p_ucred->cr_uid;
136 		info.low = low;
137 		info.who = uap->who;
138 		allproc_scan(getpriority_callback, &info);
139 		low = info.low;
140 		break;
141 
142 	default:
143 		error = EINVAL;
144 		goto done;
145 	}
146 	if (low == PRIO_MAX + 1) {
147 		error = ESRCH;
148 		goto done;
149 	}
150 	uap->sysmsg_result = low;
151 	error = 0;
152 done:
153 	return (error);
154 }
155 
156 /*
157  * Figure out the current lowest nice priority for processes owned
158  * by the specified user.
159  */
160 static
161 int
162 getpriority_callback(struct proc *p, void *data)
163 {
164 	struct getpriority_info *info = data;
165 
166 	if (PRISON_CHECK(curproc->p_ucred, p->p_ucred) &&
167 	    p->p_ucred->cr_uid == info->who &&
168 	    p->p_nice < info->low) {
169 		info->low = p->p_nice;
170 	}
171 	return(0);
172 }
173 
174 struct setpriority_info {
175 	int prio;
176 	int who;
177 	int error;
178 	int found;
179 };
180 
181 static int setpriority_callback(struct proc *p, void *data);
182 
183 /*
184  * MPALMOSTSAFE
185  */
186 int
187 sys_setpriority(struct setpriority_args *uap)
188 {
189 	struct setpriority_info info;
190 	struct proc *curp = curproc;
191 	struct proc *p;
192 	int found = 0, error = 0;
193 
194 	lwkt_gettoken(&proc_token);
195 
196 	switch (uap->which) {
197 	case PRIO_PROCESS:
198 		if (uap->who == 0) {
199 			p = curp;
200 			PHOLD(p);
201 		} else {
202 			p = pfind(uap->who);
203 		}
204 		if (p) {
205 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
206 				error = donice(p, uap->prio);
207 				found++;
208 			}
209 			PRELE(p);
210 		}
211 		break;
212 
213 	case PRIO_PGRP:
214 	{
215 		struct pgrp *pg;
216 
217 		if (uap->who == 0) {
218 			pg = curp->p_pgrp;
219 			pgref(pg);
220 		} else if ((pg = pgfind(uap->who)) == NULL) {
221 			break;
222 		} /* else ref held from pgfind */
223 
224 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
225 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
226 				error = donice(p, uap->prio);
227 				found++;
228 			}
229 		}
230 		pgrel(pg);
231 		break;
232 	}
233 	case PRIO_USER:
234 		if (uap->who == 0)
235 			uap->who = curp->p_ucred->cr_uid;
236 		info.prio = uap->prio;
237 		info.who = uap->who;
238 		info.error = 0;
239 		info.found = 0;
240 		allproc_scan(setpriority_callback, &info);
241 		error = info.error;
242 		found = info.found;
243 		break;
244 
245 	default:
246 		error = EINVAL;
247 		found = 1;
248 		break;
249 	}
250 
251 	lwkt_reltoken(&proc_token);
252 
253 	if (found == 0)
254 		error = ESRCH;
255 	return (error);
256 }
257 
258 static
259 int
260 setpriority_callback(struct proc *p, void *data)
261 {
262 	struct setpriority_info *info = data;
263 	int error;
264 
265 	if (p->p_ucred->cr_uid == info->who &&
266 	    PRISON_CHECK(curproc->p_ucred, p->p_ucred)) {
267 		error = donice(p, info->prio);
268 		if (error)
269 			info->error = error;
270 		++info->found;
271 	}
272 	return(0);
273 }
274 
275 static int
276 donice(struct proc *chgp, int n)
277 {
278 	struct proc *curp = curproc;
279 	struct ucred *cr = curp->p_ucred;
280 	struct lwp *lp;
281 
282 	if (cr->cr_uid && cr->cr_ruid &&
283 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
284 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
285 		return (EPERM);
286 	if (n > PRIO_MAX)
287 		n = PRIO_MAX;
288 	if (n < PRIO_MIN)
289 		n = PRIO_MIN;
290 	if (n < chgp->p_nice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0))
291 		return (EACCES);
292 	chgp->p_nice = n;
293 	FOREACH_LWP_IN_PROC(lp, chgp) {
294 		LWPHOLD(lp);
295 		chgp->p_usched->resetpriority(lp);
296 		LWPRELE(lp);
297 	}
298 	return (0);
299 }
300 
301 
302 struct ioprio_get_info {
303 	int high;
304 	int who;
305 };
306 
307 static int ioprio_get_callback(struct proc *p, void *data);
308 
309 /*
310  * MPALMOSTSAFE
311  */
312 int
313 sys_ioprio_get(struct ioprio_get_args *uap)
314 {
315 	struct ioprio_get_info info;
316 	struct proc *curp = curproc;
317 	struct proc *p;
318 	int high = IOPRIO_MIN-2;
319 	int error;
320 
321 	lwkt_gettoken(&proc_token);
322 
323 	switch (uap->which) {
324 	case PRIO_PROCESS:
325 		if (uap->who == 0) {
326 			p = curp;
327 			PHOLD(p);
328 		} else {
329 			p = pfind(uap->who);
330 		}
331 		if (p) {
332 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred))
333 				high = p->p_ionice;
334 			PRELE(p);
335 		}
336 		break;
337 
338 	case PRIO_PGRP:
339 	{
340 		struct pgrp *pg;
341 
342 		if (uap->who == 0) {
343 			pg = curp->p_pgrp;
344 			pgref(pg);
345 		} else if ((pg = pgfind(uap->who)) == NULL) {
346 			break;
347 		} /* else ref held from pgfind */
348 
349 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
350 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred) &&
351 			    p->p_nice > high)
352 				high = p->p_ionice;
353 		}
354 		pgrel(pg);
355 		break;
356 	}
357 	case PRIO_USER:
358 		if (uap->who == 0)
359 			uap->who = curp->p_ucred->cr_uid;
360 		info.high = high;
361 		info.who = uap->who;
362 		allproc_scan(ioprio_get_callback, &info);
363 		high = info.high;
364 		break;
365 
366 	default:
367 		error = EINVAL;
368 		goto done;
369 	}
370 	if (high == IOPRIO_MIN-2) {
371 		error = ESRCH;
372 		goto done;
373 	}
374 	uap->sysmsg_result = high;
375 	error = 0;
376 done:
377 	lwkt_reltoken(&proc_token);
378 
379 	return (error);
380 }
381 
382 /*
383  * Figure out the current lowest nice priority for processes owned
384  * by the specified user.
385  */
386 static
387 int
388 ioprio_get_callback(struct proc *p, void *data)
389 {
390 	struct ioprio_get_info *info = data;
391 
392 	if (PRISON_CHECK(curproc->p_ucred, p->p_ucred) &&
393 	    p->p_ucred->cr_uid == info->who &&
394 	    p->p_ionice > info->high) {
395 		info->high = p->p_ionice;
396 	}
397 	return(0);
398 }
399 
400 
401 struct ioprio_set_info {
402 	int prio;
403 	int who;
404 	int error;
405 	int found;
406 };
407 
408 static int ioprio_set_callback(struct proc *p, void *data);
409 
410 /*
411  * MPALMOSTSAFE
412  */
413 int
414 sys_ioprio_set(struct ioprio_set_args *uap)
415 {
416 	struct ioprio_set_info info;
417 	struct proc *curp = curproc;
418 	struct proc *p;
419 	int found = 0, error = 0;
420 
421 	lwkt_gettoken(&proc_token);
422 
423 	switch (uap->which) {
424 	case PRIO_PROCESS:
425 		if (uap->who == 0) {
426 			p = curp;
427 			PHOLD(p);
428 		} else {
429 			p = pfind(uap->who);
430 		}
431 		if (p) {
432 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
433 				error = doionice(p, uap->prio);
434 				found++;
435 			}
436 			PRELE(p);
437 		}
438 		break;
439 
440 	case PRIO_PGRP:
441 	{
442 		struct pgrp *pg;
443 
444 		if (uap->who == 0) {
445 			pg = curp->p_pgrp;
446 			pgref(pg);
447 		} else if ((pg = pgfind(uap->who)) == NULL) {
448 			break;
449 		} /* else ref held from pgfind */
450 
451 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
452 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
453 				error = doionice(p, uap->prio);
454 				found++;
455 			}
456 		}
457 		pgrel(pg);
458 		break;
459 	}
460 	case PRIO_USER:
461 		if (uap->who == 0)
462 			uap->who = curp->p_ucred->cr_uid;
463 		info.prio = uap->prio;
464 		info.who = uap->who;
465 		info.error = 0;
466 		info.found = 0;
467 		allproc_scan(ioprio_set_callback, &info);
468 		error = info.error;
469 		found = info.found;
470 		break;
471 
472 	default:
473 		error = EINVAL;
474 		found = 1;
475 		break;
476 	}
477 
478 	lwkt_reltoken(&proc_token);
479 
480 	if (found == 0)
481 		error = ESRCH;
482 	return (error);
483 }
484 
485 static
486 int
487 ioprio_set_callback(struct proc *p, void *data)
488 {
489 	struct ioprio_set_info *info = data;
490 	int error;
491 
492 	if (p->p_ucred->cr_uid == info->who &&
493 	    PRISON_CHECK(curproc->p_ucred, p->p_ucred)) {
494 		error = doionice(p, info->prio);
495 		if (error)
496 			info->error = error;
497 		++info->found;
498 	}
499 	return(0);
500 }
501 
502 int
503 doionice(struct proc *chgp, int n)
504 {
505 	struct proc *curp = curproc;
506 	struct ucred *cr = curp->p_ucred;
507 
508 	if (cr->cr_uid && cr->cr_ruid &&
509 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
510 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
511 		return (EPERM);
512 	if (n > IOPRIO_MAX)
513 		n = IOPRIO_MAX;
514 	if (n < IOPRIO_MIN)
515 		n = IOPRIO_MIN;
516 	if (n < chgp->p_ionice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0))
517 		return (EACCES);
518 	chgp->p_ionice = n;
519 
520 	return (0);
521 
522 }
523 
524 /*
525  * MPALMOSTSAFE
526  */
527 int
528 sys_lwp_rtprio(struct lwp_rtprio_args *uap)
529 {
530 	struct proc *p;
531 	struct lwp *lp;
532 	struct rtprio rtp;
533 	struct ucred *cr = curthread->td_ucred;
534 	int error;
535 
536 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
537 	if (error)
538 		return error;
539 	if (uap->pid < 0)
540 		return EINVAL;
541 
542 	lwkt_gettoken(&proc_token);
543 
544 	if (uap->pid == 0) {
545 		p = curproc;
546 		PHOLD(p);
547 	} else {
548 		p = pfind(uap->pid);
549 	}
550 
551 	if (p == NULL) {
552 		error = ESRCH;
553 		goto done;
554 	}
555 
556 	if (uap->tid < -1) {
557 		error = EINVAL;
558 		goto done;
559 	}
560 	if (uap->tid == -1) {
561 		/*
562 		 * sadly, tid can be 0 so we can't use 0 here
563 		 * like sys_rtprio()
564 		 */
565 		lp = curthread->td_lwp;
566 	} else {
567 		lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
568 		if (lp == NULL) {
569 			error = ESRCH;
570 			goto done;
571 		}
572 	}
573 
574 	switch (uap->function) {
575 	case RTP_LOOKUP:
576 		error = copyout(&lp->lwp_rtprio, uap->rtp,
577 				sizeof(struct rtprio));
578 		break;
579 	case RTP_SET:
580 		if (cr->cr_uid && cr->cr_ruid &&
581 		    cr->cr_uid != p->p_ucred->cr_uid &&
582 		    cr->cr_ruid != p->p_ucred->cr_uid) {
583 			error = EPERM;
584 			break;
585 		}
586 		/* disallow setting rtprio in most cases if not superuser */
587 		if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) {
588 			/* can't set someone else's */
589 			if (uap->pid) { /* XXX */
590 				error = EPERM;
591 				break;
592 			}
593 			/* can't set realtime priority */
594 /*
595  * Realtime priority has to be restricted for reasons which should be
596  * obvious. However, for idle priority, there is a potential for
597  * system deadlock if an idleprio process gains a lock on a resource
598  * that other processes need (and the idleprio process can't run
599  * due to a CPU-bound normal process). Fix me! XXX
600  */
601  			if (RTP_PRIO_IS_REALTIME(rtp.type)) {
602 				error = EPERM;
603 				break;
604 			}
605 		}
606 		switch (rtp.type) {
607 #ifdef RTP_PRIO_FIFO
608 		case RTP_PRIO_FIFO:
609 #endif
610 		case RTP_PRIO_REALTIME:
611 		case RTP_PRIO_NORMAL:
612 		case RTP_PRIO_IDLE:
613 			if (rtp.prio > RTP_PRIO_MAX) {
614 				error = EINVAL;
615 			} else {
616 				lp->lwp_rtprio = rtp;
617 				error = 0;
618 			}
619 			break;
620 		default:
621 			error = EINVAL;
622 			break;
623 		}
624 		break;
625 	default:
626 		error = EINVAL;
627 		break;
628 	}
629 
630 done:
631 	if (p)
632 		PRELE(p);
633 	lwkt_reltoken(&proc_token);
634 
635 	return (error);
636 }
637 
638 /*
639  * Set realtime priority
640  *
641  * MPALMOSTSAFE
642  */
643 int
644 sys_rtprio(struct rtprio_args *uap)
645 {
646 	struct proc *p;
647 	struct lwp *lp;
648 	struct ucred *cr = curthread->td_ucred;
649 	struct rtprio rtp;
650 	int error;
651 
652 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
653 	if (error)
654 		return (error);
655 
656 	lwkt_gettoken(&proc_token);
657 
658 	if (uap->pid == 0) {
659 		p = curproc;
660 		PHOLD(p);
661 	} else {
662 		p = pfind(uap->pid);
663 	}
664 
665 	if (p == NULL) {
666 		error = ESRCH;
667 		goto done;
668 	}
669 
670 	/* XXX lwp */
671 	lp = FIRST_LWP_IN_PROC(p);
672 	switch (uap->function) {
673 	case RTP_LOOKUP:
674 		error = copyout(&lp->lwp_rtprio, uap->rtp,
675 				sizeof(struct rtprio));
676 		break;
677 	case RTP_SET:
678 		if (cr->cr_uid && cr->cr_ruid &&
679 		    cr->cr_uid != p->p_ucred->cr_uid &&
680 		    cr->cr_ruid != p->p_ucred->cr_uid) {
681 			error = EPERM;
682 			break;
683 		}
684 		/* disallow setting rtprio in most cases if not superuser */
685 		if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) {
686 			/* can't set someone else's */
687 			if (uap->pid) {
688 				error = EPERM;
689 				break;
690 			}
691 			/* can't set realtime priority */
692 /*
693  * Realtime priority has to be restricted for reasons which should be
694  * obvious. However, for idle priority, there is a potential for
695  * system deadlock if an idleprio process gains a lock on a resource
696  * that other processes need (and the idleprio process can't run
697  * due to a CPU-bound normal process). Fix me! XXX
698  */
699 			if (RTP_PRIO_IS_REALTIME(rtp.type)) {
700 				error = EPERM;
701 				break;
702 			}
703 		}
704 		switch (rtp.type) {
705 #ifdef RTP_PRIO_FIFO
706 		case RTP_PRIO_FIFO:
707 #endif
708 		case RTP_PRIO_REALTIME:
709 		case RTP_PRIO_NORMAL:
710 		case RTP_PRIO_IDLE:
711 			if (rtp.prio > RTP_PRIO_MAX) {
712 				error = EINVAL;
713 				break;
714 			}
715 			lp->lwp_rtprio = rtp;
716 			error = 0;
717 			break;
718 		default:
719 			error = EINVAL;
720 			break;
721 		}
722 		break;
723 	default:
724 		error = EINVAL;
725 		break;
726 	}
727 done:
728 	if (p)
729 		PRELE(p);
730 	lwkt_reltoken(&proc_token);
731 
732 	return (error);
733 }
734 
735 /*
736  * MPSAFE
737  */
738 int
739 sys_setrlimit(struct __setrlimit_args *uap)
740 {
741 	struct rlimit alim;
742 	int error;
743 
744 	error = copyin(uap->rlp, &alim, sizeof(alim));
745 	if (error)
746 		return (error);
747 
748 	error = kern_setrlimit(uap->which, &alim);
749 
750 	return (error);
751 }
752 
753 /*
754  * MPSAFE
755  */
756 int
757 sys_getrlimit(struct __getrlimit_args *uap)
758 {
759 	struct rlimit lim;
760 	int error;
761 
762 	error = kern_getrlimit(uap->which, &lim);
763 
764 	if (error == 0)
765 		error = copyout(&lim, uap->rlp, sizeof(*uap->rlp));
766 	return error;
767 }
768 
769 /*
770  * Transform the running time and tick information in lwp lp's thread into user,
771  * system, and interrupt time usage.
772  *
773  * Since we are limited to statclock tick granularity this is a statisical
774  * calculation which will be correct over the long haul, but should not be
775  * expected to measure fine grained deltas.
776  *
777  * It is possible to catch a lwp in the midst of being created, so
778  * check whether lwp_thread is NULL or not.
779  */
780 void
781 calcru(struct lwp *lp, struct timeval *up, struct timeval *sp)
782 {
783 	struct thread *td;
784 
785 	/*
786 	 * Calculate at the statclock level.  YYY if the thread is owned by
787 	 * another cpu we need to forward the request to the other cpu, or
788 	 * have a token to interlock the information in order to avoid racing
789 	 * thread destruction.
790 	 */
791 	if ((td = lp->lwp_thread) != NULL) {
792 		crit_enter();
793 		up->tv_sec = td->td_uticks / 1000000;
794 		up->tv_usec = td->td_uticks % 1000000;
795 		sp->tv_sec = td->td_sticks / 1000000;
796 		sp->tv_usec = td->td_sticks % 1000000;
797 		crit_exit();
798 	}
799 }
800 
801 /*
802  * Aggregate resource statistics of all lwps of a process.
803  *
804  * proc.p_ru keeps track of all statistics directly related to a proc.  This
805  * consists of RSS usage and nswap information and aggregate numbers for all
806  * former lwps of this proc.
807  *
808  * proc.p_cru is the sum of all stats of reaped children.
809  *
810  * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning
811  * packet, scheduler switch or page fault counts, etc.  This information gets
812  * added to lwp.lwp_proc.p_ru when the lwp exits.
813  */
814 void
815 calcru_proc(struct proc *p, struct rusage *ru)
816 {
817 	struct timeval upt, spt;
818 	long *rip1, *rip2;
819 	struct lwp *lp;
820 
821 	*ru = p->p_ru;
822 
823 	FOREACH_LWP_IN_PROC(lp, p) {
824 		calcru(lp, &upt, &spt);
825 		timevaladd(&ru->ru_utime, &upt);
826 		timevaladd(&ru->ru_stime, &spt);
827 		for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first;
828 		     rip1 <= &ru->ru_last;
829 		     rip1++, rip2++)
830 			*rip1 += *rip2;
831 	}
832 }
833 
834 
835 /*
836  * MPALMOSTSAFE
837  */
838 int
839 sys_getrusage(struct getrusage_args *uap)
840 {
841 	struct rusage ru;
842 	struct rusage *rup;
843 	int error;
844 
845 	lwkt_gettoken(&proc_token);
846 
847 	switch (uap->who) {
848 	case RUSAGE_SELF:
849 		rup = &ru;
850 		calcru_proc(curproc, rup);
851 		error = 0;
852 		break;
853 	case RUSAGE_CHILDREN:
854 		rup = &curproc->p_cru;
855 		error = 0;
856 		break;
857 	default:
858 		error = EINVAL;
859 		break;
860 	}
861 	if (error == 0)
862 		error = copyout(rup, uap->rusage, sizeof(struct rusage));
863 	lwkt_reltoken(&proc_token);
864 	return (error);
865 }
866 
867 void
868 ruadd(struct rusage *ru, struct rusage *ru2)
869 {
870 	long *ip, *ip2;
871 	int i;
872 
873 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
874 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
875 	if (ru->ru_maxrss < ru2->ru_maxrss)
876 		ru->ru_maxrss = ru2->ru_maxrss;
877 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
878 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
879 		*ip++ += *ip2++;
880 }
881 
882 /*
883  * Find the uidinfo structure for a uid.  This structure is used to
884  * track the total resource consumption (process count, socket buffer
885  * size, etc.) for the uid and impose limits.
886  */
887 void
888 uihashinit(void)
889 {
890 	spin_init(&uihash_lock);
891 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
892 }
893 
894 /*
895  * NOTE: Must be called with uihash_lock held
896  *
897  * MPSAFE
898  */
899 static struct uidinfo *
900 uilookup(uid_t uid)
901 {
902 	struct	uihashhead *uipp;
903 	struct	uidinfo *uip;
904 
905 	uipp = UIHASH(uid);
906 	LIST_FOREACH(uip, uipp, ui_hash) {
907 		if (uip->ui_uid == uid)
908 			break;
909 	}
910 	return (uip);
911 }
912 
913 /*
914  * Helper function to creat ea uid that could not be found.
915  * This function will properly deal with races.
916  *
917  * MPSAFE
918  */
919 static struct uidinfo *
920 uicreate(uid_t uid)
921 {
922 	struct	uidinfo *uip, *tmp;
923 
924 	/*
925 	 * Allocate space and check for a race
926 	 */
927 	uip = kmalloc(sizeof(*uip), M_UIDINFO, M_WAITOK|M_ZERO);
928 
929 	/*
930 	 * Initialize structure and enter it into the hash table
931 	 */
932 	spin_init(&uip->ui_lock);
933 	uip->ui_uid = uid;
934 	uip->ui_ref = 1;	/* we're returning a ref */
935 	varsymset_init(&uip->ui_varsymset, NULL);
936 
937 	/*
938 	 * Somebody may have already created the uidinfo for this
939 	 * uid. If so, return that instead.
940 	 */
941 	spin_lock(&uihash_lock);
942 	tmp = uilookup(uid);
943 	if (tmp != NULL) {
944 		uihold(tmp);
945 		spin_unlock(&uihash_lock);
946 
947 		spin_uninit(&uip->ui_lock);
948 		varsymset_clean(&uip->ui_varsymset);
949 		kfree(uip, M_UIDINFO);
950 		uip = tmp;
951 	} else {
952 		LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
953 		spin_unlock(&uihash_lock);
954 	}
955 	return (uip);
956 }
957 
958 /*
959  *
960  *
961  * MPSAFE
962  */
963 struct uidinfo *
964 uifind(uid_t uid)
965 {
966 	struct	uidinfo *uip;
967 
968 	spin_lock(&uihash_lock);
969 	uip = uilookup(uid);
970 	if (uip == NULL) {
971 		spin_unlock(&uihash_lock);
972 		uip = uicreate(uid);
973 	} else {
974 		uihold(uip);
975 		spin_unlock(&uihash_lock);
976 	}
977 	return (uip);
978 }
979 
980 /*
981  * Helper funtion to remove a uidinfo whos reference count is
982  * transitioning from 1->0.  The reference count is 1 on call.
983  *
984  * Zero is returned on success, otherwise non-zero and the
985  * uiphas not been removed.
986  *
987  * MPSAFE
988  */
989 static __inline int
990 uifree(struct uidinfo *uip)
991 {
992 	/*
993 	 * If we are still the only holder after acquiring the uihash_lock
994 	 * we can safely unlink the uip and destroy it.  Otherwise we lost
995 	 * a race and must fail.
996 	 */
997 	spin_lock(&uihash_lock);
998 	if (uip->ui_ref != 1) {
999 		spin_unlock(&uihash_lock);
1000 		return(-1);
1001 	}
1002 	LIST_REMOVE(uip, ui_hash);
1003 	spin_unlock(&uihash_lock);
1004 
1005 	/*
1006 	 * The uip is now orphaned and we can destroy it at our
1007 	 * leisure.
1008 	 */
1009 	if (uip->ui_sbsize != 0)
1010 		kprintf("freeing uidinfo: uid = %d, sbsize = %jd\n",
1011 		    uip->ui_uid, (intmax_t)uip->ui_sbsize);
1012 	if (uip->ui_proccnt != 0)
1013 		kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1014 		    uip->ui_uid, uip->ui_proccnt);
1015 
1016 	varsymset_clean(&uip->ui_varsymset);
1017 	lockuninit(&uip->ui_varsymset.vx_lock);
1018 	spin_uninit(&uip->ui_lock);
1019 	kfree(uip, M_UIDINFO);
1020 	return(0);
1021 }
1022 
1023 /*
1024  * MPSAFE
1025  */
1026 void
1027 uihold(struct uidinfo *uip)
1028 {
1029 	atomic_add_int(&uip->ui_ref, 1);
1030 	KKASSERT(uip->ui_ref >= 0);
1031 }
1032 
1033 /*
1034  * NOTE: It is important for us to not drop the ref count to 0
1035  *	 because this can cause a 2->0/2->0 race with another
1036  *	 concurrent dropper.  Losing the race in that situation
1037  *	 can cause uip to become stale for one of the other
1038  *	 threads.
1039  *
1040  * MPSAFE
1041  */
1042 void
1043 uidrop(struct uidinfo *uip)
1044 {
1045 	int ref;
1046 
1047 	KKASSERT(uip->ui_ref > 0);
1048 
1049 	for (;;) {
1050 		ref = uip->ui_ref;
1051 		cpu_ccfence();
1052 		if (ref == 1) {
1053 			if (uifree(uip) == 0)
1054 				break;
1055 		} else if (atomic_cmpset_int(&uip->ui_ref, ref, ref - 1)) {
1056 			break;
1057 		}
1058 		/* else retry */
1059 	}
1060 }
1061 
1062 void
1063 uireplace(struct uidinfo **puip, struct uidinfo *nuip)
1064 {
1065 	uidrop(*puip);
1066 	*puip = nuip;
1067 }
1068 
1069 /*
1070  * Change the count associated with number of processes
1071  * a given user is using.  When 'max' is 0, don't enforce a limit
1072  */
1073 int
1074 chgproccnt(struct uidinfo *uip, int diff, int max)
1075 {
1076 	int ret;
1077 	spin_lock(&uip->ui_lock);
1078 	/* don't allow them to exceed max, but allow subtraction */
1079 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
1080 		ret = 0;
1081 	} else {
1082 		uip->ui_proccnt += diff;
1083 		if (uip->ui_proccnt < 0)
1084 			kprintf("negative proccnt for uid = %d\n", uip->ui_uid);
1085 		ret = 1;
1086 	}
1087 	spin_unlock(&uip->ui_lock);
1088 	return ret;
1089 }
1090 
1091 /*
1092  * Change the total socket buffer size a user has used.
1093  */
1094 int
1095 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max)
1096 {
1097 	rlim_t new;
1098 
1099 	spin_lock(&uip->ui_lock);
1100 	new = uip->ui_sbsize + to - *hiwat;
1101 	KKASSERT(new >= 0);
1102 
1103 	/*
1104 	 * If we are trying to increase the socket buffer size
1105 	 * Scale down the hi water mark when we exceed the user's
1106 	 * allowed socket buffer space.
1107 	 *
1108 	 * We can't scale down too much or we will blow up atomic packet
1109 	 * operations.
1110 	 */
1111 	if (to > *hiwat && to > MCLBYTES && new > max) {
1112 		to = to * max / new;
1113 		if (to < MCLBYTES)
1114 			to = MCLBYTES;
1115 	}
1116 	uip->ui_sbsize = new;
1117 	*hiwat = to;
1118 	spin_unlock(&uip->ui_lock);
1119 	return (1);
1120 }
1121 
1122