xref: /dragonfly/sys/kern/kern_resource.c (revision 8a0bcd56)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
40  * $DragonFly: src/sys/kern/kern_resource.c,v 1.35 2008/05/27 05:25:34 dillon Exp $
41  */
42 
43 #include "opt_compat.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/file.h>
49 #include <sys/kern_syscall.h>
50 #include <sys/kernel.h>
51 #include <sys/resourcevar.h>
52 #include <sys/malloc.h>
53 #include <sys/proc.h>
54 #include <sys/priv.h>
55 #include <sys/time.h>
56 #include <sys/lockf.h>
57 
58 #include <vm/vm.h>
59 #include <vm/vm_param.h>
60 #include <sys/lock.h>
61 #include <vm/pmap.h>
62 #include <vm/vm_map.h>
63 
64 #include <sys/thread2.h>
65 #include <sys/spinlock2.h>
66 #include <sys/mplock2.h>
67 
68 static int donice (struct proc *chgp, int n);
69 static int doionice (struct proc *chgp, int n);
70 
71 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
72 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
73 static struct spinlock uihash_lock;
74 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
75 static u_long uihash;		/* size of hash table - 1 */
76 
77 static struct uidinfo	*uicreate (uid_t uid);
78 static struct uidinfo	*uilookup (uid_t uid);
79 
80 /*
81  * Resource controls and accounting.
82  */
83 
84 struct getpriority_info {
85 	int low;
86 	int who;
87 };
88 
89 static int getpriority_callback(struct proc *p, void *data);
90 
91 /*
92  * MPALMOSTSAFE
93  */
94 int
95 sys_getpriority(struct getpriority_args *uap)
96 {
97 	struct getpriority_info info;
98 	struct proc *curp = curproc;
99 	struct proc *p;
100 	int low = PRIO_MAX + 1;
101 	int error;
102 
103 	get_mplock();
104 
105 	switch (uap->which) {
106 	case PRIO_PROCESS:
107 		if (uap->who == 0)
108 			p = curp;
109 		else
110 			p = pfind(uap->who);
111 		if (p == 0)
112 			break;
113 		if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
114 			break;
115 		low = p->p_nice;
116 		break;
117 
118 	case PRIO_PGRP:
119 	{
120 		struct pgrp *pg;
121 
122 		if (uap->who == 0)
123 			pg = curp->p_pgrp;
124 		else if ((pg = pgfind(uap->who)) == NULL)
125 			break;
126 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
127 			if ((PRISON_CHECK(curp->p_ucred, p->p_ucred) && p->p_nice < low))
128 				low = p->p_nice;
129 		}
130 		break;
131 	}
132 	case PRIO_USER:
133 		if (uap->who == 0)
134 			uap->who = curp->p_ucred->cr_uid;
135 		info.low = low;
136 		info.who = uap->who;
137 		allproc_scan(getpriority_callback, &info);
138 		low = info.low;
139 		break;
140 
141 	default:
142 		error = EINVAL;
143 		goto done;
144 	}
145 	if (low == PRIO_MAX + 1) {
146 		error = ESRCH;
147 		goto done;
148 	}
149 	uap->sysmsg_result = low;
150 	error = 0;
151 done:
152 	rel_mplock();
153 	return (error);
154 }
155 
156 /*
157  * Figure out the current lowest nice priority for processes owned
158  * by the specified user.
159  */
160 static
161 int
162 getpriority_callback(struct proc *p, void *data)
163 {
164 	struct getpriority_info *info = data;
165 
166 	if (PRISON_CHECK(curproc->p_ucred, p->p_ucred) &&
167 	    p->p_ucred->cr_uid == info->who &&
168 	    p->p_nice < info->low) {
169 		info->low = p->p_nice;
170 	}
171 	return(0);
172 }
173 
174 struct setpriority_info {
175 	int prio;
176 	int who;
177 	int error;
178 	int found;
179 };
180 
181 static int setpriority_callback(struct proc *p, void *data);
182 
183 /*
184  * MPALMOSTSAFE
185  */
186 int
187 sys_setpriority(struct setpriority_args *uap)
188 {
189 	struct setpriority_info info;
190 	struct proc *curp = curproc;
191 	struct proc *p;
192 	int found = 0, error = 0;
193 
194 	get_mplock();
195 
196 	switch (uap->which) {
197 	case PRIO_PROCESS:
198 		if (uap->who == 0)
199 			p = curp;
200 		else
201 			p = pfind(uap->who);
202 		if (p == 0)
203 			break;
204 		if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
205 			break;
206 		error = donice(p, uap->prio);
207 		found++;
208 		break;
209 
210 	case PRIO_PGRP:
211 	{
212 		struct pgrp *pg;
213 
214 		if (uap->who == 0)
215 			pg = curp->p_pgrp;
216 		else if ((pg = pgfind(uap->who)) == NULL)
217 			break;
218 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
219 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
220 				error = donice(p, uap->prio);
221 				found++;
222 			}
223 		}
224 		break;
225 	}
226 	case PRIO_USER:
227 		if (uap->who == 0)
228 			uap->who = curp->p_ucred->cr_uid;
229 		info.prio = uap->prio;
230 		info.who = uap->who;
231 		info.error = 0;
232 		info.found = 0;
233 		allproc_scan(setpriority_callback, &info);
234 		error = info.error;
235 		found = info.found;
236 		break;
237 
238 	default:
239 		error = EINVAL;
240 		found = 1;
241 		break;
242 	}
243 
244 	rel_mplock();
245 	if (found == 0)
246 		error = ESRCH;
247 	return (error);
248 }
249 
250 static
251 int
252 setpriority_callback(struct proc *p, void *data)
253 {
254 	struct setpriority_info *info = data;
255 	int error;
256 
257 	if (p->p_ucred->cr_uid == info->who &&
258 	    PRISON_CHECK(curproc->p_ucred, p->p_ucred)) {
259 		error = donice(p, info->prio);
260 		if (error)
261 			info->error = error;
262 		++info->found;
263 	}
264 	return(0);
265 }
266 
267 static int
268 donice(struct proc *chgp, int n)
269 {
270 	struct proc *curp = curproc;
271 	struct ucred *cr = curp->p_ucred;
272 	struct lwp *lp;
273 
274 	if (cr->cr_uid && cr->cr_ruid &&
275 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
276 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
277 		return (EPERM);
278 	if (n > PRIO_MAX)
279 		n = PRIO_MAX;
280 	if (n < PRIO_MIN)
281 		n = PRIO_MIN;
282 	if (n < chgp->p_nice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0))
283 		return (EACCES);
284 	chgp->p_nice = n;
285 	FOREACH_LWP_IN_PROC(lp, chgp)
286 		chgp->p_usched->resetpriority(lp);
287 	return (0);
288 }
289 
290 
291 struct ioprio_get_info {
292 	int high;
293 	int who;
294 };
295 
296 static int ioprio_get_callback(struct proc *p, void *data);
297 
298 /*
299  * MPALMOSTSAFE
300  */
301 int
302 sys_ioprio_get(struct ioprio_get_args *uap)
303 {
304 	struct ioprio_get_info info;
305 	struct proc *curp = curproc;
306 	struct proc *p;
307 	int high = IOPRIO_MIN-2;
308 	int error;
309 
310 	get_mplock();
311 
312 	switch (uap->which) {
313 	case PRIO_PROCESS:
314 		if (uap->who == 0)
315 			p = curp;
316 		else
317 			p = pfind(uap->who);
318 		if (p == 0)
319 			break;
320 		if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
321 			break;
322 		high = p->p_ionice;
323 		break;
324 
325 	case PRIO_PGRP:
326 	{
327 		struct pgrp *pg;
328 
329 		if (uap->who == 0)
330 			pg = curp->p_pgrp;
331 		else if ((pg = pgfind(uap->who)) == NULL)
332 			break;
333 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
334 			if ((PRISON_CHECK(curp->p_ucred, p->p_ucred) && p->p_nice > high))
335 				high = p->p_ionice;
336 		}
337 		break;
338 	}
339 	case PRIO_USER:
340 		if (uap->who == 0)
341 			uap->who = curp->p_ucred->cr_uid;
342 		info.high = high;
343 		info.who = uap->who;
344 		allproc_scan(ioprio_get_callback, &info);
345 		high = info.high;
346 		break;
347 
348 	default:
349 		error = EINVAL;
350 		goto done;
351 	}
352 	if (high == IOPRIO_MIN-2) {
353 		error = ESRCH;
354 		goto done;
355 	}
356 	uap->sysmsg_result = high;
357 	error = 0;
358 done:
359 	rel_mplock();
360 	return (error);
361 }
362 
363 /*
364  * Figure out the current lowest nice priority for processes owned
365  * by the specified user.
366  */
367 static
368 int
369 ioprio_get_callback(struct proc *p, void *data)
370 {
371 	struct ioprio_get_info *info = data;
372 
373 	if (PRISON_CHECK(curproc->p_ucred, p->p_ucred) &&
374 	    p->p_ucred->cr_uid == info->who &&
375 	    p->p_ionice > info->high) {
376 		info->high = p->p_ionice;
377 	}
378 	return(0);
379 }
380 
381 
382 struct ioprio_set_info {
383 	int prio;
384 	int who;
385 	int error;
386 	int found;
387 };
388 
389 static int ioprio_set_callback(struct proc *p, void *data);
390 
391 /*
392  * MPALMOSTSAFE
393  */
394 int
395 sys_ioprio_set(struct ioprio_set_args *uap)
396 {
397 	struct ioprio_set_info info;
398 	struct proc *curp = curproc;
399 	struct proc *p;
400 	int found = 0, error = 0;
401 
402 	get_mplock();
403 
404 	switch (uap->which) {
405 	case PRIO_PROCESS:
406 		if (uap->who == 0)
407 			p = curp;
408 		else
409 			p = pfind(uap->who);
410 		if (p == 0)
411 			break;
412 		if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
413 			break;
414 		error = doionice(p, uap->prio);
415 		found++;
416 		break;
417 
418 	case PRIO_PGRP:
419 	{
420 		struct pgrp *pg;
421 
422 		if (uap->who == 0)
423 			pg = curp->p_pgrp;
424 		else if ((pg = pgfind(uap->who)) == NULL)
425 			break;
426 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
427 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
428 				error = doionice(p, uap->prio);
429 				found++;
430 			}
431 		}
432 		break;
433 	}
434 	case PRIO_USER:
435 		if (uap->who == 0)
436 			uap->who = curp->p_ucred->cr_uid;
437 		info.prio = uap->prio;
438 		info.who = uap->who;
439 		info.error = 0;
440 		info.found = 0;
441 		allproc_scan(ioprio_set_callback, &info);
442 		error = info.error;
443 		found = info.found;
444 		break;
445 
446 	default:
447 		error = EINVAL;
448 		found = 1;
449 		break;
450 	}
451 
452 	rel_mplock();
453 	if (found == 0)
454 		error = ESRCH;
455 	return (error);
456 }
457 
458 static
459 int
460 ioprio_set_callback(struct proc *p, void *data)
461 {
462 	struct ioprio_set_info *info = data;
463 	int error;
464 
465 	if (p->p_ucred->cr_uid == info->who &&
466 	    PRISON_CHECK(curproc->p_ucred, p->p_ucred)) {
467 		error = doionice(p, info->prio);
468 		if (error)
469 			info->error = error;
470 		++info->found;
471 	}
472 	return(0);
473 }
474 
475 int
476 doionice(struct proc *chgp, int n)
477 {
478 	struct proc *curp = curproc;
479 	struct ucred *cr = curp->p_ucred;
480 
481 	if (cr->cr_uid && cr->cr_ruid &&
482 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
483 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
484 		return (EPERM);
485 	if (n > IOPRIO_MAX)
486 		n = IOPRIO_MAX;
487 	if (n < IOPRIO_MIN)
488 		n = IOPRIO_MIN;
489 	if (n < chgp->p_ionice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0))
490 		return (EACCES);
491 	chgp->p_ionice = n;
492 
493 	return (0);
494 
495 }
496 
497 /*
498  * MPALMOSTSAFE
499  */
500 int
501 sys_lwp_rtprio(struct lwp_rtprio_args *uap)
502 {
503 	struct proc *p = curproc;
504 	struct lwp *lp;
505 	struct rtprio rtp;
506 	struct ucred *cr = curthread->td_ucred;
507 	int error;
508 
509 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
510 	if (error)
511 		return error;
512 	if (uap->pid < 0)
513 		return EINVAL;
514 
515 	get_mplock();
516 	if (uap->pid == 0) {
517 		/* curproc already loaded on p */
518 	} else {
519 		p = pfind(uap->pid);
520 	}
521 
522 	if (p == NULL) {
523 		error = ESRCH;
524 		goto done;
525 	}
526 
527 	if (uap->tid < -1) {
528 		error = EINVAL;
529 		goto done;
530 	}
531 	if (uap->tid == -1) {
532 		/*
533 		 * sadly, tid can be 0 so we can't use 0 here
534 		 * like sys_rtprio()
535 		 */
536 		lp = curthread->td_lwp;
537 	} else {
538 		lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
539 		if (lp == NULL) {
540 			error = ESRCH;
541 			goto done;
542 		}
543 	}
544 
545 	switch (uap->function) {
546 	case RTP_LOOKUP:
547 		error = copyout(&lp->lwp_rtprio, uap->rtp,
548 				sizeof(struct rtprio));
549 		break;
550 	case RTP_SET:
551 		if (cr->cr_uid && cr->cr_ruid &&
552 		    cr->cr_uid != p->p_ucred->cr_uid &&
553 		    cr->cr_ruid != p->p_ucred->cr_uid) {
554 			error = EPERM;
555 			break;
556 		}
557 		/* disallow setting rtprio in most cases if not superuser */
558 		if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) {
559 			/* can't set someone else's */
560 			if (uap->pid) { /* XXX */
561 				error = EPERM;
562 				break;
563 			}
564 			/* can't set realtime priority */
565 /*
566  * Realtime priority has to be restricted for reasons which should be
567  * obvious. However, for idle priority, there is a potential for
568  * system deadlock if an idleprio process gains a lock on a resource
569  * that other processes need (and the idleprio process can't run
570  * due to a CPU-bound normal process). Fix me! XXX
571  */
572  			if (RTP_PRIO_IS_REALTIME(rtp.type)) {
573 				error = EPERM;
574 				break;
575 			}
576 		}
577 		switch (rtp.type) {
578 #ifdef RTP_PRIO_FIFO
579 		case RTP_PRIO_FIFO:
580 #endif
581 		case RTP_PRIO_REALTIME:
582 		case RTP_PRIO_NORMAL:
583 		case RTP_PRIO_IDLE:
584 			if (rtp.prio > RTP_PRIO_MAX) {
585 				error = EINVAL;
586 			} else {
587 				lp->lwp_rtprio = rtp;
588 				error = 0;
589 			}
590 			break;
591 		default:
592 			error = EINVAL;
593 			break;
594 		}
595 		break;
596 	default:
597 		error = EINVAL;
598 		break;
599 	}
600 
601 done:
602 	rel_mplock();
603 	return (error);
604 }
605 
606 /*
607  * Set realtime priority
608  *
609  * MPALMOSTSAFE
610  */
611 int
612 sys_rtprio(struct rtprio_args *uap)
613 {
614 	struct proc *curp = curproc;
615 	struct proc *p;
616 	struct lwp *lp;
617 	struct ucred *cr = curthread->td_ucred;
618 	struct rtprio rtp;
619 	int error;
620 
621 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
622 	if (error)
623 		return (error);
624 
625 	get_mplock();
626 	if (uap->pid == 0)
627 		p = curp;
628 	else
629 		p = pfind(uap->pid);
630 
631 	if (p == NULL) {
632 		error = ESRCH;
633 		goto done;
634 	}
635 
636 	/* XXX lwp */
637 	lp = FIRST_LWP_IN_PROC(p);
638 	switch (uap->function) {
639 	case RTP_LOOKUP:
640 		error = copyout(&lp->lwp_rtprio, uap->rtp,
641 				sizeof(struct rtprio));
642 		break;
643 	case RTP_SET:
644 		if (cr->cr_uid && cr->cr_ruid &&
645 		    cr->cr_uid != p->p_ucred->cr_uid &&
646 		    cr->cr_ruid != p->p_ucred->cr_uid) {
647 			error = EPERM;
648 			break;
649 		}
650 		/* disallow setting rtprio in most cases if not superuser */
651 		if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) {
652 			/* can't set someone else's */
653 			if (uap->pid) {
654 				error = EPERM;
655 				break;
656 			}
657 			/* can't set realtime priority */
658 /*
659  * Realtime priority has to be restricted for reasons which should be
660  * obvious. However, for idle priority, there is a potential for
661  * system deadlock if an idleprio process gains a lock on a resource
662  * that other processes need (and the idleprio process can't run
663  * due to a CPU-bound normal process). Fix me! XXX
664  */
665 			if (RTP_PRIO_IS_REALTIME(rtp.type)) {
666 				error = EPERM;
667 				break;
668 			}
669 		}
670 		switch (rtp.type) {
671 #ifdef RTP_PRIO_FIFO
672 		case RTP_PRIO_FIFO:
673 #endif
674 		case RTP_PRIO_REALTIME:
675 		case RTP_PRIO_NORMAL:
676 		case RTP_PRIO_IDLE:
677 			if (rtp.prio > RTP_PRIO_MAX) {
678 				error = EINVAL;
679 				break;
680 			}
681 			lp->lwp_rtprio = rtp;
682 			error = 0;
683 			break;
684 		default:
685 			error = EINVAL;
686 			break;
687 		}
688 		break;
689 	default:
690 		error = EINVAL;
691 		break;
692 	}
693 done:
694 	rel_mplock();
695 	return (error);
696 }
697 
698 /*
699  * MPSAFE
700  */
701 int
702 sys_setrlimit(struct __setrlimit_args *uap)
703 {
704 	struct rlimit alim;
705 	int error;
706 
707 	error = copyin(uap->rlp, &alim, sizeof(alim));
708 	if (error)
709 		return (error);
710 
711 	error = kern_setrlimit(uap->which, &alim);
712 
713 	return (error);
714 }
715 
716 /*
717  * MPSAFE
718  */
719 int
720 sys_getrlimit(struct __getrlimit_args *uap)
721 {
722 	struct rlimit lim;
723 	int error;
724 
725 	error = kern_getrlimit(uap->which, &lim);
726 
727 	if (error == 0)
728 		error = copyout(&lim, uap->rlp, sizeof(*uap->rlp));
729 	return error;
730 }
731 
732 /*
733  * Transform the running time and tick information in lwp lp's thread into user,
734  * system, and interrupt time usage.
735  *
736  * Since we are limited to statclock tick granularity this is a statisical
737  * calculation which will be correct over the long haul, but should not be
738  * expected to measure fine grained deltas.
739  *
740  * It is possible to catch a lwp in the midst of being created, so
741  * check whether lwp_thread is NULL or not.
742  */
743 void
744 calcru(struct lwp *lp, struct timeval *up, struct timeval *sp)
745 {
746 	struct thread *td;
747 
748 	/*
749 	 * Calculate at the statclock level.  YYY if the thread is owned by
750 	 * another cpu we need to forward the request to the other cpu, or
751 	 * have a token to interlock the information in order to avoid racing
752 	 * thread destruction.
753 	 */
754 	if ((td = lp->lwp_thread) != NULL) {
755 		crit_enter();
756 		up->tv_sec = td->td_uticks / 1000000;
757 		up->tv_usec = td->td_uticks % 1000000;
758 		sp->tv_sec = td->td_sticks / 1000000;
759 		sp->tv_usec = td->td_sticks % 1000000;
760 		crit_exit();
761 	}
762 }
763 
764 /*
765  * Aggregate resource statistics of all lwps of a process.
766  *
767  * proc.p_ru keeps track of all statistics directly related to a proc.  This
768  * consists of RSS usage and nswap information and aggregate numbers for all
769  * former lwps of this proc.
770  *
771  * proc.p_cru is the sum of all stats of reaped children.
772  *
773  * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning
774  * packet, scheduler switch or page fault counts, etc.  This information gets
775  * added to lwp.lwp_proc.p_ru when the lwp exits.
776  */
777 void
778 calcru_proc(struct proc *p, struct rusage *ru)
779 {
780 	struct timeval upt, spt;
781 	long *rip1, *rip2;
782 	struct lwp *lp;
783 
784 	*ru = p->p_ru;
785 
786 	FOREACH_LWP_IN_PROC(lp, p) {
787 		calcru(lp, &upt, &spt);
788 		timevaladd(&ru->ru_utime, &upt);
789 		timevaladd(&ru->ru_stime, &spt);
790 		for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first;
791 		     rip1 <= &ru->ru_last;
792 		     rip1++, rip2++)
793 			*rip1 += *rip2;
794 	}
795 }
796 
797 
798 /*
799  * MPALMOSTSAFE
800  */
801 int
802 sys_getrusage(struct getrusage_args *uap)
803 {
804 	struct rusage ru;
805 	struct rusage *rup;
806 	int error;
807 
808 	get_mplock();
809 
810 	switch (uap->who) {
811 	case RUSAGE_SELF:
812 		rup = &ru;
813 		calcru_proc(curproc, rup);
814 		error = 0;
815 		break;
816 	case RUSAGE_CHILDREN:
817 		rup = &curproc->p_cru;
818 		error = 0;
819 		break;
820 	default:
821 		error = EINVAL;
822 		break;
823 	}
824 	if (error == 0)
825 		error = copyout(rup, uap->rusage, sizeof(struct rusage));
826 	rel_mplock();
827 	return (error);
828 }
829 
830 void
831 ruadd(struct rusage *ru, struct rusage *ru2)
832 {
833 	long *ip, *ip2;
834 	int i;
835 
836 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
837 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
838 	if (ru->ru_maxrss < ru2->ru_maxrss)
839 		ru->ru_maxrss = ru2->ru_maxrss;
840 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
841 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
842 		*ip++ += *ip2++;
843 }
844 
845 /*
846  * Find the uidinfo structure for a uid.  This structure is used to
847  * track the total resource consumption (process count, socket buffer
848  * size, etc.) for the uid and impose limits.
849  */
850 void
851 uihashinit(void)
852 {
853 	spin_init(&uihash_lock);
854 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
855 }
856 
857 /*
858  * NOTE: Must be called with uihash_lock held
859  *
860  * MPSAFE
861  */
862 static struct uidinfo *
863 uilookup(uid_t uid)
864 {
865 	struct	uihashhead *uipp;
866 	struct	uidinfo *uip;
867 
868 	uipp = UIHASH(uid);
869 	LIST_FOREACH(uip, uipp, ui_hash) {
870 		if (uip->ui_uid == uid)
871 			break;
872 	}
873 	return (uip);
874 }
875 
876 /*
877  * Helper function to creat ea uid that could not be found.
878  * This function will properly deal with races.
879  *
880  * MPSAFE
881  */
882 static struct uidinfo *
883 uicreate(uid_t uid)
884 {
885 	struct	uidinfo *uip, *tmp;
886 
887 	/*
888 	 * Allocate space and check for a race
889 	 */
890 	uip = kmalloc(sizeof(*uip), M_UIDINFO, M_WAITOK|M_ZERO);
891 
892 	/*
893 	 * Initialize structure and enter it into the hash table
894 	 */
895 	spin_init(&uip->ui_lock);
896 	uip->ui_uid = uid;
897 	uip->ui_ref = 1;	/* we're returning a ref */
898 	varsymset_init(&uip->ui_varsymset, NULL);
899 
900 	/*
901 	 * Somebody may have already created the uidinfo for this
902 	 * uid. If so, return that instead.
903 	 */
904 	spin_lock(&uihash_lock);
905 	tmp = uilookup(uid);
906 	if (tmp != NULL) {
907 		uihold(tmp);
908 		spin_unlock(&uihash_lock);
909 
910 		spin_uninit(&uip->ui_lock);
911 		varsymset_clean(&uip->ui_varsymset);
912 		FREE(uip, M_UIDINFO);
913 		uip = tmp;
914 	} else {
915 		LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
916 		spin_unlock(&uihash_lock);
917 	}
918 	return (uip);
919 }
920 
921 /*
922  *
923  *
924  * MPSAFE
925  */
926 struct uidinfo *
927 uifind(uid_t uid)
928 {
929 	struct	uidinfo *uip;
930 
931 	spin_lock(&uihash_lock);
932 	uip = uilookup(uid);
933 	if (uip == NULL) {
934 		spin_unlock(&uihash_lock);
935 		uip = uicreate(uid);
936 	} else {
937 		uihold(uip);
938 		spin_unlock(&uihash_lock);
939 	}
940 	return (uip);
941 }
942 
943 /*
944  * Helper funtion to remove a uidinfo whos reference count is
945  * transitioning from 1->0.  The reference count is 1 on call.
946  *
947  * Zero is returned on success, otherwise non-zero and the
948  * uiphas not been removed.
949  *
950  * MPSAFE
951  */
952 static __inline int
953 uifree(struct uidinfo *uip)
954 {
955 	/*
956 	 * If we are still the only holder after acquiring the uihash_lock
957 	 * we can safely unlink the uip and destroy it.  Otherwise we lost
958 	 * a race and must fail.
959 	 */
960 	spin_lock(&uihash_lock);
961 	if (uip->ui_ref != 1) {
962 		spin_unlock(&uihash_lock);
963 		return(-1);
964 	}
965 	LIST_REMOVE(uip, ui_hash);
966 	spin_unlock(&uihash_lock);
967 
968 	/*
969 	 * The uip is now orphaned and we can destroy it at our
970 	 * leisure.
971 	 */
972 	if (uip->ui_sbsize != 0)
973 		kprintf("freeing uidinfo: uid = %d, sbsize = %jd\n",
974 		    uip->ui_uid, (intmax_t)uip->ui_sbsize);
975 	if (uip->ui_proccnt != 0)
976 		kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n",
977 		    uip->ui_uid, uip->ui_proccnt);
978 
979 	varsymset_clean(&uip->ui_varsymset);
980 	lockuninit(&uip->ui_varsymset.vx_lock);
981 	spin_uninit(&uip->ui_lock);
982 	FREE(uip, M_UIDINFO);
983 	return(0);
984 }
985 
986 /*
987  * MPSAFE
988  */
989 void
990 uihold(struct uidinfo *uip)
991 {
992 	atomic_add_int(&uip->ui_ref, 1);
993 	KKASSERT(uip->ui_ref >= 0);
994 }
995 
996 /*
997  * NOTE: It is important for us to not drop the ref count to 0
998  *	 because this can cause a 2->0/2->0 race with another
999  *	 concurrent dropper.  Losing the race in that situation
1000  *	 can cause uip to become stale for one of the other
1001  *	 threads.
1002  *
1003  * MPSAFE
1004  */
1005 void
1006 uidrop(struct uidinfo *uip)
1007 {
1008 	int ref;
1009 
1010 	KKASSERT(uip->ui_ref > 0);
1011 
1012 	for (;;) {
1013 		ref = uip->ui_ref;
1014 		cpu_ccfence();
1015 		if (ref == 1) {
1016 			if (uifree(uip) == 0)
1017 				break;
1018 		} else if (atomic_cmpset_int(&uip->ui_ref, ref, ref - 1)) {
1019 			break;
1020 		}
1021 		/* else retry */
1022 	}
1023 }
1024 
1025 void
1026 uireplace(struct uidinfo **puip, struct uidinfo *nuip)
1027 {
1028 	uidrop(*puip);
1029 	*puip = nuip;
1030 }
1031 
1032 /*
1033  * Change the count associated with number of processes
1034  * a given user is using.  When 'max' is 0, don't enforce a limit
1035  */
1036 int
1037 chgproccnt(struct uidinfo *uip, int diff, int max)
1038 {
1039 	int ret;
1040 	spin_lock(&uip->ui_lock);
1041 	/* don't allow them to exceed max, but allow subtraction */
1042 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
1043 		ret = 0;
1044 	} else {
1045 		uip->ui_proccnt += diff;
1046 		if (uip->ui_proccnt < 0)
1047 			kprintf("negative proccnt for uid = %d\n", uip->ui_uid);
1048 		ret = 1;
1049 	}
1050 	spin_unlock(&uip->ui_lock);
1051 	return ret;
1052 }
1053 
1054 /*
1055  * Change the total socket buffer size a user has used.
1056  */
1057 int
1058 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max)
1059 {
1060 	rlim_t new;
1061 
1062 	spin_lock(&uip->ui_lock);
1063 	new = uip->ui_sbsize + to - *hiwat;
1064 	KKASSERT(new >= 0);
1065 
1066 	/*
1067 	 * If we are trying to increase the socket buffer size
1068 	 * Scale down the hi water mark when we exceed the user's
1069 	 * allowed socket buffer space.
1070 	 *
1071 	 * We can't scale down too much or we will blow up atomic packet
1072 	 * operations.
1073 	 */
1074 	if (to > *hiwat && to > MCLBYTES && new > max) {
1075 		to = to * max / new;
1076 		if (to < MCLBYTES)
1077 			to = MCLBYTES;
1078 	}
1079 	uip->ui_sbsize = new;
1080 	*hiwat = to;
1081 	spin_unlock(&uip->ui_lock);
1082 	return (1);
1083 }
1084 
1085