1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2010 The FreeBSD Foundation
5 *
6 * This software was developed by Edward Tomasz Napierala under sponsorship
7 * from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #ifdef RCTL
32
33 #include <sys/param.h>
34 #include <sys/devctl.h>
35 #include <sys/malloc.h>
36 #include <sys/queue.h>
37 #include <sys/refcount.h>
38 #include <sys/jail.h>
39 #include <sys/kernel.h>
40 #include <sys/limits.h>
41 #include <sys/loginclass.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/racct.h>
46 #include <sys/rctl.h>
47 #include <sys/resourcevar.h>
48 #include <sys/sx.h>
49 #include <sys/sysproto.h>
50 #include <sys/systm.h>
51 #include <sys/types.h>
52 #include <sys/eventhandler.h>
53 #include <sys/lock.h>
54 #include <sys/mutex.h>
55 #include <sys/rwlock.h>
56 #include <sys/sbuf.h>
57 #include <sys/taskqueue.h>
58 #include <sys/tree.h>
59 #include <vm/uma.h>
60
61 #ifndef RACCT
62 #error "The RCTL option requires the RACCT option"
63 #endif
64
65 FEATURE(rctl, "Resource Limits");
66
67 #define HRF_DEFAULT 0
68 #define HRF_DONT_INHERIT 1
69 #define HRF_DONT_ACCUMULATE 2
70
71 #define RCTL_MAX_INBUFSIZE 4 * 1024
72 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024
73 #define RCTL_LOG_BUFSIZE 128
74
75 #define RCTL_PCPU_SHIFT (10 * 1000000)
76
77 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
78 static int rctl_log_rate_limit = 10;
79 static int rctl_devctl_rate_limit = 10;
80
81 /*
82 * Values below are initialized in rctl_init().
83 */
84 static int rctl_throttle_min = -1;
85 static int rctl_throttle_max = -1;
86 static int rctl_throttle_pct = -1;
87 static int rctl_throttle_pct2 = -1;
88
89 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
90 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
91 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
92 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
93
94 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
95 "Resource Limits");
96 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
97 &rctl_maxbufsize, 0, "Maximum output buffer size");
98 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
99 &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
101 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
102 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
103 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
104 &rctl_throttle_min_sysctl, "IU",
105 "Shortest throttling duration, in hz");
106 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
107 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
108 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
109 &rctl_throttle_max_sysctl, "IU",
110 "Longest throttling duration, in hz");
111 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
112 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
113 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
114 &rctl_throttle_pct_sysctl, "IU",
115 "Throttling penalty for process consumption, in percent");
116 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
117 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
118 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
119 &rctl_throttle_pct2_sysctl, "IU",
120 "Throttling penalty for container consumption, in percent");
121 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
122
123 /*
124 * 'rctl_rule_link' connects a rule with every racct it's related to.
125 * For example, rule 'user:X:openfiles:deny=N/process' is linked
126 * with uidinfo for user X, and to each process of that user.
127 */
128 struct rctl_rule_link {
129 LIST_ENTRY(rctl_rule_link) rrl_next;
130 struct rctl_rule *rrl_rule;
131 int rrl_exceeded;
132 };
133
134 struct dict {
135 const char *d_name;
136 int d_value;
137 };
138
139 static struct dict subjectnames[] = {
140 { "process", RCTL_SUBJECT_TYPE_PROCESS },
141 { "user", RCTL_SUBJECT_TYPE_USER },
142 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
143 { "jail", RCTL_SUBJECT_TYPE_JAIL },
144 { NULL, -1 }};
145
146 static struct dict resourcenames[] = {
147 { "cputime", RACCT_CPU },
148 { "datasize", RACCT_DATA },
149 { "stacksize", RACCT_STACK },
150 { "coredumpsize", RACCT_CORE },
151 { "memoryuse", RACCT_RSS },
152 { "memorylocked", RACCT_MEMLOCK },
153 { "maxproc", RACCT_NPROC },
154 { "openfiles", RACCT_NOFILE },
155 { "vmemoryuse", RACCT_VMEM },
156 { "pseudoterminals", RACCT_NPTS },
157 { "swapuse", RACCT_SWAP },
158 { "nthr", RACCT_NTHR },
159 { "msgqqueued", RACCT_MSGQQUEUED },
160 { "msgqsize", RACCT_MSGQSIZE },
161 { "nmsgq", RACCT_NMSGQ },
162 { "nsem", RACCT_NSEM },
163 { "nsemop", RACCT_NSEMOP },
164 { "nshm", RACCT_NSHM },
165 { "shmsize", RACCT_SHMSIZE },
166 { "wallclock", RACCT_WALLCLOCK },
167 { "pcpu", RACCT_PCTCPU },
168 { "readbps", RACCT_READBPS },
169 { "writebps", RACCT_WRITEBPS },
170 { "readiops", RACCT_READIOPS },
171 { "writeiops", RACCT_WRITEIOPS },
172 { NULL, -1 }};
173
174 static struct dict actionnames[] = {
175 { "sighup", RCTL_ACTION_SIGHUP },
176 { "sigint", RCTL_ACTION_SIGINT },
177 { "sigquit", RCTL_ACTION_SIGQUIT },
178 { "sigill", RCTL_ACTION_SIGILL },
179 { "sigtrap", RCTL_ACTION_SIGTRAP },
180 { "sigabrt", RCTL_ACTION_SIGABRT },
181 { "sigemt", RCTL_ACTION_SIGEMT },
182 { "sigfpe", RCTL_ACTION_SIGFPE },
183 { "sigkill", RCTL_ACTION_SIGKILL },
184 { "sigbus", RCTL_ACTION_SIGBUS },
185 { "sigsegv", RCTL_ACTION_SIGSEGV },
186 { "sigsys", RCTL_ACTION_SIGSYS },
187 { "sigpipe", RCTL_ACTION_SIGPIPE },
188 { "sigalrm", RCTL_ACTION_SIGALRM },
189 { "sigterm", RCTL_ACTION_SIGTERM },
190 { "sigurg", RCTL_ACTION_SIGURG },
191 { "sigstop", RCTL_ACTION_SIGSTOP },
192 { "sigtstp", RCTL_ACTION_SIGTSTP },
193 { "sigchld", RCTL_ACTION_SIGCHLD },
194 { "sigttin", RCTL_ACTION_SIGTTIN },
195 { "sigttou", RCTL_ACTION_SIGTTOU },
196 { "sigio", RCTL_ACTION_SIGIO },
197 { "sigxcpu", RCTL_ACTION_SIGXCPU },
198 { "sigxfsz", RCTL_ACTION_SIGXFSZ },
199 { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
200 { "sigprof", RCTL_ACTION_SIGPROF },
201 { "sigwinch", RCTL_ACTION_SIGWINCH },
202 { "siginfo", RCTL_ACTION_SIGINFO },
203 { "sigusr1", RCTL_ACTION_SIGUSR1 },
204 { "sigusr2", RCTL_ACTION_SIGUSR2 },
205 { "sigthr", RCTL_ACTION_SIGTHR },
206 { "deny", RCTL_ACTION_DENY },
207 { "log", RCTL_ACTION_LOG },
208 { "devctl", RCTL_ACTION_DEVCTL },
209 { "throttle", RCTL_ACTION_THROTTLE },
210 { NULL, -1 }};
211
212 static void rctl_init(void);
213 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
214
215 static uma_zone_t rctl_rule_zone;
216 static uma_zone_t rctl_rule_link_zone;
217
218 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
219 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
220
221 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
222
rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)223 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
224 {
225 int error, val = rctl_throttle_min;
226
227 error = sysctl_handle_int(oidp, &val, 0, req);
228 if (error || !req->newptr)
229 return (error);
230 if (val < 1 || val > rctl_throttle_max)
231 return (EINVAL);
232
233 RACCT_LOCK();
234 rctl_throttle_min = val;
235 RACCT_UNLOCK();
236
237 return (0);
238 }
239
rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)240 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
241 {
242 int error, val = rctl_throttle_max;
243
244 error = sysctl_handle_int(oidp, &val, 0, req);
245 if (error || !req->newptr)
246 return (error);
247 if (val < rctl_throttle_min)
248 return (EINVAL);
249
250 RACCT_LOCK();
251 rctl_throttle_max = val;
252 RACCT_UNLOCK();
253
254 return (0);
255 }
256
rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)257 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
258 {
259 int error, val = rctl_throttle_pct;
260
261 error = sysctl_handle_int(oidp, &val, 0, req);
262 if (error || !req->newptr)
263 return (error);
264 if (val < 0)
265 return (EINVAL);
266
267 RACCT_LOCK();
268 rctl_throttle_pct = val;
269 RACCT_UNLOCK();
270
271 return (0);
272 }
273
rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)274 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
275 {
276 int error, val = rctl_throttle_pct2;
277
278 error = sysctl_handle_int(oidp, &val, 0, req);
279 if (error || !req->newptr)
280 return (error);
281 if (val < 0)
282 return (EINVAL);
283
284 RACCT_LOCK();
285 rctl_throttle_pct2 = val;
286 RACCT_UNLOCK();
287
288 return (0);
289 }
290
291 static const char *
rctl_subject_type_name(int subject)292 rctl_subject_type_name(int subject)
293 {
294 int i;
295
296 for (i = 0; subjectnames[i].d_name != NULL; i++) {
297 if (subjectnames[i].d_value == subject)
298 return (subjectnames[i].d_name);
299 }
300
301 panic("rctl_subject_type_name: unknown subject type %d", subject);
302 }
303
304 static const char *
rctl_action_name(int action)305 rctl_action_name(int action)
306 {
307 int i;
308
309 for (i = 0; actionnames[i].d_name != NULL; i++) {
310 if (actionnames[i].d_value == action)
311 return (actionnames[i].d_name);
312 }
313
314 panic("rctl_action_name: unknown action %d", action);
315 }
316
317 const char *
rctl_resource_name(int resource)318 rctl_resource_name(int resource)
319 {
320 int i;
321
322 for (i = 0; resourcenames[i].d_name != NULL; i++) {
323 if (resourcenames[i].d_value == resource)
324 return (resourcenames[i].d_name);
325 }
326
327 panic("rctl_resource_name: unknown resource %d", resource);
328 }
329
330 static struct racct *
rctl_proc_rule_to_racct(const struct proc * p,const struct rctl_rule * rule)331 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
332 {
333 struct ucred *cred = p->p_ucred;
334
335 ASSERT_RACCT_ENABLED();
336 RACCT_LOCK_ASSERT();
337
338 switch (rule->rr_per) {
339 case RCTL_SUBJECT_TYPE_PROCESS:
340 return (p->p_racct);
341 case RCTL_SUBJECT_TYPE_USER:
342 return (cred->cr_ruidinfo->ui_racct);
343 case RCTL_SUBJECT_TYPE_LOGINCLASS:
344 return (cred->cr_loginclass->lc_racct);
345 case RCTL_SUBJECT_TYPE_JAIL:
346 return (cred->cr_prison->pr_prison_racct->prr_racct);
347 default:
348 panic("%s: unknown per %d", __func__, rule->rr_per);
349 }
350 }
351
352 /*
353 * Return the amount of resource that can be allocated by 'p' before
354 * hitting 'rule'.
355 */
356 static int64_t
rctl_available_resource(const struct proc * p,const struct rctl_rule * rule)357 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
358 {
359 const struct racct *racct;
360 int64_t available;
361
362 ASSERT_RACCT_ENABLED();
363 RACCT_LOCK_ASSERT();
364
365 racct = rctl_proc_rule_to_racct(p, rule);
366 available = rule->rr_amount - racct->r_resources[rule->rr_resource];
367
368 return (available);
369 }
370
371 /*
372 * Called every second for proc, uidinfo, loginclass, and jail containers.
373 * If the limit isn't exceeded, it decreases the usage amount to zero.
374 * Otherwise, it decreases it by the value of the limit. This way
375 * resource consumption exceeding the limit "carries over" to the next
376 * period.
377 */
378 void
rctl_throttle_decay(struct racct * racct,int resource)379 rctl_throttle_decay(struct racct *racct, int resource)
380 {
381 struct rctl_rule *rule;
382 struct rctl_rule_link *link;
383 int64_t minavailable;
384
385 ASSERT_RACCT_ENABLED();
386 RACCT_LOCK_ASSERT();
387
388 minavailable = INT64_MAX;
389
390 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
391 rule = link->rrl_rule;
392
393 if (rule->rr_resource != resource)
394 continue;
395 if (rule->rr_action != RCTL_ACTION_THROTTLE)
396 continue;
397
398 if (rule->rr_amount < minavailable)
399 minavailable = rule->rr_amount;
400 }
401
402 if (racct->r_resources[resource] < minavailable) {
403 racct->r_resources[resource] = 0;
404 } else {
405 /*
406 * Cap utilization counter at ten times the limit. Otherwise,
407 * if we changed the rule lowering the allowed amount, it could
408 * take unreasonably long time for the accumulated resource
409 * usage to drop.
410 */
411 if (racct->r_resources[resource] > minavailable * 10)
412 racct->r_resources[resource] = minavailable * 10;
413
414 racct->r_resources[resource] -= minavailable;
415 }
416 }
417
418 /*
419 * Special version of rctl_get_available() for the %CPU resource.
420 * We slightly cheat here and return less than we normally would.
421 */
422 int64_t
rctl_pcpu_available(const struct proc * p)423 rctl_pcpu_available(const struct proc *p) {
424 struct rctl_rule *rule;
425 struct rctl_rule_link *link;
426 int64_t available, minavailable, limit;
427
428 ASSERT_RACCT_ENABLED();
429 RACCT_LOCK_ASSERT();
430
431 minavailable = INT64_MAX;
432 limit = 0;
433
434 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
435 rule = link->rrl_rule;
436 if (rule->rr_resource != RACCT_PCTCPU)
437 continue;
438 if (rule->rr_action != RCTL_ACTION_DENY)
439 continue;
440 available = rctl_available_resource(p, rule);
441 if (available < minavailable) {
442 minavailable = available;
443 limit = rule->rr_amount;
444 }
445 }
446
447 /*
448 * Return slightly less than actual value of the available
449 * %cpu resource. This makes %cpu throttling more aggressive
450 * and lets us act sooner than the limits are already exceeded.
451 */
452 if (limit != 0) {
453 if (limit > 2 * RCTL_PCPU_SHIFT)
454 minavailable -= RCTL_PCPU_SHIFT;
455 else
456 minavailable -= (limit / 2);
457 }
458
459 return (minavailable);
460 }
461
462 static uint64_t
xadd(uint64_t a,uint64_t b)463 xadd(uint64_t a, uint64_t b)
464 {
465 uint64_t c;
466
467 c = a + b;
468
469 /*
470 * Detect overflow.
471 */
472 if (c < a || c < b)
473 return (UINT64_MAX);
474
475 return (c);
476 }
477
478 static uint64_t
xmul(uint64_t a,uint64_t b)479 xmul(uint64_t a, uint64_t b)
480 {
481
482 if (b != 0 && a > UINT64_MAX / b)
483 return (UINT64_MAX);
484
485 return (a * b);
486 }
487
488 /*
489 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
490 * to what it keeps allocated now. Returns non-zero if the allocation should
491 * be denied, 0 otherwise.
492 */
493 int
rctl_enforce(struct proc * p,int resource,uint64_t amount)494 rctl_enforce(struct proc *p, int resource, uint64_t amount)
495 {
496 static struct timeval log_lasttime, devctl_lasttime;
497 static int log_curtime = 0, devctl_curtime = 0;
498 struct rctl_rule *rule;
499 struct rctl_rule_link *link;
500 struct sbuf sb;
501 char *buf;
502 int64_t available;
503 uint64_t sleep_ms, sleep_ratio;
504 int should_deny = 0;
505
506 ASSERT_RACCT_ENABLED();
507 RACCT_LOCK_ASSERT();
508
509 /*
510 * There may be more than one matching rule; go through all of them.
511 * Denial should be done last, after logging and sending signals.
512 */
513 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
514 rule = link->rrl_rule;
515 if (rule->rr_resource != resource)
516 continue;
517
518 available = rctl_available_resource(p, rule);
519 if (available >= (int64_t)amount) {
520 link->rrl_exceeded = 0;
521 continue;
522 }
523
524 switch (rule->rr_action) {
525 case RCTL_ACTION_DENY:
526 should_deny = 1;
527 continue;
528 case RCTL_ACTION_LOG:
529 /*
530 * If rrl_exceeded != 0, it means we've already
531 * logged a warning for this process.
532 */
533 if (link->rrl_exceeded != 0)
534 continue;
535
536 /*
537 * If the process state is not fully initialized yet,
538 * we can't access most of the required fields, e.g.
539 * p->p_comm. This happens when called from fork1().
540 * Ignore this rule for now; it will be processed just
541 * after fork, when called from racct_proc_fork_done().
542 */
543 if (p->p_state != PRS_NORMAL)
544 continue;
545
546 if (!ppsratecheck(&log_lasttime, &log_curtime,
547 rctl_log_rate_limit))
548 continue;
549
550 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
551 if (buf == NULL) {
552 printf("rctl_enforce: out of memory\n");
553 continue;
554 }
555 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
556 rctl_rule_to_sbuf(&sb, rule);
557 sbuf_finish(&sb);
558 printf("rctl: rule \"%s\" matched by pid %d "
559 "(%s), uid %d, jail %s\n", sbuf_data(&sb),
560 p->p_pid, p->p_comm, p->p_ucred->cr_uid,
561 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
562 sbuf_delete(&sb);
563 free(buf, M_RCTL);
564 link->rrl_exceeded = 1;
565 continue;
566 case RCTL_ACTION_DEVCTL:
567 if (link->rrl_exceeded != 0)
568 continue;
569
570 if (p->p_state != PRS_NORMAL)
571 continue;
572
573 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
574 rctl_devctl_rate_limit))
575 continue;
576
577 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
578 if (buf == NULL) {
579 printf("rctl_enforce: out of memory\n");
580 continue;
581 }
582 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
583 sbuf_cat(&sb, "rule=");
584 rctl_rule_to_sbuf(&sb, rule);
585 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
586 p->p_pid, p->p_ucred->cr_ruid,
587 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
588 sbuf_finish(&sb);
589 devctl_notify("RCTL", "rule", "matched",
590 sbuf_data(&sb));
591 sbuf_delete(&sb);
592 free(buf, M_RCTL);
593 link->rrl_exceeded = 1;
594 continue;
595 case RCTL_ACTION_THROTTLE:
596 if (p->p_state != PRS_NORMAL)
597 continue;
598
599 if (rule->rr_amount == 0) {
600 racct_proc_throttle(p, rctl_throttle_max);
601 continue;
602 }
603
604 /*
605 * Make the process sleep for a fraction of second
606 * proportional to the ratio of process' resource
607 * utilization compared to the limit. The point is
608 * to penalize resource hogs: processes that consume
609 * more of the available resources sleep for longer.
610 *
611 * We're trying to defer division until the very end,
612 * to minimize the rounding effects. The following
613 * calculation could have been written in a clearer
614 * way like this:
615 *
616 * sleep_ms = hz * p->p_racct->r_resources[resource] /
617 * rule->rr_amount;
618 * sleep_ms *= rctl_throttle_pct / 100;
619 * if (sleep_ms < rctl_throttle_min)
620 * sleep_ms = rctl_throttle_min;
621 *
622 */
623 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
624 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
625 if (sleep_ms < rctl_throttle_min * rule->rr_amount)
626 sleep_ms = rctl_throttle_min * rule->rr_amount;
627
628 /*
629 * Multiply that by the ratio of the resource
630 * consumption for the container compared to the limit,
631 * squared. In other words, a process in a container
632 * that is two times over the limit will be throttled
633 * four times as much for hitting the same rule. The
634 * point is to penalize processes more if the container
635 * itself (eg certain UID or jail) is above the limit.
636 */
637 if (available < 0)
638 sleep_ratio = -available / rule->rr_amount;
639 else
640 sleep_ratio = 0;
641 sleep_ratio = xmul(sleep_ratio, sleep_ratio);
642 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
643 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
644
645 /*
646 * Finally the division.
647 */
648 sleep_ms /= rule->rr_amount;
649
650 if (sleep_ms > rctl_throttle_max)
651 sleep_ms = rctl_throttle_max;
652 #if 0
653 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
654 __func__, p->p_pid, p->p_comm,
655 p->p_racct->r_resources[resource],
656 rule->rr_amount, (uintmax_t)sleep_ms,
657 (uintmax_t)sleep_ratio, (intmax_t)available);
658 #endif
659
660 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
661 __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
662 racct_proc_throttle(p, sleep_ms);
663 continue;
664 default:
665 if (link->rrl_exceeded != 0)
666 continue;
667
668 if (p->p_state != PRS_NORMAL)
669 continue;
670
671 KASSERT(rule->rr_action > 0 &&
672 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
673 ("rctl_enforce: unknown action %d",
674 rule->rr_action));
675
676 /*
677 * We're using the fact that RCTL_ACTION_SIG* values
678 * are equal to their counterparts from sys/signal.h.
679 */
680 kern_psignal(p, rule->rr_action);
681 link->rrl_exceeded = 1;
682 continue;
683 }
684 }
685
686 if (should_deny) {
687 /*
688 * Return fake error code; the caller should change it
689 * into one proper for the situation - EFSIZ, ENOMEM etc.
690 */
691 return (EDOOFUS);
692 }
693
694 return (0);
695 }
696
697 uint64_t
rctl_get_limit(struct proc * p,int resource)698 rctl_get_limit(struct proc *p, int resource)
699 {
700 struct rctl_rule *rule;
701 struct rctl_rule_link *link;
702 uint64_t amount = UINT64_MAX;
703
704 ASSERT_RACCT_ENABLED();
705 RACCT_LOCK_ASSERT();
706
707 /*
708 * There may be more than one matching rule; go through all of them.
709 * Denial should be done last, after logging and sending signals.
710 */
711 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
712 rule = link->rrl_rule;
713 if (rule->rr_resource != resource)
714 continue;
715 if (rule->rr_action != RCTL_ACTION_DENY)
716 continue;
717 if (rule->rr_amount < amount)
718 amount = rule->rr_amount;
719 }
720
721 return (amount);
722 }
723
724 uint64_t
rctl_get_available(struct proc * p,int resource)725 rctl_get_available(struct proc *p, int resource)
726 {
727 struct rctl_rule *rule;
728 struct rctl_rule_link *link;
729 int64_t available, minavailable, allocated;
730
731 minavailable = INT64_MAX;
732
733 ASSERT_RACCT_ENABLED();
734 RACCT_LOCK_ASSERT();
735
736 /*
737 * There may be more than one matching rule; go through all of them.
738 * Denial should be done last, after logging and sending signals.
739 */
740 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
741 rule = link->rrl_rule;
742 if (rule->rr_resource != resource)
743 continue;
744 if (rule->rr_action != RCTL_ACTION_DENY)
745 continue;
746 available = rctl_available_resource(p, rule);
747 if (available < minavailable)
748 minavailable = available;
749 }
750
751 /*
752 * XXX: Think about this _hard_.
753 */
754 allocated = p->p_racct->r_resources[resource];
755 if (minavailable < INT64_MAX - allocated)
756 minavailable += allocated;
757 if (minavailable < 0)
758 minavailable = 0;
759
760 return (minavailable);
761 }
762
763 static int
rctl_rule_matches(const struct rctl_rule * rule,const struct rctl_rule * filter)764 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
765 {
766
767 ASSERT_RACCT_ENABLED();
768
769 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
770 if (rule->rr_subject_type != filter->rr_subject_type)
771 return (0);
772
773 switch (filter->rr_subject_type) {
774 case RCTL_SUBJECT_TYPE_PROCESS:
775 if (filter->rr_subject.rs_proc != NULL &&
776 rule->rr_subject.rs_proc !=
777 filter->rr_subject.rs_proc)
778 return (0);
779 break;
780 case RCTL_SUBJECT_TYPE_USER:
781 if (filter->rr_subject.rs_uip != NULL &&
782 rule->rr_subject.rs_uip !=
783 filter->rr_subject.rs_uip)
784 return (0);
785 break;
786 case RCTL_SUBJECT_TYPE_LOGINCLASS:
787 if (filter->rr_subject.rs_loginclass != NULL &&
788 rule->rr_subject.rs_loginclass !=
789 filter->rr_subject.rs_loginclass)
790 return (0);
791 break;
792 case RCTL_SUBJECT_TYPE_JAIL:
793 if (filter->rr_subject.rs_prison_racct != NULL &&
794 rule->rr_subject.rs_prison_racct !=
795 filter->rr_subject.rs_prison_racct)
796 return (0);
797 break;
798 default:
799 panic("rctl_rule_matches: unknown subject type %d",
800 filter->rr_subject_type);
801 }
802 }
803
804 if (filter->rr_resource != RACCT_UNDEFINED) {
805 if (rule->rr_resource != filter->rr_resource)
806 return (0);
807 }
808
809 if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
810 if (rule->rr_action != filter->rr_action)
811 return (0);
812 }
813
814 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
815 if (rule->rr_amount != filter->rr_amount)
816 return (0);
817 }
818
819 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
820 if (rule->rr_per != filter->rr_per)
821 return (0);
822 }
823
824 return (1);
825 }
826
827 static int
str2value(const char * str,int * value,struct dict * table)828 str2value(const char *str, int *value, struct dict *table)
829 {
830 int i;
831
832 if (value == NULL)
833 return (EINVAL);
834
835 for (i = 0; table[i].d_name != NULL; i++) {
836 if (strcasecmp(table[i].d_name, str) == 0) {
837 *value = table[i].d_value;
838 return (0);
839 }
840 }
841
842 return (EINVAL);
843 }
844
845 static int
str2id(const char * str,id_t * value)846 str2id(const char *str, id_t *value)
847 {
848 char *end;
849
850 if (str == NULL)
851 return (EINVAL);
852
853 *value = strtoul(str, &end, 10);
854 if ((size_t)(end - str) != strlen(str))
855 return (EINVAL);
856
857 return (0);
858 }
859
860 static int
str2int64(const char * str,int64_t * value)861 str2int64(const char *str, int64_t *value)
862 {
863 char *end;
864
865 if (str == NULL)
866 return (EINVAL);
867
868 *value = strtoul(str, &end, 10);
869 if ((size_t)(end - str) != strlen(str))
870 return (EINVAL);
871
872 if (*value < 0)
873 return (ERANGE);
874
875 return (0);
876 }
877
878 /*
879 * Connect the rule to the racct, increasing refcount for the rule.
880 */
881 static void
rctl_racct_add_rule(struct racct * racct,struct rctl_rule * rule)882 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
883 {
884 struct rctl_rule_link *link;
885
886 ASSERT_RACCT_ENABLED();
887 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
888
889 rctl_rule_acquire(rule);
890 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
891 link->rrl_rule = rule;
892 link->rrl_exceeded = 0;
893
894 RACCT_LOCK();
895 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
896 RACCT_UNLOCK();
897 }
898
899 static int
rctl_racct_add_rule_locked(struct racct * racct,struct rctl_rule * rule)900 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
901 {
902 struct rctl_rule_link *link;
903
904 ASSERT_RACCT_ENABLED();
905 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
906 RACCT_LOCK_ASSERT();
907
908 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
909 if (link == NULL)
910 return (ENOMEM);
911 rctl_rule_acquire(rule);
912 link->rrl_rule = rule;
913 link->rrl_exceeded = 0;
914
915 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
916
917 return (0);
918 }
919
920 /*
921 * Remove limits for a rules matching the filter and release
922 * the refcounts for the rules, possibly freeing them. Returns
923 * the number of limit structures removed.
924 */
925 static int
rctl_racct_remove_rules(struct racct * racct,const struct rctl_rule * filter)926 rctl_racct_remove_rules(struct racct *racct,
927 const struct rctl_rule *filter)
928 {
929 struct rctl_rule_link *link, *linktmp;
930 int removed = 0;
931
932 ASSERT_RACCT_ENABLED();
933 RACCT_LOCK_ASSERT();
934
935 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
936 if (!rctl_rule_matches(link->rrl_rule, filter))
937 continue;
938
939 LIST_REMOVE(link, rrl_next);
940 rctl_rule_release(link->rrl_rule);
941 uma_zfree(rctl_rule_link_zone, link);
942 removed++;
943 }
944 return (removed);
945 }
946
947 static void
rctl_rule_acquire_subject(struct rctl_rule * rule)948 rctl_rule_acquire_subject(struct rctl_rule *rule)
949 {
950
951 ASSERT_RACCT_ENABLED();
952
953 switch (rule->rr_subject_type) {
954 case RCTL_SUBJECT_TYPE_UNDEFINED:
955 case RCTL_SUBJECT_TYPE_PROCESS:
956 break;
957 case RCTL_SUBJECT_TYPE_JAIL:
958 if (rule->rr_subject.rs_prison_racct != NULL)
959 prison_racct_hold(rule->rr_subject.rs_prison_racct);
960 break;
961 case RCTL_SUBJECT_TYPE_USER:
962 if (rule->rr_subject.rs_uip != NULL)
963 uihold(rule->rr_subject.rs_uip);
964 break;
965 case RCTL_SUBJECT_TYPE_LOGINCLASS:
966 if (rule->rr_subject.rs_loginclass != NULL)
967 loginclass_hold(rule->rr_subject.rs_loginclass);
968 break;
969 default:
970 panic("rctl_rule_acquire_subject: unknown subject type %d",
971 rule->rr_subject_type);
972 }
973 }
974
975 static void
rctl_rule_release_subject(struct rctl_rule * rule)976 rctl_rule_release_subject(struct rctl_rule *rule)
977 {
978
979 ASSERT_RACCT_ENABLED();
980
981 switch (rule->rr_subject_type) {
982 case RCTL_SUBJECT_TYPE_UNDEFINED:
983 case RCTL_SUBJECT_TYPE_PROCESS:
984 break;
985 case RCTL_SUBJECT_TYPE_JAIL:
986 if (rule->rr_subject.rs_prison_racct != NULL)
987 prison_racct_free(rule->rr_subject.rs_prison_racct);
988 break;
989 case RCTL_SUBJECT_TYPE_USER:
990 if (rule->rr_subject.rs_uip != NULL)
991 uifree(rule->rr_subject.rs_uip);
992 break;
993 case RCTL_SUBJECT_TYPE_LOGINCLASS:
994 if (rule->rr_subject.rs_loginclass != NULL)
995 loginclass_free(rule->rr_subject.rs_loginclass);
996 break;
997 default:
998 panic("rctl_rule_release_subject: unknown subject type %d",
999 rule->rr_subject_type);
1000 }
1001 }
1002
1003 struct rctl_rule *
rctl_rule_alloc(int flags)1004 rctl_rule_alloc(int flags)
1005 {
1006 struct rctl_rule *rule;
1007
1008 ASSERT_RACCT_ENABLED();
1009
1010 rule = uma_zalloc(rctl_rule_zone, flags);
1011 if (rule == NULL)
1012 return (NULL);
1013 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1014 rule->rr_subject.rs_proc = NULL;
1015 rule->rr_subject.rs_uip = NULL;
1016 rule->rr_subject.rs_loginclass = NULL;
1017 rule->rr_subject.rs_prison_racct = NULL;
1018 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1019 rule->rr_resource = RACCT_UNDEFINED;
1020 rule->rr_action = RCTL_ACTION_UNDEFINED;
1021 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1022 refcount_init(&rule->rr_refcount, 1);
1023
1024 return (rule);
1025 }
1026
1027 struct rctl_rule *
rctl_rule_duplicate(const struct rctl_rule * rule,int flags)1028 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
1029 {
1030 struct rctl_rule *copy;
1031
1032 ASSERT_RACCT_ENABLED();
1033
1034 copy = uma_zalloc(rctl_rule_zone, flags);
1035 if (copy == NULL)
1036 return (NULL);
1037 copy->rr_subject_type = rule->rr_subject_type;
1038 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
1039 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
1040 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
1041 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
1042 copy->rr_per = rule->rr_per;
1043 copy->rr_resource = rule->rr_resource;
1044 copy->rr_action = rule->rr_action;
1045 copy->rr_amount = rule->rr_amount;
1046 refcount_init(©->rr_refcount, 1);
1047 rctl_rule_acquire_subject(copy);
1048
1049 return (copy);
1050 }
1051
1052 void
rctl_rule_acquire(struct rctl_rule * rule)1053 rctl_rule_acquire(struct rctl_rule *rule)
1054 {
1055
1056 ASSERT_RACCT_ENABLED();
1057 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1058
1059 refcount_acquire(&rule->rr_refcount);
1060 }
1061
1062 static void
rctl_rule_free(void * context,int pending)1063 rctl_rule_free(void *context, int pending)
1064 {
1065 struct rctl_rule *rule;
1066
1067 rule = (struct rctl_rule *)context;
1068
1069 ASSERT_RACCT_ENABLED();
1070 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
1071
1072 /*
1073 * We don't need locking here; rule is guaranteed to be inaccessible.
1074 */
1075
1076 rctl_rule_release_subject(rule);
1077 uma_zfree(rctl_rule_zone, rule);
1078 }
1079
1080 void
rctl_rule_release(struct rctl_rule * rule)1081 rctl_rule_release(struct rctl_rule *rule)
1082 {
1083
1084 ASSERT_RACCT_ENABLED();
1085 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1086
1087 if (refcount_release(&rule->rr_refcount)) {
1088 /*
1089 * rctl_rule_release() is often called when iterating
1090 * over all the uidinfo structures in the system,
1091 * holding uihashtbl_lock. Since rctl_rule_free()
1092 * might end up calling uifree(), this would lead
1093 * to lock recursion. Use taskqueue to avoid this.
1094 */
1095 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
1096 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
1097 }
1098 }
1099
1100 static int
rctl_rule_fully_specified(const struct rctl_rule * rule)1101 rctl_rule_fully_specified(const struct rctl_rule *rule)
1102 {
1103
1104 ASSERT_RACCT_ENABLED();
1105
1106 switch (rule->rr_subject_type) {
1107 case RCTL_SUBJECT_TYPE_UNDEFINED:
1108 return (0);
1109 case RCTL_SUBJECT_TYPE_PROCESS:
1110 if (rule->rr_subject.rs_proc == NULL)
1111 return (0);
1112 break;
1113 case RCTL_SUBJECT_TYPE_USER:
1114 if (rule->rr_subject.rs_uip == NULL)
1115 return (0);
1116 break;
1117 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1118 if (rule->rr_subject.rs_loginclass == NULL)
1119 return (0);
1120 break;
1121 case RCTL_SUBJECT_TYPE_JAIL:
1122 if (rule->rr_subject.rs_prison_racct == NULL)
1123 return (0);
1124 break;
1125 default:
1126 panic("rctl_rule_fully_specified: unknown subject type %d",
1127 rule->rr_subject_type);
1128 }
1129 if (rule->rr_resource == RACCT_UNDEFINED)
1130 return (0);
1131 if (rule->rr_action == RCTL_ACTION_UNDEFINED)
1132 return (0);
1133 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
1134 return (0);
1135 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
1136 return (0);
1137
1138 return (1);
1139 }
1140
1141 static int
rctl_string_to_rule(char * rulestr,struct rctl_rule ** rulep)1142 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
1143 {
1144 struct rctl_rule *rule;
1145 char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
1146 *amountstr, *perstr;
1147 id_t id;
1148 int error = 0;
1149
1150 ASSERT_RACCT_ENABLED();
1151
1152 rule = rctl_rule_alloc(M_WAITOK);
1153
1154 subjectstr = strsep(&rulestr, ":");
1155 subject_idstr = strsep(&rulestr, ":");
1156 resourcestr = strsep(&rulestr, ":");
1157 actionstr = strsep(&rulestr, "=/");
1158 amountstr = strsep(&rulestr, "/");
1159 perstr = rulestr;
1160
1161 if (subjectstr == NULL || subjectstr[0] == '\0')
1162 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1163 else {
1164 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
1165 if (error != 0)
1166 goto out;
1167 }
1168
1169 if (subject_idstr == NULL || subject_idstr[0] == '\0') {
1170 rule->rr_subject.rs_proc = NULL;
1171 rule->rr_subject.rs_uip = NULL;
1172 rule->rr_subject.rs_loginclass = NULL;
1173 rule->rr_subject.rs_prison_racct = NULL;
1174 } else {
1175 switch (rule->rr_subject_type) {
1176 case RCTL_SUBJECT_TYPE_UNDEFINED:
1177 error = EINVAL;
1178 goto out;
1179 case RCTL_SUBJECT_TYPE_PROCESS:
1180 error = str2id(subject_idstr, &id);
1181 if (error != 0)
1182 goto out;
1183 sx_assert(&allproc_lock, SA_LOCKED);
1184 rule->rr_subject.rs_proc = pfind(id);
1185 if (rule->rr_subject.rs_proc == NULL) {
1186 error = ESRCH;
1187 goto out;
1188 }
1189 PROC_UNLOCK(rule->rr_subject.rs_proc);
1190 break;
1191 case RCTL_SUBJECT_TYPE_USER:
1192 error = str2id(subject_idstr, &id);
1193 if (error != 0)
1194 goto out;
1195 rule->rr_subject.rs_uip = uifind(id);
1196 break;
1197 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1198 rule->rr_subject.rs_loginclass =
1199 loginclass_find(subject_idstr);
1200 if (rule->rr_subject.rs_loginclass == NULL) {
1201 error = ENAMETOOLONG;
1202 goto out;
1203 }
1204 break;
1205 case RCTL_SUBJECT_TYPE_JAIL:
1206 rule->rr_subject.rs_prison_racct =
1207 prison_racct_find(subject_idstr);
1208 if (rule->rr_subject.rs_prison_racct == NULL) {
1209 error = ENAMETOOLONG;
1210 goto out;
1211 }
1212 break;
1213 default:
1214 panic("rctl_string_to_rule: unknown subject type %d",
1215 rule->rr_subject_type);
1216 }
1217 }
1218
1219 if (resourcestr == NULL || resourcestr[0] == '\0')
1220 rule->rr_resource = RACCT_UNDEFINED;
1221 else {
1222 error = str2value(resourcestr, &rule->rr_resource,
1223 resourcenames);
1224 if (error != 0)
1225 goto out;
1226 }
1227
1228 if (actionstr == NULL || actionstr[0] == '\0')
1229 rule->rr_action = RCTL_ACTION_UNDEFINED;
1230 else {
1231 error = str2value(actionstr, &rule->rr_action, actionnames);
1232 if (error != 0)
1233 goto out;
1234 }
1235
1236 if (amountstr == NULL || amountstr[0] == '\0')
1237 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1238 else {
1239 error = str2int64(amountstr, &rule->rr_amount);
1240 if (error != 0)
1241 goto out;
1242 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
1243 if (rule->rr_amount > INT64_MAX / 1000000) {
1244 error = ERANGE;
1245 goto out;
1246 }
1247 rule->rr_amount *= 1000000;
1248 }
1249 }
1250
1251 if (perstr == NULL || perstr[0] == '\0')
1252 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1253 else {
1254 error = str2value(perstr, &rule->rr_per, subjectnames);
1255 if (error != 0)
1256 goto out;
1257 }
1258
1259 out:
1260 if (error == 0)
1261 *rulep = rule;
1262 else
1263 rctl_rule_release(rule);
1264
1265 return (error);
1266 }
1267
1268 /*
1269 * Link a rule with all the subjects it applies to.
1270 */
1271 int
rctl_rule_add(struct rctl_rule * rule)1272 rctl_rule_add(struct rctl_rule *rule)
1273 {
1274 struct proc *p;
1275 struct ucred *cred;
1276 struct uidinfo *uip;
1277 struct prison *pr;
1278 struct prison_racct *prr;
1279 struct loginclass *lc;
1280 struct rctl_rule *rule2;
1281 int match;
1282
1283 ASSERT_RACCT_ENABLED();
1284 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1285
1286 /*
1287 * Some rules just don't make sense, like "deny" rule for an undeniable
1288 * resource. The exception are the RSS and %CPU resources - they are
1289 * not deniable in the racct sense, but the limit is enforced in
1290 * a different way.
1291 */
1292 if (rule->rr_action == RCTL_ACTION_DENY &&
1293 !RACCT_IS_DENIABLE(rule->rr_resource) &&
1294 rule->rr_resource != RACCT_RSS &&
1295 rule->rr_resource != RACCT_PCTCPU) {
1296 return (EOPNOTSUPP);
1297 }
1298
1299 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1300 !RACCT_IS_DECAYING(rule->rr_resource)) {
1301 return (EOPNOTSUPP);
1302 }
1303
1304 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1305 rule->rr_resource == RACCT_PCTCPU) {
1306 return (EOPNOTSUPP);
1307 }
1308
1309 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1310 RACCT_IS_SLOPPY(rule->rr_resource)) {
1311 return (EOPNOTSUPP);
1312 }
1313
1314 /*
1315 * Make sure there are no duplicated rules. Also, for the "deny"
1316 * rules, remove ones differing only by "amount".
1317 */
1318 if (rule->rr_action == RCTL_ACTION_DENY) {
1319 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1320 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1321 rctl_rule_remove(rule2);
1322 rctl_rule_release(rule2);
1323 } else
1324 rctl_rule_remove(rule);
1325
1326 switch (rule->rr_subject_type) {
1327 case RCTL_SUBJECT_TYPE_PROCESS:
1328 p = rule->rr_subject.rs_proc;
1329 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1330
1331 rctl_racct_add_rule(p->p_racct, rule);
1332 /*
1333 * In case of per-process rule, we don't have anything more
1334 * to do.
1335 */
1336 return (0);
1337
1338 case RCTL_SUBJECT_TYPE_USER:
1339 uip = rule->rr_subject.rs_uip;
1340 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1341 rctl_racct_add_rule(uip->ui_racct, rule);
1342 break;
1343
1344 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1345 lc = rule->rr_subject.rs_loginclass;
1346 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1347 rctl_racct_add_rule(lc->lc_racct, rule);
1348 break;
1349
1350 case RCTL_SUBJECT_TYPE_JAIL:
1351 prr = rule->rr_subject.rs_prison_racct;
1352 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1353 rctl_racct_add_rule(prr->prr_racct, rule);
1354 break;
1355
1356 default:
1357 panic("rctl_rule_add: unknown subject type %d",
1358 rule->rr_subject_type);
1359 }
1360
1361 /*
1362 * Now go through all the processes and add the new rule to the ones
1363 * it applies to.
1364 */
1365 sx_assert(&allproc_lock, SA_LOCKED);
1366 FOREACH_PROC_IN_SYSTEM(p) {
1367 cred = p->p_ucred;
1368 switch (rule->rr_subject_type) {
1369 case RCTL_SUBJECT_TYPE_USER:
1370 if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1371 cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1372 break;
1373 continue;
1374 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1375 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1376 break;
1377 continue;
1378 case RCTL_SUBJECT_TYPE_JAIL:
1379 match = 0;
1380 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1381 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1382 match = 1;
1383 break;
1384 }
1385 }
1386 if (match)
1387 break;
1388 continue;
1389 default:
1390 panic("rctl_rule_add: unknown subject type %d",
1391 rule->rr_subject_type);
1392 }
1393
1394 rctl_racct_add_rule(p->p_racct, rule);
1395 }
1396
1397 return (0);
1398 }
1399
1400 static void
rctl_rule_pre_callback(void)1401 rctl_rule_pre_callback(void)
1402 {
1403
1404 RACCT_LOCK();
1405 }
1406
1407 static void
rctl_rule_post_callback(void)1408 rctl_rule_post_callback(void)
1409 {
1410
1411 RACCT_UNLOCK();
1412 }
1413
1414 static void
rctl_rule_remove_callback(struct racct * racct,void * arg2,void * arg3)1415 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1416 {
1417 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1418 int found = 0;
1419
1420 ASSERT_RACCT_ENABLED();
1421 RACCT_LOCK_ASSERT();
1422
1423 found += rctl_racct_remove_rules(racct, filter);
1424
1425 *((int *)arg3) += found;
1426 }
1427
1428 /*
1429 * Remove all rules that match the filter.
1430 */
1431 int
rctl_rule_remove(struct rctl_rule * filter)1432 rctl_rule_remove(struct rctl_rule *filter)
1433 {
1434 struct proc *p;
1435 int found = 0;
1436
1437 ASSERT_RACCT_ENABLED();
1438
1439 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1440 filter->rr_subject.rs_proc != NULL) {
1441 p = filter->rr_subject.rs_proc;
1442 RACCT_LOCK();
1443 found = rctl_racct_remove_rules(p->p_racct, filter);
1444 RACCT_UNLOCK();
1445 if (found)
1446 return (0);
1447 return (ESRCH);
1448 }
1449
1450 loginclass_racct_foreach(rctl_rule_remove_callback,
1451 rctl_rule_pre_callback, rctl_rule_post_callback,
1452 filter, (void *)&found);
1453 ui_racct_foreach(rctl_rule_remove_callback,
1454 rctl_rule_pre_callback, rctl_rule_post_callback,
1455 filter, (void *)&found);
1456 prison_racct_foreach(rctl_rule_remove_callback,
1457 rctl_rule_pre_callback, rctl_rule_post_callback,
1458 filter, (void *)&found);
1459
1460 sx_assert(&allproc_lock, SA_LOCKED);
1461 RACCT_LOCK();
1462 FOREACH_PROC_IN_SYSTEM(p) {
1463 found += rctl_racct_remove_rules(p->p_racct, filter);
1464 }
1465 RACCT_UNLOCK();
1466
1467 if (found)
1468 return (0);
1469 return (ESRCH);
1470 }
1471
1472 /*
1473 * Appends a rule to the sbuf.
1474 */
1475 static void
rctl_rule_to_sbuf(struct sbuf * sb,const struct rctl_rule * rule)1476 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1477 {
1478 int64_t amount;
1479
1480 ASSERT_RACCT_ENABLED();
1481
1482 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1483
1484 switch (rule->rr_subject_type) {
1485 case RCTL_SUBJECT_TYPE_PROCESS:
1486 if (rule->rr_subject.rs_proc == NULL)
1487 sbuf_putc(sb, ':');
1488 else
1489 sbuf_printf(sb, "%d:",
1490 rule->rr_subject.rs_proc->p_pid);
1491 break;
1492 case RCTL_SUBJECT_TYPE_USER:
1493 if (rule->rr_subject.rs_uip == NULL)
1494 sbuf_putc(sb, ':');
1495 else
1496 sbuf_printf(sb, "%d:",
1497 rule->rr_subject.rs_uip->ui_uid);
1498 break;
1499 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1500 if (rule->rr_subject.rs_loginclass == NULL)
1501 sbuf_putc(sb, ':');
1502 else
1503 sbuf_printf(sb, "%s:",
1504 rule->rr_subject.rs_loginclass->lc_name);
1505 break;
1506 case RCTL_SUBJECT_TYPE_JAIL:
1507 if (rule->rr_subject.rs_prison_racct == NULL)
1508 sbuf_putc(sb, ':');
1509 else
1510 sbuf_printf(sb, "%s:",
1511 rule->rr_subject.rs_prison_racct->prr_name);
1512 break;
1513 default:
1514 panic("rctl_rule_to_sbuf: unknown subject type %d",
1515 rule->rr_subject_type);
1516 }
1517
1518 amount = rule->rr_amount;
1519 if (amount != RCTL_AMOUNT_UNDEFINED &&
1520 RACCT_IS_IN_MILLIONS(rule->rr_resource))
1521 amount /= 1000000;
1522
1523 sbuf_printf(sb, "%s:%s=%jd",
1524 rctl_resource_name(rule->rr_resource),
1525 rctl_action_name(rule->rr_action),
1526 amount);
1527
1528 if (rule->rr_per != rule->rr_subject_type)
1529 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1530 }
1531
1532 /*
1533 * Routine used by RCTL syscalls to read in input string.
1534 */
1535 static int
rctl_read_inbuf(char ** inputstr,const char * inbufp,size_t inbuflen)1536 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1537 {
1538 char *str;
1539 int error;
1540
1541 ASSERT_RACCT_ENABLED();
1542
1543 if (inbuflen <= 0)
1544 return (EINVAL);
1545 if (inbuflen > RCTL_MAX_INBUFSIZE)
1546 return (E2BIG);
1547
1548 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1549 error = copyinstr(inbufp, str, inbuflen, NULL);
1550 if (error != 0) {
1551 free(str, M_RCTL);
1552 return (error);
1553 }
1554
1555 *inputstr = str;
1556
1557 return (0);
1558 }
1559
1560 /*
1561 * Routine used by RCTL syscalls to write out output string.
1562 */
1563 static int
rctl_write_outbuf(struct sbuf * outputsbuf,char * outbufp,size_t outbuflen)1564 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1565 {
1566 int error;
1567
1568 ASSERT_RACCT_ENABLED();
1569
1570 if (outputsbuf == NULL)
1571 return (0);
1572
1573 sbuf_finish(outputsbuf);
1574 if (outbuflen < sbuf_len(outputsbuf) + 1) {
1575 sbuf_delete(outputsbuf);
1576 return (ERANGE);
1577 }
1578 error = copyout(sbuf_data(outputsbuf), outbufp,
1579 sbuf_len(outputsbuf) + 1);
1580 sbuf_delete(outputsbuf);
1581 return (error);
1582 }
1583
1584 static struct sbuf *
rctl_racct_to_sbuf(struct racct * racct,int sloppy)1585 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1586 {
1587 struct sbuf *sb;
1588 int64_t amount;
1589 int i;
1590
1591 ASSERT_RACCT_ENABLED();
1592
1593 sb = sbuf_new_auto();
1594 for (i = 0; i <= RACCT_MAX; i++) {
1595 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1596 continue;
1597 RACCT_LOCK();
1598 amount = racct->r_resources[i];
1599 RACCT_UNLOCK();
1600 if (RACCT_IS_IN_MILLIONS(i))
1601 amount /= 1000000;
1602 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1603 }
1604 sbuf_setpos(sb, sbuf_len(sb) - 1);
1605 return (sb);
1606 }
1607
1608 int
sys_rctl_get_racct(struct thread * td,struct rctl_get_racct_args * uap)1609 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1610 {
1611 struct rctl_rule *filter;
1612 struct sbuf *outputsbuf = NULL;
1613 struct proc *p;
1614 struct uidinfo *uip;
1615 struct loginclass *lc;
1616 struct prison_racct *prr;
1617 char *inputstr;
1618 int error;
1619
1620 if (!racct_enable)
1621 return (ENOSYS);
1622
1623 error = priv_check(td, PRIV_RCTL_GET_RACCT);
1624 if (error != 0)
1625 return (error);
1626
1627 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1628 if (error != 0)
1629 return (error);
1630
1631 sx_slock(&allproc_lock);
1632 error = rctl_string_to_rule(inputstr, &filter);
1633 free(inputstr, M_RCTL);
1634 if (error != 0) {
1635 sx_sunlock(&allproc_lock);
1636 return (error);
1637 }
1638
1639 switch (filter->rr_subject_type) {
1640 case RCTL_SUBJECT_TYPE_PROCESS:
1641 p = filter->rr_subject.rs_proc;
1642 if (p == NULL) {
1643 error = EINVAL;
1644 goto out;
1645 }
1646 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1647 break;
1648 case RCTL_SUBJECT_TYPE_USER:
1649 uip = filter->rr_subject.rs_uip;
1650 if (uip == NULL) {
1651 error = EINVAL;
1652 goto out;
1653 }
1654 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1655 break;
1656 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1657 lc = filter->rr_subject.rs_loginclass;
1658 if (lc == NULL) {
1659 error = EINVAL;
1660 goto out;
1661 }
1662 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1663 break;
1664 case RCTL_SUBJECT_TYPE_JAIL:
1665 prr = filter->rr_subject.rs_prison_racct;
1666 if (prr == NULL) {
1667 error = EINVAL;
1668 goto out;
1669 }
1670 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1671 break;
1672 default:
1673 error = EINVAL;
1674 }
1675 out:
1676 rctl_rule_release(filter);
1677 sx_sunlock(&allproc_lock);
1678 if (error != 0)
1679 return (error);
1680
1681 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1682
1683 return (error);
1684 }
1685
1686 static void
rctl_get_rules_callback(struct racct * racct,void * arg2,void * arg3)1687 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1688 {
1689 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1690 struct rctl_rule_link *link;
1691 struct sbuf *sb = (struct sbuf *)arg3;
1692
1693 ASSERT_RACCT_ENABLED();
1694 RACCT_LOCK_ASSERT();
1695
1696 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1697 if (!rctl_rule_matches(link->rrl_rule, filter))
1698 continue;
1699 rctl_rule_to_sbuf(sb, link->rrl_rule);
1700 sbuf_putc(sb, ',');
1701 }
1702 }
1703
1704 int
sys_rctl_get_rules(struct thread * td,struct rctl_get_rules_args * uap)1705 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1706 {
1707 struct sbuf *sb;
1708 struct rctl_rule *filter;
1709 struct rctl_rule_link *link;
1710 struct proc *p;
1711 char *inputstr, *buf;
1712 size_t bufsize;
1713 int error;
1714
1715 if (!racct_enable)
1716 return (ENOSYS);
1717
1718 error = priv_check(td, PRIV_RCTL_GET_RULES);
1719 if (error != 0)
1720 return (error);
1721
1722 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1723 if (error != 0)
1724 return (error);
1725
1726 sx_slock(&allproc_lock);
1727 error = rctl_string_to_rule(inputstr, &filter);
1728 free(inputstr, M_RCTL);
1729 if (error != 0) {
1730 sx_sunlock(&allproc_lock);
1731 return (error);
1732 }
1733
1734 bufsize = uap->outbuflen;
1735 if (bufsize > rctl_maxbufsize) {
1736 sx_sunlock(&allproc_lock);
1737 return (E2BIG);
1738 }
1739
1740 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1741 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1742 KASSERT(sb != NULL, ("sbuf_new failed"));
1743
1744 FOREACH_PROC_IN_SYSTEM(p) {
1745 RACCT_LOCK();
1746 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1747 /*
1748 * Non-process rules will be added to the buffer later.
1749 * Adding them here would result in duplicated output.
1750 */
1751 if (link->rrl_rule->rr_subject_type !=
1752 RCTL_SUBJECT_TYPE_PROCESS)
1753 continue;
1754 if (!rctl_rule_matches(link->rrl_rule, filter))
1755 continue;
1756 rctl_rule_to_sbuf(sb, link->rrl_rule);
1757 sbuf_putc(sb, ',');
1758 }
1759 RACCT_UNLOCK();
1760 }
1761
1762 loginclass_racct_foreach(rctl_get_rules_callback,
1763 rctl_rule_pre_callback, rctl_rule_post_callback,
1764 filter, sb);
1765 ui_racct_foreach(rctl_get_rules_callback,
1766 rctl_rule_pre_callback, rctl_rule_post_callback,
1767 filter, sb);
1768 prison_racct_foreach(rctl_get_rules_callback,
1769 rctl_rule_pre_callback, rctl_rule_post_callback,
1770 filter, sb);
1771 if (sbuf_error(sb) == ENOMEM) {
1772 error = ERANGE;
1773 goto out;
1774 }
1775
1776 /*
1777 * Remove trailing ",".
1778 */
1779 if (sbuf_len(sb) > 0)
1780 sbuf_setpos(sb, sbuf_len(sb) - 1);
1781
1782 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1783 out:
1784 rctl_rule_release(filter);
1785 sx_sunlock(&allproc_lock);
1786 free(buf, M_RCTL);
1787 return (error);
1788 }
1789
1790 int
sys_rctl_get_limits(struct thread * td,struct rctl_get_limits_args * uap)1791 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1792 {
1793 struct sbuf *sb;
1794 struct rctl_rule *filter;
1795 struct rctl_rule_link *link;
1796 char *inputstr, *buf;
1797 size_t bufsize;
1798 int error;
1799
1800 if (!racct_enable)
1801 return (ENOSYS);
1802
1803 error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1804 if (error != 0)
1805 return (error);
1806
1807 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1808 if (error != 0)
1809 return (error);
1810
1811 sx_slock(&allproc_lock);
1812 error = rctl_string_to_rule(inputstr, &filter);
1813 free(inputstr, M_RCTL);
1814 if (error != 0) {
1815 sx_sunlock(&allproc_lock);
1816 return (error);
1817 }
1818
1819 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1820 rctl_rule_release(filter);
1821 sx_sunlock(&allproc_lock);
1822 return (EINVAL);
1823 }
1824 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1825 rctl_rule_release(filter);
1826 sx_sunlock(&allproc_lock);
1827 return (EOPNOTSUPP);
1828 }
1829 if (filter->rr_subject.rs_proc == NULL) {
1830 rctl_rule_release(filter);
1831 sx_sunlock(&allproc_lock);
1832 return (EINVAL);
1833 }
1834
1835 bufsize = uap->outbuflen;
1836 if (bufsize > rctl_maxbufsize) {
1837 rctl_rule_release(filter);
1838 sx_sunlock(&allproc_lock);
1839 return (E2BIG);
1840 }
1841
1842 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1843 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1844 KASSERT(sb != NULL, ("sbuf_new failed"));
1845
1846 RACCT_LOCK();
1847 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1848 rrl_next) {
1849 rctl_rule_to_sbuf(sb, link->rrl_rule);
1850 sbuf_putc(sb, ',');
1851 }
1852 RACCT_UNLOCK();
1853 if (sbuf_error(sb) == ENOMEM) {
1854 error = ERANGE;
1855 sbuf_delete(sb);
1856 goto out;
1857 }
1858
1859 /*
1860 * Remove trailing ",".
1861 */
1862 if (sbuf_len(sb) > 0)
1863 sbuf_setpos(sb, sbuf_len(sb) - 1);
1864
1865 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1866 out:
1867 rctl_rule_release(filter);
1868 sx_sunlock(&allproc_lock);
1869 free(buf, M_RCTL);
1870 return (error);
1871 }
1872
1873 int
sys_rctl_add_rule(struct thread * td,struct rctl_add_rule_args * uap)1874 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1875 {
1876 struct rctl_rule *rule;
1877 char *inputstr;
1878 int error;
1879
1880 if (!racct_enable)
1881 return (ENOSYS);
1882
1883 error = priv_check(td, PRIV_RCTL_ADD_RULE);
1884 if (error != 0)
1885 return (error);
1886
1887 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1888 if (error != 0)
1889 return (error);
1890
1891 sx_slock(&allproc_lock);
1892 error = rctl_string_to_rule(inputstr, &rule);
1893 free(inputstr, M_RCTL);
1894 if (error != 0) {
1895 sx_sunlock(&allproc_lock);
1896 return (error);
1897 }
1898 /*
1899 * The 'per' part of a rule is optional.
1900 */
1901 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1902 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1903 rule->rr_per = rule->rr_subject_type;
1904
1905 if (!rctl_rule_fully_specified(rule)) {
1906 error = EINVAL;
1907 goto out;
1908 }
1909
1910 error = rctl_rule_add(rule);
1911
1912 out:
1913 rctl_rule_release(rule);
1914 sx_sunlock(&allproc_lock);
1915 return (error);
1916 }
1917
1918 int
sys_rctl_remove_rule(struct thread * td,struct rctl_remove_rule_args * uap)1919 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1920 {
1921 struct rctl_rule *filter;
1922 char *inputstr;
1923 int error;
1924
1925 if (!racct_enable)
1926 return (ENOSYS);
1927
1928 error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1929 if (error != 0)
1930 return (error);
1931
1932 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1933 if (error != 0)
1934 return (error);
1935
1936 sx_slock(&allproc_lock);
1937 error = rctl_string_to_rule(inputstr, &filter);
1938 free(inputstr, M_RCTL);
1939 if (error != 0) {
1940 sx_sunlock(&allproc_lock);
1941 return (error);
1942 }
1943
1944 error = rctl_rule_remove(filter);
1945 rctl_rule_release(filter);
1946 sx_sunlock(&allproc_lock);
1947
1948 return (error);
1949 }
1950
1951 /*
1952 * Update RCTL rule list after credential change.
1953 */
1954 void
rctl_proc_ucred_changed(struct proc * p,struct ucred * newcred)1955 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1956 {
1957 LIST_HEAD(, rctl_rule_link) newrules;
1958 struct rctl_rule_link *link, *newlink;
1959 struct uidinfo *newuip;
1960 struct loginclass *newlc;
1961 struct prison_racct *newprr;
1962 int rulecnt, i;
1963
1964 if (!racct_enable)
1965 return;
1966
1967 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1968
1969 newuip = newcred->cr_ruidinfo;
1970 newlc = newcred->cr_loginclass;
1971 newprr = newcred->cr_prison->pr_prison_racct;
1972
1973 LIST_INIT(&newrules);
1974
1975 again:
1976 /*
1977 * First, count the rules that apply to the process with new
1978 * credentials.
1979 */
1980 rulecnt = 0;
1981 RACCT_LOCK();
1982 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1983 if (link->rrl_rule->rr_subject_type ==
1984 RCTL_SUBJECT_TYPE_PROCESS)
1985 rulecnt++;
1986 }
1987 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1988 rulecnt++;
1989 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1990 rulecnt++;
1991 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1992 rulecnt++;
1993 RACCT_UNLOCK();
1994
1995 /*
1996 * Create temporary list. We've dropped the rctl_lock in order
1997 * to use M_WAITOK.
1998 */
1999 for (i = 0; i < rulecnt; i++) {
2000 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
2001 newlink->rrl_rule = NULL;
2002 newlink->rrl_exceeded = 0;
2003 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
2004 }
2005
2006 newlink = LIST_FIRST(&newrules);
2007
2008 /*
2009 * Assign rules to the newly allocated list entries.
2010 */
2011 RACCT_LOCK();
2012 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
2013 if (link->rrl_rule->rr_subject_type ==
2014 RCTL_SUBJECT_TYPE_PROCESS) {
2015 if (newlink == NULL)
2016 goto goaround;
2017 rctl_rule_acquire(link->rrl_rule);
2018 newlink->rrl_rule = link->rrl_rule;
2019 newlink->rrl_exceeded = link->rrl_exceeded;
2020 newlink = LIST_NEXT(newlink, rrl_next);
2021 rulecnt--;
2022 }
2023 }
2024
2025 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
2026 if (newlink == NULL)
2027 goto goaround;
2028 rctl_rule_acquire(link->rrl_rule);
2029 newlink->rrl_rule = link->rrl_rule;
2030 newlink->rrl_exceeded = link->rrl_exceeded;
2031 newlink = LIST_NEXT(newlink, rrl_next);
2032 rulecnt--;
2033 }
2034
2035 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
2036 if (newlink == NULL)
2037 goto goaround;
2038 rctl_rule_acquire(link->rrl_rule);
2039 newlink->rrl_rule = link->rrl_rule;
2040 newlink->rrl_exceeded = link->rrl_exceeded;
2041 newlink = LIST_NEXT(newlink, rrl_next);
2042 rulecnt--;
2043 }
2044
2045 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
2046 if (newlink == NULL)
2047 goto goaround;
2048 rctl_rule_acquire(link->rrl_rule);
2049 newlink->rrl_rule = link->rrl_rule;
2050 newlink->rrl_exceeded = link->rrl_exceeded;
2051 newlink = LIST_NEXT(newlink, rrl_next);
2052 rulecnt--;
2053 }
2054
2055 if (rulecnt == 0) {
2056 /*
2057 * Free the old rule list.
2058 */
2059 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
2060 link = LIST_FIRST(&p->p_racct->r_rule_links);
2061 LIST_REMOVE(link, rrl_next);
2062 rctl_rule_release(link->rrl_rule);
2063 uma_zfree(rctl_rule_link_zone, link);
2064 }
2065
2066 /*
2067 * Replace lists and we're done.
2068 *
2069 * XXX: Is there any way to switch list heads instead
2070 * of iterating here?
2071 */
2072 while (!LIST_EMPTY(&newrules)) {
2073 newlink = LIST_FIRST(&newrules);
2074 LIST_REMOVE(newlink, rrl_next);
2075 LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
2076 newlink, rrl_next);
2077 }
2078
2079 RACCT_UNLOCK();
2080
2081 return;
2082 }
2083
2084 goaround:
2085 RACCT_UNLOCK();
2086
2087 /*
2088 * Rule list changed while we were not holding the rctl_lock.
2089 * Free the new list and try again.
2090 */
2091 while (!LIST_EMPTY(&newrules)) {
2092 newlink = LIST_FIRST(&newrules);
2093 LIST_REMOVE(newlink, rrl_next);
2094 if (newlink->rrl_rule != NULL)
2095 rctl_rule_release(newlink->rrl_rule);
2096 uma_zfree(rctl_rule_link_zone, newlink);
2097 }
2098
2099 goto again;
2100 }
2101
2102 /*
2103 * Assign RCTL rules to the newly created process.
2104 */
2105 int
rctl_proc_fork(struct proc * parent,struct proc * child)2106 rctl_proc_fork(struct proc *parent, struct proc *child)
2107 {
2108 struct rctl_rule *rule;
2109 struct rctl_rule_link *link;
2110 int error;
2111
2112 ASSERT_RACCT_ENABLED();
2113 RACCT_LOCK_ASSERT();
2114 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
2115
2116 LIST_INIT(&child->p_racct->r_rule_links);
2117
2118 /*
2119 * Go through limits applicable to the parent and assign them
2120 * to the child. Rules with 'process' subject have to be duplicated
2121 * in order to make their rr_subject point to the new process.
2122 */
2123 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
2124 if (link->rrl_rule->rr_subject_type ==
2125 RCTL_SUBJECT_TYPE_PROCESS) {
2126 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
2127 if (rule == NULL)
2128 goto fail;
2129 KASSERT(rule->rr_subject.rs_proc == parent,
2130 ("rule->rr_subject.rs_proc != parent"));
2131 rule->rr_subject.rs_proc = child;
2132 error = rctl_racct_add_rule_locked(child->p_racct,
2133 rule);
2134 rctl_rule_release(rule);
2135 if (error != 0)
2136 goto fail;
2137 } else {
2138 error = rctl_racct_add_rule_locked(child->p_racct,
2139 link->rrl_rule);
2140 if (error != 0)
2141 goto fail;
2142 }
2143 }
2144
2145 return (0);
2146
2147 fail:
2148 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
2149 link = LIST_FIRST(&child->p_racct->r_rule_links);
2150 LIST_REMOVE(link, rrl_next);
2151 rctl_rule_release(link->rrl_rule);
2152 uma_zfree(rctl_rule_link_zone, link);
2153 }
2154
2155 return (EAGAIN);
2156 }
2157
2158 /*
2159 * Release rules attached to the racct.
2160 */
2161 void
rctl_racct_release(struct racct * racct)2162 rctl_racct_release(struct racct *racct)
2163 {
2164 struct rctl_rule_link *link;
2165
2166 ASSERT_RACCT_ENABLED();
2167 RACCT_LOCK_ASSERT();
2168
2169 while (!LIST_EMPTY(&racct->r_rule_links)) {
2170 link = LIST_FIRST(&racct->r_rule_links);
2171 LIST_REMOVE(link, rrl_next);
2172 rctl_rule_release(link->rrl_rule);
2173 uma_zfree(rctl_rule_link_zone, link);
2174 }
2175 }
2176
2177 static void
rctl_init(void)2178 rctl_init(void)
2179 {
2180
2181 if (!racct_enable)
2182 return;
2183
2184 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
2185 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2186 rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
2187 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
2188 UMA_ALIGN_PTR, 0);
2189
2190 /*
2191 * Set default values, making sure not to overwrite the ones
2192 * fetched from tunables. Most of those could be set at the
2193 * declaration, except for the rctl_throttle_max - we cannot
2194 * set it there due to hz not being compile time constant.
2195 */
2196 if (rctl_throttle_min < 1)
2197 rctl_throttle_min = 1;
2198 if (rctl_throttle_max < rctl_throttle_min)
2199 rctl_throttle_max = 2 * hz;
2200 if (rctl_throttle_pct < 0)
2201 rctl_throttle_pct = 100;
2202 if (rctl_throttle_pct2 < 0)
2203 rctl_throttle_pct2 = 100;
2204 }
2205
2206 #else /* !RCTL */
2207
2208 #include <sys/types.h>
2209 #include <sys/errno.h>
2210 #include <sys/sysproto.h>
2211
2212 int
sys_rctl_get_racct(struct thread * td,struct rctl_get_racct_args * uap)2213 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
2214 {
2215
2216 return (ENOSYS);
2217 }
2218
2219 int
sys_rctl_get_rules(struct thread * td,struct rctl_get_rules_args * uap)2220 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
2221 {
2222
2223 return (ENOSYS);
2224 }
2225
2226 int
sys_rctl_get_limits(struct thread * td,struct rctl_get_limits_args * uap)2227 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
2228 {
2229
2230 return (ENOSYS);
2231 }
2232
2233 int
sys_rctl_add_rule(struct thread * td,struct rctl_add_rule_args * uap)2234 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
2235 {
2236
2237 return (ENOSYS);
2238 }
2239
2240 int
sys_rctl_remove_rule(struct thread * td,struct rctl_remove_rule_args * uap)2241 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
2242 {
2243
2244 return (ENOSYS);
2245 }
2246
2247 #endif /* RCTL */
2248