1 /*
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
8 *
9 */
10 /*-
11 * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net>
12 * All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36
37 /*
38 * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $
39 */
40
41 #include "opt_inet6.h"
42
43 #include <sys/param.h>
44 #include <sys/types.h>
45 #include <sys/kernel.h>
46 #include <sys/systm.h>
47 #include <sys/errno.h>
48 #include <sys/sysmsg.h>
49 #include <sys/malloc.h>
50 #include <sys/nlookup.h>
51 #include <sys/namecache.h>
52 #include <sys/proc.h>
53 #include <sys/caps.h>
54 #include <sys/jail.h>
55 #include <sys/socket.h>
56 #include <sys/sysctl.h>
57 #include <sys/kern_syscall.h>
58 #include <net/if.h>
59 #include <netinet/in.h>
60 #include <netinet6/in6_var.h>
61
62 static struct prison *prison_find(int);
63 static void prison_ipcache_init(struct prison *);
64
65 __read_mostly static prison_cap_t prison_default_caps;
66
67 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
68
69 SYSCTL_NODE(, OID_AUTO, jail, CTLFLAG_RW, 0,
70 "All jails settings");
71
72 SYSCTL_NODE(_jail, OID_AUTO, defaults, CTLFLAG_RW, 0,
73 "Default options for jails");
74
75 /*#define PRISON_DEBUG*/
76 #ifdef PRISON_DEBUG
77 __read_mostly static int prison_debug;
78 SYSCTL_INT(_jail, OID_AUTO, debug, CTLFLAG_RW, &prison_debug, 0,
79 "Debug prison refs");
80 #endif
81
82 SYSCTL_BIT64(_jail_defaults, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
83 &prison_default_caps, 1, PRISON_CAP_SYS_SET_HOSTNAME,
84 "Processes in jail can set their hostnames");
85
86 SYSCTL_BIT64(_jail_defaults, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
87 &prison_default_caps, 0, PRISON_CAP_NET_UNIXIPROUTE,
88 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
89
90 SYSCTL_BIT64(_jail_defaults, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
91 &prison_default_caps, 0, PRISON_CAP_SYS_SYSVIPC,
92 "Processes in jail can use System V IPC primitives");
93
94 SYSCTL_BIT64(_jail_defaults, OID_AUTO, chflags_allowed, CTLFLAG_RW,
95 &prison_default_caps, 0, PRISON_CAP_VFS_CHFLAGS,
96 "Processes in jail can alter system file flags");
97
98 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
99 &prison_default_caps, 0, PRISON_CAP_NET_RAW_SOCKETS,
100 "Process in jail can create raw sockets");
101
102 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_listen_override, CTLFLAG_RW,
103 &prison_default_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE,
104 "Process in jail can override host wildcard listen");
105
106 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_nullfs, CTLFLAG_RW,
107 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS,
108 "Process in jail can mount nullfs(5) filesystems");
109
110 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_tmpfs, CTLFLAG_RW,
111 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS,
112 "Process in jail can mount tmpfs(5) filesystems");
113
114 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_devfs, CTLFLAG_RW,
115 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_DEVFS,
116 "Process in jail can mount devfs(5) filesystems");
117
118 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_procfs, CTLFLAG_RW,
119 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_PROCFS,
120 "Process in jail can mount procfs(5) filesystems");
121
122 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_fusefs, CTLFLAG_RW,
123 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_FUSEFS,
124 "Process in jail can mount fuse filesystems");
125
126 static int lastprid = 0;
127 static int prisoncount = 0;
128
129 static struct lock jail_lock =
130 LOCK_INITIALIZER("jail", 0, LK_CANRECURSE);
131
132 LIST_HEAD(prisonlist, prison);
133 static struct prisonlist allprison = LIST_HEAD_INITIALIZER(&allprison);
134
135 static int
kern_jail_attach(int jid)136 kern_jail_attach(int jid)
137 {
138 struct proc *p = curthread->td_proc;
139 struct prison *pr;
140 struct ucred *cr;
141 int error;
142
143 pr = prison_find(jid);
144 if (pr == NULL)
145 return(EINVAL);
146
147 error = kern_chroot(&pr->pr_root);
148 if (error)
149 return(error);
150
151 prison_hold(pr);
152 lwkt_gettoken(&p->p_token);
153 cr = cratom_proc(p);
154 cr->cr_prison = pr;
155 p->p_flags |= P_JAILED;
156 caps_set_locked(p, SYSCAP_RESTRICTEDROOT, __SYSCAP_ALL);
157 lwkt_reltoken(&p->p_token);
158
159 return(0);
160 }
161
162 static int
assign_prison_id(struct prison * pr)163 assign_prison_id(struct prison *pr)
164 {
165 int tryprid;
166 struct prison *tpr;
167
168 tryprid = lastprid + 1;
169 if (tryprid == JAIL_MAX)
170 tryprid = 1;
171
172 lockmgr(&jail_lock, LK_EXCLUSIVE);
173 next:
174 LIST_FOREACH(tpr, &allprison, pr_list) {
175 if (tpr->pr_id != tryprid)
176 continue;
177 tryprid++;
178 if (tryprid == JAIL_MAX) {
179 lockmgr(&jail_lock, LK_RELEASE);
180 return (ERANGE);
181 }
182 goto next;
183 }
184 pr->pr_id = lastprid = tryprid;
185 lockmgr(&jail_lock, LK_RELEASE);
186
187 return (0);
188 }
189
190 static int
kern_jail(struct prison * pr,struct jail * j)191 kern_jail(struct prison *pr, struct jail *j)
192 {
193 int error;
194 struct nlookupdata nd;
195
196 error = nlookup_init(&nd, j->path, UIO_USERSPACE, NLC_FOLLOW);
197 if (error) {
198 nlookup_done(&nd);
199 return (error);
200 }
201 error = nlookup(&nd);
202 if (error) {
203 nlookup_done(&nd);
204 return (error);
205 }
206 cache_copy(&nd.nl_nch, &pr->pr_root);
207
208 varsymset_init(&pr->pr_varsymset, NULL);
209 prison_ipcache_init(pr);
210
211 error = assign_prison_id(pr);
212 if (error) {
213 varsymset_clean(&pr->pr_varsymset);
214 nlookup_done(&nd);
215 return (error);
216 }
217
218 lockmgr(&jail_lock, LK_EXCLUSIVE);
219 LIST_INSERT_HEAD(&allprison, pr, pr_list);
220 ++prisoncount;
221 lockmgr(&jail_lock, LK_RELEASE);
222
223 error = prison_sysctl_create(pr);
224 if (error)
225 goto out;
226
227 error = kern_jail_attach(pr->pr_id);
228 if (error)
229 goto out2;
230
231 nlookup_done(&nd);
232 return 0;
233
234 out2:
235 prison_sysctl_done(pr);
236
237 out:
238 lockmgr(&jail_lock, LK_EXCLUSIVE);
239 LIST_REMOVE(pr, pr_list);
240 --prisoncount;
241 lockmgr(&jail_lock, LK_RELEASE);
242 varsymset_clean(&pr->pr_varsymset);
243 nlookup_done(&nd);
244 return (error);
245 }
246
247 /*
248 * jail()
249 *
250 * jail_args(syscallarg(struct jail *) jail)
251 *
252 * MPALMOSTSAFE
253 */
254 int
sys_jail(struct sysmsg * sysmsg,const struct jail_args * uap)255 sys_jail(struct sysmsg *sysmsg, const struct jail_args *uap)
256 {
257 struct prison *pr;
258 struct jail_ip_storage *jip;
259 struct jail j;
260 int error;
261 uint32_t jversion;
262
263 sysmsg->sysmsg_result = -1;
264
265 error = caps_priv_check_self(SYSCAP_NOJAIL_CREATE);
266 if (error)
267 return (error);
268
269 error = copyin(uap->jail, &jversion, sizeof(jversion));
270 if (error)
271 return (error);
272
273 pr = kmalloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
274 SLIST_INIT(&pr->pr_ips);
275 lockmgr(&jail_lock, LK_EXCLUSIVE);
276
277 switch (jversion) {
278 case 0:
279 /* Single IPv4 jails. */
280 {
281 struct jail_v0 jv0;
282 struct sockaddr_in ip4addr;
283
284 error = copyin(uap->jail, &jv0, sizeof(jv0));
285 if (error)
286 goto out;
287
288 j.path = jv0.path;
289 j.hostname = jv0.hostname;
290
291 jip = kmalloc(sizeof(*jip), M_PRISON, M_WAITOK | M_ZERO);
292 ip4addr.sin_family = AF_INET;
293 ip4addr.sin_addr.s_addr = htonl(jv0.ip_number);
294 memcpy(&jip->ip, &ip4addr, sizeof(ip4addr));
295 SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
296 break;
297 }
298
299 case 1:
300 /*
301 * DragonFly multi noIP/IPv4/IPv6 jails
302 *
303 * NOTE: This version is unsupported by FreeBSD
304 * (which uses version 2 instead).
305 */
306
307 error = copyin(uap->jail, &j, sizeof(j));
308 if (error)
309 goto out;
310
311 for (int i = 0; i < j.n_ips; i++) {
312 jip = kmalloc(sizeof(*jip), M_PRISON,
313 M_WAITOK | M_ZERO);
314 SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
315 error = copyin(&j.ips[i], &jip->ip,
316 sizeof(struct sockaddr_storage));
317 if (error)
318 goto out;
319 }
320 break;
321 default:
322 error = EINVAL;
323 goto out;
324 }
325
326 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
327 if (error)
328 goto out;
329
330 /* Use default capabilities as a template */
331 pr->pr_caps = prison_default_caps;
332
333 error = kern_jail(pr, &j);
334 if (error)
335 goto out;
336
337 sysmsg->sysmsg_result = pr->pr_id;
338 lockmgr(&jail_lock, LK_RELEASE);
339
340 return (0);
341
342 out:
343 /* Delete all ips */
344 while (!SLIST_EMPTY(&pr->pr_ips)) {
345 jip = SLIST_FIRST(&pr->pr_ips);
346 SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
347 kfree(jip, M_PRISON);
348 }
349 lockmgr(&jail_lock, LK_RELEASE);
350 kfree(pr, M_PRISON);
351
352 return (error);
353 }
354
355 /*
356 * int jail_attach(int jid);
357 *
358 * MPALMOSTSAFE
359 */
360 int
sys_jail_attach(struct sysmsg * sysmsg,const struct jail_attach_args * uap)361 sys_jail_attach(struct sysmsg *sysmsg, const struct jail_attach_args *uap)
362 {
363 int error;
364
365 error = caps_priv_check_self(SYSCAP_NOJAIL_ATTACH);
366 if (error)
367 return(error);
368 lockmgr(&jail_lock, LK_EXCLUSIVE);
369 error = kern_jail_attach(uap->jid);
370 lockmgr(&jail_lock, LK_RELEASE);
371 return (error);
372 }
373
374 static void
prison_ipcache_init(struct prison * pr)375 prison_ipcache_init(struct prison *pr)
376 {
377 struct jail_ip_storage *jis;
378 struct sockaddr_in *ip4;
379 struct sockaddr_in6 *ip6;
380
381 lockmgr(&jail_lock, LK_EXCLUSIVE);
382 SLIST_FOREACH(jis, &pr->pr_ips, entries) {
383 switch (jis->ip.ss_family) {
384 case AF_INET:
385 ip4 = (struct sockaddr_in *)&jis->ip;
386 if ((ntohl(ip4->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) ==
387 IN_LOOPBACKNET) {
388 /* loopback address */
389 if (pr->local_ip4 == NULL)
390 pr->local_ip4 = ip4;
391 } else {
392 /* public address */
393 if (pr->nonlocal_ip4 == NULL)
394 pr->nonlocal_ip4 = ip4;
395 }
396 break;
397
398 case AF_INET6:
399 ip6 = (struct sockaddr_in6 *)&jis->ip;
400 if (IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr)) {
401 /* loopback address */
402 if (pr->local_ip6 == NULL)
403 pr->local_ip6 = ip6;
404 } else {
405 /* public address */
406 if (pr->nonlocal_ip6 == NULL)
407 pr->nonlocal_ip6 = ip6;
408 }
409 break;
410 }
411 }
412 lockmgr(&jail_lock, LK_RELEASE);
413 }
414
415 /*
416 * Changes INADDR_LOOPBACK for a valid jail address.
417 * ip is in network byte order.
418 * Returns 1 if the ip is among jail valid ips.
419 * Returns 0 if is not among jail valid ips or
420 * if couldn't replace INADDR_LOOPBACK for a valid
421 * IP.
422 */
423 int
prison_replace_wildcards(struct thread * td,struct sockaddr * ip)424 prison_replace_wildcards(struct thread *td, struct sockaddr *ip)
425 {
426 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
427 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
428 struct prison *pr;
429
430 if (td->td_proc == NULL || td->td_ucred == NULL)
431 return (1);
432 if ((pr = td->td_ucred->cr_prison) == NULL)
433 return (1);
434
435 if ((ip->sa_family == AF_INET &&
436 ip4->sin_addr.s_addr == htonl(INADDR_ANY)) ||
437 (ip->sa_family == AF_INET6 &&
438 IN6_IS_ADDR_UNSPECIFIED(&ip6->sin6_addr)))
439 return (1);
440 if ((ip->sa_family == AF_INET &&
441 ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
442 (ip->sa_family == AF_INET6 &&
443 IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
444 if (!prison_get_local(pr, ip->sa_family, ip) &&
445 !prison_get_nonlocal(pr, ip->sa_family, ip))
446 return(0);
447 else
448 return(1);
449 }
450 if (jailed_ip(pr, ip))
451 return(1);
452 return(0);
453 }
454
455 /*
456 * Convert the localhost IP to the actual jail IP
457 */
458 int
prison_remote_ip(struct thread * td,struct sockaddr * ip)459 prison_remote_ip(struct thread *td, struct sockaddr *ip)
460 {
461 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
462 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
463 struct prison *pr;
464
465 if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
466 return(1);
467 if ((pr = td->td_ucred->cr_prison) == NULL)
468 return(1);
469 if ((ip->sa_family == AF_INET &&
470 ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
471 (ip->sa_family == AF_INET6 &&
472 IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
473 if (!prison_get_local(pr, ip->sa_family, ip) &&
474 !prison_get_nonlocal(pr, ip->sa_family, ip))
475 return(0);
476 else
477 return(1);
478 }
479 return(1);
480 }
481
482 /*
483 * Convert the jail IP back to localhost
484 *
485 * Used by getsockname() and getpeername() to convert the in-jail loopback
486 * address back to LOCALHOST. For example, 127.0.0.2 -> 127.0.0.1. The
487 * idea is that programs running inside the jail should be unaware that they
488 * are using a different loopback IP than the host.
489 */
490 __read_mostly static struct in6_addr sin6_localhost = IN6ADDR_LOOPBACK_INIT;
491
492 int
prison_local_ip(struct thread * td,struct sockaddr * ip)493 prison_local_ip(struct thread *td, struct sockaddr *ip)
494 {
495 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
496 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
497 struct prison *pr;
498
499 if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
500 return(1);
501 if ((pr = td->td_ucred->cr_prison) == NULL)
502 return(1);
503 if (ip->sa_family == AF_INET && pr->local_ip4 &&
504 pr->local_ip4->sin_addr.s_addr == ip4->sin_addr.s_addr &&
505 pr->local_ip4->sin_addr.s_addr != htonl(INADDR_LOOPBACK)) {
506 ip4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
507 return(0);
508 }
509 if (ip->sa_family == AF_INET6 && pr->local_ip6 &&
510 bcmp(&pr->local_ip6->sin6_addr, &ip6->sin6_addr,
511 sizeof(ip6->sin6_addr)) == 0) {
512 bcopy(&sin6_localhost, &ip6->sin6_addr, sizeof(ip6->sin6_addr));
513 return(0);
514 }
515 return(1);
516 }
517
518 /*
519 * Prison get non loopback ip:
520 * - af is the address family of the ip we want (AF_INET|AF_INET6).
521 * - If ip != NULL, put the first IP address that is not a loopback address
522 * into *ip.
523 *
524 * ip is in network by order and we don't touch it unless we find a valid ip.
525 * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
526 * or NULL. This struct may not be modified.
527 */
528 struct sockaddr *
prison_get_nonlocal(struct prison * pr,sa_family_t af,struct sockaddr * ip)529 prison_get_nonlocal(struct prison *pr, sa_family_t af, struct sockaddr *ip)
530 {
531 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
532 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
533
534 /* Check if it is cached */
535 switch(af) {
536 case AF_INET:
537 if (ip4 != NULL && pr->nonlocal_ip4 != NULL)
538 ip4->sin_addr.s_addr = pr->nonlocal_ip4->sin_addr.s_addr;
539 return (struct sockaddr *)pr->nonlocal_ip4;
540
541 case AF_INET6:
542 if (ip6 != NULL && pr->nonlocal_ip6 != NULL)
543 ip6->sin6_addr = pr->nonlocal_ip6->sin6_addr;
544 return (struct sockaddr *)pr->nonlocal_ip6;
545 }
546
547 /* NOTREACHED */
548 return NULL;
549 }
550
551 /*
552 * Prison get loopback ip.
553 * - af is the address family of the ip we want (AF_INET|AF_INET6).
554 * - If ip != NULL, put the first IP address that is not a loopback address
555 * into *ip.
556 *
557 * ip is in network by order and we don't touch it unless we find a valid ip.
558 * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
559 * or NULL. This struct may not be modified.
560 */
561 struct sockaddr *
prison_get_local(struct prison * pr,sa_family_t af,struct sockaddr * ip)562 prison_get_local(struct prison *pr, sa_family_t af, struct sockaddr *ip)
563 {
564 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
565 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
566
567 /* Check if it is cached */
568 switch(af) {
569 case AF_INET:
570 if (ip4 != NULL && pr->local_ip4 != NULL)
571 ip4->sin_addr.s_addr = pr->local_ip4->sin_addr.s_addr;
572 return (struct sockaddr *)pr->local_ip4;
573
574 case AF_INET6:
575 if (ip6 != NULL && pr->local_ip6 != NULL)
576 ip6->sin6_addr = pr->local_ip6->sin6_addr;
577 return (struct sockaddr *)pr->local_ip6;
578 }
579
580 /* NOTREACHED */
581 return NULL;
582 }
583
584 /* Check if the IP is among ours, if it is return 1, else 0 */
585 int
jailed_ip(struct prison * pr,const struct sockaddr * ip)586 jailed_ip(struct prison *pr, const struct sockaddr *ip)
587 {
588 const struct jail_ip_storage *jis;
589 const struct sockaddr_in *jip4, *ip4;
590 const struct sockaddr_in6 *jip6, *ip6;
591
592 if (pr == NULL)
593 return(0);
594 ip4 = (const struct sockaddr_in *)ip;
595 ip6 = (const struct sockaddr_in6 *)ip;
596
597 lockmgr(&jail_lock, LK_EXCLUSIVE);
598 SLIST_FOREACH(jis, &pr->pr_ips, entries) {
599 switch (ip->sa_family) {
600 case AF_INET:
601 jip4 = (const struct sockaddr_in *) &jis->ip;
602 if (jip4->sin_family == AF_INET &&
603 ip4->sin_addr.s_addr == jip4->sin_addr.s_addr) {
604 lockmgr(&jail_lock, LK_RELEASE);
605 return(1);
606 }
607 break;
608 case AF_INET6:
609 jip6 = (const struct sockaddr_in6 *) &jis->ip;
610 if (jip6->sin6_family == AF_INET6 &&
611 IN6_ARE_ADDR_EQUAL(&ip6->sin6_addr,
612 &jip6->sin6_addr)) {
613 lockmgr(&jail_lock, LK_RELEASE);
614 return(1);
615 }
616 break;
617 }
618 }
619 lockmgr(&jail_lock, LK_RELEASE);
620 /* Ip not in list */
621 return(0);
622 }
623
624 int
prison_if(struct ucred * cred,struct sockaddr * sa)625 prison_if(struct ucred *cred, struct sockaddr *sa)
626 {
627 struct prison *pr;
628 struct sockaddr_in *sai = (struct sockaddr_in*) sa;
629
630 pr = cred->cr_prison;
631
632 if (((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
633 && PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_NET_UNIXIPROUTE))
634 return(1);
635 else if ((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
636 return(0);
637 else if (jailed_ip(pr, sa))
638 return(0);
639 return(1);
640 }
641
642 /*
643 * Returns a prison instance, or NULL on failure.
644 */
645 static struct prison *
prison_find(int prid)646 prison_find(int prid)
647 {
648 struct prison *pr;
649
650 lockmgr(&jail_lock, LK_EXCLUSIVE);
651 LIST_FOREACH(pr, &allprison, pr_list) {
652 if (pr->pr_id == prid)
653 break;
654 }
655 lockmgr(&jail_lock, LK_RELEASE);
656
657 return(pr);
658 }
659
660 static int
sysctl_jail_list(SYSCTL_HANDLER_ARGS)661 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
662 {
663 struct thread *td = curthread;
664 struct jail_ip_storage *jip;
665 #ifdef INET6
666 struct sockaddr_in6 *jsin6;
667 #endif
668 struct sockaddr_in *jsin;
669 struct lwp *lp;
670 struct prison *pr;
671 unsigned int jlssize, jlsused;
672 int count, error;
673 char *jls; /* Jail list */
674 char *oip; /* Output ip */
675 char *fullpath, *freepath;
676
677 jlsused = 0;
678
679 if (jailed(td->td_ucred))
680 return (0);
681 lp = td->td_lwp;
682 retry:
683 count = prisoncount;
684
685 if (count == 0)
686 return(0);
687
688 jlssize = (count * 1024);
689 jls = kmalloc(jlssize + 1, M_TEMP, M_WAITOK | M_ZERO);
690 if (count < prisoncount) {
691 kfree(jls, M_TEMP);
692 goto retry;
693 }
694 count = prisoncount;
695
696 lockmgr(&jail_lock, LK_EXCLUSIVE);
697 LIST_FOREACH(pr, &allprison, pr_list) {
698 error = cache_fullpath(lp->lwp_proc, &pr->pr_root, NULL,
699 &fullpath, &freepath, 0);
700 if (error)
701 continue;
702 if (jlsused && jlsused < jlssize)
703 jls[jlsused++] = '\n';
704 count = ksnprintf(jls + jlsused, (jlssize - jlsused),
705 "%d %s %s",
706 pr->pr_id, pr->pr_host, fullpath);
707 kfree(freepath, M_TEMP);
708 if (count < 0)
709 goto end;
710 jlsused += count;
711
712 /* Copy the IPS */
713 SLIST_FOREACH(jip, &pr->pr_ips, entries) {
714 char buf[INET_ADDRSTRLEN];
715
716 jsin = (struct sockaddr_in *)&jip->ip;
717
718 switch(jsin->sin_family) {
719 case AF_INET:
720 oip = kinet_ntoa(jsin->sin_addr, buf);
721 break;
722 #ifdef INET6
723 case AF_INET6:
724 jsin6 = (struct sockaddr_in6 *)&jip->ip;
725 oip = ip6_sprintf(&jsin6->sin6_addr);
726 break;
727 #endif
728 default:
729 oip = "?family?";
730 break;
731 }
732
733 if ((jlssize - jlsused) < (strlen(oip) + 1)) {
734 error = ERANGE;
735 goto end;
736 }
737 count = ksnprintf(jls + jlsused, (jlssize - jlsused),
738 " %s", oip);
739 if (count < 0)
740 goto end;
741 jlsused += count;
742 }
743 }
744
745 /*
746 * The format is:
747 * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id...
748 */
749 error = SYSCTL_OUT(req, jls, jlsused);
750 end:
751 lockmgr(&jail_lock, LK_RELEASE);
752 kfree(jls, M_TEMP);
753
754 return(error);
755 }
756
757 SYSCTL_OID(_jail, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
758 sysctl_jail_list, "A", "List of active jails");
759
760 static int
sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)761 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
762 {
763 int error, injail;
764
765 injail = jailed(req->td->td_ucred);
766 error = SYSCTL_OUT(req, &injail, sizeof(injail));
767
768 return (error);
769 }
770
771 SYSCTL_PROC(_jail, OID_AUTO, jailed,
772 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NOLOCK, NULL, 0,
773 sysctl_jail_jailed, "I", "Process in jail?");
774
775 /*
776 * MPSAFE
777 */
778 void
prison_hold(struct prison * pr)779 prison_hold(struct prison *pr)
780 {
781 atomic_add_int(&pr->pr_ref, 1);
782 #ifdef PRISON_DEBUG
783 if (prison_debug > 0) {
784 --prison_debug;
785 print_backtrace(-1);
786 }
787 #endif
788 }
789
790 /*
791 * MPALMOSTSAFE
792 */
793 void
prison_free(struct prison * pr)794 prison_free(struct prison *pr)
795 {
796 struct jail_ip_storage *jls;
797
798 #ifdef PRISON_DEBUG
799 if (prison_debug > 0) {
800 --prison_debug;
801 print_backtrace(-1);
802 }
803 #endif
804 KKASSERT(pr->pr_ref > 0);
805 if (atomic_fetchadd_int(&pr->pr_ref, -1) != 1)
806 return;
807
808 /*
809 * The global jail lock is needed on the last ref to adjust
810 * the list.
811 */
812 lockmgr(&jail_lock, LK_EXCLUSIVE);
813 if (pr->pr_ref) {
814 lockmgr(&jail_lock, LK_RELEASE);
815 return;
816 }
817 LIST_REMOVE(pr, pr_list);
818 --prisoncount;
819
820 /*
821 * Clean up
822 */
823 while (!SLIST_EMPTY(&pr->pr_ips)) {
824 jls = SLIST_FIRST(&pr->pr_ips);
825 SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
826 kfree(jls, M_PRISON);
827 }
828 lockmgr(&jail_lock, LK_RELEASE);
829
830 if (pr->pr_linux != NULL)
831 kfree(pr->pr_linux, M_PRISON);
832 varsymset_clean(&pr->pr_varsymset);
833
834 /* Release the sysctl tree */
835 prison_sysctl_done(pr);
836
837 cache_drop(&pr->pr_root);
838 kfree(pr, M_PRISON);
839 }
840
841 /*
842 * Check if permisson for a specific privilege is granted within jail.
843 *
844 * MPSAFE
845 */
846 int
prison_priv_check(struct ucred * cred,int cap)847 prison_priv_check(struct ucred *cred, int cap)
848 {
849 struct prison *pr = cred->cr_prison;
850
851 if (!jailed(cred))
852 return (0);
853
854 switch (cap & ~__SYSCAP_XFLAGS) {
855 case SYSCAP_RESTRICTEDROOT: /* meta group 1 */
856 /* RESTRICTEDROOT fallbacks disallowed in jails */
857 return EPERM;
858 case SYSCAP_SENSITIVEROOT: /* meta group 2 */
859 case SYSCAP_NOEXEC: /* meta group 3 */
860 case SYSCAP_NOCRED: /* meta group 4 */
861 return 0;
862 case SYSCAP_NOJAIL: /* meta group 5 */
863 /* all jail ops disallowed in jails */
864 return EPERM;
865 case SYSCAP_NONET: /* meta group 6 */
866 return 0;
867 case SYSCAP_NONET_SENSITIVE: /* meta group 7 */
868 /* all sensitive network ops disallowed in jails */
869 return EPERM;
870 case SYSCAP_NOVFS: /* meta group 8 */
871 case SYSCAP_NOVFS_SENSITIVE: /* meta group 9 */
872 case SYSCAP_NOMOUNT: /* meta group 10 */
873 case SYSCAP_NO11: /* meta group 11 */
874 case SYSCAP_NO12: /* meta group 12 */
875 case SYSCAP_NO13: /* meta group 13 */
876 case SYSCAP_NO14: /* meta group 14 */
877 case SYSCAP_NO15: /* meta group 15 */
878 return (0);
879
880 /* ----- */ /* group 1 - disallowed */
881
882 case SYSCAP_NOPROC_TRESPASS: /* group 2 allowed */
883 case SYSCAP_NOPROC_SETLOGIN:
884 case SYSCAP_NOPROC_SETRLIMIT:
885 case SYSCAP_NOSYSCTL_WR:
886 case SYSCAP_NOVARSYM_SYS:
887 case SYSCAP_NOSETHOSTNAME:
888 case SYSCAP_NOQUOTA_WR:
889 case SYSCAP_NODEBUG_UNPRIV:
890 case SYSCAP_NOSCHED:
891 case SYSCAP_NOSCHED_CPUSET:
892 case SYSCAP_NOSETTIME:
893 return (0);
894
895 case SYSCAP_NOEXEC_SUID: /* group 3 allowed */
896 case SYSCAP_NOEXEC_SGID:
897 return (0);
898
899 case SYSCAP_NOCRED_SETUID: /* group 4 allowed */
900 case SYSCAP_NOCRED_SETGID:
901 case SYSCAP_NOCRED_SETEUID:
902 case SYSCAP_NOCRED_SETEGID:
903 case SYSCAP_NOCRED_SETREUID:
904 case SYSCAP_NOCRED_SETREGID:
905 case SYSCAP_NOCRED_SETRESUID:
906 case SYSCAP_NOCRED_SETRESGID:
907 case SYSCAP_NOCRED_SETGROUPS:
908 return (0);
909
910 case SYSCAP_NOJAIL_CREATE: /* group 5 disallowed */
911 case SYSCAP_NOJAIL_ATTACH:
912 return EPERM;
913
914 case SYSCAP_NONET_RESPORT: /* group 6 mostly allowed */
915 /*
916 * Allow reserved ports
917 */
918 return 0;
919 case SYSCAP_NONET_RAW:
920 /*
921 * Conditionally allow creating raw sockets in jail.
922 */
923 if (PRISON_CAP_ISSET(pr->pr_caps,
924 PRISON_CAP_NET_RAW_SOCKETS))
925 return (0);
926 else
927 return (EPERM);
928
929 /* ----- */ /* group 7 - disallowed */
930
931 case SYSCAP_NOVFS_SYSFLAGS: /* group 8 - allowed */
932 case SYSCAP_NOVFS_CHOWN:
933 case SYSCAP_NOVFS_CHMOD:
934 case SYSCAP_NOVFS_LINK:
935 case SYSCAP_NOVFS_CHFLAGS_DEV:
936 case SYSCAP_NOVFS_SETATTR:
937 case SYSCAP_NOVFS_SETGID:
938 case SYSCAP_NOVFS_GENERATION:
939 case SYSCAP_NOVFS_RETAINSUGID:
940 return (0);
941
942 case SYSCAP_NOVFS_MKNOD_BAD: /* group 9 - allowed */
943 case SYSCAP_NOVFS_MKNOD_WHT:
944 case SYSCAP_NOVFS_MKNOD_DIR:
945 case SYSCAP_NOVFS_MKNOD_DEV:
946 case SYSCAP_NOVFS_IOCTL:
947 case SYSCAP_NOVFS_CHROOT:
948 case SYSCAP_NOVFS_REVOKE:
949 return (0);
950
951 case SYSCAP_NOMOUNT_NULLFS: /* group 10 - conditional */
952 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_NULLFS))
953 return (0);
954 else
955 return (EPERM);
956 case SYSCAP_NOMOUNT_DEVFS:
957 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_DEVFS))
958 return (0);
959 else
960 return (EPERM);
961 case SYSCAP_NOMOUNT_TMPFS:
962 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_TMPFS))
963 return (0);
964 else
965 return (EPERM);
966 case SYSCAP_NOMOUNT_PROCFS:
967 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_PROCFS))
968 return (0);
969 else
970 return (EPERM);
971 case SYSCAP_NOMOUNT_FUSE:
972 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_FUSEFS))
973 return (0);
974 else
975 return (EPERM);
976 case SYSCAP_NOMOUNT_UMOUNT:
977 return (0);
978
979 default:
980 /* otherwise disallow */
981 return (EPERM);
982 }
983 }
984
985
986 /*
987 * Create a per-jail sysctl tree to control the prison
988 */
989 int
prison_sysctl_create(struct prison * pr)990 prison_sysctl_create(struct prison *pr)
991 {
992 char id_str[7];
993
994 ksnprintf(id_str, 6, "%d", pr->pr_id);
995
996 pr->pr_sysctl_ctx = (struct sysctl_ctx_list *) kmalloc(
997 sizeof(struct sysctl_ctx_list), M_PRISON, M_WAITOK | M_ZERO);
998
999 sysctl_ctx_init(pr->pr_sysctl_ctx);
1000
1001 /* Main jail node */
1002 pr->pr_sysctl_tree = SYSCTL_ADD_NODE(pr->pr_sysctl_ctx,
1003 SYSCTL_STATIC_CHILDREN(_jail),
1004 OID_AUTO, id_str, CTLFLAG_RD, 0,
1005 "Jail specific settings");
1006
1007 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1008 OID_AUTO, "sys_set_hostname", CTLFLAG_RW,
1009 &pr->pr_caps, 0, PRISON_CAP_SYS_SET_HOSTNAME,
1010 "Processes in jail can set their hostnames");
1011
1012 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1013 OID_AUTO, "sys_sysvipc", CTLFLAG_RW,
1014 &pr->pr_caps, 0, PRISON_CAP_SYS_SYSVIPC,
1015 "Processes in jail can use System V IPC primitives");
1016
1017 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1018 OID_AUTO, "net_unixiproute", CTLFLAG_RW,
1019 &pr->pr_caps, 0, PRISON_CAP_NET_UNIXIPROUTE,
1020 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
1021
1022 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1023 OID_AUTO, "net_raw_sockets", CTLFLAG_RW,
1024 &pr->pr_caps, 0, PRISON_CAP_NET_RAW_SOCKETS,
1025 "Process in jail can create raw sockets");
1026
1027 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1028 OID_AUTO, "allow_listen_override", CTLFLAG_RW,
1029 &pr->pr_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE,
1030 "Process in jail can create raw sockets");
1031
1032 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1033 OID_AUTO, "vfs_chflags", CTLFLAG_RW,
1034 &pr->pr_caps, 0, PRISON_CAP_VFS_CHFLAGS,
1035 "Process in jail can override host wildcard listen");
1036
1037 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1038 OID_AUTO, "vfs_mount_nullfs", CTLFLAG_RW,
1039 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS,
1040 "Processes in jail can mount nullfs(5) filesystems");
1041
1042 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1043 OID_AUTO, "vfs_mount_tmpfs", CTLFLAG_RW,
1044 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS,
1045 "Processes in jail can mount tmpfs(5) filesystems");
1046
1047 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1048 OID_AUTO, "vfs_mount_devfs", CTLFLAG_RW,
1049 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_DEVFS,
1050 "Processes in jail can mount devfs(5) filesystems");
1051
1052 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1053 OID_AUTO, "vfs_mount_procfs", CTLFLAG_RW,
1054 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_PROCFS,
1055 "Processes in jail can mount procfs(5) filesystems");
1056
1057 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
1058 OID_AUTO, "vfs_mount_fusefs", CTLFLAG_RW,
1059 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_FUSEFS,
1060 "Processes in jail can mount fuse filesystems");
1061
1062 return 0;
1063 }
1064
1065 int
prison_sysctl_done(struct prison * pr)1066 prison_sysctl_done(struct prison *pr)
1067 {
1068 if (pr->pr_sysctl_tree) {
1069 sysctl_ctx_free(pr->pr_sysctl_ctx);
1070 kfree(pr->pr_sysctl_ctx, M_PRISON);
1071 pr->pr_sysctl_tree = NULL;
1072 }
1073
1074 return 0;
1075 }
1076