xref: /dragonfly/sys/kern/kern_jail.c (revision 32efd857)
1 /*
2  * ----------------------------------------------------------------------------
3  * "THE BEER-WARE LICENSE" (Revision 42):
4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5  * can do whatever you want with this stuff. If we meet some day, and you think
6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7  * ----------------------------------------------------------------------------
8  *
9  */
10 /*-
11  * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net>
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 
37 /*
38  * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $
39  */
40 
41 #include "opt_inet6.h"
42 
43 #include <sys/param.h>
44 #include <sys/types.h>
45 #include <sys/kernel.h>
46 #include <sys/systm.h>
47 #include <sys/errno.h>
48 #include <sys/sysmsg.h>
49 #include <sys/malloc.h>
50 #include <sys/nlookup.h>
51 #include <sys/namecache.h>
52 #include <sys/proc.h>
53 #include <sys/priv.h>
54 #include <sys/jail.h>
55 #include <sys/socket.h>
56 #include <sys/sysctl.h>
57 #include <sys/kern_syscall.h>
58 #include <net/if.h>
59 #include <netinet/in.h>
60 #include <netinet6/in6_var.h>
61 
62 static struct prison	*prison_find(int);
63 static void		prison_ipcache_init(struct prison *);
64 
65 __read_mostly static prison_cap_t	prison_default_caps;
66 
67 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
68 
69 SYSCTL_NODE(, OID_AUTO, jail, CTLFLAG_RW, 0,
70     "All jails settings");
71 
72 SYSCTL_NODE(_jail, OID_AUTO, defaults, CTLFLAG_RW, 0,
73     "Default options for jails");
74 
75 /*#define PRISON_DEBUG*/
76 #ifdef PRISON_DEBUG
77 __read_mostly static int prison_debug;
78 SYSCTL_INT(_jail, OID_AUTO, debug, CTLFLAG_RW, &prison_debug, 0,
79     "Debug prison refs");
80 #endif
81 
82 SYSCTL_BIT64(_jail_defaults, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
83     &prison_default_caps, 1, PRISON_CAP_SYS_SET_HOSTNAME,
84     "Processes in jail can set their hostnames");
85 
86 SYSCTL_BIT64(_jail_defaults, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
87     &prison_default_caps, 0, PRISON_CAP_NET_UNIXIPROUTE,
88     "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
89 
90 SYSCTL_BIT64(_jail_defaults, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
91     &prison_default_caps, 0, PRISON_CAP_SYS_SYSVIPC,
92     "Processes in jail can use System V IPC primitives");
93 
94 SYSCTL_BIT64(_jail_defaults, OID_AUTO, chflags_allowed, CTLFLAG_RW,
95     &prison_default_caps, 0, PRISON_CAP_VFS_CHFLAGS,
96     "Processes in jail can alter system file flags");
97 
98 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
99     &prison_default_caps, 0, PRISON_CAP_NET_RAW_SOCKETS,
100     "Process in jail can create raw sockets");
101 
102 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_listen_override, CTLFLAG_RW,
103     &prison_default_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE,
104     "Process in jail can override host wildcard listen");
105 
106 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_nullfs, CTLFLAG_RW,
107     &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS,
108     "Process in jail can mount nullfs(5) filesystems");
109 
110 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_tmpfs, CTLFLAG_RW,
111     &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS,
112     "Process in jail can mount tmpfs(5) filesystems");
113 
114 static int	lastprid = 0;
115 static int	prisoncount = 0;
116 
117 static struct lock jail_lock =
118        LOCK_INITIALIZER("jail", 0, LK_CANRECURSE);
119 
120 LIST_HEAD(prisonlist, prison);
121 static struct prisonlist allprison = LIST_HEAD_INITIALIZER(&allprison);
122 
123 static int
124 kern_jail_attach(int jid)
125 {
126 	struct proc *p = curthread->td_proc;
127 	struct prison *pr;
128 	struct ucred *cr;
129 	int error;
130 
131 	pr = prison_find(jid);
132 	if (pr == NULL)
133 		return(EINVAL);
134 
135 	error = kern_chroot(&pr->pr_root);
136 	if (error)
137 		return(error);
138 
139 	prison_hold(pr);
140 	lwkt_gettoken(&p->p_token);
141 	cr = cratom_proc(p);
142 	cr->cr_prison = pr;
143 	p->p_flags |= P_JAILED;
144 	lwkt_reltoken(&p->p_token);
145 
146 	return(0);
147 }
148 
149 static int
150 assign_prison_id(struct prison *pr)
151 {
152 	int tryprid;
153 	struct prison *tpr;
154 
155 	tryprid = lastprid + 1;
156 	if (tryprid == JAIL_MAX)
157 		tryprid = 1;
158 
159 	lockmgr(&jail_lock, LK_EXCLUSIVE);
160 next:
161 	LIST_FOREACH(tpr, &allprison, pr_list) {
162 		if (tpr->pr_id != tryprid)
163 			continue;
164 		tryprid++;
165 		if (tryprid == JAIL_MAX) {
166 			lockmgr(&jail_lock, LK_RELEASE);
167 			return (ERANGE);
168 		}
169 		goto next;
170 	}
171 	pr->pr_id = lastprid = tryprid;
172 	lockmgr(&jail_lock, LK_RELEASE);
173 
174 	return (0);
175 }
176 
177 static int
178 kern_jail(struct prison *pr, struct jail *j)
179 {
180 	int error;
181 	struct nlookupdata nd;
182 
183 	error = nlookup_init(&nd, j->path, UIO_USERSPACE, NLC_FOLLOW);
184 	if (error) {
185 		nlookup_done(&nd);
186 		return (error);
187 	}
188 	error = nlookup(&nd);
189 	if (error) {
190 		nlookup_done(&nd);
191 		return (error);
192 	}
193 	cache_copy(&nd.nl_nch, &pr->pr_root);
194 
195 	varsymset_init(&pr->pr_varsymset, NULL);
196 	prison_ipcache_init(pr);
197 
198 	error = assign_prison_id(pr);
199 	if (error) {
200 		varsymset_clean(&pr->pr_varsymset);
201 		nlookup_done(&nd);
202 		return (error);
203 	}
204 
205 	lockmgr(&jail_lock, LK_EXCLUSIVE);
206 	LIST_INSERT_HEAD(&allprison, pr, pr_list);
207 	++prisoncount;
208 	lockmgr(&jail_lock, LK_RELEASE);
209 
210 	error = prison_sysctl_create(pr);
211 	if (error)
212 		goto out;
213 
214 	error = kern_jail_attach(pr->pr_id);
215 	if (error)
216 		goto out2;
217 
218 	nlookup_done(&nd);
219 	return 0;
220 
221 out2:
222 	prison_sysctl_done(pr);
223 
224 out:
225 	lockmgr(&jail_lock, LK_EXCLUSIVE);
226 	LIST_REMOVE(pr, pr_list);
227 	--prisoncount;
228 	lockmgr(&jail_lock, LK_RELEASE);
229 	varsymset_clean(&pr->pr_varsymset);
230 	nlookup_done(&nd);
231 	return (error);
232 }
233 
234 /*
235  * jail()
236  *
237  * jail_args(syscallarg(struct jail *) jail)
238  *
239  * MPALMOSTSAFE
240  */
241 int
242 sys_jail(struct sysmsg *sysmsg, const struct jail_args *uap)
243 {
244 	struct thread *td = curthread;
245 	struct prison *pr;
246 	struct jail_ip_storage *jip;
247 	struct jail j;
248 	int error;
249 	uint32_t jversion;
250 
251 	sysmsg->sysmsg_result = -1;
252 
253 	error = priv_check(td, PRIV_JAIL_CREATE);
254 	if (error)
255 		return (error);
256 
257 	error = copyin(uap->jail, &jversion, sizeof(jversion));
258 	if (error)
259 		return (error);
260 
261 	pr = kmalloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
262 	SLIST_INIT(&pr->pr_ips);
263 	lockmgr(&jail_lock, LK_EXCLUSIVE);
264 
265 	switch (jversion) {
266 	case 0:
267 		/* Single IPv4 jails. */
268 		{
269 		struct jail_v0 jv0;
270 		struct sockaddr_in ip4addr;
271 
272 		error = copyin(uap->jail, &jv0, sizeof(jv0));
273 		if (error)
274 			goto out;
275 
276 		j.path = jv0.path;
277 		j.hostname = jv0.hostname;
278 
279 		jip = kmalloc(sizeof(*jip),  M_PRISON, M_WAITOK | M_ZERO);
280 		ip4addr.sin_family = AF_INET;
281 		ip4addr.sin_addr.s_addr = htonl(jv0.ip_number);
282 		memcpy(&jip->ip, &ip4addr, sizeof(ip4addr));
283 		SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
284 		break;
285 		}
286 
287 	case 1:
288 		/*
289 		 * DragonFly multi noIP/IPv4/IPv6 jails
290 		 *
291 		 * NOTE: This version is unsupported by FreeBSD
292 		 * (which uses version 2 instead).
293 		 */
294 
295 		error = copyin(uap->jail, &j, sizeof(j));
296 		if (error)
297 			goto out;
298 
299 		for (int i = 0; i < j.n_ips; i++) {
300 			jip = kmalloc(sizeof(*jip), M_PRISON,
301 				      M_WAITOK | M_ZERO);
302 			SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
303 			error = copyin(&j.ips[i], &jip->ip,
304 					sizeof(struct sockaddr_storage));
305 			if (error)
306 				goto out;
307 		}
308 		break;
309 	default:
310 		error = EINVAL;
311 		goto out;
312 	}
313 
314 	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
315 	if (error)
316 		goto out;
317 
318 	/* Use default capabilities as a template */
319 	pr->pr_caps = prison_default_caps;
320 
321 	error = kern_jail(pr, &j);
322 	if (error)
323 		goto out;
324 
325 	sysmsg->sysmsg_result = pr->pr_id;
326 	lockmgr(&jail_lock, LK_RELEASE);
327 
328 	return (0);
329 
330 out:
331 	/* Delete all ips */
332 	while (!SLIST_EMPTY(&pr->pr_ips)) {
333 		jip = SLIST_FIRST(&pr->pr_ips);
334 		SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
335 		kfree(jip, M_PRISON);
336 	}
337 	lockmgr(&jail_lock, LK_RELEASE);
338 	kfree(pr, M_PRISON);
339 
340 	return (error);
341 }
342 
343 /*
344  * int jail_attach(int jid);
345  *
346  * MPALMOSTSAFE
347  */
348 int
349 sys_jail_attach(struct sysmsg *sysmsg, const struct jail_attach_args *uap)
350 {
351 	struct thread *td = curthread;
352 	int error;
353 
354 	error = priv_check(td, PRIV_JAIL_ATTACH);
355 	if (error)
356 		return(error);
357 	lockmgr(&jail_lock, LK_EXCLUSIVE);
358 	error = kern_jail_attach(uap->jid);
359 	lockmgr(&jail_lock, LK_RELEASE);
360 	return (error);
361 }
362 
363 static void
364 prison_ipcache_init(struct prison *pr)
365 {
366 	struct jail_ip_storage *jis;
367 	struct sockaddr_in *ip4;
368 	struct sockaddr_in6 *ip6;
369 
370 	lockmgr(&jail_lock, LK_EXCLUSIVE);
371 	SLIST_FOREACH(jis, &pr->pr_ips, entries) {
372 		switch (jis->ip.ss_family) {
373 		case AF_INET:
374 			ip4 = (struct sockaddr_in *)&jis->ip;
375 			if ((ntohl(ip4->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) ==
376 			    IN_LOOPBACKNET) {
377 				/* loopback address */
378 				if (pr->local_ip4 == NULL)
379 					pr->local_ip4 = ip4;
380 			} else {
381 				/* public address */
382 				if (pr->nonlocal_ip4 == NULL)
383 					pr->nonlocal_ip4 = ip4;
384 			}
385 			break;
386 
387 		case AF_INET6:
388 			ip6 = (struct sockaddr_in6 *)&jis->ip;
389 			if (IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr)) {
390 				/* loopback address */
391 				if (pr->local_ip6 == NULL)
392 					pr->local_ip6 = ip6;
393 			} else {
394 				/* public address */
395 				if (pr->nonlocal_ip6 == NULL)
396 					pr->nonlocal_ip6 = ip6;
397 			}
398 			break;
399 		}
400 	}
401 	lockmgr(&jail_lock, LK_RELEASE);
402 }
403 
404 /*
405  * Changes INADDR_LOOPBACK for a valid jail address.
406  * ip is in network byte order.
407  * Returns 1 if the ip is among jail valid ips.
408  * Returns 0 if is not among jail valid ips or
409  * if couldn't replace INADDR_LOOPBACK for a valid
410  * IP.
411  */
412 int
413 prison_replace_wildcards(struct thread *td, struct sockaddr *ip)
414 {
415 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
416 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
417 	struct prison *pr;
418 
419 	if (td->td_proc == NULL || td->td_ucred == NULL)
420 		return (1);
421 	if ((pr = td->td_ucred->cr_prison) == NULL)
422 		return (1);
423 
424 	if ((ip->sa_family == AF_INET &&
425 	    ip4->sin_addr.s_addr == htonl(INADDR_ANY)) ||
426 	    (ip->sa_family == AF_INET6 &&
427 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->sin6_addr)))
428 		return (1);
429 	if ((ip->sa_family == AF_INET &&
430 	    ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
431 	    (ip->sa_family == AF_INET6 &&
432 	    IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
433 		if (!prison_get_local(pr, ip->sa_family, ip) &&
434 		    !prison_get_nonlocal(pr, ip->sa_family, ip))
435 			return(0);
436 		else
437 			return(1);
438 	}
439 	if (jailed_ip(pr, ip))
440 		return(1);
441 	return(0);
442 }
443 
444 /*
445  * Convert the localhost IP to the actual jail IP
446  */
447 int
448 prison_remote_ip(struct thread *td, struct sockaddr *ip)
449 {
450 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
451 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
452 	struct prison *pr;
453 
454 	if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
455 		return(1);
456 	if ((pr = td->td_ucred->cr_prison) == NULL)
457 		return(1);
458 	if ((ip->sa_family == AF_INET &&
459 	    ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
460 	    (ip->sa_family == AF_INET6 &&
461 	    IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
462 		if (!prison_get_local(pr, ip->sa_family, ip) &&
463 		    !prison_get_nonlocal(pr, ip->sa_family, ip))
464 			return(0);
465 		else
466 			return(1);
467 	}
468 	return(1);
469 }
470 
471 /*
472  * Convert the jail IP back to localhost
473  *
474  * Used by getsockname() and getpeername() to convert the in-jail loopback
475  * address back to LOCALHOST.  For example, 127.0.0.2 -> 127.0.0.1.  The
476  * idea is that programs running inside the jail should be unaware that they
477  * are using a different loopback IP than the host.
478  */
479 __read_mostly static struct in6_addr sin6_localhost = IN6ADDR_LOOPBACK_INIT;
480 
481 int
482 prison_local_ip(struct thread *td, struct sockaddr *ip)
483 {
484 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
485 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
486 	struct prison *pr;
487 
488 	if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
489 		return(1);
490 	if ((pr = td->td_ucred->cr_prison) == NULL)
491 		return(1);
492 	if (ip->sa_family == AF_INET && pr->local_ip4 &&
493 	    pr->local_ip4->sin_addr.s_addr == ip4->sin_addr.s_addr &&
494 	    pr->local_ip4->sin_addr.s_addr != htonl(INADDR_LOOPBACK)) {
495 		ip4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
496 		return(0);
497 	}
498 	if (ip->sa_family == AF_INET6 && pr->local_ip6 &&
499 	    bcmp(&pr->local_ip6->sin6_addr, &ip6->sin6_addr,
500 		 sizeof(ip6->sin6_addr)) == 0) {
501 		bcopy(&sin6_localhost, &ip6->sin6_addr, sizeof(ip6->sin6_addr));
502 		return(0);
503 	}
504 	return(1);
505 }
506 
507 /*
508  * Prison get non loopback ip:
509  * - af is the address family of the ip we want (AF_INET|AF_INET6).
510  * - If ip != NULL, put the first IP address that is not a loopback address
511  *   into *ip.
512  *
513  * ip is in network by order and we don't touch it unless we find a valid ip.
514  * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
515  * or NULL.  This struct may not be modified.
516  */
517 struct sockaddr *
518 prison_get_nonlocal(struct prison *pr, sa_family_t af, struct sockaddr *ip)
519 {
520 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
521 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
522 
523 	/* Check if it is cached */
524 	switch(af) {
525 	case AF_INET:
526 		if (ip4 != NULL && pr->nonlocal_ip4 != NULL)
527 			ip4->sin_addr.s_addr = pr->nonlocal_ip4->sin_addr.s_addr;
528 		return (struct sockaddr *)pr->nonlocal_ip4;
529 
530 	case AF_INET6:
531 		if (ip6 != NULL && pr->nonlocal_ip6 != NULL)
532 			ip6->sin6_addr = pr->nonlocal_ip6->sin6_addr;
533 		return (struct sockaddr *)pr->nonlocal_ip6;
534 	}
535 
536 	/* NOTREACHED */
537 	return NULL;
538 }
539 
540 /*
541  * Prison get loopback ip.
542  * - af is the address family of the ip we want (AF_INET|AF_INET6).
543  * - If ip != NULL, put the first IP address that is not a loopback address
544  *   into *ip.
545  *
546  * ip is in network by order and we don't touch it unless we find a valid ip.
547  * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
548  * or NULL.  This struct may not be modified.
549  */
550 struct sockaddr *
551 prison_get_local(struct prison *pr, sa_family_t af, struct sockaddr *ip)
552 {
553 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
554 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
555 
556 	/* Check if it is cached */
557 	switch(af) {
558 	case AF_INET:
559 		if (ip4 != NULL && pr->local_ip4 != NULL)
560 			ip4->sin_addr.s_addr = pr->local_ip4->sin_addr.s_addr;
561 		return (struct sockaddr *)pr->local_ip4;
562 
563 	case AF_INET6:
564 		if (ip6 != NULL && pr->local_ip6 != NULL)
565 			ip6->sin6_addr = pr->local_ip6->sin6_addr;
566 		return (struct sockaddr *)pr->local_ip6;
567 	}
568 
569 	/* NOTREACHED */
570 	return NULL;
571 }
572 
573 /* Check if the IP is among ours, if it is return 1, else 0 */
574 int
575 jailed_ip(struct prison *pr, const struct sockaddr *ip)
576 {
577 	const struct jail_ip_storage *jis;
578 	const struct sockaddr_in *jip4, *ip4;
579 	const struct sockaddr_in6 *jip6, *ip6;
580 
581 	if (pr == NULL)
582 		return(0);
583 	ip4 = (const struct sockaddr_in *)ip;
584 	ip6 = (const struct sockaddr_in6 *)ip;
585 
586 	lockmgr(&jail_lock, LK_EXCLUSIVE);
587 	SLIST_FOREACH(jis, &pr->pr_ips, entries) {
588 		switch (ip->sa_family) {
589 		case AF_INET:
590 			jip4 = (const struct sockaddr_in *) &jis->ip;
591 			if (jip4->sin_family == AF_INET &&
592 			    ip4->sin_addr.s_addr == jip4->sin_addr.s_addr) {
593 				lockmgr(&jail_lock, LK_RELEASE);
594 				return(1);
595 			}
596 			break;
597 		case AF_INET6:
598 			jip6 = (const struct sockaddr_in6 *) &jis->ip;
599 			if (jip6->sin6_family == AF_INET6 &&
600 			    IN6_ARE_ADDR_EQUAL(&ip6->sin6_addr,
601 				&jip6->sin6_addr)) {
602 				lockmgr(&jail_lock, LK_RELEASE);
603 				return(1);
604 			}
605 			break;
606 		}
607 	}
608 	lockmgr(&jail_lock, LK_RELEASE);
609 	/* Ip not in list */
610 	return(0);
611 }
612 
613 int
614 prison_if(struct ucred *cred, struct sockaddr *sa)
615 {
616 	struct prison *pr;
617 	struct sockaddr_in *sai = (struct sockaddr_in*) sa;
618 
619 	pr = cred->cr_prison;
620 
621 	if (((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
622 	    && PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_NET_UNIXIPROUTE))
623 		return(1);
624 	else if ((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
625 		return(0);
626 	else if (jailed_ip(pr, sa))
627 		return(0);
628 	return(1);
629 }
630 
631 /*
632  * Returns a prison instance, or NULL on failure.
633  */
634 static struct prison *
635 prison_find(int prid)
636 {
637 	struct prison *pr;
638 
639 	lockmgr(&jail_lock, LK_EXCLUSIVE);
640 	LIST_FOREACH(pr, &allprison, pr_list) {
641 		if (pr->pr_id == prid)
642 			break;
643 	}
644 	lockmgr(&jail_lock, LK_RELEASE);
645 
646 	return(pr);
647 }
648 
649 static int
650 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
651 {
652 	struct thread *td = curthread;
653 	struct jail_ip_storage *jip;
654 #ifdef INET6
655 	struct sockaddr_in6 *jsin6;
656 #endif
657 	struct sockaddr_in *jsin;
658 	struct lwp *lp;
659 	struct prison *pr;
660 	unsigned int jlssize, jlsused;
661 	int count, error;
662 	char *jls; /* Jail list */
663 	char *oip; /* Output ip */
664 	char *fullpath, *freepath;
665 
666 	jlsused = 0;
667 
668 	if (jailed(td->td_ucred))
669 		return (0);
670 	lp = td->td_lwp;
671 retry:
672 	count = prisoncount;
673 
674 	if (count == 0)
675 		return(0);
676 
677 	jlssize = (count * 1024);
678 	jls = kmalloc(jlssize + 1, M_TEMP, M_WAITOK | M_ZERO);
679 	if (count < prisoncount) {
680 		kfree(jls, M_TEMP);
681 		goto retry;
682 	}
683 	count = prisoncount;
684 
685 	lockmgr(&jail_lock, LK_EXCLUSIVE);
686 	LIST_FOREACH(pr, &allprison, pr_list) {
687 		error = cache_fullpath(lp->lwp_proc, &pr->pr_root, NULL,
688 					&fullpath, &freepath, 0);
689 		if (error)
690 			continue;
691 		if (jlsused && jlsused < jlssize)
692 			jls[jlsused++] = '\n';
693 		count = ksnprintf(jls + jlsused, (jlssize - jlsused),
694 				 "%d %s %s",
695 				 pr->pr_id, pr->pr_host, fullpath);
696 		kfree(freepath, M_TEMP);
697 		if (count < 0)
698 			goto end;
699 		jlsused += count;
700 
701 		/* Copy the IPS */
702 		SLIST_FOREACH(jip, &pr->pr_ips, entries) {
703 			char buf[INET_ADDRSTRLEN];
704 
705 			jsin = (struct sockaddr_in *)&jip->ip;
706 
707 			switch(jsin->sin_family) {
708 			case AF_INET:
709 				oip = kinet_ntoa(jsin->sin_addr, buf);
710 				break;
711 #ifdef INET6
712 			case AF_INET6:
713 				jsin6 = (struct sockaddr_in6 *)&jip->ip;
714 				oip = ip6_sprintf(&jsin6->sin6_addr);
715 				break;
716 #endif
717 			default:
718 				oip = "?family?";
719 				break;
720 			}
721 
722 			if ((jlssize - jlsused) < (strlen(oip) + 1)) {
723 				error = ERANGE;
724 				goto end;
725 			}
726 			count = ksnprintf(jls + jlsused, (jlssize - jlsused),
727 					  " %s", oip);
728 			if (count < 0)
729 				goto end;
730 			jlsused += count;
731 		}
732 	}
733 
734 	/*
735 	 * The format is:
736 	 * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id...
737 	 */
738 	error = SYSCTL_OUT(req, jls, jlsused);
739 end:
740 	lockmgr(&jail_lock, LK_RELEASE);
741 	kfree(jls, M_TEMP);
742 
743 	return(error);
744 }
745 
746 SYSCTL_OID(_jail, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
747 	   sysctl_jail_list, "A", "List of active jails");
748 
749 static int
750 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
751 {
752 	int error, injail;
753 
754 	injail = jailed(req->td->td_ucred);
755 	error = SYSCTL_OUT(req, &injail, sizeof(injail));
756 
757 	return (error);
758 }
759 
760 SYSCTL_PROC(_jail, OID_AUTO, jailed,
761 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NOLOCK, NULL, 0,
762 	    sysctl_jail_jailed, "I", "Process in jail?");
763 
764 /*
765  * MPSAFE
766  */
767 void
768 prison_hold(struct prison *pr)
769 {
770 	atomic_add_int(&pr->pr_ref, 1);
771 #ifdef PRISON_DEBUG
772 	if (prison_debug > 0) {
773 		--prison_debug;
774 		print_backtrace(-1);
775 	}
776 #endif
777 }
778 
779 /*
780  * MPALMOSTSAFE
781  */
782 void
783 prison_free(struct prison *pr)
784 {
785 	struct jail_ip_storage *jls;
786 
787 #ifdef PRISON_DEBUG
788 	if (prison_debug > 0) {
789 		--prison_debug;
790 		print_backtrace(-1);
791 	}
792 #endif
793 	KKASSERT(pr->pr_ref > 0);
794 	if (atomic_fetchadd_int(&pr->pr_ref, -1) != 1)
795 		return;
796 
797 	/*
798 	 * The global jail lock is needed on the last ref to adjust
799 	 * the list.
800 	 */
801 	lockmgr(&jail_lock, LK_EXCLUSIVE);
802 	if (pr->pr_ref) {
803 		lockmgr(&jail_lock, LK_RELEASE);
804 		return;
805 	}
806 	LIST_REMOVE(pr, pr_list);
807 	--prisoncount;
808 
809 	/*
810 	 * Clean up
811 	 */
812 	while (!SLIST_EMPTY(&pr->pr_ips)) {
813 		jls = SLIST_FIRST(&pr->pr_ips);
814 		SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
815 		kfree(jls, M_PRISON);
816 	}
817 	lockmgr(&jail_lock, LK_RELEASE);
818 
819 	if (pr->pr_linux != NULL)
820 		kfree(pr->pr_linux, M_PRISON);
821 	varsymset_clean(&pr->pr_varsymset);
822 
823 	/* Release the sysctl tree */
824 	prison_sysctl_done(pr);
825 
826 	cache_drop(&pr->pr_root);
827 	kfree(pr, M_PRISON);
828 }
829 
830 /*
831  * Check if permisson for a specific privilege is granted within jail.
832  *
833  * MPSAFE
834  */
835 int
836 prison_priv_check(struct ucred *cred, int priv)
837 {
838 	struct prison *pr = cred->cr_prison;
839 
840 	if (!jailed(cred))
841 		return (0);
842 
843 	switch (priv) {
844 	case PRIV_CRED_SETUID:
845 	case PRIV_CRED_SETEUID:
846 	case PRIV_CRED_SETGID:
847 	case PRIV_CRED_SETEGID:
848 	case PRIV_CRED_SETGROUPS:
849 	case PRIV_CRED_SETREUID:
850 	case PRIV_CRED_SETREGID:
851 	case PRIV_CRED_SETRESUID:
852 	case PRIV_CRED_SETRESGID:
853 
854 	case PRIV_VFS_SYSFLAGS:
855 	case PRIV_VFS_CHOWN:
856 	case PRIV_VFS_CHMOD:
857 	case PRIV_VFS_CHROOT:
858 	case PRIV_VFS_LINK:
859 	case PRIV_VFS_CHFLAGS_DEV:
860 	case PRIV_VFS_REVOKE:
861 	case PRIV_VFS_MKNOD_BAD:
862 	case PRIV_VFS_MKNOD_WHT:
863 	case PRIV_VFS_MKNOD_DIR:
864 		return (0);
865 
866 	case PRIV_VFS_MOUNT_NULLFS:
867 		if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_NULLFS))
868 			return (0);
869 		else
870 			return (EPERM);
871 	case PRIV_VFS_MOUNT_DEVFS:
872 		return (EPERM);
873 	case PRIV_VFS_MOUNT_TMPFS:
874 		if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_TMPFS))
875 			return (0);
876 		else
877 			return (EPERM);
878 
879 	case PRIV_VFS_SETATTR:
880 	case PRIV_VFS_SETGID:
881 
882 	case PRIV_PROC_SETRLIMIT:
883 	case PRIV_PROC_SETLOGIN:
884 
885 	case PRIV_SYSCTL_WRITEJAIL:
886 
887 	case PRIV_VARSYM_SYS:
888 
889 	case PRIV_SETHOSTNAME:
890 
891 	case PRIV_PROC_TRESPASS:
892 
893 		return (0);
894 
895 	case PRIV_UFS_QUOTAON:
896 	case PRIV_UFS_QUOTAOFF:
897 	case PRIV_VFS_SETQUOTA:
898 	case PRIV_UFS_SETUSE:
899 	case PRIV_VFS_GETQUOTA:
900 		return (0);
901 
902 
903 	case PRIV_DEBUG_UNPRIV:
904 		return (0);
905 
906 
907 		/*
908 		 * Allow jailed root to bind reserved ports.
909 		 */
910 	case PRIV_NETINET_RESERVEDPORT:
911 		return (0);
912 
913 
914 		/*
915 		 * Conditionally allow creating raw sockets in jail.
916 		 */
917 	case PRIV_NETINET_RAW:
918 		if (PRISON_CAP_ISSET(pr->pr_caps,
919 			PRISON_CAP_NET_RAW_SOCKETS))
920 			return (0);
921 		else
922 			return (EPERM);
923 
924 	case PRIV_HAMMER_IOCTL:
925 		return (0);
926 
927 	default:
928 
929 		return (EPERM);
930 	}
931 }
932 
933 
934 /*
935  * Create a per-jail sysctl tree to control the prison
936  */
937 int
938 prison_sysctl_create(struct prison *pr)
939 {
940 	char id_str[7];
941 
942 	ksnprintf(id_str, 6, "%d", pr->pr_id);
943 
944 	pr->pr_sysctl_ctx = (struct sysctl_ctx_list *) kmalloc(
945 		sizeof(struct sysctl_ctx_list), M_PRISON, M_WAITOK | M_ZERO);
946 
947 	sysctl_ctx_init(pr->pr_sysctl_ctx);
948 
949 	/* Main jail node */
950 	pr->pr_sysctl_tree = SYSCTL_ADD_NODE(pr->pr_sysctl_ctx,
951 	    SYSCTL_STATIC_CHILDREN(_jail),
952 	    OID_AUTO, id_str, CTLFLAG_RD, 0,
953 	    "Jail specific settings");
954 
955 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
956 	    OID_AUTO, "sys_set_hostname", CTLFLAG_RW,
957 	    &pr->pr_caps, 0, PRISON_CAP_SYS_SET_HOSTNAME,
958 	    "Processes in jail can set their hostnames");
959 
960 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
961 	    OID_AUTO, "sys_sysvipc", CTLFLAG_RW,
962 	    &pr->pr_caps, 0, PRISON_CAP_SYS_SYSVIPC,
963 	    "Processes in jail can use System V IPC primitives");
964 
965 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
966 	    OID_AUTO, "net_unixiproute", CTLFLAG_RW,
967 	    &pr->pr_caps, 0, PRISON_CAP_NET_UNIXIPROUTE,
968 	    "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
969 
970 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
971 	    OID_AUTO, "net_raw_sockets", CTLFLAG_RW,
972 	    &pr->pr_caps, 0, PRISON_CAP_NET_RAW_SOCKETS,
973 	    "Process in jail can create raw sockets");
974 
975 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
976 	    OID_AUTO, "allow_listen_override", CTLFLAG_RW,
977 	    &pr->pr_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE,
978 	    "Process in jail can create raw sockets");
979 
980 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
981 	    OID_AUTO, "vfs_chflags", CTLFLAG_RW,
982 	    &pr->pr_caps, 0, PRISON_CAP_VFS_CHFLAGS,
983 	    "Process in jail can override host wildcard listen");
984 
985 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
986 	    OID_AUTO, "vfs_mount_nullfs", CTLFLAG_RW,
987 	    &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS,
988 	    "Processes in jail can mount nullfs(5) filesystems");
989 
990 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
991 	    OID_AUTO, "vfs_mount_tmpfs", CTLFLAG_RW,
992 	    &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS,
993 	    "Processes in jail can mount tmpfs(5) filesystems");
994 
995 	return 0;
996 }
997 
998 int
999 prison_sysctl_done(struct prison *pr)
1000 {
1001 	if (pr->pr_sysctl_tree) {
1002 		sysctl_ctx_free(pr->pr_sysctl_ctx);
1003 		kfree(pr->pr_sysctl_ctx, M_PRISON);
1004 		pr->pr_sysctl_tree = NULL;
1005 	}
1006 
1007 	return 0;
1008 }
1009