xref: /dragonfly/sys/kern/kern_jail.c (revision 7c4f4eee)
1 /*
2  * ----------------------------------------------------------------------------
3  * "THE BEER-WARE LICENSE" (Revision 42):
4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5  * can do whatever you want with this stuff. If we meet some day, and you think
6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7  * ----------------------------------------------------------------------------
8  *
9  */
10 /*-
11  * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net>
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 
37 /*
38  * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $
39  * $DragonFly: src/sys/kern/kern_jail.c,v 1.19 2008/05/17 18:20:33 dillon Exp $
40  */
41 
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/kernel.h>
47 #include <sys/systm.h>
48 #include <sys/errno.h>
49 #include <sys/sysproto.h>
50 #include <sys/malloc.h>
51 #include <sys/nlookup.h>
52 #include <sys/namecache.h>
53 #include <sys/proc.h>
54 #include <sys/priv.h>
55 #include <sys/jail.h>
56 #include <sys/socket.h>
57 #include <sys/sysctl.h>
58 #include <sys/kern_syscall.h>
59 #include <net/if.h>
60 #include <netinet/in.h>
61 #include <netinet6/in6_var.h>
62 
63 static struct prison	*prison_find(int);
64 static void		prison_ipcache_init(struct prison *);
65 
66 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
67 
68 SYSCTL_NODE(, OID_AUTO, jail, CTLFLAG_RW, 0,
69     "All jails settings");
70 
71 SYSCTL_NODE(_jail, OID_AUTO, defaults, CTLFLAG_RW, 0,
72     "Default options for jails");
73 
74 int	jail_set_hostname_allowed = 1;
75 SYSCTL_INT(_jail_defaults, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
76     &jail_set_hostname_allowed, 0,
77     "Processes in jail can set their hostnames");
78 
79 int	jail_socket_unixiproute_only = 1;
80 SYSCTL_INT(_jail_defaults, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
81     &jail_socket_unixiproute_only, 0,
82     "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
83 
84 int	jail_sysvipc_allowed = 0;
85 SYSCTL_INT(_jail_defaults, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
86     &jail_sysvipc_allowed, 0,
87     "Processes in jail can use System V IPC primitives");
88 
89 int    jail_chflags_allowed = 0;
90 SYSCTL_INT(_jail_defaults, OID_AUTO, chflags_allowed, CTLFLAG_RW,
91     &jail_chflags_allowed, 0,
92     "Processes in jail can alter system file flags");
93 
94 int    jail_allow_raw_sockets = 0;
95 SYSCTL_INT(_jail_defaults, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
96     &jail_allow_raw_sockets, 0,
97     "Process in jail can create raw sockets");
98 
99 int	lastprid = 0;
100 int	prisoncount = 0;
101 
102 static struct lock jail_lock =
103        LOCK_INITIALIZER("jail", 0, LK_CANRECURSE);
104 
105 LIST_HEAD(prisonlist, prison);
106 struct	prisonlist allprison = LIST_HEAD_INITIALIZER(&allprison);
107 
108 static int
109 kern_jail_attach(int jid)
110 {
111 	struct proc *p = curthread->td_proc;
112 	struct prison *pr;
113 	struct ucred *cr;
114 	int error;
115 
116 	pr = prison_find(jid);
117 	if (pr == NULL)
118 		return(EINVAL);
119 
120 	error = kern_chroot(&pr->pr_root);
121 	if (error)
122 		return(error);
123 
124 	prison_hold(pr);
125 	lwkt_gettoken(&p->p_token);
126 	cr = cratom_proc(p);
127 	cr->cr_prison = pr;
128 	p->p_flags |= P_JAILED;
129 	lwkt_reltoken(&p->p_token);
130 
131 	return(0);
132 }
133 
134 static int
135 assign_prison_id(struct prison *pr)
136 {
137 	int tryprid;
138 	struct prison *tpr;
139 
140 	tryprid = lastprid + 1;
141 	if (tryprid == JAIL_MAX)
142 		tryprid = 1;
143 
144 	lockmgr(&jail_lock, LK_EXCLUSIVE);
145 next:
146 	LIST_FOREACH(tpr, &allprison, pr_list) {
147 		if (tpr->pr_id != tryprid)
148 			continue;
149 		tryprid++;
150 		if (tryprid == JAIL_MAX) {
151 			lockmgr(&jail_lock, LK_RELEASE);
152 			return (ERANGE);
153 		}
154 		goto next;
155 	}
156 	pr->pr_id = lastprid = tryprid;
157 	lockmgr(&jail_lock, LK_RELEASE);
158 
159 	return (0);
160 }
161 
162 static int
163 kern_jail(struct prison *pr, struct jail *j)
164 {
165 	int error;
166 	struct nlookupdata nd;
167 
168 	error = nlookup_init(&nd, j->path, UIO_USERSPACE, NLC_FOLLOW);
169 	if (error) {
170 		nlookup_done(&nd);
171 		return (error);
172 	}
173 	error = nlookup(&nd);
174 	if (error) {
175 		nlookup_done(&nd);
176 		return (error);
177 	}
178 	cache_copy(&nd.nl_nch, &pr->pr_root);
179 
180 	varsymset_init(&pr->pr_varsymset, NULL);
181 	prison_ipcache_init(pr);
182 
183 	error = assign_prison_id(pr);
184 	if (error) {
185 		varsymset_clean(&pr->pr_varsymset);
186 		nlookup_done(&nd);
187 		return (error);
188 	}
189 
190 	lockmgr(&jail_lock, LK_EXCLUSIVE);
191 	LIST_INSERT_HEAD(&allprison, pr, pr_list);
192 	++prisoncount;
193 	lockmgr(&jail_lock, LK_RELEASE);
194 
195 	error = prison_sysctl_create(pr);
196 	if (error)
197 		goto out;
198 
199 	error = kern_jail_attach(pr->pr_id);
200 	if (error)
201 		goto out2;
202 
203 	nlookup_done(&nd);
204 	return 0;
205 
206 out2:
207 	prison_sysctl_done(pr);
208 
209 out:
210 	lockmgr(&jail_lock, LK_EXCLUSIVE);
211 	LIST_REMOVE(pr, pr_list);
212 	--prisoncount;
213 	lockmgr(&jail_lock, LK_RELEASE);
214 	varsymset_clean(&pr->pr_varsymset);
215 	nlookup_done(&nd);
216 	return (error);
217 }
218 
219 /*
220  * jail()
221  *
222  * jail_args(syscallarg(struct jail *) jail)
223  *
224  * MPALMOSTSAFE
225  */
226 int
227 sys_jail(struct jail_args *uap)
228 {
229 	struct thread *td = curthread;
230 	struct prison *pr;
231 	struct jail_ip_storage *jip;
232 	struct jail j;
233 	int error;
234 	uint32_t jversion;
235 
236 	uap->sysmsg_result = -1;
237 
238 	error = priv_check(td, PRIV_JAIL_CREATE);
239 	if (error)
240 		return (error);
241 
242 	error = copyin(uap->jail, &jversion, sizeof(jversion));
243 	if (error)
244 		return (error);
245 
246 	pr = kmalloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
247 	SLIST_INIT(&pr->pr_ips);
248 	lockmgr(&jail_lock, LK_EXCLUSIVE);
249 
250 	switch (jversion) {
251 	case 0:
252 		/* Single IPv4 jails. */
253 		{
254 		struct jail_v0 jv0;
255 		struct sockaddr_in ip4addr;
256 
257 		error = copyin(uap->jail, &jv0, sizeof(jv0));
258 		if (error)
259 			goto out;
260 
261 		j.path = jv0.path;
262 		j.hostname = jv0.hostname;
263 
264 		jip = kmalloc(sizeof(*jip),  M_PRISON, M_WAITOK | M_ZERO);
265 		ip4addr.sin_family = AF_INET;
266 		ip4addr.sin_addr.s_addr = htonl(jv0.ip_number);
267 		memcpy(&jip->ip, &ip4addr, sizeof(ip4addr));
268 		SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
269 		break;
270 		}
271 
272 	case 1:
273 		/*
274 		 * DragonFly multi noIP/IPv4/IPv6 jails
275 		 *
276 		 * NOTE: This version is unsupported by FreeBSD
277 		 * (which uses version 2 instead).
278 		 */
279 
280 		error = copyin(uap->jail, &j, sizeof(j));
281 		if (error)
282 			goto out;
283 
284 		for (int i = 0; i < j.n_ips; i++) {
285 			jip = kmalloc(sizeof(*jip), M_PRISON,
286 				      M_WAITOK | M_ZERO);
287 			SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
288 			error = copyin(&j.ips[i], &jip->ip,
289 					sizeof(struct sockaddr_storage));
290 			if (error)
291 				goto out;
292 		}
293 		break;
294 	default:
295 		error = EINVAL;
296 		goto out;
297 	}
298 
299 	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
300 	if (error)
301 		goto out;
302 
303 	/* Global settings are the default for all jails */
304 	pr->pr_set_hostname_allowed = jail_set_hostname_allowed;
305 	pr->pr_socket_unixiproute_only = jail_socket_unixiproute_only;
306 	pr->pr_sysvipc_allowed = jail_sysvipc_allowed;
307 	pr->pr_chflags_allowed = jail_chflags_allowed;
308 	pr->pr_allow_raw_sockets = jail_allow_raw_sockets;
309 
310 	error = kern_jail(pr, &j);
311 	if (error)
312 		goto out;
313 
314 	uap->sysmsg_result = pr->pr_id;
315 	lockmgr(&jail_lock, LK_RELEASE);
316 
317 	return (0);
318 
319 out:
320 	/* Delete all ips */
321 	while (!SLIST_EMPTY(&pr->pr_ips)) {
322 		jip = SLIST_FIRST(&pr->pr_ips);
323 		SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
324 		kfree(jip, M_PRISON);
325 	}
326 	lockmgr(&jail_lock, LK_RELEASE);
327 	kfree(pr, M_PRISON);
328 
329 	return (error);
330 }
331 
332 /*
333  * int jail_attach(int jid);
334  *
335  * MPALMOSTSAFE
336  */
337 int
338 sys_jail_attach(struct jail_attach_args *uap)
339 {
340 	struct thread *td = curthread;
341 	int error;
342 
343 	error = priv_check(td, PRIV_JAIL_ATTACH);
344 	if (error)
345 		return(error);
346 	lockmgr(&jail_lock, LK_EXCLUSIVE);
347 	error = kern_jail_attach(uap->jid);
348 	lockmgr(&jail_lock, LK_RELEASE);
349 	return (error);
350 }
351 
352 static void
353 prison_ipcache_init(struct prison *pr)
354 {
355 	struct jail_ip_storage *jis;
356 	struct sockaddr_in *ip4;
357 	struct sockaddr_in6 *ip6;
358 
359 	lockmgr(&jail_lock, LK_EXCLUSIVE);
360 	SLIST_FOREACH(jis, &pr->pr_ips, entries) {
361 		switch (jis->ip.ss_family) {
362 		case AF_INET:
363 			ip4 = (struct sockaddr_in *)&jis->ip;
364 			if ((ntohl(ip4->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) ==
365 			    IN_LOOPBACKNET) {
366 				/* loopback address */
367 				if (pr->local_ip4 == NULL)
368 					pr->local_ip4 = ip4;
369 			} else {
370 				/* public address */
371 				if (pr->nonlocal_ip4 == NULL)
372 					pr->nonlocal_ip4 = ip4;
373 			}
374 			break;
375 
376 		case AF_INET6:
377 			ip6 = (struct sockaddr_in6 *)&jis->ip;
378 			if (IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr)) {
379 				/* loopback address */
380 				if (pr->local_ip6 == NULL)
381 					pr->local_ip6 = ip6;
382 			} else {
383 				/* public address */
384 				if (pr->nonlocal_ip6 == NULL)
385 					pr->nonlocal_ip6 = ip6;
386 			}
387 			break;
388 		}
389 	}
390 	lockmgr(&jail_lock, LK_RELEASE);
391 }
392 
393 /*
394  * Changes INADDR_LOOPBACK for a valid jail address.
395  * ip is in network byte order.
396  * Returns 1 if the ip is among jail valid ips.
397  * Returns 0 if is not among jail valid ips or
398  * if couldn't replace INADDR_LOOPBACK for a valid
399  * IP.
400  */
401 int
402 prison_replace_wildcards(struct thread *td, struct sockaddr *ip)
403 {
404 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
405 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
406 	struct prison *pr;
407 
408 	if (td->td_proc == NULL || td->td_ucred == NULL)
409 		return (1);
410 	if ((pr = td->td_ucred->cr_prison) == NULL)
411 		return (1);
412 
413 	if ((ip->sa_family == AF_INET &&
414 	    ip4->sin_addr.s_addr == htonl(INADDR_ANY)) ||
415 	    (ip->sa_family == AF_INET6 &&
416 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->sin6_addr)))
417 		return (1);
418 	if ((ip->sa_family == AF_INET &&
419 	    ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
420 	    (ip->sa_family == AF_INET6 &&
421 	    IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
422 		if (!prison_get_local(pr, ip->sa_family, ip) &&
423 		    !prison_get_nonlocal(pr, ip->sa_family, ip))
424 			return(0);
425 		else
426 			return(1);
427 	}
428 	if (jailed_ip(pr, ip))
429 		return(1);
430 	return(0);
431 }
432 
433 int
434 prison_remote_ip(struct thread *td, struct sockaddr *ip)
435 {
436 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
437 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
438 	struct prison *pr;
439 
440 	if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
441 		return(1);
442 	if ((pr = td->td_ucred->cr_prison) == NULL)
443 		return(1);
444 	if ((ip->sa_family == AF_INET &&
445 	    ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
446 	    (ip->sa_family == AF_INET6 &&
447 	    IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
448 		if (!prison_get_local(pr, ip->sa_family, ip) &&
449 		    !prison_get_nonlocal(pr, ip->sa_family, ip))
450 			return(0);
451 		else
452 			return(1);
453 	}
454 	return(1);
455 }
456 
457 /*
458  * Prison get non loopback ip:
459  * - af is the address family of the ip we want (AF_INET|AF_INET6).
460  * - If ip != NULL, put the first IP address that is not a loopback address
461  *   into *ip.
462  *
463  * ip is in network by order and we don't touch it unless we find a valid ip.
464  * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
465  * or NULL.  This struct may not be modified.
466  */
467 struct sockaddr *
468 prison_get_nonlocal(struct prison *pr, sa_family_t af, struct sockaddr *ip)
469 {
470 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
471 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
472 
473 	/* Check if it is cached */
474 	switch(af) {
475 	case AF_INET:
476 		if (ip4 != NULL && pr->nonlocal_ip4 != NULL)
477 			ip4->sin_addr.s_addr = pr->nonlocal_ip4->sin_addr.s_addr;
478 		return (struct sockaddr *)pr->nonlocal_ip4;
479 
480 	case AF_INET6:
481 		if (ip6 != NULL && pr->nonlocal_ip6 != NULL)
482 			ip6->sin6_addr = pr->nonlocal_ip6->sin6_addr;
483 		return (struct sockaddr *)pr->nonlocal_ip6;
484 	}
485 
486 	/* NOTREACHED */
487 	return NULL;
488 }
489 
490 /*
491  * Prison get loopback ip.
492  * - af is the address family of the ip we want (AF_INET|AF_INET6).
493  * - If ip != NULL, put the first IP address that is not a loopback address
494  *   into *ip.
495  *
496  * ip is in network by order and we don't touch it unless we find a valid ip.
497  * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
498  * or NULL.  This struct may not be modified.
499  */
500 struct sockaddr *
501 prison_get_local(struct prison *pr, sa_family_t af, struct sockaddr *ip)
502 {
503 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
504 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
505 
506 	/* Check if it is cached */
507 	switch(af) {
508 	case AF_INET:
509 		if (ip4 != NULL && pr->local_ip4 != NULL)
510 			ip4->sin_addr.s_addr = pr->local_ip4->sin_addr.s_addr;
511 		return (struct sockaddr *)pr->local_ip4;
512 
513 	case AF_INET6:
514 		if (ip6 != NULL && pr->local_ip6 != NULL)
515 			ip6->sin6_addr = pr->local_ip6->sin6_addr;
516 		return (struct sockaddr *)pr->local_ip6;
517 	}
518 
519 	/* NOTREACHED */
520 	return NULL;
521 }
522 
523 /* Check if the IP is among ours, if it is return 1, else 0 */
524 int
525 jailed_ip(struct prison *pr, struct sockaddr *ip)
526 {
527 	struct jail_ip_storage *jis;
528 	struct sockaddr_in *jip4, *ip4;
529 	struct sockaddr_in6 *jip6, *ip6;
530 
531 	if (pr == NULL)
532 		return(0);
533 	ip4 = (struct sockaddr_in *)ip;
534 	ip6 = (struct sockaddr_in6 *)ip;
535 
536 	lockmgr(&jail_lock, LK_EXCLUSIVE);
537 	SLIST_FOREACH(jis, &pr->pr_ips, entries) {
538 		switch (ip->sa_family) {
539 		case AF_INET:
540 			jip4 = (struct sockaddr_in *) &jis->ip;
541 			if (jip4->sin_family == AF_INET &&
542 			    ip4->sin_addr.s_addr == jip4->sin_addr.s_addr) {
543 				lockmgr(&jail_lock, LK_RELEASE);
544 				return(1);
545 			}
546 			break;
547 		case AF_INET6:
548 			jip6 = (struct sockaddr_in6 *) &jis->ip;
549 			if (jip6->sin6_family == AF_INET6 &&
550 			    IN6_ARE_ADDR_EQUAL(&ip6->sin6_addr,
551 				&jip6->sin6_addr)) {
552 				lockmgr(&jail_lock, LK_RELEASE);
553 				return(1);
554 			}
555 			break;
556 		}
557 	}
558 	lockmgr(&jail_lock, LK_RELEASE);
559 	/* Ip not in list */
560 	return(0);
561 }
562 
563 int
564 prison_if(struct ucred *cred, struct sockaddr *sa)
565 {
566 	struct prison *pr;
567 	struct sockaddr_in *sai = (struct sockaddr_in*) sa;
568 
569 	pr = cred->cr_prison;
570 
571 	if (((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
572 	    && jail_socket_unixiproute_only)
573 		return(1);
574 	else if ((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
575 		return(0);
576 	else if (jailed_ip(pr, sa))
577 		return(0);
578 	return(1);
579 }
580 
581 /*
582  * Returns a prison instance, or NULL on failure.
583  */
584 static struct prison *
585 prison_find(int prid)
586 {
587 	struct prison *pr;
588 
589 	lockmgr(&jail_lock, LK_EXCLUSIVE);
590 	LIST_FOREACH(pr, &allprison, pr_list) {
591 		if (pr->pr_id == prid)
592 			break;
593 	}
594 	lockmgr(&jail_lock, LK_RELEASE);
595 
596 	return(pr);
597 }
598 
599 static int
600 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
601 {
602 	struct thread *td = curthread;
603 	struct jail_ip_storage *jip;
604 #ifdef INET6
605 	struct sockaddr_in6 *jsin6;
606 #endif
607 	struct sockaddr_in *jsin;
608 	struct lwp *lp;
609 	struct prison *pr;
610 	unsigned int jlssize, jlsused;
611 	int count, error;
612 	char *jls; /* Jail list */
613 	char *oip; /* Output ip */
614 	char *fullpath, *freepath;
615 
616 	jlsused = 0;
617 
618 	if (jailed(td->td_ucred))
619 		return (0);
620 	lp = td->td_lwp;
621 retry:
622 	count = prisoncount;
623 
624 	if (count == 0)
625 		return(0);
626 
627 	jlssize = (count * 1024);
628 	jls = kmalloc(jlssize + 1, M_TEMP, M_WAITOK | M_ZERO);
629 	if (count < prisoncount) {
630 		kfree(jls, M_TEMP);
631 		goto retry;
632 	}
633 	count = prisoncount;
634 
635 	lockmgr(&jail_lock, LK_EXCLUSIVE);
636 	LIST_FOREACH(pr, &allprison, pr_list) {
637 		error = cache_fullpath(lp->lwp_proc, &pr->pr_root, NULL,
638 					&fullpath, &freepath, 0);
639 		if (error)
640 			continue;
641 		if (jlsused && jlsused < jlssize)
642 			jls[jlsused++] = '\n';
643 		count = ksnprintf(jls + jlsused, (jlssize - jlsused),
644 				 "%d %s %s",
645 				 pr->pr_id, pr->pr_host, fullpath);
646 		kfree(freepath, M_TEMP);
647 		if (count < 0)
648 			goto end;
649 		jlsused += count;
650 
651 		/* Copy the IPS */
652 		SLIST_FOREACH(jip, &pr->pr_ips, entries) {
653 			char buf[INET_ADDRSTRLEN];
654 
655 			jsin = (struct sockaddr_in *)&jip->ip;
656 
657 			switch(jsin->sin_family) {
658 			case AF_INET:
659 				oip = kinet_ntoa(jsin->sin_addr, buf);
660 				break;
661 #ifdef INET6
662 			case AF_INET6:
663 				jsin6 = (struct sockaddr_in6 *)&jip->ip;
664 				oip = ip6_sprintf(&jsin6->sin6_addr);
665 				break;
666 #endif
667 			default:
668 				oip = "?family?";
669 				break;
670 			}
671 
672 			if ((jlssize - jlsused) < (strlen(oip) + 1)) {
673 				error = ERANGE;
674 				goto end;
675 			}
676 			count = ksnprintf(jls + jlsused, (jlssize - jlsused),
677 					  " %s", oip);
678 			if (count < 0)
679 				goto end;
680 			jlsused += count;
681 		}
682 	}
683 
684 	/*
685 	 * The format is:
686 	 * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id...
687 	 */
688 	error = SYSCTL_OUT(req, jls, jlsused);
689 end:
690 	lockmgr(&jail_lock, LK_RELEASE);
691 	kfree(jls, M_TEMP);
692 
693 	return(error);
694 }
695 
696 SYSCTL_OID(_jail, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
697 	   sysctl_jail_list, "A", "List of active jails");
698 
699 static int
700 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
701 {
702 	int error, injail;
703 
704 	injail = jailed(req->td->td_ucred);
705 	error = SYSCTL_OUT(req, &injail, sizeof(injail));
706 
707 	return (error);
708 }
709 
710 SYSCTL_PROC(_jail, OID_AUTO, jailed,
711 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NOLOCK, NULL, 0,
712 	    sysctl_jail_jailed, "I", "Process in jail?");
713 
714 /*
715  * MPSAFE
716  */
717 void
718 prison_hold(struct prison *pr)
719 {
720 	atomic_add_int(&pr->pr_ref, 1);
721 }
722 
723 /*
724  * MPALMOSTSAFE
725  */
726 void
727 prison_free(struct prison *pr)
728 {
729 	struct jail_ip_storage *jls;
730 
731 	KKASSERT(pr->pr_ref > 0);
732 	if (atomic_fetchadd_int(&pr->pr_ref, -1) != 1)
733 		return;
734 
735 	/*
736 	 * The MP lock is needed on the last ref to adjust
737 	 * the list.
738 	 */
739 	lockmgr(&jail_lock, LK_EXCLUSIVE);
740 	if (pr->pr_ref) {
741 		lockmgr(&jail_lock, LK_RELEASE);
742 		return;
743 	}
744 	LIST_REMOVE(pr, pr_list);
745 	--prisoncount;
746 
747 	/*
748 	 * Clean up
749 	 */
750 	while (!SLIST_EMPTY(&pr->pr_ips)) {
751 		jls = SLIST_FIRST(&pr->pr_ips);
752 		SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
753 		kfree(jls, M_PRISON);
754 	}
755 	lockmgr(&jail_lock, LK_RELEASE);
756 
757 	if (pr->pr_linux != NULL)
758 		kfree(pr->pr_linux, M_PRISON);
759 	varsymset_clean(&pr->pr_varsymset);
760 
761 	/* Release the sysctl tree */
762 	prison_sysctl_done(pr);
763 
764 	cache_drop(&pr->pr_root);
765 	kfree(pr, M_PRISON);
766 }
767 
768 /*
769  * Check if permisson for a specific privilege is granted within jail.
770  *
771  * MPSAFE
772  */
773 int
774 prison_priv_check(struct ucred *cred, int priv)
775 {
776 	struct prison *pr = cred->cr_prison;
777 
778 	if (!jailed(cred))
779 		return (0);
780 
781 	switch (priv) {
782 	case PRIV_CRED_SETUID:
783 	case PRIV_CRED_SETEUID:
784 	case PRIV_CRED_SETGID:
785 	case PRIV_CRED_SETEGID:
786 	case PRIV_CRED_SETGROUPS:
787 	case PRIV_CRED_SETREUID:
788 	case PRIV_CRED_SETREGID:
789 	case PRIV_CRED_SETRESUID:
790 	case PRIV_CRED_SETRESGID:
791 
792 	case PRIV_VFS_SYSFLAGS:
793 	case PRIV_VFS_CHOWN:
794 	case PRIV_VFS_CHMOD:
795 	case PRIV_VFS_CHROOT:
796 	case PRIV_VFS_LINK:
797 	case PRIV_VFS_CHFLAGS_DEV:
798 	case PRIV_VFS_REVOKE:
799 	case PRIV_VFS_MKNOD_BAD:
800 	case PRIV_VFS_MKNOD_WHT:
801 	case PRIV_VFS_MKNOD_DIR:
802 	case PRIV_VFS_SETATTR:
803 	case PRIV_VFS_SETGID:
804 
805 	case PRIV_PROC_SETRLIMIT:
806 	case PRIV_PROC_SETLOGIN:
807 
808 	case PRIV_SYSCTL_WRITEJAIL:
809 
810 	case PRIV_VARSYM_SYS:
811 
812 	case PRIV_SETHOSTNAME:
813 
814 	case PRIV_PROC_TRESPASS:
815 
816 		return (0);
817 
818 	case PRIV_UFS_QUOTAON:
819 	case PRIV_UFS_QUOTAOFF:
820 	case PRIV_VFS_SETQUOTA:
821 	case PRIV_UFS_SETUSE:
822 	case PRIV_VFS_GETQUOTA:
823 		return (0);
824 
825 
826 	case PRIV_DEBUG_UNPRIV:
827 		return (0);
828 
829 
830 		/*
831 		 * Allow jailed root to bind reserved ports.
832 		 */
833 	case PRIV_NETINET_RESERVEDPORT:
834 		return (0);
835 
836 
837 		/*
838 		 * Conditionally allow creating raw sockets in jail.
839 		 */
840 	case PRIV_NETINET_RAW:
841 		if (pr->pr_allow_raw_sockets)
842 			return (0);
843 		else
844 			return (EPERM);
845 
846 	case PRIV_HAMMER_IOCTL:
847 		return (0);
848 
849 	default:
850 
851 		return (EPERM);
852 	}
853 }
854 
855 static int
856 sysctl_prison_switch_int8(SYSCTL_HANDLER_ARGS, int8_t *val)
857 {
858 	int new_val;
859 	int error;
860 
861 	new_val = *val;
862 
863 	error = sysctl_handle_int(oidp, &new_val, 0, req);
864 	if (error != 0 || req->newptr == NULL)
865 		return error;
866 
867 	if (new_val != 0 && new_val != 1)
868 		return EINVAL;
869 
870 	*val = new_val;
871 
872 	return error;
873 
874 }
875 
876 static int
877 sysctl_prison_set_hostname_allowed(SYSCTL_HANDLER_ARGS)
878 {
879 	struct prison *pr;
880 	int error;
881 
882 	if (arg1 == NULL)
883 		return EINVAL;
884 	pr = arg1;
885 
886 	error = sysctl_prison_switch_int8(oidp, arg1, arg2, req,
887 	    &pr->pr_set_hostname_allowed);
888 
889 	return error;
890 }
891 
892 static int
893 sysctl_prison_socket_unixiproute_only(SYSCTL_HANDLER_ARGS)
894 {
895 	struct prison *pr;
896 	int error;
897 
898 	if (arg1 == NULL)
899 		return EINVAL;
900 	pr = arg1;
901 
902 	error = sysctl_prison_switch_int8(oidp, arg1, arg2, req,
903 	    &pr->pr_socket_unixiproute_only);
904 
905 	return error;
906 }
907 
908 static int
909 sysctl_prison_sysvipc_allowed(SYSCTL_HANDLER_ARGS)
910 {
911 	struct prison *pr;
912 	int error;
913 
914 	if (arg1 == NULL)
915 		return EINVAL;
916 	pr = arg1;
917 
918 	error = sysctl_prison_switch_int8(oidp, arg1, arg2, req,
919 	    &pr->pr_sysvipc_allowed);
920 
921 	return error;
922 }
923 
924 static int
925 sysctl_prison_chflags_allowed(SYSCTL_HANDLER_ARGS)
926 {
927 	struct prison *pr;
928 	int error;
929 
930 	if (arg1 == NULL)
931 		return EINVAL;
932 	pr = arg1;
933 
934 	error = sysctl_prison_switch_int8(oidp, arg1, arg2, req,
935 	    &pr->pr_chflags_allowed);
936 
937 	return error;
938 }
939 
940 static int
941 sysctl_prison_allow_raw_sockets(SYSCTL_HANDLER_ARGS)
942 {
943 	struct prison *pr;
944 	int error;
945 
946 	if (arg1 == NULL)
947 		return EINVAL;
948 	pr = arg1;
949 
950 	error = sysctl_prison_switch_int8(oidp, arg1, arg2, req,
951 	    &pr->pr_allow_raw_sockets);
952 
953 	return error;
954 }
955 
956 /*
957  * Create a per-jail sysctl tree to control the prison
958  */
959 int
960 prison_sysctl_create(struct prison *pr)
961 {
962 	char id_str[7];
963 
964 	ksnprintf(id_str, 6, "%d", pr->pr_id);
965 
966 	pr->pr_sysctl_ctx = (struct sysctl_ctx_list *) kmalloc(
967 		sizeof(struct sysctl_ctx_list), M_TEMP, M_WAITOK | M_ZERO);
968 
969 	sysctl_ctx_init(pr->pr_sysctl_ctx);
970 
971 	/* Main jail node */
972 	pr->pr_sysctl_tree = SYSCTL_ADD_NODE(pr->pr_sysctl_ctx,
973 	    SYSCTL_STATIC_CHILDREN(_jail),
974 	    OID_AUTO, id_str, CTLFLAG_RD, 0,
975 	    "Jail specific settings");
976 
977 	SYSCTL_ADD_PROC(pr->pr_sysctl_ctx,
978 	    SYSCTL_CHILDREN(pr->pr_sysctl_tree), OID_AUTO,
979 	    "set_hostname_allowed", CTLTYPE_INT | CTLFLAG_RW,
980 	    pr, sizeof(pr->pr_set_hostname_allowed),
981 	    sysctl_prison_set_hostname_allowed, "I",
982 	    "Processes in jail can set their hostnames");
983 	SYSCTL_ADD_PROC(pr->pr_sysctl_ctx,
984 	    SYSCTL_CHILDREN(pr->pr_sysctl_tree), OID_AUTO,
985 	    "socket_unixiproute_only", CTLTYPE_INT | CTLFLAG_RW,
986 	    pr, sizeof(pr->pr_socket_unixiproute_only),
987 	    sysctl_prison_socket_unixiproute_only, "I",
988 	    "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
989 	SYSCTL_ADD_PROC(pr->pr_sysctl_ctx,
990 	    SYSCTL_CHILDREN(pr->pr_sysctl_tree), OID_AUTO,
991 	    "sysvipc_allowed", CTLTYPE_INT | CTLFLAG_RW,
992 	    pr, sizeof(pr->pr_sysvipc_allowed),
993 	    sysctl_prison_sysvipc_allowed, "I",
994 	    "Processes in jail can use System V IPC primitives");
995 	SYSCTL_ADD_PROC(pr->pr_sysctl_ctx,
996 	    SYSCTL_CHILDREN(pr->pr_sysctl_tree), OID_AUTO,
997 	    "chflags_allowed", CTLTYPE_INT | CTLFLAG_RW,
998 	    pr, sizeof(pr->pr_chflags_allowed),
999 	    sysctl_prison_chflags_allowed, "I",
1000 	    "Processes in jail can alter system file flags");
1001 	SYSCTL_ADD_PROC(pr->pr_sysctl_ctx,
1002 	    SYSCTL_CHILDREN(pr->pr_sysctl_tree), OID_AUTO,
1003 	    "allow_raw_sockets", CTLTYPE_INT | CTLFLAG_RW,
1004 	    pr, sizeof(pr->pr_allow_raw_sockets),
1005 	    sysctl_prison_allow_raw_sockets, "I",
1006 	    "Process in jail can create raw sockets");
1007 
1008 	return 0;
1009 }
1010 
1011 int
1012 prison_sysctl_done(struct prison *pr)
1013 {
1014 	if (pr->pr_sysctl_tree) {
1015 		sysctl_ctx_free(pr->pr_sysctl_ctx);
1016 		kfree(pr->pr_sysctl_ctx, M_TEMP);
1017 		pr->pr_sysctl_tree = NULL;
1018 	}
1019 
1020 	return 0;
1021 }
1022