1 /* 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 */ 10 /*- 11 * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net> 12 * All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 37 /* 38 * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $ 39 */ 40 41 #include "opt_inet6.h" 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 #include <sys/kernel.h> 46 #include <sys/systm.h> 47 #include <sys/errno.h> 48 #include <sys/sysmsg.h> 49 #include <sys/malloc.h> 50 #include <sys/nlookup.h> 51 #include <sys/namecache.h> 52 #include <sys/proc.h> 53 #include <sys/caps.h> 54 #include <sys/jail.h> 55 #include <sys/socket.h> 56 #include <sys/sysctl.h> 57 #include <sys/kern_syscall.h> 58 #include <net/if.h> 59 #include <netinet/in.h> 60 #include <netinet6/in6_var.h> 61 62 static struct prison *prison_find(int); 63 static void prison_ipcache_init(struct prison *); 64 65 __read_mostly static prison_cap_t prison_default_caps; 66 67 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 68 69 SYSCTL_NODE(, OID_AUTO, jail, CTLFLAG_RW, 0, 70 "All jails settings"); 71 72 SYSCTL_NODE(_jail, OID_AUTO, defaults, CTLFLAG_RW, 0, 73 "Default options for jails"); 74 75 /*#define PRISON_DEBUG*/ 76 #ifdef PRISON_DEBUG 77 __read_mostly static int prison_debug; 78 SYSCTL_INT(_jail, OID_AUTO, debug, CTLFLAG_RW, &prison_debug, 0, 79 "Debug prison refs"); 80 #endif 81 82 SYSCTL_BIT64(_jail_defaults, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 83 &prison_default_caps, 1, PRISON_CAP_SYS_SET_HOSTNAME, 84 "Processes in jail can set their hostnames"); 85 86 SYSCTL_BIT64(_jail_defaults, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 87 &prison_default_caps, 0, PRISON_CAP_NET_UNIXIPROUTE, 88 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only"); 89 90 SYSCTL_BIT64(_jail_defaults, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 91 &prison_default_caps, 0, PRISON_CAP_SYS_SYSVIPC, 92 "Processes in jail can use System V IPC primitives"); 93 94 SYSCTL_BIT64(_jail_defaults, OID_AUTO, chflags_allowed, CTLFLAG_RW, 95 &prison_default_caps, 0, PRISON_CAP_VFS_CHFLAGS, 96 "Processes in jail can alter system file flags"); 97 98 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 99 &prison_default_caps, 0, PRISON_CAP_NET_RAW_SOCKETS, 100 "Process in jail can create raw sockets"); 101 102 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_listen_override, CTLFLAG_RW, 103 &prison_default_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE, 104 "Process in jail can override host wildcard listen"); 105 106 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_nullfs, CTLFLAG_RW, 107 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS, 108 "Process in jail can mount nullfs(5) filesystems"); 109 110 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_tmpfs, CTLFLAG_RW, 111 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS, 112 "Process in jail can mount tmpfs(5) filesystems"); 113 114 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_devfs, CTLFLAG_RW, 115 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_DEVFS, 116 "Process in jail can mount devfs(5) filesystems"); 117 118 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_procfs, CTLFLAG_RW, 119 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_PROCFS, 120 "Process in jail can mount procfs(5) filesystems"); 121 122 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_fusefs, CTLFLAG_RW, 123 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_FUSEFS, 124 "Process in jail can mount fuse filesystems"); 125 126 static int lastprid = 0; 127 static int prisoncount = 0; 128 129 static struct lock jail_lock = 130 LOCK_INITIALIZER("jail", 0, LK_CANRECURSE); 131 132 LIST_HEAD(prisonlist, prison); 133 static struct prisonlist allprison = LIST_HEAD_INITIALIZER(&allprison); 134 135 static int 136 kern_jail_attach(int jid) 137 { 138 struct proc *p = curthread->td_proc; 139 struct prison *pr; 140 struct ucred *cr; 141 int error; 142 143 pr = prison_find(jid); 144 if (pr == NULL) 145 return(EINVAL); 146 147 error = kern_chroot(&pr->pr_root); 148 if (error) 149 return(error); 150 151 prison_hold(pr); 152 lwkt_gettoken(&p->p_token); 153 cr = cratom_proc(p); 154 cr->cr_prison = pr; 155 p->p_flags |= P_JAILED; 156 caps_set_locked(p, SYSCAP_RESTRICTEDROOT, __SYSCAP_ALL); 157 lwkt_reltoken(&p->p_token); 158 159 return(0); 160 } 161 162 static int 163 assign_prison_id(struct prison *pr) 164 { 165 int tryprid; 166 struct prison *tpr; 167 168 tryprid = lastprid + 1; 169 if (tryprid == JAIL_MAX) 170 tryprid = 1; 171 172 lockmgr(&jail_lock, LK_EXCLUSIVE); 173 next: 174 LIST_FOREACH(tpr, &allprison, pr_list) { 175 if (tpr->pr_id != tryprid) 176 continue; 177 tryprid++; 178 if (tryprid == JAIL_MAX) { 179 lockmgr(&jail_lock, LK_RELEASE); 180 return (ERANGE); 181 } 182 goto next; 183 } 184 pr->pr_id = lastprid = tryprid; 185 lockmgr(&jail_lock, LK_RELEASE); 186 187 return (0); 188 } 189 190 static int 191 kern_jail(struct prison *pr, struct jail *j) 192 { 193 int error; 194 struct nlookupdata nd; 195 196 error = nlookup_init(&nd, j->path, UIO_USERSPACE, NLC_FOLLOW); 197 if (error) { 198 nlookup_done(&nd); 199 return (error); 200 } 201 error = nlookup(&nd); 202 if (error) { 203 nlookup_done(&nd); 204 return (error); 205 } 206 cache_copy(&nd.nl_nch, &pr->pr_root); 207 208 varsymset_init(&pr->pr_varsymset, NULL); 209 prison_ipcache_init(pr); 210 211 error = assign_prison_id(pr); 212 if (error) { 213 varsymset_clean(&pr->pr_varsymset); 214 nlookup_done(&nd); 215 return (error); 216 } 217 218 lockmgr(&jail_lock, LK_EXCLUSIVE); 219 LIST_INSERT_HEAD(&allprison, pr, pr_list); 220 ++prisoncount; 221 lockmgr(&jail_lock, LK_RELEASE); 222 223 error = prison_sysctl_create(pr); 224 if (error) 225 goto out; 226 227 error = kern_jail_attach(pr->pr_id); 228 if (error) 229 goto out2; 230 231 nlookup_done(&nd); 232 return 0; 233 234 out2: 235 prison_sysctl_done(pr); 236 237 out: 238 lockmgr(&jail_lock, LK_EXCLUSIVE); 239 LIST_REMOVE(pr, pr_list); 240 --prisoncount; 241 lockmgr(&jail_lock, LK_RELEASE); 242 varsymset_clean(&pr->pr_varsymset); 243 nlookup_done(&nd); 244 return (error); 245 } 246 247 /* 248 * jail() 249 * 250 * jail_args(syscallarg(struct jail *) jail) 251 * 252 * MPALMOSTSAFE 253 */ 254 int 255 sys_jail(struct sysmsg *sysmsg, const struct jail_args *uap) 256 { 257 struct prison *pr; 258 struct jail_ip_storage *jip; 259 struct jail j; 260 int error; 261 uint32_t jversion; 262 263 sysmsg->sysmsg_result = -1; 264 265 error = caps_priv_check_self(SYSCAP_NOJAIL_CREATE); 266 if (error) 267 return (error); 268 269 error = copyin(uap->jail, &jversion, sizeof(jversion)); 270 if (error) 271 return (error); 272 273 pr = kmalloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 274 SLIST_INIT(&pr->pr_ips); 275 lockmgr(&jail_lock, LK_EXCLUSIVE); 276 277 switch (jversion) { 278 case 0: 279 /* Single IPv4 jails. */ 280 { 281 struct jail_v0 jv0; 282 struct sockaddr_in ip4addr; 283 284 error = copyin(uap->jail, &jv0, sizeof(jv0)); 285 if (error) 286 goto out; 287 288 j.path = jv0.path; 289 j.hostname = jv0.hostname; 290 291 jip = kmalloc(sizeof(*jip), M_PRISON, M_WAITOK | M_ZERO); 292 ip4addr.sin_family = AF_INET; 293 ip4addr.sin_addr.s_addr = htonl(jv0.ip_number); 294 memcpy(&jip->ip, &ip4addr, sizeof(ip4addr)); 295 SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries); 296 break; 297 } 298 299 case 1: 300 /* 301 * DragonFly multi noIP/IPv4/IPv6 jails 302 * 303 * NOTE: This version is unsupported by FreeBSD 304 * (which uses version 2 instead). 305 */ 306 307 error = copyin(uap->jail, &j, sizeof(j)); 308 if (error) 309 goto out; 310 311 for (int i = 0; i < j.n_ips; i++) { 312 jip = kmalloc(sizeof(*jip), M_PRISON, 313 M_WAITOK | M_ZERO); 314 SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries); 315 error = copyin(&j.ips[i], &jip->ip, 316 sizeof(struct sockaddr_storage)); 317 if (error) 318 goto out; 319 } 320 break; 321 default: 322 error = EINVAL; 323 goto out; 324 } 325 326 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 327 if (error) 328 goto out; 329 330 /* Use default capabilities as a template */ 331 pr->pr_caps = prison_default_caps; 332 333 error = kern_jail(pr, &j); 334 if (error) 335 goto out; 336 337 sysmsg->sysmsg_result = pr->pr_id; 338 lockmgr(&jail_lock, LK_RELEASE); 339 340 return (0); 341 342 out: 343 /* Delete all ips */ 344 while (!SLIST_EMPTY(&pr->pr_ips)) { 345 jip = SLIST_FIRST(&pr->pr_ips); 346 SLIST_REMOVE_HEAD(&pr->pr_ips, entries); 347 kfree(jip, M_PRISON); 348 } 349 lockmgr(&jail_lock, LK_RELEASE); 350 kfree(pr, M_PRISON); 351 352 return (error); 353 } 354 355 /* 356 * int jail_attach(int jid); 357 * 358 * MPALMOSTSAFE 359 */ 360 int 361 sys_jail_attach(struct sysmsg *sysmsg, const struct jail_attach_args *uap) 362 { 363 int error; 364 365 error = caps_priv_check_self(SYSCAP_NOJAIL_ATTACH); 366 if (error) 367 return(error); 368 lockmgr(&jail_lock, LK_EXCLUSIVE); 369 error = kern_jail_attach(uap->jid); 370 lockmgr(&jail_lock, LK_RELEASE); 371 return (error); 372 } 373 374 static void 375 prison_ipcache_init(struct prison *pr) 376 { 377 struct jail_ip_storage *jis; 378 struct sockaddr_in *ip4; 379 struct sockaddr_in6 *ip6; 380 381 lockmgr(&jail_lock, LK_EXCLUSIVE); 382 SLIST_FOREACH(jis, &pr->pr_ips, entries) { 383 switch (jis->ip.ss_family) { 384 case AF_INET: 385 ip4 = (struct sockaddr_in *)&jis->ip; 386 if ((ntohl(ip4->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == 387 IN_LOOPBACKNET) { 388 /* loopback address */ 389 if (pr->local_ip4 == NULL) 390 pr->local_ip4 = ip4; 391 } else { 392 /* public address */ 393 if (pr->nonlocal_ip4 == NULL) 394 pr->nonlocal_ip4 = ip4; 395 } 396 break; 397 398 case AF_INET6: 399 ip6 = (struct sockaddr_in6 *)&jis->ip; 400 if (IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr)) { 401 /* loopback address */ 402 if (pr->local_ip6 == NULL) 403 pr->local_ip6 = ip6; 404 } else { 405 /* public address */ 406 if (pr->nonlocal_ip6 == NULL) 407 pr->nonlocal_ip6 = ip6; 408 } 409 break; 410 } 411 } 412 lockmgr(&jail_lock, LK_RELEASE); 413 } 414 415 /* 416 * Changes INADDR_LOOPBACK for a valid jail address. 417 * ip is in network byte order. 418 * Returns 1 if the ip is among jail valid ips. 419 * Returns 0 if is not among jail valid ips or 420 * if couldn't replace INADDR_LOOPBACK for a valid 421 * IP. 422 */ 423 int 424 prison_replace_wildcards(struct thread *td, struct sockaddr *ip) 425 { 426 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 427 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 428 struct prison *pr; 429 430 if (td->td_proc == NULL || td->td_ucred == NULL) 431 return (1); 432 if ((pr = td->td_ucred->cr_prison) == NULL) 433 return (1); 434 435 if ((ip->sa_family == AF_INET && 436 ip4->sin_addr.s_addr == htonl(INADDR_ANY)) || 437 (ip->sa_family == AF_INET6 && 438 IN6_IS_ADDR_UNSPECIFIED(&ip6->sin6_addr))) 439 return (1); 440 if ((ip->sa_family == AF_INET && 441 ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) || 442 (ip->sa_family == AF_INET6 && 443 IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) { 444 if (!prison_get_local(pr, ip->sa_family, ip) && 445 !prison_get_nonlocal(pr, ip->sa_family, ip)) 446 return(0); 447 else 448 return(1); 449 } 450 if (jailed_ip(pr, ip)) 451 return(1); 452 return(0); 453 } 454 455 /* 456 * Convert the localhost IP to the actual jail IP 457 */ 458 int 459 prison_remote_ip(struct thread *td, struct sockaddr *ip) 460 { 461 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 462 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 463 struct prison *pr; 464 465 if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL) 466 return(1); 467 if ((pr = td->td_ucred->cr_prison) == NULL) 468 return(1); 469 if ((ip->sa_family == AF_INET && 470 ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) || 471 (ip->sa_family == AF_INET6 && 472 IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) { 473 if (!prison_get_local(pr, ip->sa_family, ip) && 474 !prison_get_nonlocal(pr, ip->sa_family, ip)) 475 return(0); 476 else 477 return(1); 478 } 479 return(1); 480 } 481 482 /* 483 * Convert the jail IP back to localhost 484 * 485 * Used by getsockname() and getpeername() to convert the in-jail loopback 486 * address back to LOCALHOST. For example, 127.0.0.2 -> 127.0.0.1. The 487 * idea is that programs running inside the jail should be unaware that they 488 * are using a different loopback IP than the host. 489 */ 490 __read_mostly static struct in6_addr sin6_localhost = IN6ADDR_LOOPBACK_INIT; 491 492 int 493 prison_local_ip(struct thread *td, struct sockaddr *ip) 494 { 495 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 496 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 497 struct prison *pr; 498 499 if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL) 500 return(1); 501 if ((pr = td->td_ucred->cr_prison) == NULL) 502 return(1); 503 if (ip->sa_family == AF_INET && pr->local_ip4 && 504 pr->local_ip4->sin_addr.s_addr == ip4->sin_addr.s_addr && 505 pr->local_ip4->sin_addr.s_addr != htonl(INADDR_LOOPBACK)) { 506 ip4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 507 return(0); 508 } 509 if (ip->sa_family == AF_INET6 && pr->local_ip6 && 510 bcmp(&pr->local_ip6->sin6_addr, &ip6->sin6_addr, 511 sizeof(ip6->sin6_addr)) == 0) { 512 bcopy(&sin6_localhost, &ip6->sin6_addr, sizeof(ip6->sin6_addr)); 513 return(0); 514 } 515 return(1); 516 } 517 518 /* 519 * Prison get non loopback ip: 520 * - af is the address family of the ip we want (AF_INET|AF_INET6). 521 * - If ip != NULL, put the first IP address that is not a loopback address 522 * into *ip. 523 * 524 * ip is in network by order and we don't touch it unless we find a valid ip. 525 * No matter if ip == NULL or not, we return either a valid struct sockaddr *, 526 * or NULL. This struct may not be modified. 527 */ 528 struct sockaddr * 529 prison_get_nonlocal(struct prison *pr, sa_family_t af, struct sockaddr *ip) 530 { 531 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 532 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 533 534 /* Check if it is cached */ 535 switch(af) { 536 case AF_INET: 537 if (ip4 != NULL && pr->nonlocal_ip4 != NULL) 538 ip4->sin_addr.s_addr = pr->nonlocal_ip4->sin_addr.s_addr; 539 return (struct sockaddr *)pr->nonlocal_ip4; 540 541 case AF_INET6: 542 if (ip6 != NULL && pr->nonlocal_ip6 != NULL) 543 ip6->sin6_addr = pr->nonlocal_ip6->sin6_addr; 544 return (struct sockaddr *)pr->nonlocal_ip6; 545 } 546 547 /* NOTREACHED */ 548 return NULL; 549 } 550 551 /* 552 * Prison get loopback ip. 553 * - af is the address family of the ip we want (AF_INET|AF_INET6). 554 * - If ip != NULL, put the first IP address that is not a loopback address 555 * into *ip. 556 * 557 * ip is in network by order and we don't touch it unless we find a valid ip. 558 * No matter if ip == NULL or not, we return either a valid struct sockaddr *, 559 * or NULL. This struct may not be modified. 560 */ 561 struct sockaddr * 562 prison_get_local(struct prison *pr, sa_family_t af, struct sockaddr *ip) 563 { 564 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 565 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 566 567 /* Check if it is cached */ 568 switch(af) { 569 case AF_INET: 570 if (ip4 != NULL && pr->local_ip4 != NULL) 571 ip4->sin_addr.s_addr = pr->local_ip4->sin_addr.s_addr; 572 return (struct sockaddr *)pr->local_ip4; 573 574 case AF_INET6: 575 if (ip6 != NULL && pr->local_ip6 != NULL) 576 ip6->sin6_addr = pr->local_ip6->sin6_addr; 577 return (struct sockaddr *)pr->local_ip6; 578 } 579 580 /* NOTREACHED */ 581 return NULL; 582 } 583 584 /* Check if the IP is among ours, if it is return 1, else 0 */ 585 int 586 jailed_ip(struct prison *pr, const struct sockaddr *ip) 587 { 588 const struct jail_ip_storage *jis; 589 const struct sockaddr_in *jip4, *ip4; 590 const struct sockaddr_in6 *jip6, *ip6; 591 592 if (pr == NULL) 593 return(0); 594 ip4 = (const struct sockaddr_in *)ip; 595 ip6 = (const struct sockaddr_in6 *)ip; 596 597 lockmgr(&jail_lock, LK_EXCLUSIVE); 598 SLIST_FOREACH(jis, &pr->pr_ips, entries) { 599 switch (ip->sa_family) { 600 case AF_INET: 601 jip4 = (const struct sockaddr_in *) &jis->ip; 602 if (jip4->sin_family == AF_INET && 603 ip4->sin_addr.s_addr == jip4->sin_addr.s_addr) { 604 lockmgr(&jail_lock, LK_RELEASE); 605 return(1); 606 } 607 break; 608 case AF_INET6: 609 jip6 = (const struct sockaddr_in6 *) &jis->ip; 610 if (jip6->sin6_family == AF_INET6 && 611 IN6_ARE_ADDR_EQUAL(&ip6->sin6_addr, 612 &jip6->sin6_addr)) { 613 lockmgr(&jail_lock, LK_RELEASE); 614 return(1); 615 } 616 break; 617 } 618 } 619 lockmgr(&jail_lock, LK_RELEASE); 620 /* Ip not in list */ 621 return(0); 622 } 623 624 int 625 prison_if(struct ucred *cred, struct sockaddr *sa) 626 { 627 struct prison *pr; 628 struct sockaddr_in *sai = (struct sockaddr_in*) sa; 629 630 pr = cred->cr_prison; 631 632 if (((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6)) 633 && PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_NET_UNIXIPROUTE)) 634 return(1); 635 else if ((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6)) 636 return(0); 637 else if (jailed_ip(pr, sa)) 638 return(0); 639 return(1); 640 } 641 642 /* 643 * Returns a prison instance, or NULL on failure. 644 */ 645 static struct prison * 646 prison_find(int prid) 647 { 648 struct prison *pr; 649 650 lockmgr(&jail_lock, LK_EXCLUSIVE); 651 LIST_FOREACH(pr, &allprison, pr_list) { 652 if (pr->pr_id == prid) 653 break; 654 } 655 lockmgr(&jail_lock, LK_RELEASE); 656 657 return(pr); 658 } 659 660 static int 661 sysctl_jail_list(SYSCTL_HANDLER_ARGS) 662 { 663 struct thread *td = curthread; 664 struct jail_ip_storage *jip; 665 #ifdef INET6 666 struct sockaddr_in6 *jsin6; 667 #endif 668 struct sockaddr_in *jsin; 669 struct lwp *lp; 670 struct prison *pr; 671 unsigned int jlssize, jlsused; 672 int count, error; 673 char *jls; /* Jail list */ 674 char *oip; /* Output ip */ 675 char *fullpath, *freepath; 676 677 jlsused = 0; 678 679 if (jailed(td->td_ucred)) 680 return (0); 681 lp = td->td_lwp; 682 retry: 683 count = prisoncount; 684 685 if (count == 0) 686 return(0); 687 688 jlssize = (count * 1024); 689 jls = kmalloc(jlssize + 1, M_TEMP, M_WAITOK | M_ZERO); 690 if (count < prisoncount) { 691 kfree(jls, M_TEMP); 692 goto retry; 693 } 694 count = prisoncount; 695 696 lockmgr(&jail_lock, LK_EXCLUSIVE); 697 LIST_FOREACH(pr, &allprison, pr_list) { 698 error = cache_fullpath(lp->lwp_proc, &pr->pr_root, NULL, 699 &fullpath, &freepath, 0); 700 if (error) 701 continue; 702 if (jlsused && jlsused < jlssize) 703 jls[jlsused++] = '\n'; 704 count = ksnprintf(jls + jlsused, (jlssize - jlsused), 705 "%d %s %s", 706 pr->pr_id, pr->pr_host, fullpath); 707 kfree(freepath, M_TEMP); 708 if (count < 0) 709 goto end; 710 jlsused += count; 711 712 /* Copy the IPS */ 713 SLIST_FOREACH(jip, &pr->pr_ips, entries) { 714 char buf[INET_ADDRSTRLEN]; 715 716 jsin = (struct sockaddr_in *)&jip->ip; 717 718 switch(jsin->sin_family) { 719 case AF_INET: 720 oip = kinet_ntoa(jsin->sin_addr, buf); 721 break; 722 #ifdef INET6 723 case AF_INET6: 724 jsin6 = (struct sockaddr_in6 *)&jip->ip; 725 oip = ip6_sprintf(&jsin6->sin6_addr); 726 break; 727 #endif 728 default: 729 oip = "?family?"; 730 break; 731 } 732 733 if ((jlssize - jlsused) < (strlen(oip) + 1)) { 734 error = ERANGE; 735 goto end; 736 } 737 count = ksnprintf(jls + jlsused, (jlssize - jlsused), 738 " %s", oip); 739 if (count < 0) 740 goto end; 741 jlsused += count; 742 } 743 } 744 745 /* 746 * The format is: 747 * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id... 748 */ 749 error = SYSCTL_OUT(req, jls, jlsused); 750 end: 751 lockmgr(&jail_lock, LK_RELEASE); 752 kfree(jls, M_TEMP); 753 754 return(error); 755 } 756 757 SYSCTL_OID(_jail, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, 758 sysctl_jail_list, "A", "List of active jails"); 759 760 static int 761 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 762 { 763 int error, injail; 764 765 injail = jailed(req->td->td_ucred); 766 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 767 768 return (error); 769 } 770 771 SYSCTL_PROC(_jail, OID_AUTO, jailed, 772 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NOLOCK, NULL, 0, 773 sysctl_jail_jailed, "I", "Process in jail?"); 774 775 /* 776 * MPSAFE 777 */ 778 void 779 prison_hold(struct prison *pr) 780 { 781 atomic_add_int(&pr->pr_ref, 1); 782 #ifdef PRISON_DEBUG 783 if (prison_debug > 0) { 784 --prison_debug; 785 print_backtrace(-1); 786 } 787 #endif 788 } 789 790 /* 791 * MPALMOSTSAFE 792 */ 793 void 794 prison_free(struct prison *pr) 795 { 796 struct jail_ip_storage *jls; 797 798 #ifdef PRISON_DEBUG 799 if (prison_debug > 0) { 800 --prison_debug; 801 print_backtrace(-1); 802 } 803 #endif 804 KKASSERT(pr->pr_ref > 0); 805 if (atomic_fetchadd_int(&pr->pr_ref, -1) != 1) 806 return; 807 808 /* 809 * The global jail lock is needed on the last ref to adjust 810 * the list. 811 */ 812 lockmgr(&jail_lock, LK_EXCLUSIVE); 813 if (pr->pr_ref) { 814 lockmgr(&jail_lock, LK_RELEASE); 815 return; 816 } 817 LIST_REMOVE(pr, pr_list); 818 --prisoncount; 819 820 /* 821 * Clean up 822 */ 823 while (!SLIST_EMPTY(&pr->pr_ips)) { 824 jls = SLIST_FIRST(&pr->pr_ips); 825 SLIST_REMOVE_HEAD(&pr->pr_ips, entries); 826 kfree(jls, M_PRISON); 827 } 828 lockmgr(&jail_lock, LK_RELEASE); 829 830 if (pr->pr_linux != NULL) 831 kfree(pr->pr_linux, M_PRISON); 832 varsymset_clean(&pr->pr_varsymset); 833 834 /* Release the sysctl tree */ 835 prison_sysctl_done(pr); 836 837 cache_drop(&pr->pr_root); 838 kfree(pr, M_PRISON); 839 } 840 841 /* 842 * Check if permisson for a specific privilege is granted within jail. 843 * 844 * MPSAFE 845 */ 846 int 847 prison_priv_check(struct ucred *cred, int cap) 848 { 849 struct prison *pr = cred->cr_prison; 850 851 if (!jailed(cred)) 852 return (0); 853 854 switch (cap & ~__SYSCAP_XFLAGS) { 855 case SYSCAP_RESTRICTEDROOT: /* meta group 1 */ 856 /* RESTRICTEDROOT fallbacks disallowed in jails */ 857 return EPERM; 858 case SYSCAP_SENSITIVEROOT: /* meta group 2 */ 859 case SYSCAP_NOEXEC: /* meta group 3 */ 860 case SYSCAP_NOCRED: /* meta group 4 */ 861 return 0; 862 case SYSCAP_NOJAIL: /* meta group 5 */ 863 /* all jail ops disallowed in jails */ 864 return EPERM; 865 case SYSCAP_NONET: /* meta group 6 */ 866 return 0; 867 case SYSCAP_NONET_SENSITIVE: /* meta group 7 */ 868 /* all sensitive network ops disallowed in jails */ 869 return EPERM; 870 case SYSCAP_NOVFS: /* meta group 8 */ 871 case SYSCAP_NOVFS_SENSITIVE: /* meta group 9 */ 872 case SYSCAP_NOMOUNT: /* meta group 10 */ 873 case SYSCAP_NO11: /* meta group 11 */ 874 case SYSCAP_NO12: /* meta group 12 */ 875 case SYSCAP_NO13: /* meta group 13 */ 876 case SYSCAP_NO14: /* meta group 14 */ 877 case SYSCAP_NO15: /* meta group 15 */ 878 return (0); 879 880 /* ----- */ /* group 1 - disallowed */ 881 882 case SYSCAP_NOPROC_TRESPASS: /* group 2 allowed */ 883 case SYSCAP_NOPROC_SETLOGIN: 884 case SYSCAP_NOPROC_SETRLIMIT: 885 case SYSCAP_NOSYSCTL_WR: 886 case SYSCAP_NOVARSYM_SYS: 887 case SYSCAP_NOSETHOSTNAME: 888 case SYSCAP_NOQUOTA_WR: 889 case SYSCAP_NODEBUG_UNPRIV: 890 case SYSCAP_NOSCHED: 891 case SYSCAP_NOSCHED_CPUSET: 892 case SYSCAP_NOSETTIME: 893 return (0); 894 895 case SYSCAP_NOEXEC_SUID: /* group 3 allowed */ 896 case SYSCAP_NOEXEC_SGID: 897 return (0); 898 899 case SYSCAP_NOCRED_SETUID: /* group 4 allowed */ 900 case SYSCAP_NOCRED_SETGID: 901 case SYSCAP_NOCRED_SETEUID: 902 case SYSCAP_NOCRED_SETEGID: 903 case SYSCAP_NOCRED_SETREUID: 904 case SYSCAP_NOCRED_SETREGID: 905 case SYSCAP_NOCRED_SETRESUID: 906 case SYSCAP_NOCRED_SETRESGID: 907 case SYSCAP_NOCRED_SETGROUPS: 908 return (0); 909 910 case SYSCAP_NOJAIL_CREATE: /* group 5 disallowed */ 911 case SYSCAP_NOJAIL_ATTACH: 912 return EPERM; 913 914 case SYSCAP_NONET_RESPORT: /* group 6 mostly allowed */ 915 /* 916 * Allow reserved ports 917 */ 918 return 0; 919 case SYSCAP_NONET_RAW: 920 /* 921 * Conditionally allow creating raw sockets in jail. 922 */ 923 if (PRISON_CAP_ISSET(pr->pr_caps, 924 PRISON_CAP_NET_RAW_SOCKETS)) 925 return (0); 926 else 927 return (EPERM); 928 929 /* ----- */ /* group 7 - disallowed */ 930 931 case SYSCAP_NOVFS_SYSFLAGS: /* group 8 - allowed */ 932 case SYSCAP_NOVFS_CHOWN: 933 case SYSCAP_NOVFS_CHMOD: 934 case SYSCAP_NOVFS_LINK: 935 case SYSCAP_NOVFS_CHFLAGS_DEV: 936 case SYSCAP_NOVFS_SETATTR: 937 case SYSCAP_NOVFS_SETGID: 938 case SYSCAP_NOVFS_GENERATION: 939 case SYSCAP_NOVFS_RETAINSUGID: 940 return (0); 941 942 case SYSCAP_NOVFS_MKNOD_BAD: /* group 9 - allowed */ 943 case SYSCAP_NOVFS_MKNOD_WHT: 944 case SYSCAP_NOVFS_MKNOD_DIR: 945 case SYSCAP_NOVFS_MKNOD_DEV: 946 case SYSCAP_NOVFS_IOCTL: 947 case SYSCAP_NOVFS_CHROOT: 948 case SYSCAP_NOVFS_REVOKE: 949 return (0); 950 951 case SYSCAP_NOMOUNT_NULLFS: /* group 10 - conditional */ 952 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_NULLFS)) 953 return (0); 954 else 955 return (EPERM); 956 case SYSCAP_NOMOUNT_DEVFS: 957 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_DEVFS)) 958 return (0); 959 else 960 return (EPERM); 961 case SYSCAP_NOMOUNT_TMPFS: 962 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_TMPFS)) 963 return (0); 964 else 965 return (EPERM); 966 case SYSCAP_NOMOUNT_PROCFS: 967 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_PROCFS)) 968 return (0); 969 else 970 return (EPERM); 971 case SYSCAP_NOMOUNT_FUSE: 972 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_FUSEFS)) 973 return (0); 974 else 975 return (EPERM); 976 case SYSCAP_NOMOUNT_UMOUNT: 977 return (0); 978 979 default: 980 /* otherwise disallow */ 981 return (EPERM); 982 } 983 } 984 985 986 /* 987 * Create a per-jail sysctl tree to control the prison 988 */ 989 int 990 prison_sysctl_create(struct prison *pr) 991 { 992 char id_str[7]; 993 994 ksnprintf(id_str, 6, "%d", pr->pr_id); 995 996 pr->pr_sysctl_ctx = (struct sysctl_ctx_list *) kmalloc( 997 sizeof(struct sysctl_ctx_list), M_PRISON, M_WAITOK | M_ZERO); 998 999 sysctl_ctx_init(pr->pr_sysctl_ctx); 1000 1001 /* Main jail node */ 1002 pr->pr_sysctl_tree = SYSCTL_ADD_NODE(pr->pr_sysctl_ctx, 1003 SYSCTL_STATIC_CHILDREN(_jail), 1004 OID_AUTO, id_str, CTLFLAG_RD, 0, 1005 "Jail specific settings"); 1006 1007 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1008 OID_AUTO, "sys_set_hostname", CTLFLAG_RW, 1009 &pr->pr_caps, 0, PRISON_CAP_SYS_SET_HOSTNAME, 1010 "Processes in jail can set their hostnames"); 1011 1012 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1013 OID_AUTO, "sys_sysvipc", CTLFLAG_RW, 1014 &pr->pr_caps, 0, PRISON_CAP_SYS_SYSVIPC, 1015 "Processes in jail can use System V IPC primitives"); 1016 1017 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1018 OID_AUTO, "net_unixiproute", CTLFLAG_RW, 1019 &pr->pr_caps, 0, PRISON_CAP_NET_UNIXIPROUTE, 1020 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only"); 1021 1022 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1023 OID_AUTO, "net_raw_sockets", CTLFLAG_RW, 1024 &pr->pr_caps, 0, PRISON_CAP_NET_RAW_SOCKETS, 1025 "Process in jail can create raw sockets"); 1026 1027 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1028 OID_AUTO, "allow_listen_override", CTLFLAG_RW, 1029 &pr->pr_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE, 1030 "Process in jail can create raw sockets"); 1031 1032 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1033 OID_AUTO, "vfs_chflags", CTLFLAG_RW, 1034 &pr->pr_caps, 0, PRISON_CAP_VFS_CHFLAGS, 1035 "Process in jail can override host wildcard listen"); 1036 1037 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1038 OID_AUTO, "vfs_mount_nullfs", CTLFLAG_RW, 1039 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS, 1040 "Processes in jail can mount nullfs(5) filesystems"); 1041 1042 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1043 OID_AUTO, "vfs_mount_tmpfs", CTLFLAG_RW, 1044 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS, 1045 "Processes in jail can mount tmpfs(5) filesystems"); 1046 1047 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1048 OID_AUTO, "vfs_mount_devfs", CTLFLAG_RW, 1049 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_DEVFS, 1050 "Processes in jail can mount devfs(5) filesystems"); 1051 1052 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1053 OID_AUTO, "vfs_mount_procfs", CTLFLAG_RW, 1054 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_PROCFS, 1055 "Processes in jail can mount procfs(5) filesystems"); 1056 1057 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 1058 OID_AUTO, "vfs_mount_fusefs", CTLFLAG_RW, 1059 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_FUSEFS, 1060 "Processes in jail can mount fuse filesystems"); 1061 1062 return 0; 1063 } 1064 1065 int 1066 prison_sysctl_done(struct prison *pr) 1067 { 1068 if (pr->pr_sysctl_tree) { 1069 sysctl_ctx_free(pr->pr_sysctl_ctx); 1070 kfree(pr->pr_sysctl_ctx, M_PRISON); 1071 pr->pr_sysctl_tree = NULL; 1072 } 1073 1074 return 0; 1075 } 1076