1 /* 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 */ 10 /*- 11 * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net> 12 * All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 37 /* 38 * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $ 39 */ 40 41 #include "opt_inet6.h" 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 #include <sys/kernel.h> 46 #include <sys/systm.h> 47 #include <sys/errno.h> 48 #include <sys/sysmsg.h> 49 #include <sys/malloc.h> 50 #include <sys/nlookup.h> 51 #include <sys/namecache.h> 52 #include <sys/proc.h> 53 #include <sys/priv.h> 54 #include <sys/jail.h> 55 #include <sys/socket.h> 56 #include <sys/sysctl.h> 57 #include <sys/kern_syscall.h> 58 #include <net/if.h> 59 #include <netinet/in.h> 60 #include <netinet6/in6_var.h> 61 62 static struct prison *prison_find(int); 63 static void prison_ipcache_init(struct prison *); 64 65 __read_mostly static prison_cap_t prison_default_caps; 66 67 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 68 69 SYSCTL_NODE(, OID_AUTO, jail, CTLFLAG_RW, 0, 70 "All jails settings"); 71 72 SYSCTL_NODE(_jail, OID_AUTO, defaults, CTLFLAG_RW, 0, 73 "Default options for jails"); 74 75 /*#define PRISON_DEBUG*/ 76 #ifdef PRISON_DEBUG 77 __read_mostly static int prison_debug; 78 SYSCTL_INT(_jail, OID_AUTO, debug, CTLFLAG_RW, &prison_debug, 0, 79 "Debug prison refs"); 80 #endif 81 82 SYSCTL_BIT64(_jail_defaults, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 83 &prison_default_caps, 1, PRISON_CAP_SYS_SET_HOSTNAME, 84 "Processes in jail can set their hostnames"); 85 86 SYSCTL_BIT64(_jail_defaults, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 87 &prison_default_caps, 0, PRISON_CAP_NET_UNIXIPROUTE, 88 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only"); 89 90 SYSCTL_BIT64(_jail_defaults, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 91 &prison_default_caps, 0, PRISON_CAP_SYS_SYSVIPC, 92 "Processes in jail can use System V IPC primitives"); 93 94 SYSCTL_BIT64(_jail_defaults, OID_AUTO, chflags_allowed, CTLFLAG_RW, 95 &prison_default_caps, 0, PRISON_CAP_VFS_CHFLAGS, 96 "Processes in jail can alter system file flags"); 97 98 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 99 &prison_default_caps, 0, PRISON_CAP_NET_RAW_SOCKETS, 100 "Process in jail can create raw sockets"); 101 102 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_listen_override, CTLFLAG_RW, 103 &prison_default_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE, 104 "Process in jail can override host wildcard listen"); 105 106 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_nullfs, CTLFLAG_RW, 107 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS, 108 "Process in jail can mount nullfs(5) filesystems"); 109 110 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_tmpfs, CTLFLAG_RW, 111 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS, 112 "Process in jail can mount tmpfs(5) filesystems"); 113 114 int lastprid = 0; 115 int prisoncount = 0; 116 117 static struct lock jail_lock = 118 LOCK_INITIALIZER("jail", 0, LK_CANRECURSE); 119 120 LIST_HEAD(prisonlist, prison); 121 struct prisonlist allprison = LIST_HEAD_INITIALIZER(&allprison); 122 123 static int 124 kern_jail_attach(int jid) 125 { 126 struct proc *p = curthread->td_proc; 127 struct prison *pr; 128 struct ucred *cr; 129 int error; 130 131 pr = prison_find(jid); 132 if (pr == NULL) 133 return(EINVAL); 134 135 error = kern_chroot(&pr->pr_root); 136 if (error) 137 return(error); 138 139 prison_hold(pr); 140 lwkt_gettoken(&p->p_token); 141 cr = cratom_proc(p); 142 cr->cr_prison = pr; 143 p->p_flags |= P_JAILED; 144 lwkt_reltoken(&p->p_token); 145 146 return(0); 147 } 148 149 static int 150 assign_prison_id(struct prison *pr) 151 { 152 int tryprid; 153 struct prison *tpr; 154 155 tryprid = lastprid + 1; 156 if (tryprid == JAIL_MAX) 157 tryprid = 1; 158 159 lockmgr(&jail_lock, LK_EXCLUSIVE); 160 next: 161 LIST_FOREACH(tpr, &allprison, pr_list) { 162 if (tpr->pr_id != tryprid) 163 continue; 164 tryprid++; 165 if (tryprid == JAIL_MAX) { 166 lockmgr(&jail_lock, LK_RELEASE); 167 return (ERANGE); 168 } 169 goto next; 170 } 171 pr->pr_id = lastprid = tryprid; 172 lockmgr(&jail_lock, LK_RELEASE); 173 174 return (0); 175 } 176 177 static int 178 kern_jail(struct prison *pr, struct jail *j) 179 { 180 int error; 181 struct nlookupdata nd; 182 183 error = nlookup_init(&nd, j->path, UIO_USERSPACE, NLC_FOLLOW); 184 if (error) { 185 nlookup_done(&nd); 186 return (error); 187 } 188 error = nlookup(&nd); 189 if (error) { 190 nlookup_done(&nd); 191 return (error); 192 } 193 cache_copy(&nd.nl_nch, &pr->pr_root); 194 195 varsymset_init(&pr->pr_varsymset, NULL); 196 prison_ipcache_init(pr); 197 198 error = assign_prison_id(pr); 199 if (error) { 200 varsymset_clean(&pr->pr_varsymset); 201 nlookup_done(&nd); 202 return (error); 203 } 204 205 lockmgr(&jail_lock, LK_EXCLUSIVE); 206 LIST_INSERT_HEAD(&allprison, pr, pr_list); 207 ++prisoncount; 208 lockmgr(&jail_lock, LK_RELEASE); 209 210 error = prison_sysctl_create(pr); 211 if (error) 212 goto out; 213 214 error = kern_jail_attach(pr->pr_id); 215 if (error) 216 goto out2; 217 218 nlookup_done(&nd); 219 return 0; 220 221 out2: 222 prison_sysctl_done(pr); 223 224 out: 225 lockmgr(&jail_lock, LK_EXCLUSIVE); 226 LIST_REMOVE(pr, pr_list); 227 --prisoncount; 228 lockmgr(&jail_lock, LK_RELEASE); 229 varsymset_clean(&pr->pr_varsymset); 230 nlookup_done(&nd); 231 return (error); 232 } 233 234 /* 235 * jail() 236 * 237 * jail_args(syscallarg(struct jail *) jail) 238 * 239 * MPALMOSTSAFE 240 */ 241 int 242 sys_jail(struct sysmsg *sysmsg, const struct jail_args *uap) 243 { 244 struct thread *td = curthread; 245 struct prison *pr; 246 struct jail_ip_storage *jip; 247 struct jail j; 248 int error; 249 uint32_t jversion; 250 251 sysmsg->sysmsg_result = -1; 252 253 error = priv_check(td, PRIV_JAIL_CREATE); 254 if (error) 255 return (error); 256 257 error = copyin(uap->jail, &jversion, sizeof(jversion)); 258 if (error) 259 return (error); 260 261 pr = kmalloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 262 SLIST_INIT(&pr->pr_ips); 263 lockmgr(&jail_lock, LK_EXCLUSIVE); 264 265 switch (jversion) { 266 case 0: 267 /* Single IPv4 jails. */ 268 { 269 struct jail_v0 jv0; 270 struct sockaddr_in ip4addr; 271 272 error = copyin(uap->jail, &jv0, sizeof(jv0)); 273 if (error) 274 goto out; 275 276 j.path = jv0.path; 277 j.hostname = jv0.hostname; 278 279 jip = kmalloc(sizeof(*jip), M_PRISON, M_WAITOK | M_ZERO); 280 ip4addr.sin_family = AF_INET; 281 ip4addr.sin_addr.s_addr = htonl(jv0.ip_number); 282 memcpy(&jip->ip, &ip4addr, sizeof(ip4addr)); 283 SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries); 284 break; 285 } 286 287 case 1: 288 /* 289 * DragonFly multi noIP/IPv4/IPv6 jails 290 * 291 * NOTE: This version is unsupported by FreeBSD 292 * (which uses version 2 instead). 293 */ 294 295 error = copyin(uap->jail, &j, sizeof(j)); 296 if (error) 297 goto out; 298 299 for (int i = 0; i < j.n_ips; i++) { 300 jip = kmalloc(sizeof(*jip), M_PRISON, 301 M_WAITOK | M_ZERO); 302 SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries); 303 error = copyin(&j.ips[i], &jip->ip, 304 sizeof(struct sockaddr_storage)); 305 if (error) 306 goto out; 307 } 308 break; 309 default: 310 error = EINVAL; 311 goto out; 312 } 313 314 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 315 if (error) 316 goto out; 317 318 /* Use default capabilities as a template */ 319 pr->pr_caps = prison_default_caps; 320 321 error = kern_jail(pr, &j); 322 if (error) 323 goto out; 324 325 sysmsg->sysmsg_result = pr->pr_id; 326 lockmgr(&jail_lock, LK_RELEASE); 327 328 return (0); 329 330 out: 331 /* Delete all ips */ 332 while (!SLIST_EMPTY(&pr->pr_ips)) { 333 jip = SLIST_FIRST(&pr->pr_ips); 334 SLIST_REMOVE_HEAD(&pr->pr_ips, entries); 335 kfree(jip, M_PRISON); 336 } 337 lockmgr(&jail_lock, LK_RELEASE); 338 kfree(pr, M_PRISON); 339 340 return (error); 341 } 342 343 /* 344 * int jail_attach(int jid); 345 * 346 * MPALMOSTSAFE 347 */ 348 int 349 sys_jail_attach(struct sysmsg *sysmsg, const struct jail_attach_args *uap) 350 { 351 struct thread *td = curthread; 352 int error; 353 354 error = priv_check(td, PRIV_JAIL_ATTACH); 355 if (error) 356 return(error); 357 lockmgr(&jail_lock, LK_EXCLUSIVE); 358 error = kern_jail_attach(uap->jid); 359 lockmgr(&jail_lock, LK_RELEASE); 360 return (error); 361 } 362 363 static void 364 prison_ipcache_init(struct prison *pr) 365 { 366 struct jail_ip_storage *jis; 367 struct sockaddr_in *ip4; 368 struct sockaddr_in6 *ip6; 369 370 lockmgr(&jail_lock, LK_EXCLUSIVE); 371 SLIST_FOREACH(jis, &pr->pr_ips, entries) { 372 switch (jis->ip.ss_family) { 373 case AF_INET: 374 ip4 = (struct sockaddr_in *)&jis->ip; 375 if ((ntohl(ip4->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == 376 IN_LOOPBACKNET) { 377 /* loopback address */ 378 if (pr->local_ip4 == NULL) 379 pr->local_ip4 = ip4; 380 } else { 381 /* public address */ 382 if (pr->nonlocal_ip4 == NULL) 383 pr->nonlocal_ip4 = ip4; 384 } 385 break; 386 387 case AF_INET6: 388 ip6 = (struct sockaddr_in6 *)&jis->ip; 389 if (IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr)) { 390 /* loopback address */ 391 if (pr->local_ip6 == NULL) 392 pr->local_ip6 = ip6; 393 } else { 394 /* public address */ 395 if (pr->nonlocal_ip6 == NULL) 396 pr->nonlocal_ip6 = ip6; 397 } 398 break; 399 } 400 } 401 lockmgr(&jail_lock, LK_RELEASE); 402 } 403 404 /* 405 * Changes INADDR_LOOPBACK for a valid jail address. 406 * ip is in network byte order. 407 * Returns 1 if the ip is among jail valid ips. 408 * Returns 0 if is not among jail valid ips or 409 * if couldn't replace INADDR_LOOPBACK for a valid 410 * IP. 411 */ 412 int 413 prison_replace_wildcards(struct thread *td, struct sockaddr *ip) 414 { 415 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 416 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 417 struct prison *pr; 418 419 if (td->td_proc == NULL || td->td_ucred == NULL) 420 return (1); 421 if ((pr = td->td_ucred->cr_prison) == NULL) 422 return (1); 423 424 if ((ip->sa_family == AF_INET && 425 ip4->sin_addr.s_addr == htonl(INADDR_ANY)) || 426 (ip->sa_family == AF_INET6 && 427 IN6_IS_ADDR_UNSPECIFIED(&ip6->sin6_addr))) 428 return (1); 429 if ((ip->sa_family == AF_INET && 430 ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) || 431 (ip->sa_family == AF_INET6 && 432 IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) { 433 if (!prison_get_local(pr, ip->sa_family, ip) && 434 !prison_get_nonlocal(pr, ip->sa_family, ip)) 435 return(0); 436 else 437 return(1); 438 } 439 if (jailed_ip(pr, ip)) 440 return(1); 441 return(0); 442 } 443 444 /* 445 * Convert the localhost IP to the actual jail IP 446 */ 447 int 448 prison_remote_ip(struct thread *td, struct sockaddr *ip) 449 { 450 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 451 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 452 struct prison *pr; 453 454 if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL) 455 return(1); 456 if ((pr = td->td_ucred->cr_prison) == NULL) 457 return(1); 458 if ((ip->sa_family == AF_INET && 459 ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) || 460 (ip->sa_family == AF_INET6 && 461 IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) { 462 if (!prison_get_local(pr, ip->sa_family, ip) && 463 !prison_get_nonlocal(pr, ip->sa_family, ip)) 464 return(0); 465 else 466 return(1); 467 } 468 return(1); 469 } 470 471 /* 472 * Convert the jail IP back to localhost 473 * 474 * Used by getsockname() and getpeername() to convert the in-jail loopback 475 * address back to LOCALHOST. For example, 127.0.0.2 -> 127.0.0.1. The 476 * idea is that programs running inside the jail should be unaware that they 477 * are using a different loopback IP than the host. 478 */ 479 __read_mostly static struct in6_addr sin6_localhost = IN6ADDR_LOOPBACK_INIT; 480 481 int 482 prison_local_ip(struct thread *td, struct sockaddr *ip) 483 { 484 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 485 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 486 struct prison *pr; 487 488 if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL) 489 return(1); 490 if ((pr = td->td_ucred->cr_prison) == NULL) 491 return(1); 492 if (ip->sa_family == AF_INET && pr->local_ip4 && 493 pr->local_ip4->sin_addr.s_addr == ip4->sin_addr.s_addr && 494 pr->local_ip4->sin_addr.s_addr != htonl(INADDR_LOOPBACK)) { 495 ip4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 496 return(0); 497 } 498 if (ip->sa_family == AF_INET6 && pr->local_ip6 && 499 bcmp(&pr->local_ip6->sin6_addr, &ip6->sin6_addr, 500 sizeof(ip6->sin6_addr)) == 0) { 501 bcopy(&sin6_localhost, &ip6->sin6_addr, sizeof(ip6->sin6_addr)); 502 return(0); 503 } 504 return(1); 505 } 506 507 /* 508 * Prison get non loopback ip: 509 * - af is the address family of the ip we want (AF_INET|AF_INET6). 510 * - If ip != NULL, put the first IP address that is not a loopback address 511 * into *ip. 512 * 513 * ip is in network by order and we don't touch it unless we find a valid ip. 514 * No matter if ip == NULL or not, we return either a valid struct sockaddr *, 515 * or NULL. This struct may not be modified. 516 */ 517 struct sockaddr * 518 prison_get_nonlocal(struct prison *pr, sa_family_t af, struct sockaddr *ip) 519 { 520 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 521 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 522 523 /* Check if it is cached */ 524 switch(af) { 525 case AF_INET: 526 if (ip4 != NULL && pr->nonlocal_ip4 != NULL) 527 ip4->sin_addr.s_addr = pr->nonlocal_ip4->sin_addr.s_addr; 528 return (struct sockaddr *)pr->nonlocal_ip4; 529 530 case AF_INET6: 531 if (ip6 != NULL && pr->nonlocal_ip6 != NULL) 532 ip6->sin6_addr = pr->nonlocal_ip6->sin6_addr; 533 return (struct sockaddr *)pr->nonlocal_ip6; 534 } 535 536 /* NOTREACHED */ 537 return NULL; 538 } 539 540 /* 541 * Prison get loopback ip. 542 * - af is the address family of the ip we want (AF_INET|AF_INET6). 543 * - If ip != NULL, put the first IP address that is not a loopback address 544 * into *ip. 545 * 546 * ip is in network by order and we don't touch it unless we find a valid ip. 547 * No matter if ip == NULL or not, we return either a valid struct sockaddr *, 548 * or NULL. This struct may not be modified. 549 */ 550 struct sockaddr * 551 prison_get_local(struct prison *pr, sa_family_t af, struct sockaddr *ip) 552 { 553 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 554 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 555 556 /* Check if it is cached */ 557 switch(af) { 558 case AF_INET: 559 if (ip4 != NULL && pr->local_ip4 != NULL) 560 ip4->sin_addr.s_addr = pr->local_ip4->sin_addr.s_addr; 561 return (struct sockaddr *)pr->local_ip4; 562 563 case AF_INET6: 564 if (ip6 != NULL && pr->local_ip6 != NULL) 565 ip6->sin6_addr = pr->local_ip6->sin6_addr; 566 return (struct sockaddr *)pr->local_ip6; 567 } 568 569 /* NOTREACHED */ 570 return NULL; 571 } 572 573 /* Check if the IP is among ours, if it is return 1, else 0 */ 574 int 575 jailed_ip(struct prison *pr, const struct sockaddr *ip) 576 { 577 const struct jail_ip_storage *jis; 578 const struct sockaddr_in *jip4, *ip4; 579 const struct sockaddr_in6 *jip6, *ip6; 580 581 if (pr == NULL) 582 return(0); 583 ip4 = (const struct sockaddr_in *)ip; 584 ip6 = (const struct sockaddr_in6 *)ip; 585 586 lockmgr(&jail_lock, LK_EXCLUSIVE); 587 SLIST_FOREACH(jis, &pr->pr_ips, entries) { 588 switch (ip->sa_family) { 589 case AF_INET: 590 jip4 = (const struct sockaddr_in *) &jis->ip; 591 if (jip4->sin_family == AF_INET && 592 ip4->sin_addr.s_addr == jip4->sin_addr.s_addr) { 593 lockmgr(&jail_lock, LK_RELEASE); 594 return(1); 595 } 596 break; 597 case AF_INET6: 598 jip6 = (const struct sockaddr_in6 *) &jis->ip; 599 if (jip6->sin6_family == AF_INET6 && 600 IN6_ARE_ADDR_EQUAL(&ip6->sin6_addr, 601 &jip6->sin6_addr)) { 602 lockmgr(&jail_lock, LK_RELEASE); 603 return(1); 604 } 605 break; 606 } 607 } 608 lockmgr(&jail_lock, LK_RELEASE); 609 /* Ip not in list */ 610 return(0); 611 } 612 613 int 614 prison_if(struct ucred *cred, struct sockaddr *sa) 615 { 616 struct prison *pr; 617 struct sockaddr_in *sai = (struct sockaddr_in*) sa; 618 619 pr = cred->cr_prison; 620 621 if (((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6)) 622 && PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_NET_UNIXIPROUTE)) 623 return(1); 624 else if ((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6)) 625 return(0); 626 else if (jailed_ip(pr, sa)) 627 return(0); 628 return(1); 629 } 630 631 /* 632 * Returns a prison instance, or NULL on failure. 633 */ 634 static struct prison * 635 prison_find(int prid) 636 { 637 struct prison *pr; 638 639 lockmgr(&jail_lock, LK_EXCLUSIVE); 640 LIST_FOREACH(pr, &allprison, pr_list) { 641 if (pr->pr_id == prid) 642 break; 643 } 644 lockmgr(&jail_lock, LK_RELEASE); 645 646 return(pr); 647 } 648 649 static int 650 sysctl_jail_list(SYSCTL_HANDLER_ARGS) 651 { 652 struct thread *td = curthread; 653 struct jail_ip_storage *jip; 654 #ifdef INET6 655 struct sockaddr_in6 *jsin6; 656 #endif 657 struct sockaddr_in *jsin; 658 struct lwp *lp; 659 struct prison *pr; 660 unsigned int jlssize, jlsused; 661 int count, error; 662 char *jls; /* Jail list */ 663 char *oip; /* Output ip */ 664 char *fullpath, *freepath; 665 666 jlsused = 0; 667 668 if (jailed(td->td_ucred)) 669 return (0); 670 lp = td->td_lwp; 671 retry: 672 count = prisoncount; 673 674 if (count == 0) 675 return(0); 676 677 jlssize = (count * 1024); 678 jls = kmalloc(jlssize + 1, M_TEMP, M_WAITOK | M_ZERO); 679 if (count < prisoncount) { 680 kfree(jls, M_TEMP); 681 goto retry; 682 } 683 count = prisoncount; 684 685 lockmgr(&jail_lock, LK_EXCLUSIVE); 686 LIST_FOREACH(pr, &allprison, pr_list) { 687 error = cache_fullpath(lp->lwp_proc, &pr->pr_root, NULL, 688 &fullpath, &freepath, 0); 689 if (error) 690 continue; 691 if (jlsused && jlsused < jlssize) 692 jls[jlsused++] = '\n'; 693 count = ksnprintf(jls + jlsused, (jlssize - jlsused), 694 "%d %s %s", 695 pr->pr_id, pr->pr_host, fullpath); 696 kfree(freepath, M_TEMP); 697 if (count < 0) 698 goto end; 699 jlsused += count; 700 701 /* Copy the IPS */ 702 SLIST_FOREACH(jip, &pr->pr_ips, entries) { 703 char buf[INET_ADDRSTRLEN]; 704 705 jsin = (struct sockaddr_in *)&jip->ip; 706 707 switch(jsin->sin_family) { 708 case AF_INET: 709 oip = kinet_ntoa(jsin->sin_addr, buf); 710 break; 711 #ifdef INET6 712 case AF_INET6: 713 jsin6 = (struct sockaddr_in6 *)&jip->ip; 714 oip = ip6_sprintf(&jsin6->sin6_addr); 715 break; 716 #endif 717 default: 718 oip = "?family?"; 719 break; 720 } 721 722 if ((jlssize - jlsused) < (strlen(oip) + 1)) { 723 error = ERANGE; 724 goto end; 725 } 726 count = ksnprintf(jls + jlsused, (jlssize - jlsused), 727 " %s", oip); 728 if (count < 0) 729 goto end; 730 jlsused += count; 731 } 732 } 733 734 /* 735 * The format is: 736 * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id... 737 */ 738 error = SYSCTL_OUT(req, jls, jlsused); 739 end: 740 lockmgr(&jail_lock, LK_RELEASE); 741 kfree(jls, M_TEMP); 742 743 return(error); 744 } 745 746 SYSCTL_OID(_jail, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, 747 sysctl_jail_list, "A", "List of active jails"); 748 749 static int 750 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 751 { 752 int error, injail; 753 754 injail = jailed(req->td->td_ucred); 755 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 756 757 return (error); 758 } 759 760 SYSCTL_PROC(_jail, OID_AUTO, jailed, 761 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NOLOCK, NULL, 0, 762 sysctl_jail_jailed, "I", "Process in jail?"); 763 764 /* 765 * MPSAFE 766 */ 767 void 768 prison_hold(struct prison *pr) 769 { 770 atomic_add_int(&pr->pr_ref, 1); 771 #ifdef PRISON_DEBUG 772 if (prison_debug > 0) { 773 --prison_debug; 774 print_backtrace(-1); 775 } 776 #endif 777 } 778 779 /* 780 * MPALMOSTSAFE 781 */ 782 void 783 prison_free(struct prison *pr) 784 { 785 struct jail_ip_storage *jls; 786 787 #ifdef PRISON_DEBUG 788 if (prison_debug > 0) { 789 --prison_debug; 790 print_backtrace(-1); 791 } 792 #endif 793 KKASSERT(pr->pr_ref > 0); 794 if (atomic_fetchadd_int(&pr->pr_ref, -1) != 1) 795 return; 796 797 /* 798 * The global jail lock is needed on the last ref to adjust 799 * the list. 800 */ 801 lockmgr(&jail_lock, LK_EXCLUSIVE); 802 if (pr->pr_ref) { 803 lockmgr(&jail_lock, LK_RELEASE); 804 return; 805 } 806 LIST_REMOVE(pr, pr_list); 807 --prisoncount; 808 809 /* 810 * Clean up 811 */ 812 while (!SLIST_EMPTY(&pr->pr_ips)) { 813 jls = SLIST_FIRST(&pr->pr_ips); 814 SLIST_REMOVE_HEAD(&pr->pr_ips, entries); 815 kfree(jls, M_PRISON); 816 } 817 lockmgr(&jail_lock, LK_RELEASE); 818 819 if (pr->pr_linux != NULL) 820 kfree(pr->pr_linux, M_PRISON); 821 varsymset_clean(&pr->pr_varsymset); 822 823 /* Release the sysctl tree */ 824 prison_sysctl_done(pr); 825 826 cache_drop(&pr->pr_root); 827 kfree(pr, M_PRISON); 828 } 829 830 /* 831 * Check if permisson for a specific privilege is granted within jail. 832 * 833 * MPSAFE 834 */ 835 int 836 prison_priv_check(struct ucred *cred, int priv) 837 { 838 struct prison *pr = cred->cr_prison; 839 840 if (!jailed(cred)) 841 return (0); 842 843 switch (priv) { 844 case PRIV_CRED_SETUID: 845 case PRIV_CRED_SETEUID: 846 case PRIV_CRED_SETGID: 847 case PRIV_CRED_SETEGID: 848 case PRIV_CRED_SETGROUPS: 849 case PRIV_CRED_SETREUID: 850 case PRIV_CRED_SETREGID: 851 case PRIV_CRED_SETRESUID: 852 case PRIV_CRED_SETRESGID: 853 854 case PRIV_VFS_SYSFLAGS: 855 case PRIV_VFS_CHOWN: 856 case PRIV_VFS_CHMOD: 857 case PRIV_VFS_CHROOT: 858 case PRIV_VFS_LINK: 859 case PRIV_VFS_CHFLAGS_DEV: 860 case PRIV_VFS_REVOKE: 861 case PRIV_VFS_MKNOD_BAD: 862 case PRIV_VFS_MKNOD_WHT: 863 case PRIV_VFS_MKNOD_DIR: 864 return (0); 865 866 case PRIV_VFS_MOUNT_NULLFS: 867 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_NULLFS)) 868 return (0); 869 else 870 return (EPERM); 871 case PRIV_VFS_MOUNT_DEVFS: 872 return (EPERM); 873 case PRIV_VFS_MOUNT_TMPFS: 874 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_TMPFS)) 875 return (0); 876 else 877 return (EPERM); 878 879 case PRIV_VFS_SETATTR: 880 case PRIV_VFS_SETGID: 881 882 case PRIV_PROC_SETRLIMIT: 883 case PRIV_PROC_SETLOGIN: 884 885 case PRIV_SYSCTL_WRITEJAIL: 886 887 case PRIV_VARSYM_SYS: 888 889 case PRIV_SETHOSTNAME: 890 891 case PRIV_PROC_TRESPASS: 892 893 return (0); 894 895 case PRIV_UFS_QUOTAON: 896 case PRIV_UFS_QUOTAOFF: 897 case PRIV_VFS_SETQUOTA: 898 case PRIV_UFS_SETUSE: 899 case PRIV_VFS_GETQUOTA: 900 return (0); 901 902 903 case PRIV_DEBUG_UNPRIV: 904 return (0); 905 906 907 /* 908 * Allow jailed root to bind reserved ports. 909 */ 910 case PRIV_NETINET_RESERVEDPORT: 911 return (0); 912 913 914 /* 915 * Conditionally allow creating raw sockets in jail. 916 */ 917 case PRIV_NETINET_RAW: 918 if (PRISON_CAP_ISSET(pr->pr_caps, 919 PRISON_CAP_NET_RAW_SOCKETS)) 920 return (0); 921 else 922 return (EPERM); 923 924 case PRIV_HAMMER_IOCTL: 925 return (0); 926 927 default: 928 929 return (EPERM); 930 } 931 } 932 933 934 /* 935 * Create a per-jail sysctl tree to control the prison 936 */ 937 int 938 prison_sysctl_create(struct prison *pr) 939 { 940 char id_str[7]; 941 942 ksnprintf(id_str, 6, "%d", pr->pr_id); 943 944 pr->pr_sysctl_ctx = (struct sysctl_ctx_list *) kmalloc( 945 sizeof(struct sysctl_ctx_list), M_PRISON, M_WAITOK | M_ZERO); 946 947 sysctl_ctx_init(pr->pr_sysctl_ctx); 948 949 /* Main jail node */ 950 pr->pr_sysctl_tree = SYSCTL_ADD_NODE(pr->pr_sysctl_ctx, 951 SYSCTL_STATIC_CHILDREN(_jail), 952 OID_AUTO, id_str, CTLFLAG_RD, 0, 953 "Jail specific settings"); 954 955 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 956 OID_AUTO, "sys_set_hostname", CTLFLAG_RW, 957 &pr->pr_caps, 0, PRISON_CAP_SYS_SET_HOSTNAME, 958 "Processes in jail can set their hostnames"); 959 960 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 961 OID_AUTO, "sys_sysvipc", CTLFLAG_RW, 962 &pr->pr_caps, 0, PRISON_CAP_SYS_SYSVIPC, 963 "Processes in jail can use System V IPC primitives"); 964 965 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 966 OID_AUTO, "net_unixiproute", CTLFLAG_RW, 967 &pr->pr_caps, 0, PRISON_CAP_NET_UNIXIPROUTE, 968 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only"); 969 970 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 971 OID_AUTO, "net_raw_sockets", CTLFLAG_RW, 972 &pr->pr_caps, 0, PRISON_CAP_NET_RAW_SOCKETS, 973 "Process in jail can create raw sockets"); 974 975 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 976 OID_AUTO, "allow_listen_override", CTLFLAG_RW, 977 &pr->pr_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE, 978 "Process in jail can create raw sockets"); 979 980 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 981 OID_AUTO, "vfs_chflags", CTLFLAG_RW, 982 &pr->pr_caps, 0, PRISON_CAP_VFS_CHFLAGS, 983 "Process in jail can override host wildcard listen"); 984 985 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 986 OID_AUTO, "vfs_mount_nullfs", CTLFLAG_RW, 987 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS, 988 "Processes in jail can mount nullfs(5) filesystems"); 989 990 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 991 OID_AUTO, "vfs_mount_tmpfs", CTLFLAG_RW, 992 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS, 993 "Processes in jail can mount tmpfs(5) filesystems"); 994 995 return 0; 996 } 997 998 int 999 prison_sysctl_done(struct prison *pr) 1000 { 1001 if (pr->pr_sysctl_tree) { 1002 sysctl_ctx_free(pr->pr_sysctl_ctx); 1003 kfree(pr->pr_sysctl_ctx, M_PRISON); 1004 pr->pr_sysctl_tree = NULL; 1005 } 1006 1007 return 0; 1008 } 1009