1 /* $NetBSD: hijack.c,v 1.59 2011/02/20 23:47:04 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __RCSID("$NetBSD: hijack.c,v 1.59 2011/02/20 23:47:04 pooka Exp $"); 30 31 #define __ssp_weak_name(fun) _hijack_ ## fun 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/event.h> 36 #include <sys/ioctl.h> 37 #include <sys/mount.h> 38 #include <sys/poll.h> 39 #include <sys/socket.h> 40 #include <sys/statvfs.h> 41 42 #include <rump/rumpclient.h> 43 #include <rump/rump_syscalls.h> 44 45 #include <assert.h> 46 #include <dlfcn.h> 47 #include <err.h> 48 #include <errno.h> 49 #include <fcntl.h> 50 #include <poll.h> 51 #include <pthread.h> 52 #include <signal.h> 53 #include <stdarg.h> 54 #include <stdbool.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <time.h> 59 #include <unistd.h> 60 61 enum dualcall { 62 DUALCALL_WRITE, DUALCALL_WRITEV, 63 DUALCALL_IOCTL, DUALCALL_FCNTL, 64 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT, 65 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN, 66 DUALCALL_RECVFROM, DUALCALL_RECVMSG, 67 DUALCALL_SENDTO, DUALCALL_SENDMSG, 68 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT, 69 DUALCALL_SHUTDOWN, 70 DUALCALL_READ, DUALCALL_READV, 71 DUALCALL_DUP2, 72 DUALCALL_CLOSE, 73 DUALCALL_POLLTS, 74 DUALCALL_KEVENT, 75 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT, 76 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD, 77 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN, 78 DUALCALL_OPEN, 79 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1, 80 DUALCALL_CHDIR, DUALCALL_FCHDIR, 81 DUALCALL_LSEEK, 82 DUALCALL_GETDENTS, 83 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK, 84 DUALCALL_RENAME, 85 DUALCALL_MKDIR, DUALCALL_RMDIR, 86 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES, 87 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE, 88 DUALCALL_FSYNC, DUALCALL_FSYNC_RANGE, 89 DUALCALL_MOUNT, DUALCALL_UNMOUNT, 90 DUALCALL___GETCWD, 91 DUALCALL__NUM 92 }; 93 94 #define RSYS_STRING(a) __STRING(a) 95 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a)) 96 97 /* 98 * Would be nice to get this automatically in sync with libc. 99 * Also, this does not work for compat-using binaries! 100 */ 101 #if !__NetBSD_Prereq__(5,99,7) 102 #define REALSELECT select 103 #define REALPOLLTS pollts 104 #define REALKEVENT kevent 105 #define REALSTAT __stat30 106 #define REALLSTAT __lstat30 107 #define REALFSTAT __fstat30 108 #define REALUTIMES utimes 109 #define REALLUTIMES lutimes 110 #define REALFUTIMES futimes 111 #else 112 #define REALSELECT _sys___select50 113 #define REALPOLLTS _sys___pollts50 114 #define REALKEVENT _sys___kevent50 115 #define REALSTAT __stat50 116 #define REALLSTAT __lstat50 117 #define REALFSTAT __fstat50 118 #define REALUTIMES __utimes50 119 #define REALLUTIMES __lutimes50 120 #define REALFUTIMES __futimes50 121 #endif 122 #define REALREAD _sys_read 123 #define REALGETDENTS __getdents30 124 #define REALMOUNT __mount50 125 #define REALLSEEK _lseek 126 127 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *); 128 int REALPOLLTS(struct pollfd *, nfds_t, 129 const struct timespec *, const sigset_t *); 130 int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t, 131 const struct timespec *); 132 ssize_t REALREAD(int, void *, size_t); 133 int REALSTAT(const char *, struct stat *); 134 int REALLSTAT(const char *, struct stat *); 135 int REALFSTAT(int, struct stat *); 136 int REALGETDENTS(int, char *, size_t); 137 int REALUTIMES(const char *, const struct timeval [2]); 138 int REALLUTIMES(const char *, const struct timeval [2]); 139 int REALFUTIMES(int, const struct timeval [2]); 140 int REALMOUNT(const char *, const char *, int, void *, size_t); 141 off_t REALLSEEK(int, off_t, int); 142 int __getcwd(char *, size_t); 143 144 #define S(a) __STRING(a) 145 struct sysnames { 146 enum dualcall scm_callnum; 147 const char *scm_hostname; 148 const char *scm_rumpname; 149 } syscnames[] = { 150 { DUALCALL_SOCKET, "__socket30", RSYS_NAME(SOCKET) }, 151 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) }, 152 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) }, 153 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) }, 154 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) }, 155 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) }, 156 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) }, 157 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) }, 158 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) }, 159 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) }, 160 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) }, 161 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) }, 162 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) }, 163 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) }, 164 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) }, 165 { DUALCALL_READV, "readv", RSYS_NAME(READV) }, 166 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) }, 167 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) }, 168 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) }, 169 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) }, 170 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) }, 171 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) }, 172 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) }, 173 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) }, 174 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) }, 175 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) }, 176 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) }, 177 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) }, 178 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) }, 179 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) }, 180 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) }, 181 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) }, 182 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) }, 183 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) }, 184 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) }, 185 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) }, 186 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) }, 187 { DUALCALL_STATVFS1, "statvfs1", RSYS_NAME(STATVFS1) }, 188 { DUALCALL_FSTATVFS1, "fstatvfs1", RSYS_NAME(FSTATVFS1) }, 189 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) }, 190 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) }, 191 { DUALCALL_LSEEK, S(REALLSEEK), RSYS_NAME(LSEEK) }, 192 { DUALCALL_GETDENTS, "__getdents30", RSYS_NAME(GETDENTS) }, 193 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) }, 194 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) }, 195 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) }, 196 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) }, 197 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) }, 198 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) }, 199 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) }, 200 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) }, 201 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) }, 202 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) }, 203 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) }, 204 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) }, 205 { DUALCALL___GETCWD, "__getcwd", RSYS_NAME(__GETCWD) }, 206 }; 207 #undef S 208 209 struct bothsys { 210 void *bs_host; 211 void *bs_rump; 212 } syscalls[DUALCALL__NUM]; 213 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which 214 215 pid_t (*host_fork)(void); 216 int (*host_daemon)(int, int); 217 int (*host_execve)(const char *, char *const[], char *const[]); 218 219 /* ok, we need *two* bits per dup2'd fd to track fd+HIJACKOFF aliases */ 220 static uint32_t dup2mask; 221 #define ISDUP2D(fd) (((fd) < 16) && (1<<(fd) & dup2mask)) 222 #define SETDUP2(fd) \ 223 do { if ((fd) < 16) dup2mask |= (1<<(fd)); } while (/*CONSTCOND*/0) 224 #define CLRDUP2(fd) \ 225 do { if ((fd) < 16) dup2mask &= ~(1<<(fd)); } while (/*CONSTCOND*/0) 226 #define ISDUP2ALIAS(fd) (((fd) < 16) && (1<<((fd)+16) & dup2mask)) 227 #define SETDUP2ALIAS(fd) \ 228 do { if ((fd) < 16) dup2mask |= (1<<((fd)+16)); } while (/*CONSTCOND*/0) 229 #define CLRDUP2ALIAS(fd) \ 230 do { if ((fd) < 16) dup2mask &= ~(1<<((fd)+16)); } while (/*CONSTCOND*/0) 231 232 //#define DEBUGJACK 233 #ifdef DEBUGJACK 234 #define DPRINTF(x) mydprintf x 235 static void 236 mydprintf(const char *fmt, ...) 237 { 238 va_list ap; 239 240 if (ISDUP2D(STDERR_FILENO)) 241 return; 242 243 va_start(ap, fmt); 244 vfprintf(stderr, fmt, ap); 245 va_end(ap); 246 } 247 248 #else 249 #define DPRINTF(x) 250 #endif 251 252 #define FDCALL(type, name, rcname, args, proto, vars) \ 253 type name args \ 254 { \ 255 type (*fun) proto; \ 256 \ 257 DPRINTF(("%s -> %d\n", __STRING(name), fd)); \ 258 if (fd_isrump(fd)) { \ 259 fun = syscalls[rcname].bs_rump; \ 260 fd = fd_host2rump(fd); \ 261 } else { \ 262 fun = syscalls[rcname].bs_host; \ 263 } \ 264 \ 265 return fun vars; \ 266 } 267 268 #define PATHCALL(type, name, rcname, args, proto, vars) \ 269 type name args \ 270 { \ 271 type (*fun) proto; \ 272 \ 273 DPRINTF(("%s -> %s\n", __STRING(name), path)); \ 274 if (path_isrump(path)) { \ 275 fun = syscalls[rcname].bs_rump; \ 276 path = path_host2rump(path); \ 277 } else { \ 278 fun = syscalls[rcname].bs_host; \ 279 } \ 280 \ 281 return fun vars; \ 282 } 283 284 /* 285 * This is called from librumpclient in case of LD_PRELOAD. 286 * It ensures correct RTLD_NEXT. 287 * 288 * ... except, it's apparently extremely difficult to force 289 * at least gcc to generate an actual stack frame here. So 290 * sprinkle some volatile foobar and baz to throw the optimizer 291 * off the scent and generate a variable assignment with the 292 * return value. The posterboy for this meltdown is amd64 293 * with -O2. At least with gcc 4.1.3 i386 works regardless of 294 * optimization. 295 */ 296 volatile int rumphijack_unrope; /* there, unhang yourself */ 297 static void * 298 hijackdlsym(void *handle, const char *symbol) 299 { 300 void *rv; 301 302 rv = dlsym(handle, symbol); 303 rumphijack_unrope = *(volatile int *)rv; 304 305 return (void *)rv; 306 } 307 308 /* 309 * This tracks if our process is in a subdirectory of /rump. 310 * It's preserved over exec. 311 */ 312 static bool pwdinrump = false; 313 314 /* 315 * These variables are set from the RUMPHIJACK string and control 316 * which operations can product rump kernel file descriptors. 317 * This should be easily extendable for future needs. 318 */ 319 #define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal" 320 static bool rumpsockets[PF_MAX]; 321 static const char *rumpprefix; 322 static size_t rumpprefixlen; 323 324 static struct { 325 int pf; 326 const char *name; 327 } socketmap[] = { 328 { PF_LOCAL, "local" }, 329 { PF_INET, "inet" }, 330 { PF_LINK, "link" }, 331 #ifdef PF_OROUTE 332 { PF_OROUTE, "oroute" }, 333 #endif 334 { PF_ROUTE, "route" }, 335 { PF_INET6, "inet6" }, 336 #ifdef PF_MPLS 337 { PF_MPLS, "mpls" }, 338 #endif 339 { -1, NULL } 340 }; 341 342 static void 343 sockparser(char *buf) 344 { 345 char *p, *l; 346 bool value; 347 int i; 348 349 /* if "all" is present, it must be specified first */ 350 if (strncmp(buf, "all", strlen("all")) == 0) { 351 for (i = 0; i < (int)__arraycount(rumpsockets); i++) { 352 rumpsockets[i] = true; 353 } 354 buf += strlen("all"); 355 if (*buf == ':') 356 buf++; 357 } 358 359 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 360 value = true; 361 if (strncmp(p, "no", strlen("no")) == 0) { 362 value = false; 363 p += strlen("no"); 364 } 365 366 for (i = 0; socketmap[i].name; i++) { 367 if (strcmp(p, socketmap[i].name) == 0) { 368 rumpsockets[socketmap[i].pf] = value; 369 break; 370 } 371 } 372 if (socketmap[i].name == NULL) { 373 warnx("invalid socket specifier %s", p); 374 } 375 } 376 } 377 378 static void 379 pathparser(char *buf) 380 { 381 382 /* sanity-check */ 383 if (*buf != '/') 384 errx(1, "hijack path specifier must begin with ``/''"); 385 rumpprefixlen = strlen(buf); 386 if (rumpprefixlen < 2) 387 errx(1, "invalid hijack prefix: %s", buf); 388 if (buf[rumpprefixlen-1] == '/' && strspn(buf, "/") != rumpprefixlen) 389 errx(1, "hijack prefix may end in slash only if pure " 390 "slash, gave %s", buf); 391 392 if ((rumpprefix = strdup(buf)) == NULL) 393 err(1, "strdup"); 394 rumpprefixlen = strlen(rumpprefix); 395 } 396 397 static struct { 398 void (*parsefn)(char *); 399 const char *name; 400 } hijackparse[] = { 401 { sockparser, "socket" }, 402 { pathparser, "path" }, 403 { NULL, NULL }, 404 }; 405 406 static void 407 parsehijack(char *hijack) 408 { 409 char *p, *p2, *l; 410 const char *hijackcopy; 411 int i; 412 413 if ((hijackcopy = strdup(hijack)) == NULL) 414 err(1, "strdup"); 415 416 /* disable everything explicitly */ 417 for (i = 0; i < PF_MAX; i++) 418 rumpsockets[i] = false; 419 420 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) { 421 p2 = strchr(p, '='); 422 if (!p2) 423 errx(1, "invalid hijack specifier: %s", hijackcopy); 424 425 for (i = 0; hijackparse[i].parsefn; i++) { 426 if (strncmp(hijackparse[i].name, p, 427 (size_t)(p2-p)) == 0) { 428 hijackparse[i].parsefn(p2+1); 429 break; 430 } 431 } 432 } 433 434 } 435 436 static void __attribute__((constructor)) 437 rcinit(void) 438 { 439 char buf[1024]; 440 extern void *(*rumpclient_dlsym)(void *, const char *); 441 unsigned i, j; 442 443 rumpclient_dlsym = hijackdlsym; 444 host_fork = dlsym(RTLD_NEXT, "fork"); 445 host_daemon = dlsym(RTLD_NEXT, "daemon"); 446 host_execve = dlsym(RTLD_NEXT, "execve"); 447 448 /* 449 * In theory cannot print anything during lookups because 450 * we might not have the call vector set up. so, the errx() 451 * is a bit of a strech, but it might work. 452 */ 453 454 for (i = 0; i < DUALCALL__NUM; i++) { 455 /* build runtime O(1) access */ 456 for (j = 0; j < __arraycount(syscnames); j++) { 457 if (syscnames[j].scm_callnum == i) 458 break; 459 } 460 461 if (j == __arraycount(syscnames)) 462 errx(1, "rumphijack error: syscall pos %d missing", i); 463 464 syscalls[i].bs_host = dlsym(RTLD_NEXT, 465 syscnames[j].scm_hostname); 466 if (syscalls[i].bs_host == NULL) 467 errx(1, "hostcall %s not found missing", 468 syscnames[j].scm_hostname); 469 470 syscalls[i].bs_rump = dlsym(RTLD_NEXT, 471 syscnames[j].scm_rumpname); 472 if (syscalls[i].bs_rump == NULL) 473 errx(1, "rumpcall %s not found missing", 474 syscnames[j].scm_rumpname); 475 } 476 477 if (rumpclient_init() == -1) 478 err(1, "rumpclient init"); 479 480 /* check which syscalls we're supposed to hijack */ 481 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) { 482 strcpy(buf, RUMPHIJACK_DEFAULT); 483 } 484 parsehijack(buf); 485 486 /* set client persistence level */ 487 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) { 488 if (strcmp(buf, "die") == 0) 489 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE); 490 else if (strcmp(buf, "inftime") == 0) 491 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME); 492 else if (strcmp(buf, "once") == 0) 493 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE); 494 else { 495 time_t timeout; 496 char *ep; 497 498 timeout = (time_t)strtoll(buf, &ep, 10); 499 if (timeout <= 0 || ep != buf + strlen(buf)) 500 errx(1, "RUMPHIJACK_RETRYCONNECT must be " 501 "keyword or integer, got: %s", buf); 502 503 rumpclient_setconnretry(timeout); 504 } 505 } 506 507 if (getenv_r("RUMPHIJACK__DUP2MASK", buf, sizeof(buf)) == 0) { 508 dup2mask = strtoul(buf, NULL, 10); 509 unsetenv("RUMPHIJACK__DUP2MASK"); 510 } 511 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) { 512 pwdinrump = true; 513 unsetenv("RUMPHIJACK__PWDINRUMP"); 514 } 515 } 516 517 /* XXX: need runtime selection. low for now due to FD_SETSIZE */ 518 #define HIJACK_FDOFF 128 519 static int 520 fd_rump2host(int fd) 521 { 522 523 if (fd == -1) 524 return fd; 525 526 if (!ISDUP2D(fd)) 527 fd += HIJACK_FDOFF; 528 529 return fd; 530 } 531 532 static int 533 fd_host2rump(int fd) 534 { 535 536 if (!ISDUP2D(fd)) 537 fd -= HIJACK_FDOFF; 538 return fd; 539 } 540 541 static bool 542 fd_isrump(int fd) 543 { 544 545 return ISDUP2D(fd) || fd >= HIJACK_FDOFF; 546 } 547 548 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_FDOFF) 549 550 static bool 551 path_isrump(const char *path) 552 { 553 554 if (rumpprefix == NULL) 555 return false; 556 557 if (*path == '/') { 558 if (strncmp(path, rumpprefix, rumpprefixlen) == 0) 559 return true; 560 return false; 561 } else { 562 return pwdinrump; 563 } 564 } 565 566 static const char *rootpath = "/"; 567 static const char * 568 path_host2rump(const char *path) 569 { 570 const char *rv; 571 572 if (*path == '/') { 573 rv = path + rumpprefixlen; 574 if (*rv == '\0') 575 rv = rootpath; 576 } else { 577 rv = path; 578 } 579 580 return rv; 581 } 582 583 static int 584 dodup(int oldd, int minfd) 585 { 586 int (*op_fcntl)(int, int, ...); 587 int newd; 588 int isrump; 589 590 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd)); 591 if (fd_isrump(oldd)) { 592 op_fcntl = GETSYSCALL(rump, FCNTL); 593 oldd = fd_host2rump(oldd); 594 isrump = 1; 595 } else { 596 op_fcntl = GETSYSCALL(host, FCNTL); 597 isrump = 0; 598 } 599 600 newd = op_fcntl(oldd, F_DUPFD, minfd); 601 602 if (isrump) 603 newd = fd_rump2host(newd); 604 DPRINTF(("dup <- %d\n", newd)); 605 606 return newd; 607 } 608 609 /* 610 * dup a host file descriptor so that it doesn't collide with the dup2mask 611 */ 612 static int 613 fd_dupgood(int fd) 614 { 615 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL); 616 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 617 int ofd, i; 618 619 for (i = 1; ISDUP2D(fd); i++) { 620 ofd = fd; 621 fd = op_fcntl(ofd, F_DUPFD, i); 622 op_close(ofd); 623 } 624 625 return fd; 626 } 627 628 int 629 open(const char *path, int flags, ...) 630 { 631 int (*op_open)(const char *, int, ...); 632 bool isrump; 633 va_list ap; 634 int fd; 635 636 if (path_isrump(path)) { 637 path = path_host2rump(path); 638 op_open = GETSYSCALL(rump, OPEN); 639 isrump = true; 640 } else { 641 op_open = GETSYSCALL(host, OPEN); 642 isrump = false; 643 } 644 645 va_start(ap, flags); 646 fd = op_open(path, flags, va_arg(ap, mode_t)); 647 va_end(ap); 648 649 if (isrump) 650 fd = fd_rump2host(fd); 651 else 652 fd = fd_dupgood(fd); 653 return fd; 654 } 655 656 int 657 chdir(const char *path) 658 { 659 int (*op_chdir)(const char *); 660 bool isrump; 661 int rv; 662 663 if (path_isrump(path)) { 664 op_chdir = GETSYSCALL(rump, CHDIR); 665 isrump = true; 666 path = path_host2rump(path); 667 } else { 668 op_chdir = GETSYSCALL(host, CHDIR); 669 isrump = false; 670 } 671 672 rv = op_chdir(path); 673 if (rv == 0) { 674 if (isrump) 675 pwdinrump = true; 676 else 677 pwdinrump = false; 678 } 679 680 return rv; 681 } 682 683 int 684 fchdir(int fd) 685 { 686 int (*op_fchdir)(int); 687 bool isrump; 688 int rv; 689 690 if (fd_isrump(fd)) { 691 op_fchdir = GETSYSCALL(rump, FCHDIR); 692 isrump = true; 693 fd = fd_host2rump(fd); 694 } else { 695 op_fchdir = GETSYSCALL(host, FCHDIR); 696 isrump = false; 697 } 698 699 rv = op_fchdir(fd); 700 if (rv == 0) { 701 if (isrump) 702 pwdinrump = true; 703 else 704 pwdinrump = false; 705 } 706 707 return rv; 708 } 709 710 int 711 __getcwd(char *bufp, size_t len) 712 { 713 int (*op___getcwd)(char *, size_t); 714 int rv; 715 716 if (pwdinrump) { 717 size_t prefixgap; 718 bool iamslash; 719 720 if (rumpprefix[rumpprefixlen-1] == '/') 721 iamslash = true; 722 else 723 iamslash = false; 724 725 if (iamslash) 726 prefixgap = rumpprefixlen - 1; /* ``//+path'' */ 727 else 728 prefixgap = rumpprefixlen; /* ``/pfx+/path'' */ 729 if (len <= prefixgap) { 730 return ERANGE; 731 } 732 733 op___getcwd = GETSYSCALL(rump, __GETCWD); 734 rv = op___getcwd(bufp + prefixgap, len - prefixgap); 735 if (rv == -1) 736 return rv; 737 738 /* augment the "/" part only for a non-root path */ 739 memcpy(bufp, rumpprefix, rumpprefixlen); 740 741 /* append / only to non-root cwd */ 742 if (rv != 2) 743 bufp[prefixgap] = '/'; 744 745 /* don't append extra slash in the purely-slash case */ 746 if (rv == 2 && !iamslash) 747 bufp[rumpprefixlen] = '\0'; 748 749 return rv; 750 } else { 751 op___getcwd = GETSYSCALL(host, __GETCWD); 752 return op___getcwd(bufp, len); 753 } 754 } 755 756 int 757 rename(const char *from, const char *to) 758 { 759 int (*op_rename)(const char *, const char *); 760 761 if (path_isrump(from)) { 762 if (!path_isrump(to)) 763 return EXDEV; 764 765 from = path_host2rump(from); 766 to = path_host2rump(to); 767 op_rename = GETSYSCALL(rump, RENAME); 768 } else { 769 if (path_isrump(to)) 770 return EXDEV; 771 772 op_rename = GETSYSCALL(host, RENAME); 773 } 774 775 return op_rename(from, to); 776 } 777 778 int __socket30(int, int, int); 779 int 780 __socket30(int domain, int type, int protocol) 781 { 782 int (*op_socket)(int, int, int); 783 int fd; 784 bool isrump; 785 786 isrump = domain < PF_MAX && rumpsockets[domain]; 787 788 if (isrump) 789 op_socket = GETSYSCALL(rump, SOCKET); 790 else 791 op_socket = GETSYSCALL(host, SOCKET); 792 fd = op_socket(domain, type, protocol); 793 794 if (isrump) 795 fd = fd_rump2host(fd); 796 else 797 fd = fd_dupgood(fd); 798 DPRINTF(("socket <- %d\n", fd)); 799 800 return fd; 801 } 802 803 int 804 accept(int s, struct sockaddr *addr, socklen_t *addrlen) 805 { 806 int (*op_accept)(int, struct sockaddr *, socklen_t *); 807 int fd; 808 bool isrump; 809 810 isrump = fd_isrump(s); 811 812 DPRINTF(("accept -> %d", s)); 813 if (isrump) { 814 op_accept = GETSYSCALL(rump, ACCEPT); 815 s = fd_host2rump(s); 816 } else { 817 op_accept = GETSYSCALL(host, ACCEPT); 818 } 819 fd = op_accept(s, addr, addrlen); 820 if (fd != -1 && isrump) 821 fd = fd_rump2host(fd); 822 else 823 fd = fd_dupgood(fd); 824 825 DPRINTF((" <- %d\n", fd)); 826 827 return fd; 828 } 829 830 /* 831 * ioctl and fcntl are varargs calls and need special treatment 832 */ 833 int 834 ioctl(int fd, unsigned long cmd, ...) 835 { 836 int (*op_ioctl)(int, unsigned long cmd, ...); 837 va_list ap; 838 int rv; 839 840 DPRINTF(("ioctl -> %d\n", fd)); 841 if (fd_isrump(fd)) { 842 fd = fd_host2rump(fd); 843 op_ioctl = GETSYSCALL(rump, IOCTL); 844 } else { 845 op_ioctl = GETSYSCALL(host, IOCTL); 846 } 847 848 va_start(ap, cmd); 849 rv = op_ioctl(fd, cmd, va_arg(ap, void *)); 850 va_end(ap); 851 return rv; 852 } 853 854 #include <syslog.h> 855 int 856 fcntl(int fd, int cmd, ...) 857 { 858 int (*op_fcntl)(int, int, ...); 859 va_list ap; 860 int rv, minfd, i; 861 862 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd)); 863 864 switch (cmd) { 865 case F_DUPFD: 866 va_start(ap, cmd); 867 minfd = va_arg(ap, int); 868 va_end(ap); 869 return dodup(fd, minfd); 870 871 case F_CLOSEM: 872 /* 873 * So, if fd < HIJACKOFF, we want to do a host closem. 874 */ 875 876 if (fd < HIJACK_FDOFF) { 877 int closemfd = fd; 878 879 if (rumpclient__closenotify(&closemfd, 880 RUMPCLIENT_CLOSE_FCLOSEM) == -1) 881 return -1; 882 op_fcntl = GETSYSCALL(host, FCNTL); 883 rv = op_fcntl(closemfd, cmd); 884 if (rv) 885 return rv; 886 } 887 888 /* 889 * Additionally, we want to do a rump closem, but only 890 * for the file descriptors not within the dup2mask. 891 */ 892 893 /* why don't we offer fls()? */ 894 for (i = 15; i >= 0; i--) { 895 if (ISDUP2D(i)) 896 break; 897 } 898 899 if (fd >= HIJACK_FDOFF) 900 fd -= HIJACK_FDOFF; 901 else 902 fd = 0; 903 fd = MAX(i+1, fd); 904 905 /* hmm, maybe we should close rump fd's not within dup2mask? */ 906 907 return rump_sys_fcntl(fd, F_CLOSEM); 908 909 case F_MAXFD: 910 /* 911 * For maxfd, if there's a rump kernel fd, return 912 * it hostified. Otherwise, return host's MAXFD 913 * return value. 914 */ 915 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) { 916 /* 917 * This might go a little wrong in case 918 * of dup2 to [012], but I'm not sure if 919 * there's a justification for tracking 920 * that info. Consider e.g. 921 * dup2(rumpfd, 2) followed by rump_sys_open() 922 * returning 1. We should return 1+HIJACKOFF, 923 * not 2+HIJACKOFF. However, if [01] is not 924 * open, the correct return value is 2. 925 */ 926 return fd_rump2host(fd); 927 } else { 928 op_fcntl = GETSYSCALL(host, FCNTL); 929 return op_fcntl(fd, F_MAXFD); 930 } 931 /*NOTREACHED*/ 932 933 default: 934 if (fd_isrump(fd)) { 935 fd = fd_host2rump(fd); 936 op_fcntl = GETSYSCALL(rump, FCNTL); 937 } else { 938 op_fcntl = GETSYSCALL(host, FCNTL); 939 } 940 941 va_start(ap, cmd); 942 rv = op_fcntl(fd, cmd, va_arg(ap, void *)); 943 va_end(ap); 944 return rv; 945 } 946 /*NOTREACHED*/ 947 } 948 949 int 950 close(int fd) 951 { 952 int (*op_close)(int); 953 int rv; 954 955 DPRINTF(("close -> %d\n", fd)); 956 if (fd_isrump(fd)) { 957 int undup2 = 0; 958 959 fd = fd_host2rump(fd); 960 if (ISDUP2ALIAS(fd)) { 961 _DIAGASSERT(ISDUP2D(fd)); 962 CLRDUP2ALIAS(fd); 963 return 0; 964 } 965 966 if (ISDUP2D(fd)) 967 undup2 = 1; 968 op_close = GETSYSCALL(rump, CLOSE); 969 rv = op_close(fd); 970 if (rv == 0 && undup2) 971 CLRDUP2(fd); 972 } else { 973 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1) 974 return -1; 975 op_close = GETSYSCALL(host, CLOSE); 976 rv = op_close(fd); 977 } 978 979 return rv; 980 } 981 982 /* 983 * write cannot issue a standard debug printf due to recursion 984 */ 985 ssize_t 986 write(int fd, const void *buf, size_t blen) 987 { 988 ssize_t (*op_write)(int, const void *, size_t); 989 990 if (fd_isrump(fd)) { 991 fd = fd_host2rump(fd); 992 op_write = GETSYSCALL(rump, WRITE); 993 } else { 994 op_write = GETSYSCALL(host, WRITE); 995 } 996 997 return op_write(fd, buf, blen); 998 } 999 1000 /* 1001 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since 1002 * many programs do that. dup2 of a rump kernel fd to another value 1003 * not >= fdoff is an error. 1004 * 1005 * Note: cannot rump2host newd, because it is often hardcoded. 1006 */ 1007 int 1008 dup2(int oldd, int newd) 1009 { 1010 int (*host_dup2)(int, int); 1011 int rv; 1012 1013 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd)); 1014 1015 if (fd_isrump(oldd)) { 1016 if (!(newd >= 0 && newd <= 2)) 1017 return EBADF; 1018 oldd = fd_host2rump(oldd); 1019 if (oldd == newd) { 1020 SETDUP2(newd); 1021 SETDUP2ALIAS(newd); 1022 return newd; 1023 } 1024 rv = rump_sys_dup2(oldd, newd); 1025 if (rv != -1) 1026 SETDUP2(newd); 1027 } else { 1028 host_dup2 = syscalls[DUALCALL_DUP2].bs_host; 1029 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1) 1030 return -1; 1031 rv = host_dup2(oldd, newd); 1032 } 1033 1034 return rv; 1035 } 1036 1037 int 1038 dup(int oldd) 1039 { 1040 1041 return dodup(oldd, 0); 1042 } 1043 1044 pid_t 1045 fork() 1046 { 1047 pid_t rv; 1048 1049 DPRINTF(("fork\n")); 1050 1051 rv = rumpclient__dofork(host_fork); 1052 1053 DPRINTF(("fork returns %d\n", rv)); 1054 return rv; 1055 } 1056 /* we do not have the luxury of not requiring a stackframe */ 1057 __strong_alias(__vfork14,fork); 1058 1059 int 1060 daemon(int nochdir, int noclose) 1061 { 1062 struct rumpclient_fork *rf; 1063 1064 if ((rf = rumpclient_prefork()) == NULL) 1065 return -1; 1066 1067 if (host_daemon(nochdir, noclose) == -1) 1068 return -1; 1069 1070 if (rumpclient_fork_init(rf) == -1) 1071 return -1; 1072 1073 return 0; 1074 } 1075 1076 int 1077 execve(const char *path, char *const argv[], char *const envp[]) 1078 { 1079 char buf[128]; 1080 char *dup2str; 1081 const char *pwdinrumpstr; 1082 char **newenv; 1083 size_t nelem; 1084 int rv, sverrno; 1085 int bonus = 1, i = 0; 1086 1087 if (dup2mask) { 1088 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2MASK=%u", dup2mask); 1089 dup2str = malloc(strlen(buf)+1); 1090 if (dup2str == NULL) 1091 return ENOMEM; 1092 strcpy(dup2str, buf); 1093 bonus++; 1094 } else { 1095 dup2str = NULL; 1096 } 1097 1098 if (pwdinrump) { 1099 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true"; 1100 bonus++; 1101 } else { 1102 pwdinrumpstr = NULL; 1103 } 1104 1105 for (nelem = 0; envp && envp[nelem]; nelem++) 1106 continue; 1107 newenv = malloc(sizeof(*newenv) * nelem+bonus); 1108 if (newenv == NULL) { 1109 free(dup2str); 1110 return ENOMEM; 1111 } 1112 memcpy(newenv, envp, nelem*sizeof(*newenv)); 1113 if (dup2str) { 1114 newenv[nelem+i] = dup2str; 1115 i++; 1116 } 1117 if (pwdinrumpstr) { 1118 newenv[nelem+i] = __UNCONST(pwdinrumpstr); 1119 i++; 1120 } 1121 newenv[nelem+i] = NULL; 1122 _DIAGASSERT(i < bonus); 1123 1124 rv = rumpclient_exec(path, argv, newenv); 1125 1126 _DIAGASSERT(rv != 0); 1127 sverrno = errno; 1128 free(newenv); 1129 free(dup2str); 1130 errno = sverrno; 1131 return rv; 1132 } 1133 1134 /* 1135 * select is done by calling poll. 1136 */ 1137 int 1138 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 1139 struct timeval *timeout) 1140 { 1141 struct pollfd *pfds; 1142 struct timespec ts, *tsp = NULL; 1143 nfds_t realnfds; 1144 int i, j; 1145 int rv, incr; 1146 1147 DPRINTF(("select\n")); 1148 1149 /* 1150 * Well, first we must scan the fds to figure out how many 1151 * fds there really are. This is because up to and including 1152 * nb5 poll() silently refuses nfds > process_maxopen_fds. 1153 * Seems to be fixed in current, thank the maker. 1154 * god damn cluster...bomb. 1155 */ 1156 1157 for (i = 0, realnfds = 0; i < nfds; i++) { 1158 if (readfds && FD_ISSET(i, readfds)) { 1159 realnfds++; 1160 continue; 1161 } 1162 if (writefds && FD_ISSET(i, writefds)) { 1163 realnfds++; 1164 continue; 1165 } 1166 if (exceptfds && FD_ISSET(i, exceptfds)) { 1167 realnfds++; 1168 continue; 1169 } 1170 } 1171 1172 if (realnfds) { 1173 pfds = calloc(realnfds, sizeof(*pfds)); 1174 if (!pfds) 1175 return -1; 1176 } else { 1177 pfds = NULL; 1178 } 1179 1180 for (i = 0, j = 0; i < nfds; i++) { 1181 incr = 0; 1182 if (readfds && FD_ISSET(i, readfds)) { 1183 pfds[j].fd = i; 1184 pfds[j].events |= POLLIN; 1185 incr=1; 1186 } 1187 if (writefds && FD_ISSET(i, writefds)) { 1188 pfds[j].fd = i; 1189 pfds[j].events |= POLLOUT; 1190 incr=1; 1191 } 1192 if (exceptfds && FD_ISSET(i, exceptfds)) { 1193 pfds[j].fd = i; 1194 pfds[j].events |= POLLHUP|POLLERR; 1195 incr=1; 1196 } 1197 if (incr) 1198 j++; 1199 } 1200 assert(j == (int)realnfds); 1201 1202 if (timeout) { 1203 TIMEVAL_TO_TIMESPEC(timeout, &ts); 1204 tsp = &ts; 1205 } 1206 rv = REALPOLLTS(pfds, realnfds, tsp, NULL); 1207 /* 1208 * "If select() returns with an error the descriptor sets 1209 * will be unmodified" 1210 */ 1211 if (rv < 0) 1212 goto out; 1213 1214 /* 1215 * zero out results (can't use FD_ZERO for the 1216 * obvious select-me-not reason). whee. 1217 * 1218 * We do this here since some software ignores the return 1219 * value of select, and hence if the timeout expires, it may 1220 * assume all input descriptors have activity. 1221 */ 1222 for (i = 0; i < nfds; i++) { 1223 if (readfds) 1224 FD_CLR(i, readfds); 1225 if (writefds) 1226 FD_CLR(i, writefds); 1227 if (exceptfds) 1228 FD_CLR(i, exceptfds); 1229 } 1230 if (rv == 0) 1231 goto out; 1232 1233 /* 1234 * We have >0 fds with activity. Harvest the results. 1235 */ 1236 for (i = 0; i < (int)realnfds; i++) { 1237 if (readfds) { 1238 if (pfds[i].revents & POLLIN) { 1239 FD_SET(pfds[i].fd, readfds); 1240 } 1241 } 1242 if (writefds) { 1243 if (pfds[i].revents & POLLOUT) { 1244 FD_SET(pfds[i].fd, writefds); 1245 } 1246 } 1247 if (exceptfds) { 1248 if (pfds[i].revents & (POLLHUP|POLLERR)) { 1249 FD_SET(pfds[i].fd, exceptfds); 1250 } 1251 } 1252 } 1253 1254 out: 1255 free(pfds); 1256 return rv; 1257 } 1258 1259 static void 1260 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall) 1261 { 1262 nfds_t i; 1263 1264 for (i = 0; i < nfds; i++) { 1265 if (fds[i].fd == -1) 1266 continue; 1267 1268 if (fd_isrump(fds[i].fd)) 1269 (*rumpcall)++; 1270 else 1271 (*hostcall)++; 1272 } 1273 } 1274 1275 static void 1276 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int)) 1277 { 1278 nfds_t i; 1279 1280 for (i = 0; i < nfds; i++) { 1281 fds[i].fd = fdadj(fds[i].fd); 1282 } 1283 } 1284 1285 /* 1286 * poll is easy as long as the call comes in the fds only in one 1287 * kernel. otherwise its quite tricky... 1288 */ 1289 struct pollarg { 1290 struct pollfd *pfds; 1291 nfds_t nfds; 1292 const struct timespec *ts; 1293 const sigset_t *sigmask; 1294 int pipefd; 1295 int errnum; 1296 }; 1297 1298 static void * 1299 hostpoll(void *arg) 1300 { 1301 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 1302 const sigset_t *); 1303 struct pollarg *parg = arg; 1304 intptr_t rv; 1305 1306 op_pollts = GETSYSCALL(host, POLLTS); 1307 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask); 1308 if (rv == -1) 1309 parg->errnum = errno; 1310 rump_sys_write(parg->pipefd, &rv, sizeof(rv)); 1311 1312 return (void *)(intptr_t)rv; 1313 } 1314 1315 int 1316 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts, 1317 const sigset_t *sigmask) 1318 { 1319 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 1320 const sigset_t *); 1321 int (*host_close)(int); 1322 int hostcall = 0, rumpcall = 0; 1323 pthread_t pt; 1324 nfds_t i; 1325 int rv; 1326 1327 DPRINTF(("poll\n")); 1328 checkpoll(fds, nfds, &hostcall, &rumpcall); 1329 1330 if (hostcall && rumpcall) { 1331 struct pollfd *pfd_host = NULL, *pfd_rump = NULL; 1332 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1}; 1333 struct pollarg parg; 1334 uintptr_t lrv; 1335 int sverrno = 0, trv; 1336 1337 /* 1338 * ok, this is where it gets tricky. We must support 1339 * this since it's a very common operation in certain 1340 * types of software (telnet, netcat, etc). We allocate 1341 * two vectors and run two poll commands in separate 1342 * threads. Whichever returns first "wins" and the 1343 * other kernel's fds won't show activity. 1344 */ 1345 rv = -1; 1346 1347 /* allocate full vector for O(n) joining after call */ 1348 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1)); 1349 if (!pfd_host) 1350 goto out; 1351 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1)); 1352 if (!pfd_rump) { 1353 goto out; 1354 } 1355 1356 /* 1357 * then, open two pipes, one for notifications 1358 * to each kernel. 1359 */ 1360 if ((rv = rump_sys_pipe(rpipe)) == -1) { 1361 sverrno = errno; 1362 } 1363 if (rv == 0 && (rv = pipe(hpipe)) == -1) { 1364 sverrno = errno; 1365 } 1366 1367 /* split vectors (or signal errors) */ 1368 for (i = 0; i < nfds; i++) { 1369 int fd; 1370 1371 fds[i].revents = 0; 1372 if (fds[i].fd == -1) { 1373 pfd_host[i].fd = -1; 1374 pfd_rump[i].fd = -1; 1375 } else if (fd_isrump(fds[i].fd)) { 1376 pfd_host[i].fd = -1; 1377 fd = fd_host2rump(fds[i].fd); 1378 if (fd == rpipe[0] || fd == rpipe[1]) { 1379 fds[i].revents = POLLNVAL; 1380 if (rv != -1) 1381 rv++; 1382 } 1383 pfd_rump[i].fd = fd; 1384 pfd_rump[i].events = fds[i].events; 1385 } else { 1386 pfd_rump[i].fd = -1; 1387 fd = fds[i].fd; 1388 if (fd == hpipe[0] || fd == hpipe[1]) { 1389 fds[i].revents = POLLNVAL; 1390 if (rv != -1) 1391 rv++; 1392 } 1393 pfd_host[i].fd = fd; 1394 pfd_host[i].events = fds[i].events; 1395 } 1396 pfd_rump[i].revents = pfd_host[i].revents = 0; 1397 } 1398 if (rv) { 1399 goto out; 1400 } 1401 1402 pfd_host[nfds].fd = hpipe[0]; 1403 pfd_host[nfds].events = POLLIN; 1404 pfd_rump[nfds].fd = rpipe[0]; 1405 pfd_rump[nfds].events = POLLIN; 1406 1407 /* 1408 * then, create a thread to do host part and meanwhile 1409 * do rump kernel part right here 1410 */ 1411 1412 parg.pfds = pfd_host; 1413 parg.nfds = nfds+1; 1414 parg.ts = ts; 1415 parg.sigmask = sigmask; 1416 parg.pipefd = rpipe[1]; 1417 pthread_create(&pt, NULL, hostpoll, &parg); 1418 1419 op_pollts = GETSYSCALL(rump, POLLTS); 1420 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL); 1421 sverrno = errno; 1422 write(hpipe[1], &rv, sizeof(rv)); 1423 pthread_join(pt, (void *)&trv); 1424 1425 /* check who "won" and merge results */ 1426 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) { 1427 rv = trv; 1428 1429 for (i = 0; i < nfds; i++) { 1430 if (pfd_rump[i].fd != -1) 1431 fds[i].revents = pfd_rump[i].revents; 1432 } 1433 sverrno = parg.errnum; 1434 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) { 1435 rv = trv; 1436 1437 for (i = 0; i < nfds; i++) { 1438 if (pfd_host[i].fd != -1) 1439 fds[i].revents = pfd_host[i].revents; 1440 } 1441 } else { 1442 rv = 0; 1443 } 1444 1445 out: 1446 host_close = GETSYSCALL(host, CLOSE); 1447 if (rpipe[0] != -1) 1448 rump_sys_close(rpipe[0]); 1449 if (rpipe[1] != -1) 1450 rump_sys_close(rpipe[1]); 1451 if (hpipe[0] != -1) 1452 host_close(hpipe[0]); 1453 if (hpipe[1] != -1) 1454 host_close(hpipe[1]); 1455 free(pfd_host); 1456 free(pfd_rump); 1457 errno = sverrno; 1458 } else { 1459 if (hostcall) { 1460 op_pollts = GETSYSCALL(host, POLLTS); 1461 } else { 1462 op_pollts = GETSYSCALL(rump, POLLTS); 1463 adjustpoll(fds, nfds, fd_host2rump); 1464 } 1465 1466 rv = op_pollts(fds, nfds, ts, sigmask); 1467 if (rumpcall) 1468 adjustpoll(fds, nfds, fd_rump2host); 1469 } 1470 1471 return rv; 1472 } 1473 1474 int 1475 poll(struct pollfd *fds, nfds_t nfds, int timeout) 1476 { 1477 struct timespec ts; 1478 struct timespec *tsp = NULL; 1479 1480 if (timeout != INFTIM) { 1481 ts.tv_sec = timeout / 1000; 1482 ts.tv_nsec = (timeout % 1000) * 1000*1000; 1483 1484 tsp = &ts; 1485 } 1486 1487 return REALPOLLTS(fds, nfds, tsp, NULL); 1488 } 1489 1490 int 1491 REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges, 1492 struct kevent *eventlist, size_t nevents, 1493 const struct timespec *timeout) 1494 { 1495 int (*op_kevent)(int, const struct kevent *, size_t, 1496 struct kevent *, size_t, const struct timespec *); 1497 const struct kevent *ev; 1498 size_t i; 1499 1500 /* 1501 * Check that we don't attempt to kevent rump kernel fd's. 1502 * That needs similar treatment to select/poll, but is slightly 1503 * trickier since we need to manage to different kq descriptors. 1504 * (TODO, in case you're wondering). 1505 */ 1506 for (i = 0; i < nchanges; i++) { 1507 ev = &changelist[i]; 1508 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE || 1509 ev->filter == EVFILT_VNODE) { 1510 if (fd_isrump((int)ev->ident)) 1511 return ENOTSUP; 1512 } 1513 } 1514 1515 op_kevent = GETSYSCALL(host, KEVENT); 1516 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout); 1517 } 1518 1519 /* 1520 * Rest are std type calls. 1521 */ 1522 1523 FDCALL(int, bind, DUALCALL_BIND, \ 1524 (int fd, const struct sockaddr *name, socklen_t namelen), \ 1525 (int, const struct sockaddr *, socklen_t), \ 1526 (fd, name, namelen)) 1527 1528 FDCALL(int, connect, DUALCALL_CONNECT, \ 1529 (int fd, const struct sockaddr *name, socklen_t namelen), \ 1530 (int, const struct sockaddr *, socklen_t), \ 1531 (fd, name, namelen)) 1532 1533 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \ 1534 (int fd, struct sockaddr *name, socklen_t *namelen), \ 1535 (int, struct sockaddr *, socklen_t *), \ 1536 (fd, name, namelen)) 1537 1538 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \ 1539 (int fd, struct sockaddr *name, socklen_t *namelen), \ 1540 (int, struct sockaddr *, socklen_t *), \ 1541 (fd, name, namelen)) 1542 1543 FDCALL(int, listen, DUALCALL_LISTEN, \ 1544 (int fd, int backlog), \ 1545 (int, int), \ 1546 (fd, backlog)) 1547 1548 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \ 1549 (int fd, void *buf, size_t len, int flags, \ 1550 struct sockaddr *from, socklen_t *fromlen), \ 1551 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \ 1552 (fd, buf, len, flags, from, fromlen)) 1553 1554 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \ 1555 (int fd, const void *buf, size_t len, int flags, \ 1556 const struct sockaddr *to, socklen_t tolen), \ 1557 (int, const void *, size_t, int, \ 1558 const struct sockaddr *, socklen_t), \ 1559 (fd, buf, len, flags, to, tolen)) 1560 1561 FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG, \ 1562 (int fd, struct msghdr *msg, int flags), \ 1563 (int, struct msghdr *, int), \ 1564 (fd, msg, flags)) 1565 1566 FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG, \ 1567 (int fd, const struct msghdr *msg, int flags), \ 1568 (int, const struct msghdr *, int), \ 1569 (fd, msg, flags)) 1570 1571 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \ 1572 (int fd, int level, int optn, void *optval, socklen_t *optlen), \ 1573 (int, int, int, void *, socklen_t *), \ 1574 (fd, level, optn, optval, optlen)) 1575 1576 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \ 1577 (int fd, int level, int optn, \ 1578 const void *optval, socklen_t optlen), \ 1579 (int, int, int, const void *, socklen_t), \ 1580 (fd, level, optn, optval, optlen)) 1581 1582 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \ 1583 (int fd, int how), \ 1584 (int, int), \ 1585 (fd, how)) 1586 1587 #if _FORTIFY_SOURCE > 0 1588 #define STUB(fun) __ssp_weak_name(fun) 1589 ssize_t _sys_readlink(const char * __restrict, char * __restrict, size_t); 1590 ssize_t 1591 STUB(readlink)(const char * __restrict path, char * __restrict buf, 1592 size_t bufsiz) 1593 { 1594 return _sys_readlink(path, buf, bufsiz); 1595 } 1596 1597 char *_sys_getcwd(char *, size_t); 1598 char * 1599 STUB(getcwd)(char *buf, size_t size) 1600 { 1601 return _sys_getcwd(buf, size); 1602 } 1603 #else 1604 #define STUB(fun) fun 1605 #endif 1606 1607 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \ 1608 (int fd, void *buf, size_t buflen), \ 1609 (int, void *, size_t), \ 1610 (fd, buf, buflen)) 1611 1612 FDCALL(ssize_t, readv, DUALCALL_READV, \ 1613 (int fd, const struct iovec *iov, int iovcnt), \ 1614 (int, const struct iovec *, int), \ 1615 (fd, iov, iovcnt)) 1616 1617 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \ 1618 (int fd, const struct iovec *iov, int iovcnt), \ 1619 (int, const struct iovec *, int), \ 1620 (fd, iov, iovcnt)) 1621 1622 FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \ 1623 (int fd, struct stat *sb), \ 1624 (int, struct stat *), \ 1625 (fd, sb)) 1626 1627 FDCALL(int, fstatvfs1, DUALCALL_FSTATVFS1, \ 1628 (int fd, struct statvfs *buf, int flags), \ 1629 (int, struct statvfs *, int), \ 1630 (fd, buf, flags)) 1631 1632 FDCALL(off_t, REALLSEEK, DUALCALL_LSEEK, \ 1633 (int fd, off_t offset, int whence), \ 1634 (int, off_t, int), \ 1635 (fd, offset, whence)) 1636 1637 FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \ 1638 (int fd, char *buf, size_t nbytes), \ 1639 (int, char *, size_t), \ 1640 (fd, buf, nbytes)) 1641 1642 FDCALL(int, fchown, DUALCALL_FCHOWN, \ 1643 (int fd, uid_t owner, gid_t group), \ 1644 (int, uid_t, gid_t), \ 1645 (fd, owner, group)) 1646 1647 FDCALL(int, fchmod, DUALCALL_FCHMOD, \ 1648 (int fd, mode_t mode), \ 1649 (int, mode_t), \ 1650 (fd, mode)) 1651 1652 FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \ 1653 (int fd, off_t length), \ 1654 (int, off_t), \ 1655 (fd, length)) 1656 1657 FDCALL(int, fsync, DUALCALL_FSYNC, \ 1658 (int fd), \ 1659 (int), \ 1660 (fd)) 1661 1662 FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \ 1663 (int fd, int how, off_t start, off_t length), \ 1664 (int, int, off_t, off_t), \ 1665 (fd, how, start, length)) 1666 1667 FDCALL(int, futimes, DUALCALL_FUTIMES, \ 1668 (int fd, const struct timeval *tv), \ 1669 (int, const struct timeval *), \ 1670 (fd, tv)) 1671 1672 /* 1673 * path-based selectors 1674 */ 1675 1676 PATHCALL(int, REALSTAT, DUALCALL_STAT, \ 1677 (const char *path, struct stat *sb), \ 1678 (const char *, struct stat *), \ 1679 (path, sb)) 1680 1681 PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \ 1682 (const char *path, struct stat *sb), \ 1683 (const char *, struct stat *), \ 1684 (path, sb)) 1685 1686 PATHCALL(int, chown, DUALCALL_CHOWN, \ 1687 (const char *path, uid_t owner, gid_t group), \ 1688 (const char *, uid_t, gid_t), \ 1689 (path, owner, group)) 1690 1691 PATHCALL(int, lchown, DUALCALL_LCHOWN, \ 1692 (const char *path, uid_t owner, gid_t group), \ 1693 (const char *, uid_t, gid_t), \ 1694 (path, owner, group)) 1695 1696 PATHCALL(int, chmod, DUALCALL_CHMOD, \ 1697 (const char *path, mode_t mode), \ 1698 (const char *, mode_t), \ 1699 (path, mode)) 1700 1701 PATHCALL(int, lchmod, DUALCALL_LCHMOD, \ 1702 (const char *path, mode_t mode), \ 1703 (const char *, mode_t), \ 1704 (path, mode)) 1705 1706 PATHCALL(int, statvfs1, DUALCALL_STATVFS1, \ 1707 (const char *path, struct statvfs *buf, int flags), \ 1708 (const char *, struct statvfs *, int), \ 1709 (path, buf, flags)) 1710 1711 PATHCALL(int, unlink, DUALCALL_UNLINK, \ 1712 (const char *path), \ 1713 (const char *), \ 1714 (path)) 1715 1716 PATHCALL(int, symlink, DUALCALL_SYMLINK, \ 1717 (const char *target, const char *path), \ 1718 (const char *, const char *), \ 1719 (target, path)) 1720 1721 PATHCALL(ssize_t, readlink, DUALCALL_READLINK, \ 1722 (const char *path, char *buf, size_t bufsiz), \ 1723 (const char *, char *, size_t), \ 1724 (path, buf, bufsiz)) 1725 1726 PATHCALL(int, mkdir, DUALCALL_MKDIR, \ 1727 (const char *path, mode_t mode), \ 1728 (const char *, mode_t), \ 1729 (path, mode)) 1730 1731 PATHCALL(int, rmdir, DUALCALL_RMDIR, \ 1732 (const char *path), \ 1733 (const char *), \ 1734 (path)) 1735 1736 PATHCALL(int, utimes, DUALCALL_UTIMES, \ 1737 (const char *path, const struct timeval *tv), \ 1738 (const char *, const struct timeval *), \ 1739 (path, tv)) 1740 1741 PATHCALL(int, lutimes, DUALCALL_LUTIMES, \ 1742 (const char *path, const struct timeval *tv), \ 1743 (const char *, const struct timeval *), \ 1744 (path, tv)) 1745 1746 PATHCALL(int, truncate, DUALCALL_TRUNCATE, \ 1747 (const char *path, off_t length), \ 1748 (const char *, off_t), \ 1749 (path, length)) 1750 1751 /* 1752 * Note: with mount the decisive parameter is the mount 1753 * destination directory. This is because we don't really know 1754 * about the "source" directory in a generic call (and besides, 1755 * it might not even exist, cf. nfs). 1756 */ 1757 PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \ 1758 (const char *type, const char *path, int flags, \ 1759 void *data, size_t dlen), \ 1760 (const char *, const char *, int, void *, size_t), \ 1761 (type, path, flags, data, dlen)) 1762 1763 PATHCALL(int, unmount, DUALCALL_UNMOUNT, \ 1764 (const char *path, int flags), \ 1765 (const char *, int), \ 1766 (path, flags)) 1767