1 /* $NetBSD: hijack.c,v 1.82 2011/03/09 23:26:19 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __RCSID("$NetBSD: hijack.c,v 1.82 2011/03/09 23:26:19 pooka Exp $"); 30 31 #define __ssp_weak_name(fun) _hijack_ ## fun 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/event.h> 36 #include <sys/ioctl.h> 37 #include <sys/mman.h> 38 #include <sys/mount.h> 39 #include <sys/poll.h> 40 #include <sys/socket.h> 41 #include <sys/statvfs.h> 42 43 #include <rump/rumpclient.h> 44 #include <rump/rump_syscalls.h> 45 46 #include <assert.h> 47 #include <dlfcn.h> 48 #include <err.h> 49 #include <errno.h> 50 #include <fcntl.h> 51 #include <poll.h> 52 #include <pthread.h> 53 #include <signal.h> 54 #include <stdarg.h> 55 #include <stdbool.h> 56 #include <stdio.h> 57 #include <stdlib.h> 58 #include <string.h> 59 #include <time.h> 60 #include <unistd.h> 61 62 #include "hijack.h" 63 64 enum dualcall { 65 DUALCALL_WRITE, DUALCALL_WRITEV, DUALCALL_PWRITE, DUALCALL_PWRITEV, 66 DUALCALL_IOCTL, DUALCALL_FCNTL, 67 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT, 68 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN, 69 DUALCALL_RECVFROM, DUALCALL_RECVMSG, 70 DUALCALL_SENDTO, DUALCALL_SENDMSG, 71 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT, 72 DUALCALL_SHUTDOWN, 73 DUALCALL_READ, DUALCALL_READV, DUALCALL_PREAD, DUALCALL_PREADV, 74 DUALCALL_DUP2, 75 DUALCALL_CLOSE, 76 DUALCALL_POLLTS, 77 DUALCALL_KEVENT, 78 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT, 79 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD, 80 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN, 81 DUALCALL_OPEN, 82 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1, 83 DUALCALL_CHDIR, DUALCALL_FCHDIR, 84 DUALCALL_LSEEK, 85 DUALCALL_GETDENTS, 86 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK, 87 DUALCALL_RENAME, 88 DUALCALL_MKDIR, DUALCALL_RMDIR, 89 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES, 90 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE, 91 DUALCALL_FSYNC, DUALCALL_FSYNC_RANGE, 92 DUALCALL_MOUNT, DUALCALL_UNMOUNT, 93 DUALCALL___GETCWD, 94 DUALCALL_CHFLAGS, DUALCALL_LCHFLAGS, DUALCALL_FCHFLAGS, 95 DUALCALL_ACCESS, 96 DUALCALL_MKNOD, 97 DUALCALL___SYSCTL, 98 DUALCALL_GETVFSSTAT, DUALCALL_NFSSVC, 99 DUALCALL_GETFH, DUALCALL_FHOPEN, DUALCALL_FHSTAT, DUALCALL_FHSTATVFS1, 100 DUALCALL_QUOTACTL, 101 DUALCALL__NUM 102 }; 103 104 #define RSYS_STRING(a) __STRING(a) 105 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a)) 106 107 /* 108 * Would be nice to get this automatically in sync with libc. 109 * Also, this does not work for compat-using binaries! 110 */ 111 #if !__NetBSD_Prereq__(5,99,7) 112 #define REALSELECT select 113 #define REALPOLLTS pollts 114 #define REALKEVENT kevent 115 #define REALSTAT __stat30 116 #define REALLSTAT __lstat30 117 #define REALFSTAT __fstat30 118 #define REALUTIMES utimes 119 #define REALLUTIMES lutimes 120 #define REALFUTIMES futimes 121 #define REALMKNOD mknod 122 #define REALFHSTAT __fhstat40 123 #else 124 #define REALSELECT _sys___select50 125 #define REALPOLLTS _sys___pollts50 126 #define REALKEVENT _sys___kevent50 127 #define REALSTAT __stat50 128 #define REALLSTAT __lstat50 129 #define REALFSTAT __fstat50 130 #define REALUTIMES __utimes50 131 #define REALLUTIMES __lutimes50 132 #define REALFUTIMES __futimes50 133 #define REALMKNOD __mknod50 134 #define REALFHSTAT __fhstat50 135 #endif 136 137 #define REALREAD _sys_read 138 #define REALPREAD _sys_pread 139 #define REALPWRITE _sys_pwrite 140 #define REALGETDENTS __getdents30 141 #define REALMOUNT __mount50 142 #define REALGETFH __getfh30 143 #define REALFHOPEN __fhopen40 144 #define REALFHSTATVFS1 __fhstatvfs140 145 #define REALQUOTACTL __quotactl50 146 147 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *); 148 int REALPOLLTS(struct pollfd *, nfds_t, 149 const struct timespec *, const sigset_t *); 150 int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t, 151 const struct timespec *); 152 ssize_t REALREAD(int, void *, size_t); 153 ssize_t REALPREAD(int, void *, size_t, off_t); 154 ssize_t REALPWRITE(int, const void *, size_t, off_t); 155 int REALSTAT(const char *, struct stat *); 156 int REALLSTAT(const char *, struct stat *); 157 int REALFSTAT(int, struct stat *); 158 int REALGETDENTS(int, char *, size_t); 159 int REALUTIMES(const char *, const struct timeval [2]); 160 int REALLUTIMES(const char *, const struct timeval [2]); 161 int REALFUTIMES(int, const struct timeval [2]); 162 int REALMOUNT(const char *, const char *, int, void *, size_t); 163 int __getcwd(char *, size_t); 164 int REALMKNOD(const char *, mode_t, dev_t); 165 int REALGETFH(const char *, void *, size_t *); 166 int REALFHOPEN(const void *, size_t, int); 167 int REALFHSTAT(const void *, size_t, struct stat *); 168 int REALFHSTATVFS1(const void *, size_t, struct statvfs *, int); 169 int REALQUOTACTL(const char *, struct plistref *); 170 171 #define S(a) __STRING(a) 172 struct sysnames { 173 enum dualcall scm_callnum; 174 const char *scm_hostname; 175 const char *scm_rumpname; 176 }; 177 178 struct sysnames sys_mandatory[] = { 179 { DUALCALL_SOCKET, "__socket30", RSYS_NAME(SOCKET) }, 180 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) }, 181 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) }, 182 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) }, 183 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) }, 184 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) }, 185 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) }, 186 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) }, 187 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) }, 188 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) }, 189 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) }, 190 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) }, 191 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) }, 192 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) }, 193 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) }, 194 { DUALCALL_READV, "readv", RSYS_NAME(READV) }, 195 { DUALCALL_PREAD, S(REALPREAD), RSYS_NAME(PREAD) }, 196 { DUALCALL_PREADV, "preadv", RSYS_NAME(PREADV) }, 197 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) }, 198 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) }, 199 { DUALCALL_PWRITE, S(REALPWRITE), RSYS_NAME(PWRITE) }, 200 { DUALCALL_PWRITEV, "pwritev", RSYS_NAME(PWRITEV) }, 201 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) }, 202 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) }, 203 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) }, 204 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) }, 205 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) }, 206 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) }, 207 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) }, 208 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) }, 209 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) }, 210 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) }, 211 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) }, 212 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) }, 213 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) }, 214 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) }, 215 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) }, 216 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) }, 217 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) }, 218 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) }, 219 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) }, 220 { DUALCALL_STATVFS1, "statvfs1", RSYS_NAME(STATVFS1) }, 221 { DUALCALL_FSTATVFS1, "fstatvfs1", RSYS_NAME(FSTATVFS1) }, 222 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) }, 223 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) }, 224 { DUALCALL_LSEEK, "lseek", RSYS_NAME(LSEEK) }, 225 { DUALCALL_GETDENTS, "__getdents30", RSYS_NAME(GETDENTS) }, 226 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) }, 227 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) }, 228 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) }, 229 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) }, 230 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) }, 231 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) }, 232 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) }, 233 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) }, 234 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) }, 235 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) }, 236 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) }, 237 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) }, 238 { DUALCALL___GETCWD, "__getcwd", RSYS_NAME(__GETCWD) }, 239 { DUALCALL_CHFLAGS, "chflags", RSYS_NAME(CHFLAGS) }, 240 { DUALCALL_LCHFLAGS, "lchflags", RSYS_NAME(LCHFLAGS) }, 241 { DUALCALL_FCHFLAGS, "fchflags", RSYS_NAME(FCHFLAGS) }, 242 { DUALCALL_ACCESS, "access", RSYS_NAME(ACCESS) }, 243 { DUALCALL_MKNOD, S(REALMKNOD), RSYS_NAME(MKNOD) }, 244 { DUALCALL___SYSCTL, "__sysctl", RSYS_NAME(__SYSCTL) }, 245 { DUALCALL_GETVFSSTAT, "getvfsstat", RSYS_NAME(GETVFSSTAT) }, 246 { DUALCALL_NFSSVC, "nfssvc", RSYS_NAME(NFSSVC) }, 247 { DUALCALL_GETFH, S(REALGETFH), RSYS_NAME(GETFH) }, 248 { DUALCALL_FHOPEN, S(REALFHOPEN), RSYS_NAME(FHOPEN) }, 249 { DUALCALL_FHSTAT, S(REALFHSTAT), RSYS_NAME(FHSTAT) }, 250 { DUALCALL_FHSTATVFS1, S(REALFHSTATVFS1),RSYS_NAME(FHSTATVFS1) }, 251 }; 252 253 struct sysnames sys_optional[] = { 254 { DUALCALL_QUOTACTL, S(REALQUOTACTL),RSYS_NAME(QUOTACTL) }, 255 }; 256 #undef S 257 258 static int 259 nolibcstub(void) 260 { 261 262 return ENOSYS; 263 } 264 265 struct bothsys { 266 void *bs_host; 267 void *bs_rump; 268 } syscalls[DUALCALL__NUM]; 269 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which 270 271 static pid_t (*host_fork)(void); 272 static int (*host_daemon)(int, int); 273 static void * (*host_mmap)(void *, size_t, int, int, int, off_t); 274 275 /* 276 * This tracks if our process is in a subdirectory of /rump. 277 * It's preserved over exec. 278 */ 279 static bool pwdinrump; 280 281 enum pathtype { PATH_HOST, PATH_RUMP, PATH_RUMPBLANKET }; 282 283 static bool fd_isrump(int); 284 static enum pathtype path_isrump(const char *); 285 286 /* 287 * Maintain a mapping table for the usual dup2 suspects. 288 * Could use atomic ops to operate on dup2vec, but an application 289 * racing there is not well-defined, so don't bother. 290 */ 291 /* note: you cannot change this without editing the env-passing code */ 292 #define DUP2HIGH 2 293 static uint32_t dup2vec[DUP2HIGH+1]; 294 #define DUP2BIT (1<<31) 295 #define DUP2ALIAS (1<<30) 296 #define DUP2FDMASK ((1<<30)-1) 297 298 static bool 299 isdup2d(int fd) 300 { 301 302 return fd <= DUP2HIGH && fd >= 0 && dup2vec[fd] & DUP2BIT; 303 } 304 305 static int 306 mapdup2(int hostfd) 307 { 308 309 _DIAGASSERT(isdup2d(hostfd)); 310 return dup2vec[hostfd] & DUP2FDMASK; 311 } 312 313 static int 314 unmapdup2(int rumpfd) 315 { 316 int i; 317 318 for (i = 0; i <= DUP2HIGH; i++) { 319 if (dup2vec[i] & DUP2BIT && 320 (dup2vec[i] & DUP2FDMASK) == (unsigned)rumpfd) 321 return i; 322 } 323 return -1; 324 } 325 326 static void 327 setdup2(int hostfd, int rumpfd) 328 { 329 330 if (hostfd > DUP2HIGH) { 331 _DIAGASSERT(0); 332 return; 333 } 334 335 dup2vec[hostfd] = DUP2BIT | DUP2ALIAS | rumpfd; 336 } 337 338 static void 339 clrdup2(int hostfd) 340 { 341 342 if (hostfd > DUP2HIGH) { 343 _DIAGASSERT(0); 344 return; 345 } 346 347 dup2vec[hostfd] = 0; 348 } 349 350 static bool 351 killdup2alias(int rumpfd) 352 { 353 int hostfd; 354 355 if ((hostfd = unmapdup2(rumpfd)) == -1) 356 return false; 357 358 if (dup2vec[hostfd] & DUP2ALIAS) { 359 dup2vec[hostfd] &= ~DUP2ALIAS; 360 return true; 361 } 362 return false; 363 } 364 365 //#define DEBUGJACK 366 #ifdef DEBUGJACK 367 #define DPRINTF(x) mydprintf x 368 static void 369 mydprintf(const char *fmt, ...) 370 { 371 va_list ap; 372 373 if (isdup2d(STDERR_FILENO)) 374 return; 375 376 va_start(ap, fmt); 377 vfprintf(stderr, fmt, ap); 378 va_end(ap); 379 } 380 381 static const char * 382 whichfd(int fd) 383 { 384 385 if (fd == -1) 386 return "-1"; 387 else if (fd_isrump(fd)) 388 return "rump"; 389 else 390 return "host"; 391 } 392 393 static const char * 394 whichpath(const char *path) 395 { 396 397 if (path_isrump(path)) 398 return "rump"; 399 else 400 return "host"; 401 } 402 403 #else 404 #define DPRINTF(x) 405 #endif 406 407 #define FDCALL(type, name, rcname, args, proto, vars) \ 408 type name args \ 409 { \ 410 type (*fun) proto; \ 411 \ 412 DPRINTF(("%s -> %d (%s)\n", __STRING(name), fd, whichfd(fd))); \ 413 if (fd_isrump(fd)) { \ 414 fun = syscalls[rcname].bs_rump; \ 415 fd = fd_host2rump(fd); \ 416 } else { \ 417 fun = syscalls[rcname].bs_host; \ 418 } \ 419 \ 420 return fun vars; \ 421 } 422 423 #define PATHCALL(type, name, rcname, args, proto, vars) \ 424 type name args \ 425 { \ 426 type (*fun) proto; \ 427 enum pathtype pt; \ 428 \ 429 DPRINTF(("%s -> %s (%s)\n", __STRING(name), path, \ 430 whichpath(path))); \ 431 if ((pt = path_isrump(path)) != PATH_HOST) { \ 432 fun = syscalls[rcname].bs_rump; \ 433 if (pt == PATH_RUMP) \ 434 path = path_host2rump(path); \ 435 } else { \ 436 fun = syscalls[rcname].bs_host; \ 437 } \ 438 \ 439 return fun vars; \ 440 } 441 442 #define VFSCALL(bit, type, name, rcname, args, proto, vars) \ 443 type name args \ 444 { \ 445 type (*fun) proto; \ 446 \ 447 DPRINTF(("%s (0x%x, 0x%x)\n", __STRING(name), bit, vfsbits)); \ 448 if (vfsbits & bit) { \ 449 fun = syscalls[rcname].bs_rump; \ 450 } else { \ 451 fun = syscalls[rcname].bs_host; \ 452 } \ 453 \ 454 return fun vars; \ 455 } 456 457 /* 458 * These variables are set from the RUMPHIJACK string and control 459 * which operations can product rump kernel file descriptors. 460 * This should be easily extendable for future needs. 461 */ 462 #define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal" 463 static bool rumpsockets[PF_MAX]; 464 static const char *rumpprefix; 465 static size_t rumpprefixlen; 466 467 static struct { 468 int pf; 469 const char *name; 470 } socketmap[] = { 471 { PF_LOCAL, "local" }, 472 { PF_INET, "inet" }, 473 { PF_LINK, "link" }, 474 #ifdef PF_OROUTE 475 { PF_OROUTE, "oroute" }, 476 #endif 477 { PF_ROUTE, "route" }, 478 { PF_INET6, "inet6" }, 479 #ifdef PF_MPLS 480 { PF_MPLS, "mpls" }, 481 #endif 482 { -1, NULL } 483 }; 484 485 static void 486 sockparser(char *buf) 487 { 488 char *p, *l; 489 bool value; 490 int i; 491 492 /* if "all" is present, it must be specified first */ 493 if (strncmp(buf, "all", strlen("all")) == 0) { 494 for (i = 0; i < (int)__arraycount(rumpsockets); i++) { 495 rumpsockets[i] = true; 496 } 497 buf += strlen("all"); 498 if (*buf == ':') 499 buf++; 500 } 501 502 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 503 value = true; 504 if (strncmp(p, "no", strlen("no")) == 0) { 505 value = false; 506 p += strlen("no"); 507 } 508 509 for (i = 0; socketmap[i].name; i++) { 510 if (strcmp(p, socketmap[i].name) == 0) { 511 rumpsockets[socketmap[i].pf] = value; 512 break; 513 } 514 } 515 if (socketmap[i].name == NULL) { 516 errx(1, "invalid socket specifier %s", p); 517 } 518 } 519 } 520 521 static void 522 pathparser(char *buf) 523 { 524 525 /* sanity-check */ 526 if (*buf != '/') 527 errx(1, "hijack path specifier must begin with ``/''"); 528 rumpprefixlen = strlen(buf); 529 if (rumpprefixlen < 2) 530 errx(1, "invalid hijack prefix: %s", buf); 531 if (buf[rumpprefixlen-1] == '/' && strspn(buf, "/") != rumpprefixlen) 532 errx(1, "hijack prefix may end in slash only if pure " 533 "slash, gave %s", buf); 534 535 if ((rumpprefix = strdup(buf)) == NULL) 536 err(1, "strdup"); 537 rumpprefixlen = strlen(rumpprefix); 538 } 539 540 static struct blanket { 541 const char *pfx; 542 size_t len; 543 } *blanket; 544 static int nblanket; 545 546 static void 547 blanketparser(char *buf) 548 { 549 char *p, *l; 550 int i; 551 552 for (nblanket = 0, p = buf; p; p = strchr(p+1, ':'), nblanket++) 553 continue; 554 555 blanket = malloc(nblanket * sizeof(*blanket)); 556 if (blanket == NULL) 557 err(1, "alloc blanket %d", nblanket); 558 559 for (p = strtok_r(buf, ":", &l), i = 0; p; 560 p = strtok_r(NULL, ":", &l), i++) { 561 blanket[i].pfx = strdup(p); 562 if (blanket[i].pfx == NULL) 563 err(1, "strdup blanket"); 564 blanket[i].len = strlen(p); 565 566 if (blanket[i].len == 0 || *blanket[i].pfx != '/') 567 errx(1, "invalid blanket specifier %s", p); 568 if (*(blanket[i].pfx + blanket[i].len-1) == '/') 569 errx(1, "invalid blanket specifier %s", p); 570 } 571 } 572 573 #define VFSBIT_NFSSVC 0x01 574 #define VFSBIT_GETVFSSTAT 0x02 575 #define VFSBIT_FHCALLS 0x04 576 static unsigned vfsbits; 577 578 static struct { 579 int bit; 580 const char *name; 581 } vfscalls[] = { 582 { VFSBIT_NFSSVC, "nfssvc" }, 583 { VFSBIT_GETVFSSTAT, "getvfsstat" }, 584 { VFSBIT_FHCALLS, "fhcalls" }, 585 { -1, NULL } 586 }; 587 588 static void 589 vfsparser(char *buf) 590 { 591 char *p, *l; 592 bool turnon; 593 unsigned int fullmask; 594 int i; 595 596 /* build the full mask and sanity-check while we're at it */ 597 fullmask = 0; 598 for (i = 0; vfscalls[i].name != NULL; i++) { 599 if (fullmask & vfscalls[i].bit) 600 errx(1, "problem exists between vi and chair"); 601 fullmask |= vfscalls[i].bit; 602 } 603 604 605 /* if "all" is present, it must be specified first */ 606 if (strncmp(buf, "all", strlen("all")) == 0) { 607 vfsbits = fullmask; 608 buf += strlen("all"); 609 if (*buf == ':') 610 buf++; 611 } 612 613 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 614 turnon = true; 615 if (strncmp(p, "no", strlen("no")) == 0) { 616 turnon = false; 617 p += strlen("no"); 618 } 619 620 for (i = 0; vfscalls[i].name; i++) { 621 if (strcmp(p, vfscalls[i].name) == 0) { 622 if (turnon) 623 vfsbits |= vfscalls[i].bit; 624 else 625 vfsbits &= ~vfscalls[i].bit; 626 break; 627 } 628 } 629 if (vfscalls[i].name == NULL) { 630 errx(1, "invalid vfscall specifier %s", p); 631 } 632 } 633 } 634 635 static bool rumpsysctl = false; 636 637 static void 638 sysctlparser(char *buf) 639 { 640 641 if (buf == NULL) { 642 rumpsysctl = true; 643 return; 644 } 645 646 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 || 647 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) { 648 rumpsysctl = true; 649 return; 650 } 651 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) { 652 rumpsysctl = false; 653 return; 654 } 655 656 errx(1, "sysctl value should be y(es)/n(o), gave: %s", buf); 657 } 658 659 static struct { 660 void (*parsefn)(char *); 661 const char *name; 662 bool needvalues; 663 } hijackparse[] = { 664 { sockparser, "socket", true }, 665 { pathparser, "path", true }, 666 { blanketparser, "blanket", true }, 667 { vfsparser, "vfs", true }, 668 { sysctlparser, "sysctl", false }, 669 { NULL, NULL, false }, 670 }; 671 672 static void 673 parsehijack(char *hijack) 674 { 675 char *p, *p2, *l; 676 const char *hijackcopy; 677 bool nop2; 678 int i; 679 680 if ((hijackcopy = strdup(hijack)) == NULL) 681 err(1, "strdup"); 682 683 /* disable everything explicitly */ 684 for (i = 0; i < PF_MAX; i++) 685 rumpsockets[i] = false; 686 687 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) { 688 nop2 = false; 689 p2 = strchr(p, '='); 690 if (!p2) { 691 nop2 = true; 692 p2 = p + strlen(p); 693 } 694 695 for (i = 0; hijackparse[i].parsefn; i++) { 696 if (strncmp(hijackparse[i].name, p, 697 (size_t)(p2-p)) == 0) { 698 if (nop2 && hijackparse[i].needvalues) 699 errx(1, "invalid hijack specifier: %s", 700 hijackcopy); 701 hijackparse[i].parsefn(nop2 ? NULL : p2+1); 702 break; 703 } 704 } 705 706 if (hijackparse[i].parsefn == NULL) 707 errx(1, "invalid hijack specifier name in %s", p); 708 } 709 710 } 711 712 static void __attribute__((constructor)) 713 rcinit(void) 714 { 715 char buf[1024]; 716 struct sysnames *sysvec; 717 size_t totalsys; 718 unsigned i, j; 719 720 host_fork = dlsym(RTLD_NEXT, "fork"); 721 host_daemon = dlsym(RTLD_NEXT, "daemon"); 722 host_mmap = dlsym(RTLD_NEXT, "mmap"); 723 724 /* 725 * In theory cannot print anything during lookups because 726 * we might not have the call vector set up. so, the errx() 727 * is a bit of a strech, but it might work. 728 */ 729 730 totalsys = __arraycount(sys_mandatory) + __arraycount(sys_optional); 731 for (i = 0; i < DUALCALL__NUM; i++) { 732 /* build runtime O(1) access */ 733 734 sysvec = sys_mandatory; 735 for (j = 0; j < __arraycount(sys_mandatory); j++) { 736 if (sys_mandatory[j].scm_callnum == i) 737 goto found; 738 } 739 sysvec = sys_optional; 740 for (j = 0; j < __arraycount(sys_optional); j++, j++) { 741 if (sys_optional[j].scm_callnum == i) 742 goto found; 743 } 744 errx(1, "rumphijack error: syscall pos %d missing", i); 745 746 found: 747 syscalls[i].bs_host = dlsym(RTLD_NEXT, 748 sysvec[j].scm_hostname); 749 if (syscalls[i].bs_host == NULL) { 750 if (sysvec == sys_optional) 751 syscalls[i].bs_host = nolibcstub; 752 else 753 errx(1, "hostcall %s not found!", 754 sysvec[j].scm_hostname); 755 } 756 757 syscalls[i].bs_rump = dlsym(RTLD_NEXT, 758 sysvec[j].scm_rumpname); 759 if (syscalls[i].bs_rump == NULL) 760 errx(1, "rumpcall %s not found!", 761 sysvec[j].scm_rumpname); 762 } 763 764 if (rumpclient_init() == -1) 765 err(1, "rumpclient init"); 766 767 /* check which syscalls we're supposed to hijack */ 768 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) { 769 strcpy(buf, RUMPHIJACK_DEFAULT); 770 } 771 parsehijack(buf); 772 773 /* set client persistence level */ 774 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) { 775 if (strcmp(buf, "die") == 0) 776 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE); 777 else if (strcmp(buf, "inftime") == 0) 778 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME); 779 else if (strcmp(buf, "once") == 0) 780 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE); 781 else { 782 time_t timeout; 783 char *ep; 784 785 timeout = (time_t)strtoll(buf, &ep, 10); 786 if (timeout <= 0 || ep != buf + strlen(buf)) 787 errx(1, "RUMPHIJACK_RETRYCONNECT must be " 788 "keyword or integer, got: %s", buf); 789 790 rumpclient_setconnretry(timeout); 791 } 792 } 793 794 if (getenv_r("RUMPHIJACK__DUP2INFO", buf, sizeof(buf)) == 0) { 795 if (sscanf(buf, "%u,%u,%u", 796 &dup2vec[0], &dup2vec[1], &dup2vec[2]) != 3) { 797 warnx("invalid dup2mask: %s", buf); 798 memset(dup2vec, 0, sizeof(dup2vec)); 799 } 800 unsetenv("RUMPHIJACK__DUP2INFO"); 801 } 802 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) { 803 pwdinrump = true; 804 unsetenv("RUMPHIJACK__PWDINRUMP"); 805 } 806 } 807 808 /* Need runtime selection. low for now due to FD_SETSIZE */ 809 #define HIJACK_FDOFF 128 810 811 static int 812 fd_rump2host(int fd) 813 { 814 815 if (fd == -1) 816 return fd; 817 return fd + HIJACK_FDOFF; 818 } 819 820 static int 821 fd_rump2host_withdup(int fd) 822 { 823 int hfd; 824 825 _DIAGASSERT(fd != -1); 826 hfd = unmapdup2(fd); 827 if (hfd != -1) { 828 _DIAGASSERT(hfd <= DUP2HIGH); 829 return hfd; 830 } 831 return fd_rump2host(fd); 832 } 833 834 static int 835 fd_host2rump(int fd) 836 { 837 838 if (!isdup2d(fd)) 839 return fd - HIJACK_FDOFF; 840 else 841 return mapdup2(fd); 842 } 843 844 static bool 845 fd_isrump(int fd) 846 { 847 848 return isdup2d(fd) || fd >= HIJACK_FDOFF; 849 } 850 851 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_FDOFF) 852 853 static enum pathtype 854 path_isrump(const char *path) 855 { 856 size_t plen; 857 int i; 858 859 if (rumpprefix == NULL && nblanket == 0) 860 return PATH_HOST; 861 862 if (*path == '/') { 863 plen = strlen(path); 864 if (rumpprefix && plen >= rumpprefixlen) { 865 if (strncmp(path, rumpprefix, rumpprefixlen) == 0 866 && (plen == rumpprefixlen 867 || *(path + rumpprefixlen) == '/')) { 868 return PATH_RUMP; 869 } 870 } 871 for (i = 0; i < nblanket; i++) { 872 if (strncmp(path, blanket[i].pfx, blanket[i].len) == 0) 873 return PATH_RUMPBLANKET; 874 } 875 876 return PATH_HOST; 877 } else { 878 return pwdinrump ? PATH_RUMP : PATH_HOST; 879 } 880 } 881 882 static const char *rootpath = "/"; 883 static const char * 884 path_host2rump(const char *path) 885 { 886 const char *rv; 887 888 if (*path == '/') { 889 rv = path + rumpprefixlen; 890 if (*rv == '\0') 891 rv = rootpath; 892 } else { 893 rv = path; 894 } 895 896 return rv; 897 } 898 899 static int 900 dodup(int oldd, int minfd) 901 { 902 int (*op_fcntl)(int, int, ...); 903 int newd; 904 int isrump; 905 906 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd)); 907 if (fd_isrump(oldd)) { 908 op_fcntl = GETSYSCALL(rump, FCNTL); 909 oldd = fd_host2rump(oldd); 910 if (minfd >= HIJACK_FDOFF) 911 minfd -= HIJACK_FDOFF; 912 isrump = 1; 913 } else { 914 op_fcntl = GETSYSCALL(host, FCNTL); 915 isrump = 0; 916 } 917 918 newd = op_fcntl(oldd, F_DUPFD, minfd); 919 920 if (isrump) 921 newd = fd_rump2host(newd); 922 DPRINTF(("dup <- %d\n", newd)); 923 924 return newd; 925 } 926 927 /* 928 * dup a host file descriptor so that it doesn't collide with the dup2mask 929 */ 930 static int 931 fd_dupgood(int fd) 932 { 933 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL); 934 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 935 int ofd, i; 936 937 for (i = 1; isdup2d(fd); i++) { 938 ofd = fd; 939 fd = op_fcntl(ofd, F_DUPFD, i); 940 op_close(ofd); 941 } 942 943 return fd; 944 } 945 946 int 947 open(const char *path, int flags, ...) 948 { 949 int (*op_open)(const char *, int, ...); 950 bool isrump; 951 va_list ap; 952 enum pathtype pt; 953 int fd; 954 955 DPRINTF(("open -> %s (%s)\n", path, whichpath(path))); 956 957 if ((pt = path_isrump(path)) != PATH_HOST) { 958 if (pt == PATH_RUMP) 959 path = path_host2rump(path); 960 op_open = GETSYSCALL(rump, OPEN); 961 isrump = true; 962 } else { 963 op_open = GETSYSCALL(host, OPEN); 964 isrump = false; 965 } 966 967 va_start(ap, flags); 968 fd = op_open(path, flags, va_arg(ap, mode_t)); 969 va_end(ap); 970 971 if (isrump) 972 fd = fd_rump2host(fd); 973 else 974 fd = fd_dupgood(fd); 975 976 DPRINTF(("open <- %d (%s)\n", fd, whichfd(fd))); 977 return fd; 978 } 979 980 int 981 chdir(const char *path) 982 { 983 int (*op_chdir)(const char *); 984 enum pathtype pt; 985 int rv; 986 987 if ((pt = path_isrump(path)) != PATH_HOST) { 988 op_chdir = GETSYSCALL(rump, CHDIR); 989 if (pt == PATH_RUMP) 990 path = path_host2rump(path); 991 } else { 992 op_chdir = GETSYSCALL(host, CHDIR); 993 } 994 995 rv = op_chdir(path); 996 if (rv == 0) 997 pwdinrump = pt != PATH_HOST; 998 999 return rv; 1000 } 1001 1002 int 1003 fchdir(int fd) 1004 { 1005 int (*op_fchdir)(int); 1006 bool isrump; 1007 int rv; 1008 1009 if (fd_isrump(fd)) { 1010 op_fchdir = GETSYSCALL(rump, FCHDIR); 1011 isrump = true; 1012 fd = fd_host2rump(fd); 1013 } else { 1014 op_fchdir = GETSYSCALL(host, FCHDIR); 1015 isrump = false; 1016 } 1017 1018 rv = op_fchdir(fd); 1019 if (rv == 0) { 1020 pwdinrump = isrump; 1021 } 1022 1023 return rv; 1024 } 1025 1026 int 1027 __getcwd(char *bufp, size_t len) 1028 { 1029 int (*op___getcwd)(char *, size_t); 1030 size_t prefixgap; 1031 bool iamslash; 1032 int rv; 1033 1034 if (pwdinrump && rumpprefix) { 1035 if (rumpprefix[rumpprefixlen-1] == '/') 1036 iamslash = true; 1037 else 1038 iamslash = false; 1039 1040 if (iamslash) 1041 prefixgap = rumpprefixlen - 1; /* ``//+path'' */ 1042 else 1043 prefixgap = rumpprefixlen; /* ``/pfx+/path'' */ 1044 if (len <= prefixgap) { 1045 errno = ERANGE; 1046 return -1; 1047 } 1048 1049 op___getcwd = GETSYSCALL(rump, __GETCWD); 1050 rv = op___getcwd(bufp + prefixgap, len - prefixgap); 1051 if (rv == -1) 1052 return rv; 1053 1054 /* augment the "/" part only for a non-root path */ 1055 memcpy(bufp, rumpprefix, rumpprefixlen); 1056 1057 /* append / only to non-root cwd */ 1058 if (rv != 2) 1059 bufp[prefixgap] = '/'; 1060 1061 /* don't append extra slash in the purely-slash case */ 1062 if (rv == 2 && !iamslash) 1063 bufp[rumpprefixlen] = '\0'; 1064 } else if (pwdinrump) { 1065 /* assume blanket. we can't provide a prefix here */ 1066 op___getcwd = GETSYSCALL(rump, __GETCWD); 1067 rv = op___getcwd(bufp, len); 1068 } else { 1069 op___getcwd = GETSYSCALL(host, __GETCWD); 1070 rv = op___getcwd(bufp, len); 1071 } 1072 1073 return rv; 1074 } 1075 1076 int 1077 rename(const char *from, const char *to) 1078 { 1079 int (*op_rename)(const char *, const char *); 1080 enum pathtype ptf, ptt; 1081 1082 if ((ptf = path_isrump(from)) != PATH_HOST) { 1083 if ((ptt = path_isrump(to)) == PATH_HOST) { 1084 errno = EXDEV; 1085 return -1; 1086 } 1087 1088 if (ptf == PATH_RUMP) 1089 from = path_host2rump(from); 1090 if (ptt == PATH_RUMP) 1091 to = path_host2rump(to); 1092 op_rename = GETSYSCALL(rump, RENAME); 1093 } else { 1094 if (path_isrump(to) != PATH_HOST) { 1095 errno = EXDEV; 1096 return -1; 1097 } 1098 1099 op_rename = GETSYSCALL(host, RENAME); 1100 } 1101 1102 return op_rename(from, to); 1103 } 1104 1105 int __socket30(int, int, int); 1106 int 1107 __socket30(int domain, int type, int protocol) 1108 { 1109 int (*op_socket)(int, int, int); 1110 int fd; 1111 bool isrump; 1112 1113 isrump = domain < PF_MAX && rumpsockets[domain]; 1114 1115 if (isrump) 1116 op_socket = GETSYSCALL(rump, SOCKET); 1117 else 1118 op_socket = GETSYSCALL(host, SOCKET); 1119 fd = op_socket(domain, type, protocol); 1120 1121 if (isrump) 1122 fd = fd_rump2host(fd); 1123 else 1124 fd = fd_dupgood(fd); 1125 DPRINTF(("socket <- %d\n", fd)); 1126 1127 return fd; 1128 } 1129 1130 int 1131 accept(int s, struct sockaddr *addr, socklen_t *addrlen) 1132 { 1133 int (*op_accept)(int, struct sockaddr *, socklen_t *); 1134 int fd; 1135 bool isrump; 1136 1137 isrump = fd_isrump(s); 1138 1139 DPRINTF(("accept -> %d", s)); 1140 if (isrump) { 1141 op_accept = GETSYSCALL(rump, ACCEPT); 1142 s = fd_host2rump(s); 1143 } else { 1144 op_accept = GETSYSCALL(host, ACCEPT); 1145 } 1146 fd = op_accept(s, addr, addrlen); 1147 if (fd != -1 && isrump) 1148 fd = fd_rump2host(fd); 1149 else 1150 fd = fd_dupgood(fd); 1151 1152 DPRINTF((" <- %d\n", fd)); 1153 1154 return fd; 1155 } 1156 1157 /* 1158 * ioctl and fcntl are varargs calls and need special treatment 1159 */ 1160 int 1161 ioctl(int fd, unsigned long cmd, ...) 1162 { 1163 int (*op_ioctl)(int, unsigned long cmd, ...); 1164 va_list ap; 1165 int rv; 1166 1167 DPRINTF(("ioctl -> %d\n", fd)); 1168 if (fd_isrump(fd)) { 1169 fd = fd_host2rump(fd); 1170 op_ioctl = GETSYSCALL(rump, IOCTL); 1171 } else { 1172 op_ioctl = GETSYSCALL(host, IOCTL); 1173 } 1174 1175 va_start(ap, cmd); 1176 rv = op_ioctl(fd, cmd, va_arg(ap, void *)); 1177 va_end(ap); 1178 return rv; 1179 } 1180 1181 int 1182 fcntl(int fd, int cmd, ...) 1183 { 1184 int (*op_fcntl)(int, int, ...); 1185 va_list ap; 1186 int rv, minfd, i, maxdup2; 1187 1188 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd)); 1189 1190 switch (cmd) { 1191 case F_DUPFD: 1192 va_start(ap, cmd); 1193 minfd = va_arg(ap, int); 1194 va_end(ap); 1195 return dodup(fd, minfd); 1196 1197 case F_CLOSEM: 1198 /* 1199 * So, if fd < HIJACKOFF, we want to do a host closem. 1200 */ 1201 1202 if (fd < HIJACK_FDOFF) { 1203 int closemfd = fd; 1204 1205 if (rumpclient__closenotify(&closemfd, 1206 RUMPCLIENT_CLOSE_FCLOSEM) == -1) 1207 return -1; 1208 op_fcntl = GETSYSCALL(host, FCNTL); 1209 rv = op_fcntl(closemfd, cmd); 1210 if (rv) 1211 return rv; 1212 } 1213 1214 /* 1215 * Additionally, we want to do a rump closem, but only 1216 * for the file descriptors not dup2'd. 1217 */ 1218 1219 for (i = 0, maxdup2 = 0; i <= DUP2HIGH; i++) { 1220 if (dup2vec[i] & DUP2BIT) { 1221 int val; 1222 1223 val = dup2vec[i] & DUP2FDMASK; 1224 maxdup2 = MAX(val, maxdup2); 1225 } 1226 } 1227 1228 if (fd >= HIJACK_FDOFF) 1229 fd -= HIJACK_FDOFF; 1230 else 1231 fd = 0; 1232 fd = MAX(maxdup2+1, fd); 1233 1234 /* hmm, maybe we should close rump fd's not within dup2mask? */ 1235 return rump_sys_fcntl(fd, F_CLOSEM); 1236 1237 case F_MAXFD: 1238 /* 1239 * For maxfd, if there's a rump kernel fd, return 1240 * it hostified. Otherwise, return host's MAXFD 1241 * return value. 1242 */ 1243 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) { 1244 /* 1245 * This might go a little wrong in case 1246 * of dup2 to [012], but I'm not sure if 1247 * there's a justification for tracking 1248 * that info. Consider e.g. 1249 * dup2(rumpfd, 2) followed by rump_sys_open() 1250 * returning 1. We should return 1+HIJACKOFF, 1251 * not 2+HIJACKOFF. However, if [01] is not 1252 * open, the correct return value is 2. 1253 */ 1254 return fd_rump2host(fd); 1255 } else { 1256 op_fcntl = GETSYSCALL(host, FCNTL); 1257 return op_fcntl(fd, F_MAXFD); 1258 } 1259 /*NOTREACHED*/ 1260 1261 default: 1262 if (fd_isrump(fd)) { 1263 fd = fd_host2rump(fd); 1264 op_fcntl = GETSYSCALL(rump, FCNTL); 1265 } else { 1266 op_fcntl = GETSYSCALL(host, FCNTL); 1267 } 1268 1269 va_start(ap, cmd); 1270 rv = op_fcntl(fd, cmd, va_arg(ap, void *)); 1271 va_end(ap); 1272 return rv; 1273 } 1274 /*NOTREACHED*/ 1275 } 1276 1277 int 1278 close(int fd) 1279 { 1280 int (*op_close)(int); 1281 int rv; 1282 1283 DPRINTF(("close -> %d\n", fd)); 1284 if (fd_isrump(fd)) { 1285 bool undup2 = false; 1286 int ofd; 1287 1288 if (isdup2d(ofd = fd)) { 1289 undup2 = true; 1290 } 1291 1292 fd = fd_host2rump(fd); 1293 if (!undup2 && killdup2alias(fd)) { 1294 return 0; 1295 } 1296 1297 op_close = GETSYSCALL(rump, CLOSE); 1298 rv = op_close(fd); 1299 if (rv == 0 && undup2) { 1300 clrdup2(ofd); 1301 } 1302 } else { 1303 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1) 1304 return -1; 1305 op_close = GETSYSCALL(host, CLOSE); 1306 rv = op_close(fd); 1307 } 1308 1309 return rv; 1310 } 1311 1312 /* 1313 * write cannot issue a standard debug printf due to recursion 1314 */ 1315 ssize_t 1316 write(int fd, const void *buf, size_t blen) 1317 { 1318 ssize_t (*op_write)(int, const void *, size_t); 1319 1320 if (fd_isrump(fd)) { 1321 fd = fd_host2rump(fd); 1322 op_write = GETSYSCALL(rump, WRITE); 1323 } else { 1324 op_write = GETSYSCALL(host, WRITE); 1325 } 1326 1327 return op_write(fd, buf, blen); 1328 } 1329 1330 /* 1331 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since 1332 * many programs do that. dup2 of a rump kernel fd to another value 1333 * not >= fdoff is an error. 1334 * 1335 * Note: cannot rump2host newd, because it is often hardcoded. 1336 */ 1337 int 1338 dup2(int oldd, int newd) 1339 { 1340 int (*host_dup2)(int, int); 1341 int rv; 1342 1343 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd)); 1344 1345 if (fd_isrump(oldd)) { 1346 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 1347 1348 /* only allow fd 0-2 for cross-kernel dup */ 1349 if (!(newd >= 0 && newd <= 2 && !fd_isrump(newd))) { 1350 errno = EBADF; 1351 return -1; 1352 } 1353 1354 /* regular dup2? */ 1355 if (fd_isrump(newd)) { 1356 newd = fd_host2rump(newd); 1357 rv = rump_sys_dup2(oldd, newd); 1358 return fd_rump2host(rv); 1359 } 1360 1361 /* 1362 * dup2 rump => host? just establish an 1363 * entry in the mapping table. 1364 */ 1365 op_close(newd); 1366 setdup2(newd, fd_host2rump(oldd)); 1367 rv = 0; 1368 } else { 1369 host_dup2 = syscalls[DUALCALL_DUP2].bs_host; 1370 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1) 1371 return -1; 1372 rv = host_dup2(oldd, newd); 1373 } 1374 1375 return rv; 1376 } 1377 1378 int 1379 dup(int oldd) 1380 { 1381 1382 return dodup(oldd, 0); 1383 } 1384 1385 pid_t 1386 fork() 1387 { 1388 pid_t rv; 1389 1390 DPRINTF(("fork\n")); 1391 1392 rv = rumpclient__dofork(host_fork); 1393 1394 DPRINTF(("fork returns %d\n", rv)); 1395 return rv; 1396 } 1397 /* we do not have the luxury of not requiring a stackframe */ 1398 __strong_alias(__vfork14,fork); 1399 1400 int 1401 daemon(int nochdir, int noclose) 1402 { 1403 struct rumpclient_fork *rf; 1404 1405 if ((rf = rumpclient_prefork()) == NULL) 1406 return -1; 1407 1408 if (host_daemon(nochdir, noclose) == -1) 1409 return -1; 1410 1411 if (rumpclient_fork_init(rf) == -1) 1412 return -1; 1413 1414 return 0; 1415 } 1416 1417 int 1418 execve(const char *path, char *const argv[], char *const envp[]) 1419 { 1420 char buf[128]; 1421 char *dup2str; 1422 const char *pwdinrumpstr; 1423 char **newenv; 1424 size_t nelem; 1425 int rv, sverrno; 1426 int bonus = 2, i = 0; 1427 1428 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2INFO=%u,%u,%u", 1429 dup2vec[0], dup2vec[1], dup2vec[2]); 1430 dup2str = strdup(buf); 1431 if (dup2str == NULL) { 1432 errno = ENOMEM; 1433 return -1; 1434 } 1435 1436 if (pwdinrump) { 1437 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true"; 1438 bonus++; 1439 } else { 1440 pwdinrumpstr = NULL; 1441 } 1442 1443 for (nelem = 0; envp && envp[nelem]; nelem++) 1444 continue; 1445 newenv = malloc(sizeof(*newenv) * (nelem+bonus)); 1446 if (newenv == NULL) { 1447 free(dup2str); 1448 errno = ENOMEM; 1449 return -1; 1450 } 1451 memcpy(newenv, envp, nelem*sizeof(*newenv)); 1452 newenv[nelem+i] = dup2str; 1453 i++; 1454 1455 if (pwdinrumpstr) { 1456 newenv[nelem+i] = __UNCONST(pwdinrumpstr); 1457 i++; 1458 } 1459 newenv[nelem+i] = NULL; 1460 _DIAGASSERT(i < bonus); 1461 1462 rv = rumpclient_exec(path, argv, newenv); 1463 1464 _DIAGASSERT(rv != 0); 1465 sverrno = errno; 1466 free(newenv); 1467 free(dup2str); 1468 errno = sverrno; 1469 return rv; 1470 } 1471 1472 /* 1473 * select is done by calling poll. 1474 */ 1475 int 1476 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 1477 struct timeval *timeout) 1478 { 1479 struct pollfd *pfds; 1480 struct timespec ts, *tsp = NULL; 1481 nfds_t realnfds; 1482 int i, j; 1483 int rv, incr; 1484 1485 DPRINTF(("select\n")); 1486 1487 /* 1488 * Well, first we must scan the fds to figure out how many 1489 * fds there really are. This is because up to and including 1490 * nb5 poll() silently refuses nfds > process_maxopen_fds. 1491 * Seems to be fixed in current, thank the maker. 1492 * god damn cluster...bomb. 1493 */ 1494 1495 for (i = 0, realnfds = 0; i < nfds; i++) { 1496 if (readfds && FD_ISSET(i, readfds)) { 1497 realnfds++; 1498 continue; 1499 } 1500 if (writefds && FD_ISSET(i, writefds)) { 1501 realnfds++; 1502 continue; 1503 } 1504 if (exceptfds && FD_ISSET(i, exceptfds)) { 1505 realnfds++; 1506 continue; 1507 } 1508 } 1509 1510 if (realnfds) { 1511 pfds = calloc(realnfds, sizeof(*pfds)); 1512 if (!pfds) 1513 return -1; 1514 } else { 1515 pfds = NULL; 1516 } 1517 1518 for (i = 0, j = 0; i < nfds; i++) { 1519 incr = 0; 1520 if (readfds && FD_ISSET(i, readfds)) { 1521 pfds[j].fd = i; 1522 pfds[j].events |= POLLIN; 1523 incr=1; 1524 } 1525 if (writefds && FD_ISSET(i, writefds)) { 1526 pfds[j].fd = i; 1527 pfds[j].events |= POLLOUT; 1528 incr=1; 1529 } 1530 if (exceptfds && FD_ISSET(i, exceptfds)) { 1531 pfds[j].fd = i; 1532 pfds[j].events |= POLLHUP|POLLERR; 1533 incr=1; 1534 } 1535 if (incr) 1536 j++; 1537 } 1538 assert(j == (int)realnfds); 1539 1540 if (timeout) { 1541 TIMEVAL_TO_TIMESPEC(timeout, &ts); 1542 tsp = &ts; 1543 } 1544 rv = REALPOLLTS(pfds, realnfds, tsp, NULL); 1545 /* 1546 * "If select() returns with an error the descriptor sets 1547 * will be unmodified" 1548 */ 1549 if (rv < 0) 1550 goto out; 1551 1552 /* 1553 * zero out results (can't use FD_ZERO for the 1554 * obvious select-me-not reason). whee. 1555 * 1556 * We do this here since some software ignores the return 1557 * value of select, and hence if the timeout expires, it may 1558 * assume all input descriptors have activity. 1559 */ 1560 for (i = 0; i < nfds; i++) { 1561 if (readfds) 1562 FD_CLR(i, readfds); 1563 if (writefds) 1564 FD_CLR(i, writefds); 1565 if (exceptfds) 1566 FD_CLR(i, exceptfds); 1567 } 1568 if (rv == 0) 1569 goto out; 1570 1571 /* 1572 * We have >0 fds with activity. Harvest the results. 1573 */ 1574 for (i = 0; i < (int)realnfds; i++) { 1575 if (readfds) { 1576 if (pfds[i].revents & POLLIN) { 1577 FD_SET(pfds[i].fd, readfds); 1578 } 1579 } 1580 if (writefds) { 1581 if (pfds[i].revents & POLLOUT) { 1582 FD_SET(pfds[i].fd, writefds); 1583 } 1584 } 1585 if (exceptfds) { 1586 if (pfds[i].revents & (POLLHUP|POLLERR)) { 1587 FD_SET(pfds[i].fd, exceptfds); 1588 } 1589 } 1590 } 1591 1592 out: 1593 free(pfds); 1594 return rv; 1595 } 1596 1597 static void 1598 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall) 1599 { 1600 nfds_t i; 1601 1602 for (i = 0; i < nfds; i++) { 1603 if (fds[i].fd == -1) 1604 continue; 1605 1606 if (fd_isrump(fds[i].fd)) 1607 (*rumpcall)++; 1608 else 1609 (*hostcall)++; 1610 } 1611 } 1612 1613 static void 1614 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int)) 1615 { 1616 nfds_t i; 1617 1618 for (i = 0; i < nfds; i++) { 1619 fds[i].fd = fdadj(fds[i].fd); 1620 } 1621 } 1622 1623 /* 1624 * poll is easy as long as the call comes in the fds only in one 1625 * kernel. otherwise its quite tricky... 1626 */ 1627 struct pollarg { 1628 struct pollfd *pfds; 1629 nfds_t nfds; 1630 const struct timespec *ts; 1631 const sigset_t *sigmask; 1632 int pipefd; 1633 int errnum; 1634 }; 1635 1636 static void * 1637 hostpoll(void *arg) 1638 { 1639 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 1640 const sigset_t *); 1641 struct pollarg *parg = arg; 1642 intptr_t rv; 1643 1644 op_pollts = GETSYSCALL(host, POLLTS); 1645 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask); 1646 if (rv == -1) 1647 parg->errnum = errno; 1648 rump_sys_write(parg->pipefd, &rv, sizeof(rv)); 1649 1650 return (void *)(intptr_t)rv; 1651 } 1652 1653 int 1654 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts, 1655 const sigset_t *sigmask) 1656 { 1657 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 1658 const sigset_t *); 1659 int (*host_close)(int); 1660 int hostcall = 0, rumpcall = 0; 1661 pthread_t pt; 1662 nfds_t i; 1663 int rv; 1664 1665 DPRINTF(("poll\n")); 1666 checkpoll(fds, nfds, &hostcall, &rumpcall); 1667 1668 if (hostcall && rumpcall) { 1669 struct pollfd *pfd_host = NULL, *pfd_rump = NULL; 1670 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1}; 1671 struct pollarg parg; 1672 uintptr_t lrv; 1673 int sverrno = 0, trv; 1674 1675 /* 1676 * ok, this is where it gets tricky. We must support 1677 * this since it's a very common operation in certain 1678 * types of software (telnet, netcat, etc). We allocate 1679 * two vectors and run two poll commands in separate 1680 * threads. Whichever returns first "wins" and the 1681 * other kernel's fds won't show activity. 1682 */ 1683 rv = -1; 1684 1685 /* allocate full vector for O(n) joining after call */ 1686 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1)); 1687 if (!pfd_host) 1688 goto out; 1689 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1)); 1690 if (!pfd_rump) { 1691 goto out; 1692 } 1693 1694 /* 1695 * then, open two pipes, one for notifications 1696 * to each kernel. 1697 * 1698 * At least the rump pipe should probably be 1699 * cached, along with the helper threads. This 1700 * should give a microbenchmark improvement (haven't 1701 * experienced a macro-level problem yet, though). 1702 */ 1703 if ((rv = rump_sys_pipe(rpipe)) == -1) { 1704 sverrno = errno; 1705 } 1706 if (rv == 0 && (rv = pipe(hpipe)) == -1) { 1707 sverrno = errno; 1708 } 1709 1710 /* split vectors (or signal errors) */ 1711 for (i = 0; i < nfds; i++) { 1712 int fd; 1713 1714 fds[i].revents = 0; 1715 if (fds[i].fd == -1) { 1716 pfd_host[i].fd = -1; 1717 pfd_rump[i].fd = -1; 1718 } else if (fd_isrump(fds[i].fd)) { 1719 pfd_host[i].fd = -1; 1720 fd = fd_host2rump(fds[i].fd); 1721 if (fd == rpipe[0] || fd == rpipe[1]) { 1722 fds[i].revents = POLLNVAL; 1723 if (rv != -1) 1724 rv++; 1725 } 1726 pfd_rump[i].fd = fd; 1727 pfd_rump[i].events = fds[i].events; 1728 } else { 1729 pfd_rump[i].fd = -1; 1730 fd = fds[i].fd; 1731 if (fd == hpipe[0] || fd == hpipe[1]) { 1732 fds[i].revents = POLLNVAL; 1733 if (rv != -1) 1734 rv++; 1735 } 1736 pfd_host[i].fd = fd; 1737 pfd_host[i].events = fds[i].events; 1738 } 1739 pfd_rump[i].revents = pfd_host[i].revents = 0; 1740 } 1741 if (rv) { 1742 goto out; 1743 } 1744 1745 pfd_host[nfds].fd = hpipe[0]; 1746 pfd_host[nfds].events = POLLIN; 1747 pfd_rump[nfds].fd = rpipe[0]; 1748 pfd_rump[nfds].events = POLLIN; 1749 1750 /* 1751 * then, create a thread to do host part and meanwhile 1752 * do rump kernel part right here 1753 */ 1754 1755 parg.pfds = pfd_host; 1756 parg.nfds = nfds+1; 1757 parg.ts = ts; 1758 parg.sigmask = sigmask; 1759 parg.pipefd = rpipe[1]; 1760 pthread_create(&pt, NULL, hostpoll, &parg); 1761 1762 op_pollts = GETSYSCALL(rump, POLLTS); 1763 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL); 1764 sverrno = errno; 1765 write(hpipe[1], &rv, sizeof(rv)); 1766 pthread_join(pt, (void *)&trv); 1767 1768 /* check who "won" and merge results */ 1769 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) { 1770 rv = trv; 1771 1772 for (i = 0; i < nfds; i++) { 1773 if (pfd_rump[i].fd != -1) 1774 fds[i].revents = pfd_rump[i].revents; 1775 } 1776 sverrno = parg.errnum; 1777 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) { 1778 rv = trv; 1779 1780 for (i = 0; i < nfds; i++) { 1781 if (pfd_host[i].fd != -1) 1782 fds[i].revents = pfd_host[i].revents; 1783 } 1784 } else { 1785 rv = 0; 1786 } 1787 1788 out: 1789 host_close = GETSYSCALL(host, CLOSE); 1790 if (rpipe[0] != -1) 1791 rump_sys_close(rpipe[0]); 1792 if (rpipe[1] != -1) 1793 rump_sys_close(rpipe[1]); 1794 if (hpipe[0] != -1) 1795 host_close(hpipe[0]); 1796 if (hpipe[1] != -1) 1797 host_close(hpipe[1]); 1798 free(pfd_host); 1799 free(pfd_rump); 1800 errno = sverrno; 1801 } else { 1802 if (hostcall) { 1803 op_pollts = GETSYSCALL(host, POLLTS); 1804 } else { 1805 op_pollts = GETSYSCALL(rump, POLLTS); 1806 adjustpoll(fds, nfds, fd_host2rump); 1807 } 1808 1809 rv = op_pollts(fds, nfds, ts, sigmask); 1810 if (rumpcall) 1811 adjustpoll(fds, nfds, fd_rump2host_withdup); 1812 } 1813 1814 return rv; 1815 } 1816 1817 int 1818 poll(struct pollfd *fds, nfds_t nfds, int timeout) 1819 { 1820 struct timespec ts; 1821 struct timespec *tsp = NULL; 1822 1823 if (timeout != INFTIM) { 1824 ts.tv_sec = timeout / 1000; 1825 ts.tv_nsec = (timeout % 1000) * 1000*1000; 1826 1827 tsp = &ts; 1828 } 1829 1830 return REALPOLLTS(fds, nfds, tsp, NULL); 1831 } 1832 1833 int 1834 REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges, 1835 struct kevent *eventlist, size_t nevents, 1836 const struct timespec *timeout) 1837 { 1838 int (*op_kevent)(int, const struct kevent *, size_t, 1839 struct kevent *, size_t, const struct timespec *); 1840 const struct kevent *ev; 1841 size_t i; 1842 1843 /* 1844 * Check that we don't attempt to kevent rump kernel fd's. 1845 * That needs similar treatment to select/poll, but is slightly 1846 * trickier since we need to manage to different kq descriptors. 1847 * (TODO, in case you're wondering). 1848 */ 1849 for (i = 0; i < nchanges; i++) { 1850 ev = &changelist[i]; 1851 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE || 1852 ev->filter == EVFILT_VNODE) { 1853 if (fd_isrump((int)ev->ident)) { 1854 errno = ENOTSUP; 1855 return -1; 1856 } 1857 } 1858 } 1859 1860 op_kevent = GETSYSCALL(host, KEVENT); 1861 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout); 1862 } 1863 1864 /* 1865 * mmapping from a rump kernel is not supported, so disallow it. 1866 */ 1867 void * 1868 mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) 1869 { 1870 1871 if (flags & MAP_FILE && fd_isrump(fd)) { 1872 errno = ENOSYS; 1873 return MAP_FAILED; 1874 } 1875 return host_mmap(addr, len, prot, flags, fd, offset); 1876 } 1877 1878 /* 1879 * these go to one or the other on a per-process configuration 1880 */ 1881 int __sysctl(const int *, unsigned int, void *, size_t *, const void *, size_t); 1882 int 1883 __sysctl(const int *name, unsigned int namelen, void *old, size_t *oldlenp, 1884 const void *new, size_t newlen) 1885 { 1886 int (*op___sysctl)(const int *, unsigned int, void *, size_t *, 1887 const void *, size_t); 1888 1889 if (rumpsysctl) { 1890 op___sysctl = GETSYSCALL(rump, __SYSCTL); 1891 } else { 1892 op___sysctl = GETSYSCALL(host, __SYSCTL); 1893 /* we haven't inited yet */ 1894 if (__predict_false(op___sysctl == NULL)) { 1895 op___sysctl = dlsym(RTLD_NEXT, "__sysctl"); 1896 } 1897 } 1898 1899 return op___sysctl(name, namelen, old, oldlenp, new, newlen); 1900 } 1901 1902 /* 1903 * Rest are std type calls. 1904 */ 1905 1906 FDCALL(int, bind, DUALCALL_BIND, \ 1907 (int fd, const struct sockaddr *name, socklen_t namelen), \ 1908 (int, const struct sockaddr *, socklen_t), \ 1909 (fd, name, namelen)) 1910 1911 FDCALL(int, connect, DUALCALL_CONNECT, \ 1912 (int fd, const struct sockaddr *name, socklen_t namelen), \ 1913 (int, const struct sockaddr *, socklen_t), \ 1914 (fd, name, namelen)) 1915 1916 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \ 1917 (int fd, struct sockaddr *name, socklen_t *namelen), \ 1918 (int, struct sockaddr *, socklen_t *), \ 1919 (fd, name, namelen)) 1920 1921 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \ 1922 (int fd, struct sockaddr *name, socklen_t *namelen), \ 1923 (int, struct sockaddr *, socklen_t *), \ 1924 (fd, name, namelen)) 1925 1926 FDCALL(int, listen, DUALCALL_LISTEN, \ 1927 (int fd, int backlog), \ 1928 (int, int), \ 1929 (fd, backlog)) 1930 1931 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \ 1932 (int fd, void *buf, size_t len, int flags, \ 1933 struct sockaddr *from, socklen_t *fromlen), \ 1934 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \ 1935 (fd, buf, len, flags, from, fromlen)) 1936 1937 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \ 1938 (int fd, const void *buf, size_t len, int flags, \ 1939 const struct sockaddr *to, socklen_t tolen), \ 1940 (int, const void *, size_t, int, \ 1941 const struct sockaddr *, socklen_t), \ 1942 (fd, buf, len, flags, to, tolen)) 1943 1944 FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG, \ 1945 (int fd, struct msghdr *msg, int flags), \ 1946 (int, struct msghdr *, int), \ 1947 (fd, msg, flags)) 1948 1949 FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG, \ 1950 (int fd, const struct msghdr *msg, int flags), \ 1951 (int, const struct msghdr *, int), \ 1952 (fd, msg, flags)) 1953 1954 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \ 1955 (int fd, int level, int optn, void *optval, socklen_t *optlen), \ 1956 (int, int, int, void *, socklen_t *), \ 1957 (fd, level, optn, optval, optlen)) 1958 1959 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \ 1960 (int fd, int level, int optn, \ 1961 const void *optval, socklen_t optlen), \ 1962 (int, int, int, const void *, socklen_t), \ 1963 (fd, level, optn, optval, optlen)) 1964 1965 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \ 1966 (int fd, int how), \ 1967 (int, int), \ 1968 (fd, how)) 1969 1970 #if _FORTIFY_SOURCE > 0 1971 #define STUB(fun) __ssp_weak_name(fun) 1972 ssize_t _sys_readlink(const char * __restrict, char * __restrict, size_t); 1973 ssize_t 1974 STUB(readlink)(const char * __restrict path, char * __restrict buf, 1975 size_t bufsiz) 1976 { 1977 return _sys_readlink(path, buf, bufsiz); 1978 } 1979 1980 char *_sys_getcwd(char *, size_t); 1981 char * 1982 STUB(getcwd)(char *buf, size_t size) 1983 { 1984 return _sys_getcwd(buf, size); 1985 } 1986 #else 1987 #define STUB(fun) fun 1988 #endif 1989 1990 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \ 1991 (int fd, void *buf, size_t buflen), \ 1992 (int, void *, size_t), \ 1993 (fd, buf, buflen)) 1994 1995 FDCALL(ssize_t, readv, DUALCALL_READV, \ 1996 (int fd, const struct iovec *iov, int iovcnt), \ 1997 (int, const struct iovec *, int), \ 1998 (fd, iov, iovcnt)) 1999 2000 FDCALL(ssize_t, REALPREAD, DUALCALL_PREAD, \ 2001 (int fd, void *buf, size_t nbytes, off_t offset), \ 2002 (int, void *, size_t, off_t), \ 2003 (fd, buf, nbytes, offset)) 2004 2005 FDCALL(ssize_t, preadv, DUALCALL_PREADV, \ 2006 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 2007 (int, const struct iovec *, int, off_t), \ 2008 (fd, iov, iovcnt, offset)) 2009 2010 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \ 2011 (int fd, const struct iovec *iov, int iovcnt), \ 2012 (int, const struct iovec *, int), \ 2013 (fd, iov, iovcnt)) 2014 2015 FDCALL(ssize_t, REALPWRITE, DUALCALL_PWRITE, \ 2016 (int fd, const void *buf, size_t nbytes, off_t offset), \ 2017 (int, const void *, size_t, off_t), \ 2018 (fd, buf, nbytes, offset)) 2019 2020 FDCALL(ssize_t, pwritev, DUALCALL_PWRITEV, \ 2021 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 2022 (int, const struct iovec *, int, off_t), \ 2023 (fd, iov, iovcnt, offset)) 2024 2025 FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \ 2026 (int fd, struct stat *sb), \ 2027 (int, struct stat *), \ 2028 (fd, sb)) 2029 2030 FDCALL(int, fstatvfs1, DUALCALL_FSTATVFS1, \ 2031 (int fd, struct statvfs *buf, int flags), \ 2032 (int, struct statvfs *, int), \ 2033 (fd, buf, flags)) 2034 2035 FDCALL(off_t, lseek, DUALCALL_LSEEK, \ 2036 (int fd, off_t offset, int whence), \ 2037 (int, off_t, int), \ 2038 (fd, offset, whence)) 2039 __strong_alias(_lseek,lseek); 2040 2041 FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \ 2042 (int fd, char *buf, size_t nbytes), \ 2043 (int, char *, size_t), \ 2044 (fd, buf, nbytes)) 2045 2046 FDCALL(int, fchown, DUALCALL_FCHOWN, \ 2047 (int fd, uid_t owner, gid_t group), \ 2048 (int, uid_t, gid_t), \ 2049 (fd, owner, group)) 2050 2051 FDCALL(int, fchmod, DUALCALL_FCHMOD, \ 2052 (int fd, mode_t mode), \ 2053 (int, mode_t), \ 2054 (fd, mode)) 2055 2056 FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \ 2057 (int fd, off_t length), \ 2058 (int, off_t), \ 2059 (fd, length)) 2060 2061 FDCALL(int, fsync, DUALCALL_FSYNC, \ 2062 (int fd), \ 2063 (int), \ 2064 (fd)) 2065 2066 FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \ 2067 (int fd, int how, off_t start, off_t length), \ 2068 (int, int, off_t, off_t), \ 2069 (fd, how, start, length)) 2070 2071 FDCALL(int, futimes, DUALCALL_FUTIMES, \ 2072 (int fd, const struct timeval *tv), \ 2073 (int, const struct timeval *), \ 2074 (fd, tv)) 2075 2076 FDCALL(int, fchflags, DUALCALL_FCHFLAGS, \ 2077 (int fd, u_long flags), \ 2078 (int, u_long), \ 2079 (fd, flags)) 2080 2081 /* 2082 * path-based selectors 2083 */ 2084 2085 PATHCALL(int, REALSTAT, DUALCALL_STAT, \ 2086 (const char *path, struct stat *sb), \ 2087 (const char *, struct stat *), \ 2088 (path, sb)) 2089 2090 PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \ 2091 (const char *path, struct stat *sb), \ 2092 (const char *, struct stat *), \ 2093 (path, sb)) 2094 2095 PATHCALL(int, chown, DUALCALL_CHOWN, \ 2096 (const char *path, uid_t owner, gid_t group), \ 2097 (const char *, uid_t, gid_t), \ 2098 (path, owner, group)) 2099 2100 PATHCALL(int, lchown, DUALCALL_LCHOWN, \ 2101 (const char *path, uid_t owner, gid_t group), \ 2102 (const char *, uid_t, gid_t), \ 2103 (path, owner, group)) 2104 2105 PATHCALL(int, chmod, DUALCALL_CHMOD, \ 2106 (const char *path, mode_t mode), \ 2107 (const char *, mode_t), \ 2108 (path, mode)) 2109 2110 PATHCALL(int, lchmod, DUALCALL_LCHMOD, \ 2111 (const char *path, mode_t mode), \ 2112 (const char *, mode_t), \ 2113 (path, mode)) 2114 2115 PATHCALL(int, statvfs1, DUALCALL_STATVFS1, \ 2116 (const char *path, struct statvfs *buf, int flags), \ 2117 (const char *, struct statvfs *, int), \ 2118 (path, buf, flags)) 2119 2120 PATHCALL(int, unlink, DUALCALL_UNLINK, \ 2121 (const char *path), \ 2122 (const char *), \ 2123 (path)) 2124 2125 PATHCALL(int, symlink, DUALCALL_SYMLINK, \ 2126 (const char *target, const char *path), \ 2127 (const char *, const char *), \ 2128 (target, path)) 2129 2130 PATHCALL(ssize_t, readlink, DUALCALL_READLINK, \ 2131 (const char *path, char *buf, size_t bufsiz), \ 2132 (const char *, char *, size_t), \ 2133 (path, buf, bufsiz)) 2134 2135 PATHCALL(int, mkdir, DUALCALL_MKDIR, \ 2136 (const char *path, mode_t mode), \ 2137 (const char *, mode_t), \ 2138 (path, mode)) 2139 2140 PATHCALL(int, rmdir, DUALCALL_RMDIR, \ 2141 (const char *path), \ 2142 (const char *), \ 2143 (path)) 2144 2145 PATHCALL(int, utimes, DUALCALL_UTIMES, \ 2146 (const char *path, const struct timeval *tv), \ 2147 (const char *, const struct timeval *), \ 2148 (path, tv)) 2149 2150 PATHCALL(int, lutimes, DUALCALL_LUTIMES, \ 2151 (const char *path, const struct timeval *tv), \ 2152 (const char *, const struct timeval *), \ 2153 (path, tv)) 2154 2155 PATHCALL(int, chflags, DUALCALL_CHFLAGS, \ 2156 (const char *path, u_long flags), \ 2157 (const char *, u_long), \ 2158 (path, flags)) 2159 2160 PATHCALL(int, lchflags, DUALCALL_LCHFLAGS, \ 2161 (const char *path, u_long flags), \ 2162 (const char *, u_long), \ 2163 (path, flags)) 2164 2165 PATHCALL(int, truncate, DUALCALL_TRUNCATE, \ 2166 (const char *path, off_t length), \ 2167 (const char *, off_t), \ 2168 (path, length)) 2169 2170 PATHCALL(int, access, DUALCALL_ACCESS, \ 2171 (const char *path, int mode), \ 2172 (const char *, int), \ 2173 (path, mode)) 2174 2175 PATHCALL(int, REALMKNOD, DUALCALL_MKNOD, \ 2176 (const char *path, mode_t mode, dev_t dev), \ 2177 (const char *, mode_t, dev_t), \ 2178 (path, mode, dev)) 2179 2180 /* 2181 * Note: with mount the decisive parameter is the mount 2182 * destination directory. This is because we don't really know 2183 * about the "source" directory in a generic call (and besides, 2184 * it might not even exist, cf. nfs). 2185 */ 2186 PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \ 2187 (const char *type, const char *path, int flags, \ 2188 void *data, size_t dlen), \ 2189 (const char *, const char *, int, void *, size_t), \ 2190 (type, path, flags, data, dlen)) 2191 2192 PATHCALL(int, unmount, DUALCALL_UNMOUNT, \ 2193 (const char *path, int flags), \ 2194 (const char *, int), \ 2195 (path, flags)) 2196 2197 PATHCALL(int, REALQUOTACTL, DUALCALL_QUOTACTL, \ 2198 (const char *path, struct plistref *p), \ 2199 (const char *, struct plistref *), \ 2200 (path, p)) 2201 2202 PATHCALL(int, REALGETFH, DUALCALL_GETFH, \ 2203 (const char *path, void *fhp, size_t *fh_size), \ 2204 (const char *, void *, size_t *), \ 2205 (path, fhp, fh_size)) 2206 2207 /* 2208 * These act different on a per-process vfs configuration 2209 */ 2210 2211 VFSCALL(VFSBIT_GETVFSSTAT, int, getvfsstat, DUALCALL_GETVFSSTAT, \ 2212 (struct statvfs *buf, size_t buflen, int flags), \ 2213 (struct statvfs *, size_t, int), \ 2214 (buf, buflen, flags)) 2215 2216 VFSCALL(VFSBIT_FHCALLS, int, REALFHOPEN, DUALCALL_FHOPEN, \ 2217 (const void *fhp, size_t fh_size, int flags), \ 2218 (const char *, size_t, int), \ 2219 (fhp, fh_size, flags)) 2220 2221 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTAT, DUALCALL_FHSTAT, \ 2222 (const void *fhp, size_t fh_size, struct stat *sb), \ 2223 (const char *, size_t, struct stat *), \ 2224 (fhp, fh_size, sb)) 2225 2226 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTATVFS1, DUALCALL_FHSTATVFS1, \ 2227 (const void *fhp, size_t fh_size, struct statvfs *sb, int flgs),\ 2228 (const char *, size_t, struct statvfs *, int), \ 2229 (fhp, fh_size, sb, flgs)) 2230 2231 /* finally, put nfssvc here. "keep the namespace clean" */ 2232 2233 #include <nfs/rpcv2.h> 2234 #include <nfs/nfs.h> 2235 2236 int 2237 nfssvc(int flags, void *argstructp) 2238 { 2239 int (*op_nfssvc)(int, void *); 2240 2241 if (vfsbits & VFSBIT_NFSSVC){ 2242 struct nfsd_args *nfsdargs; 2243 2244 /* massage the socket descriptor if necessary */ 2245 if (flags == NFSSVC_ADDSOCK) { 2246 nfsdargs = argstructp; 2247 nfsdargs->sock = fd_host2rump(nfsdargs->sock); 2248 } 2249 op_nfssvc = GETSYSCALL(rump, NFSSVC); 2250 } else 2251 op_nfssvc = GETSYSCALL(host, NFSSVC); 2252 2253 return op_nfssvc(flags, argstructp); 2254 } 2255