1 /* $NetBSD: hijack.c,v 1.126 2018/12/16 14:03:37 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * XXX: rumphijack sort of works on glibc Linux. But it's not 30 * the same quality working as on NetBSD. 31 * autoconf HAVE_FOO vs. __NetBSD__ / __linux__ could be further 32 * improved. 33 */ 34 #include <rump/rumpuser_port.h> 35 36 #if !defined(lint) 37 __RCSID("$NetBSD: hijack.c,v 1.126 2018/12/16 14:03:37 hannken Exp $"); 38 #endif 39 40 #include <sys/param.h> 41 #include <sys/types.h> 42 #include <sys/ioctl.h> 43 #include <sys/mman.h> 44 #include <sys/mount.h> 45 #include <sys/socket.h> 46 #include <sys/stat.h> 47 #include <sys/time.h> 48 #include <sys/uio.h> 49 50 #ifdef __NetBSD__ 51 #include <sys/statvfs.h> 52 #endif 53 54 #ifdef HAVE_KQUEUE 55 #include <sys/event.h> 56 #endif 57 58 #ifdef __NetBSD__ 59 #include <sys/quotactl.h> 60 #endif 61 62 #include <assert.h> 63 #include <dlfcn.h> 64 #include <err.h> 65 #include <errno.h> 66 #include <fcntl.h> 67 #include <poll.h> 68 #include <pthread.h> 69 #include <signal.h> 70 #include <stdarg.h> 71 #include <stdbool.h> 72 #include <stdint.h> 73 #include <stdio.h> 74 #include <stdlib.h> 75 #include <string.h> 76 #include <time.h> 77 #include <unistd.h> 78 79 #include <rump/rumpclient.h> 80 #include <rump/rump_syscalls.h> 81 82 #include "hijack.h" 83 84 /* 85 * XXX: Consider autogenerating this, syscnames[] and syscalls[] with 86 * a DSL where the tool also checks the symbols exported by this library 87 * to make sure all relevant calls are accounted for. 88 */ 89 enum dualcall { 90 DUALCALL_WRITE, DUALCALL_WRITEV, DUALCALL_PWRITE, DUALCALL_PWRITEV, 91 DUALCALL_IOCTL, DUALCALL_FCNTL, 92 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_PACCEPT, 93 DUALCALL_BIND, DUALCALL_CONNECT, 94 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN, 95 DUALCALL_RECVFROM, DUALCALL_RECVMSG, 96 DUALCALL_SENDTO, DUALCALL_SENDMSG, 97 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT, 98 DUALCALL_SHUTDOWN, 99 DUALCALL_READ, DUALCALL_READV, DUALCALL_PREAD, DUALCALL_PREADV, 100 DUALCALL_DUP2, 101 DUALCALL_CLOSE, 102 DUALCALL_POLLTS, 103 104 #ifndef __linux__ 105 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT, 106 #endif 107 108 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD, 109 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN, 110 DUALCALL_OPEN, 111 DUALCALL_CHDIR, DUALCALL_FCHDIR, 112 DUALCALL_LSEEK, 113 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK, 114 DUALCALL_LINK, DUALCALL_RENAME, 115 DUALCALL_MKDIR, DUALCALL_RMDIR, 116 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES, 117 DUALCALL_UTIMENSAT, DUALCALL_FUTIMENS, 118 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE, 119 DUALCALL_FSYNC, 120 DUALCALL_ACCESS, 121 122 #ifndef __linux__ 123 DUALCALL___GETCWD, 124 DUALCALL_GETDENTS, 125 #endif 126 127 #ifndef __linux__ 128 DUALCALL_MKNOD, 129 #endif 130 131 #ifdef __NetBSD__ 132 DUALCALL_GETFH, DUALCALL_FHOPEN, DUALCALL_FHSTAT, DUALCALL_FHSTATVFS1, 133 #endif 134 135 #ifdef HAVE_KQUEUE 136 DUALCALL_KEVENT, 137 #endif 138 139 #ifdef __NetBSD__ 140 DUALCALL___SYSCTL, 141 DUALCALL_MODCTL, 142 #endif 143 144 #ifdef __NetBSD__ 145 DUALCALL_NFSSVC, 146 #endif 147 148 #ifdef __NetBSD__ 149 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1, DUALCALL_GETVFSSTAT, 150 #endif 151 152 #ifdef __NetBSD__ 153 DUALCALL_MOUNT, DUALCALL_UNMOUNT, 154 #endif 155 156 #ifdef HAVE_FSYNC_RANGE 157 DUALCALL_FSYNC_RANGE, 158 #endif 159 160 #ifdef HAVE_CHFLAGS 161 DUALCALL_CHFLAGS, DUALCALL_LCHFLAGS, DUALCALL_FCHFLAGS, 162 #endif 163 164 #ifdef HAVE___QUOTACTL 165 DUALCALL_QUOTACTL, 166 #endif 167 #ifdef __NetBSD__ 168 DUALCALL_LINKAT, 169 #endif 170 DUALCALL__NUM 171 }; 172 173 #define RSYS_STRING(a) __STRING(a) 174 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a)) 175 176 /* 177 * Would be nice to get this automatically in sync with libc. 178 * Also, this does not work for compat-using binaries (we should 179 * provide all previous interfaces, not just the current ones) 180 */ 181 #if defined(__NetBSD__) 182 183 #if !__NetBSD_Prereq__(5,99,7) 184 #define REALSELECT select 185 #define REALPOLLTS pollts 186 #define REALKEVENT kevent 187 #define REALSTAT __stat30 188 #define REALLSTAT __lstat30 189 #define REALFSTAT __fstat30 190 #define REALUTIMES utimes 191 #define REALLUTIMES lutimes 192 #define REALFUTIMES futimes 193 #define REALMKNOD mknod 194 #define REALFHSTAT __fhstat40 195 #else /* >= 5.99.7 */ 196 #define REALSELECT _sys___select50 197 #define REALPOLLTS _sys___pollts50 198 #define REALKEVENT _sys___kevent50 199 #define REALSTAT __stat50 200 #define REALLSTAT __lstat50 201 #define REALFSTAT __fstat50 202 #define REALUTIMES __utimes50 203 #define REALLUTIMES __lutimes50 204 #define REALFUTIMES __futimes50 205 #define REALMKNOD __mknod50 206 #define REALFHSTAT __fhstat50 207 #endif /* < 5.99.7 */ 208 209 #define REALREAD _sys_read 210 #define REALPREAD _sys_pread 211 #define REALPWRITE _sys_pwrite 212 #define REALGETDENTS __getdents30 213 #define REALMOUNT __mount50 214 #define REALGETFH __getfh30 215 #define REALFHOPEN __fhopen40 216 #define REALFHSTATVFS1 __fhstatvfs140 217 #define REALSOCKET __socket30 218 219 #define LSEEK_ALIAS _lseek 220 #define VFORK __vfork14 221 222 int REALSTAT(const char *, struct stat *); 223 int REALLSTAT(const char *, struct stat *); 224 int REALFSTAT(int, struct stat *); 225 int REALMKNOD(const char *, mode_t, dev_t); 226 int REALGETDENTS(int, char *, size_t); 227 228 int __getcwd(char *, size_t); 229 230 #elif defined(__linux__) /* glibc, really */ 231 232 #define REALREAD read 233 #define REALPREAD pread 234 #define REALPWRITE pwrite 235 #define REALSELECT select 236 #define REALPOLLTS ppoll 237 #define REALUTIMES utimes 238 #define REALLUTIMES lutimes 239 #define REALFUTIMES futimes 240 #define REALFHSTAT fhstat 241 #define REALSOCKET socket 242 243 #else /* !NetBSD && !linux */ 244 245 #error platform not supported 246 247 #endif /* platform */ 248 249 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *); 250 int REALPOLLTS(struct pollfd *, nfds_t, 251 const struct timespec *, const sigset_t *); 252 int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t, 253 const struct timespec *); 254 ssize_t REALREAD(int, void *, size_t); 255 ssize_t REALPREAD(int, void *, size_t, off_t); 256 ssize_t REALPWRITE(int, const void *, size_t, off_t); 257 int REALUTIMES(const char *, const struct timeval [2]); 258 int REALLUTIMES(const char *, const struct timeval [2]); 259 int REALFUTIMES(int, const struct timeval [2]); 260 int REALMOUNT(const char *, const char *, int, void *, size_t); 261 int REALGETFH(const char *, void *, size_t *); 262 int REALFHOPEN(const void *, size_t, int); 263 int REALFHSTAT(const void *, size_t, struct stat *); 264 int REALFHSTATVFS1(const void *, size_t, struct statvfs *, int); 265 int REALSOCKET(int, int, int); 266 267 #define S(a) __STRING(a) 268 struct sysnames { 269 enum dualcall scm_callnum; 270 const char *scm_hostname; 271 const char *scm_rumpname; 272 } syscnames[] = { 273 { DUALCALL_SOCKET, S(REALSOCKET), RSYS_NAME(SOCKET) }, 274 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) }, 275 { DUALCALL_PACCEPT, "paccept", RSYS_NAME(PACCEPT) }, 276 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) }, 277 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) }, 278 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) }, 279 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) }, 280 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) }, 281 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) }, 282 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) }, 283 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) }, 284 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) }, 285 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) }, 286 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) }, 287 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) }, 288 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) }, 289 { DUALCALL_READV, "readv", RSYS_NAME(READV) }, 290 { DUALCALL_PREAD, S(REALPREAD), RSYS_NAME(PREAD) }, 291 { DUALCALL_PREADV, "preadv", RSYS_NAME(PREADV) }, 292 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) }, 293 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) }, 294 { DUALCALL_PWRITE, S(REALPWRITE), RSYS_NAME(PWRITE) }, 295 { DUALCALL_PWRITEV, "pwritev", RSYS_NAME(PWRITEV) }, 296 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) }, 297 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) }, 298 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) }, 299 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) }, 300 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) }, 301 #ifndef __linux__ 302 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) }, 303 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) }, 304 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) }, 305 #endif 306 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) }, 307 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) }, 308 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) }, 309 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) }, 310 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) }, 311 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) }, 312 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) }, 313 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) }, 314 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) }, 315 { DUALCALL_UTIMENSAT, "utimensat", RSYS_NAME(UTIMENSAT) }, 316 { DUALCALL_FUTIMENS, "futimens", RSYS_NAME(FUTIMENS) }, 317 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) }, 318 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) }, 319 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) }, 320 { DUALCALL_LSEEK, "lseek", RSYS_NAME(LSEEK) }, 321 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) }, 322 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) }, 323 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) }, 324 { DUALCALL_LINK, "link", RSYS_NAME(LINK) }, 325 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) }, 326 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) }, 327 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) }, 328 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) }, 329 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) }, 330 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) }, 331 { DUALCALL_ACCESS, "access", RSYS_NAME(ACCESS) }, 332 333 #ifndef __linux__ 334 { DUALCALL___GETCWD, "__getcwd", RSYS_NAME(__GETCWD) }, 335 { DUALCALL_GETDENTS, S(REALGETDENTS),RSYS_NAME(GETDENTS) }, 336 #endif 337 338 #ifndef __linux__ 339 { DUALCALL_MKNOD, S(REALMKNOD), RSYS_NAME(MKNOD) }, 340 #endif 341 342 #ifdef __NetBSD__ 343 { DUALCALL_GETFH, S(REALGETFH), RSYS_NAME(GETFH) }, 344 { DUALCALL_FHOPEN, S(REALFHOPEN), RSYS_NAME(FHOPEN) }, 345 { DUALCALL_FHSTAT, S(REALFHSTAT), RSYS_NAME(FHSTAT) }, 346 { DUALCALL_FHSTATVFS1, S(REALFHSTATVFS1),RSYS_NAME(FHSTATVFS1) }, 347 #endif 348 349 #ifdef HAVE_KQUEUE 350 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) }, 351 #endif 352 353 #ifdef __NetBSD__ 354 { DUALCALL___SYSCTL, "__sysctl", RSYS_NAME(__SYSCTL) }, 355 { DUALCALL_MODCTL, "modctl", RSYS_NAME(MODCTL) }, 356 #endif 357 358 #ifdef __NetBSD__ 359 { DUALCALL_NFSSVC, "nfssvc", RSYS_NAME(NFSSVC) }, 360 #endif 361 362 #ifdef __NetBSD__ 363 { DUALCALL_STATVFS1, "statvfs1", RSYS_NAME(STATVFS1) }, 364 { DUALCALL_FSTATVFS1, "fstatvfs1", RSYS_NAME(FSTATVFS1) }, 365 { DUALCALL_GETVFSSTAT, "getvfsstat", RSYS_NAME(GETVFSSTAT) }, 366 #endif 367 368 #ifdef __NetBSD__ 369 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) }, 370 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) }, 371 #endif 372 373 #ifdef HAVE_FSYNC_RANGE 374 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) }, 375 #endif 376 377 #ifdef HAVE_CHFLAGS 378 { DUALCALL_CHFLAGS, "chflags", RSYS_NAME(CHFLAGS) }, 379 { DUALCALL_LCHFLAGS, "lchflags", RSYS_NAME(LCHFLAGS) }, 380 { DUALCALL_FCHFLAGS, "fchflags", RSYS_NAME(FCHFLAGS) }, 381 #endif /* HAVE_CHFLAGS */ 382 383 #ifdef HAVE___QUOTACTL 384 { DUALCALL_QUOTACTL, "__quotactl", RSYS_NAME(__QUOTACTL) }, 385 #endif /* HAVE___QUOTACTL */ 386 387 #ifdef __NetBSD__ 388 { DUALCALL_LINKAT, "linkat", RSYS_NAME(LINKAT) }, 389 #endif 390 }; 391 #undef S 392 393 struct bothsys { 394 void *bs_host; 395 void *bs_rump; 396 } syscalls[DUALCALL__NUM]; 397 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which 398 399 static pid_t (*host_fork)(void); 400 static int (*host_daemon)(int, int); 401 static void * (*host_mmap)(void *, size_t, int, int, int, off_t); 402 403 /* 404 * This tracks if our process is in a subdirectory of /rump. 405 * It's preserved over exec. 406 */ 407 static bool pwdinrump; 408 409 enum pathtype { PATH_HOST, PATH_RUMP, PATH_RUMPBLANKET }; 410 411 static bool fd_isrump(int); 412 static enum pathtype path_isrump(const char *); 413 414 /* default FD_SETSIZE is 256 ==> default fdoff is 128 */ 415 static int hijack_fdoff = FD_SETSIZE/2; 416 417 /* 418 * Maintain a mapping table for the usual dup2 suspects. 419 * Could use atomic ops to operate on dup2vec, but an application 420 * racing there is not well-defined, so don't bother. 421 */ 422 /* note: you cannot change this without editing the env-passing code */ 423 #define DUP2HIGH 2 424 static uint32_t dup2vec[DUP2HIGH+1]; 425 #define DUP2BIT (1<<31) 426 #define DUP2ALIAS (1<<30) 427 #define DUP2FDMASK ((1<<30)-1) 428 429 static bool 430 isdup2d(int fd) 431 { 432 433 return fd <= DUP2HIGH && fd >= 0 && dup2vec[fd] & DUP2BIT; 434 } 435 436 static int 437 mapdup2(int hostfd) 438 { 439 440 _DIAGASSERT(isdup2d(hostfd)); 441 return dup2vec[hostfd] & DUP2FDMASK; 442 } 443 444 static int 445 unmapdup2(int rumpfd) 446 { 447 int i; 448 449 for (i = 0; i <= DUP2HIGH; i++) { 450 if (dup2vec[i] & DUP2BIT && 451 (dup2vec[i] & DUP2FDMASK) == (unsigned)rumpfd) 452 return i; 453 } 454 return -1; 455 } 456 457 static void 458 setdup2(int hostfd, int rumpfd) 459 { 460 461 if (hostfd > DUP2HIGH) { 462 _DIAGASSERT(0); 463 return; 464 } 465 466 dup2vec[hostfd] = DUP2BIT | DUP2ALIAS | rumpfd; 467 } 468 469 static void 470 clrdup2(int hostfd) 471 { 472 473 if (hostfd > DUP2HIGH) { 474 _DIAGASSERT(0); 475 return; 476 } 477 478 dup2vec[hostfd] = 0; 479 } 480 481 static bool 482 killdup2alias(int rumpfd) 483 { 484 int hostfd; 485 486 if ((hostfd = unmapdup2(rumpfd)) == -1) 487 return false; 488 489 if (dup2vec[hostfd] & DUP2ALIAS) { 490 dup2vec[hostfd] &= ~DUP2ALIAS; 491 return true; 492 } 493 return false; 494 } 495 496 //#define DEBUGJACK 497 #ifdef DEBUGJACK 498 #define DPRINTF(x) mydprintf x 499 static void 500 mydprintf(const char *fmt, ...) 501 { 502 va_list ap; 503 504 if (isdup2d(STDERR_FILENO)) 505 return; 506 507 va_start(ap, fmt); 508 vfprintf(stderr, fmt, ap); 509 va_end(ap); 510 } 511 512 static const char * 513 whichfd(int fd) 514 { 515 516 if (fd == -1) 517 return "-1"; 518 else if (fd_isrump(fd)) 519 return "rump"; 520 else 521 return "host"; 522 } 523 524 static const char * 525 whichpath(const char *path) 526 { 527 528 if (path_isrump(path)) 529 return "rump"; 530 else 531 return "host"; 532 } 533 534 #else 535 #define DPRINTF(x) 536 #endif 537 538 #define ATCALL(type, name, rcname, args, proto, vars) \ 539 type name args \ 540 { \ 541 type (*fun) proto; \ 542 int isrump = -1; \ 543 \ 544 if (fd == AT_FDCWD || *path == '/') { \ 545 isrump = path_isrump(path); \ 546 } else { \ 547 isrump = fd_isrump(fd); \ 548 } \ 549 \ 550 DPRINTF(("%s -> %d:%s (%s)\n", __STRING(name), \ 551 fd, path, isrump ? "rump" : "host")); \ 552 \ 553 assert(isrump != -1); \ 554 if (isrump) { \ 555 fun = syscalls[rcname].bs_rump; \ 556 if (fd != AT_FDCWD) \ 557 fd = fd_host2rump(fd); \ 558 path = path_host2rump(path); \ 559 } else { \ 560 fun = syscalls[rcname].bs_host; \ 561 } \ 562 return fun vars; \ 563 } 564 565 #define FDCALL(type, name, rcname, args, proto, vars) \ 566 type name args \ 567 { \ 568 type (*fun) proto; \ 569 \ 570 DPRINTF(("%s -> %d (%s)\n", __STRING(name), fd, whichfd(fd))); \ 571 if (fd_isrump(fd)) { \ 572 fun = syscalls[rcname].bs_rump; \ 573 fd = fd_host2rump(fd); \ 574 } else { \ 575 fun = syscalls[rcname].bs_host; \ 576 } \ 577 \ 578 return fun vars; \ 579 } 580 581 #define PATHCALL(type, name, rcname, args, proto, vars) \ 582 type name args \ 583 { \ 584 type (*fun) proto; \ 585 enum pathtype pt; \ 586 \ 587 DPRINTF(("%s -> %s (%s)\n", __STRING(name), path, \ 588 whichpath(path))); \ 589 if ((pt = path_isrump(path)) != PATH_HOST) { \ 590 fun = syscalls[rcname].bs_rump; \ 591 if (pt == PATH_RUMP) \ 592 path = path_host2rump(path); \ 593 } else { \ 594 fun = syscalls[rcname].bs_host; \ 595 } \ 596 \ 597 return fun vars; \ 598 } 599 600 #define VFSCALL(bit, type, name, rcname, args, proto, vars) \ 601 type name args \ 602 { \ 603 type (*fun) proto; \ 604 \ 605 DPRINTF(("%s (0x%x, 0x%x)\n", __STRING(name), bit, vfsbits)); \ 606 if (vfsbits & bit) { \ 607 fun = syscalls[rcname].bs_rump; \ 608 } else { \ 609 fun = syscalls[rcname].bs_host; \ 610 } \ 611 \ 612 return fun vars; \ 613 } 614 615 /* 616 * These variables are set from the RUMPHIJACK string and control 617 * which operations can product rump kernel file descriptors. 618 * This should be easily extendable for future needs. 619 */ 620 #define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal" 621 static bool rumpsockets[PF_MAX]; 622 static const char *rumpprefix; 623 static size_t rumpprefixlen; 624 625 static struct { 626 int pf; 627 const char *name; 628 } socketmap[] = { 629 { PF_LOCAL, "local" }, 630 { PF_INET, "inet" }, 631 #ifdef PF_LINK 632 { PF_LINK, "link" }, 633 #endif 634 #ifdef PF_OROUTE 635 { PF_OROUTE, "oroute" }, 636 #endif 637 { PF_ROUTE, "route" }, 638 { PF_INET6, "inet6" }, 639 #ifdef PF_MPLS 640 { PF_MPLS, "mpls" }, 641 #endif 642 { -1, NULL } 643 }; 644 645 static void 646 sockparser(char *buf) 647 { 648 char *p, *l = NULL; 649 bool value; 650 int i; 651 652 /* if "all" is present, it must be specified first */ 653 if (strncmp(buf, "all", strlen("all")) == 0) { 654 for (i = 0; i < (int)__arraycount(rumpsockets); i++) { 655 rumpsockets[i] = true; 656 } 657 buf += strlen("all"); 658 if (*buf == ':') 659 buf++; 660 } 661 662 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 663 value = true; 664 if (strncmp(p, "no", strlen("no")) == 0) { 665 value = false; 666 p += strlen("no"); 667 } 668 669 for (i = 0; socketmap[i].name; i++) { 670 if (strcmp(p, socketmap[i].name) == 0) { 671 rumpsockets[socketmap[i].pf] = value; 672 break; 673 } 674 } 675 if (socketmap[i].name == NULL) { 676 errx(1, "invalid socket specifier %s", p); 677 } 678 } 679 } 680 681 static void 682 pathparser(char *buf) 683 { 684 685 /* sanity-check */ 686 if (*buf != '/') 687 errx(1, "hijack path specifier must begin with ``/''"); 688 rumpprefixlen = strlen(buf); 689 if (rumpprefixlen < 2) 690 errx(1, "invalid hijack prefix: %s", buf); 691 if (buf[rumpprefixlen-1] == '/' && strspn(buf, "/") != rumpprefixlen) 692 errx(1, "hijack prefix may end in slash only if pure " 693 "slash, gave %s", buf); 694 695 if ((rumpprefix = strdup(buf)) == NULL) 696 err(1, "strdup"); 697 rumpprefixlen = strlen(rumpprefix); 698 } 699 700 static struct blanket { 701 const char *pfx; 702 size_t len; 703 } *blanket; 704 static int nblanket; 705 706 static void 707 blanketparser(char *buf) 708 { 709 char *p, *l = NULL; 710 int i; 711 712 for (nblanket = 0, p = buf; p; p = strchr(p+1, ':'), nblanket++) 713 continue; 714 715 blanket = malloc(nblanket * sizeof(*blanket)); 716 if (blanket == NULL) 717 err(1, "alloc blanket %d", nblanket); 718 719 for (p = strtok_r(buf, ":", &l), i = 0; p; 720 p = strtok_r(NULL, ":", &l), i++) { 721 blanket[i].pfx = strdup(p); 722 if (blanket[i].pfx == NULL) 723 err(1, "strdup blanket"); 724 blanket[i].len = strlen(p); 725 726 if (blanket[i].len == 0 || *blanket[i].pfx != '/') 727 errx(1, "invalid blanket specifier %s", p); 728 if (*(blanket[i].pfx + blanket[i].len-1) == '/') 729 errx(1, "invalid blanket specifier %s", p); 730 } 731 } 732 733 #define VFSBIT_NFSSVC 0x01 734 #define VFSBIT_GETVFSSTAT 0x02 735 #define VFSBIT_FHCALLS 0x04 736 static unsigned vfsbits; 737 738 static struct { 739 int bit; 740 const char *name; 741 } vfscalls[] = { 742 { VFSBIT_NFSSVC, "nfssvc" }, 743 { VFSBIT_GETVFSSTAT, "getvfsstat" }, 744 { VFSBIT_FHCALLS, "fhcalls" }, 745 { -1, NULL } 746 }; 747 748 static void 749 vfsparser(char *buf) 750 { 751 char *p, *l = NULL; 752 bool turnon; 753 unsigned int fullmask; 754 int i; 755 756 /* build the full mask and sanity-check while we're at it */ 757 fullmask = 0; 758 for (i = 0; vfscalls[i].name != NULL; i++) { 759 if (fullmask & vfscalls[i].bit) 760 errx(1, "problem exists between vi and chair"); 761 fullmask |= vfscalls[i].bit; 762 } 763 764 765 /* if "all" is present, it must be specified first */ 766 if (strncmp(buf, "all", strlen("all")) == 0) { 767 vfsbits = fullmask; 768 buf += strlen("all"); 769 if (*buf == ':') 770 buf++; 771 } 772 773 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 774 turnon = true; 775 if (strncmp(p, "no", strlen("no")) == 0) { 776 turnon = false; 777 p += strlen("no"); 778 } 779 780 for (i = 0; vfscalls[i].name; i++) { 781 if (strcmp(p, vfscalls[i].name) == 0) { 782 if (turnon) 783 vfsbits |= vfscalls[i].bit; 784 else 785 vfsbits &= ~vfscalls[i].bit; 786 break; 787 } 788 } 789 if (vfscalls[i].name == NULL) { 790 errx(1, "invalid vfscall specifier %s", p); 791 } 792 } 793 } 794 795 static bool rumpsysctl = false; 796 797 static void 798 sysctlparser(char *buf) 799 { 800 801 if (buf == NULL) { 802 rumpsysctl = true; 803 return; 804 } 805 806 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 || 807 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) { 808 rumpsysctl = true; 809 return; 810 } 811 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) { 812 rumpsysctl = false; 813 return; 814 } 815 816 errx(1, "sysctl value should be y(es)/n(o), gave: %s", buf); 817 } 818 819 static bool rumpmodctl = false; 820 821 static void 822 modctlparser(char *buf) 823 { 824 825 if (buf == NULL) { 826 rumpmodctl = true; 827 return; 828 } 829 830 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 || 831 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) { 832 rumpmodctl = true; 833 return; 834 } 835 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) { 836 rumpmodctl = false; 837 return; 838 } 839 840 errx(1, "modctl value should be y(es)/n(o), gave: %s", buf); 841 } 842 843 static void 844 fdoffparser(char *buf) 845 { 846 unsigned long fdoff; 847 char *ep; 848 849 if (*buf == '-') { 850 errx(1, "fdoff must not be negative"); 851 } 852 fdoff = strtoul(buf, &ep, 10); 853 if (*ep != '\0') 854 errx(1, "invalid fdoff specifier \"%s\"", buf); 855 if (fdoff >= INT_MAX/2 || fdoff < 3) 856 errx(1, "fdoff out of range"); 857 hijack_fdoff = fdoff; 858 } 859 860 static struct { 861 void (*parsefn)(char *); 862 const char *name; 863 bool needvalues; 864 } hijackparse[] = { 865 { sockparser, "socket", true }, 866 { pathparser, "path", true }, 867 { blanketparser, "blanket", true }, 868 { vfsparser, "vfs", true }, 869 { sysctlparser, "sysctl", false }, 870 { modctlparser, "modctl", false }, 871 { fdoffparser, "fdoff", true }, 872 { NULL, NULL, false }, 873 }; 874 875 static void 876 parsehijack(char *hijack) 877 { 878 char *p, *p2, *l; 879 const char *hijackcopy; 880 bool nop2; 881 int i; 882 883 if ((hijackcopy = strdup(hijack)) == NULL) 884 err(1, "strdup"); 885 886 /* disable everything explicitly */ 887 for (i = 0; i < PF_MAX; i++) 888 rumpsockets[i] = false; 889 890 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) { 891 nop2 = false; 892 p2 = strchr(p, '='); 893 if (!p2) { 894 nop2 = true; 895 p2 = p + strlen(p); 896 } 897 898 for (i = 0; hijackparse[i].parsefn; i++) { 899 if (strncmp(hijackparse[i].name, p, 900 (size_t)(p2-p)) == 0) { 901 if (nop2 && hijackparse[i].needvalues) 902 errx(1, "invalid hijack specifier: %s", 903 hijackcopy); 904 hijackparse[i].parsefn(nop2 ? NULL : p2+1); 905 break; 906 } 907 } 908 909 if (hijackparse[i].parsefn == NULL) 910 errx(1, "invalid hijack specifier name in %s", p); 911 } 912 913 } 914 915 static void __attribute__((constructor)) 916 rcinit(void) 917 { 918 char buf[1024]; 919 unsigned i, j; 920 921 host_fork = dlsym(RTLD_NEXT, "fork"); 922 host_daemon = dlsym(RTLD_NEXT, "daemon"); 923 if (host_mmap == NULL) 924 host_mmap = dlsym(RTLD_NEXT, "mmap"); 925 926 /* 927 * In theory cannot print anything during lookups because 928 * we might not have the call vector set up. so, the errx() 929 * is a bit of a strech, but it might work. 930 */ 931 932 for (i = 0; i < DUALCALL__NUM; i++) { 933 /* build runtime O(1) access */ 934 for (j = 0; j < __arraycount(syscnames); j++) { 935 if (syscnames[j].scm_callnum == i) 936 break; 937 } 938 939 if (j == __arraycount(syscnames)) 940 errx(1, "rumphijack error: syscall pos %d missing", i); 941 942 syscalls[i].bs_host = dlsym(RTLD_NEXT, 943 syscnames[j].scm_hostname); 944 if (syscalls[i].bs_host == NULL) 945 errx(1, "hostcall %s not found!", 946 syscnames[j].scm_hostname); 947 948 syscalls[i].bs_rump = dlsym(RTLD_NEXT, 949 syscnames[j].scm_rumpname); 950 if (syscalls[i].bs_rump == NULL) 951 errx(1, "rumpcall %s not found!", 952 syscnames[j].scm_rumpname); 953 } 954 955 if (rumpclient_init() == -1) 956 err(1, "rumpclient init"); 957 958 /* check which syscalls we're supposed to hijack */ 959 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) { 960 strcpy(buf, RUMPHIJACK_DEFAULT); 961 } 962 parsehijack(buf); 963 964 /* set client persistence level */ 965 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) { 966 if (strcmp(buf, "die") == 0) 967 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE); 968 else if (strcmp(buf, "inftime") == 0) 969 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME); 970 else if (strcmp(buf, "once") == 0) 971 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE); 972 else { 973 time_t timeout; 974 char *ep; 975 976 timeout = (time_t)strtoll(buf, &ep, 10); 977 if (timeout <= 0 || ep != buf + strlen(buf)) 978 errx(1, "RUMPHIJACK_RETRYCONNECT must be " 979 "keyword or integer, got: %s", buf); 980 981 rumpclient_setconnretry(timeout); 982 } 983 } 984 985 if (getenv_r("RUMPHIJACK__DUP2INFO", buf, sizeof(buf)) == 0) { 986 if (sscanf(buf, "%u,%u,%u", 987 &dup2vec[0], &dup2vec[1], &dup2vec[2]) != 3) { 988 warnx("invalid dup2mask: %s", buf); 989 memset(dup2vec, 0, sizeof(dup2vec)); 990 } 991 unsetenv("RUMPHIJACK__DUP2INFO"); 992 } 993 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) { 994 pwdinrump = true; 995 unsetenv("RUMPHIJACK__PWDINRUMP"); 996 } 997 } 998 999 static int 1000 fd_rump2host(int fd) 1001 { 1002 1003 if (fd == -1) 1004 return fd; 1005 return fd + hijack_fdoff; 1006 } 1007 1008 static int 1009 fd_rump2host_withdup(int fd) 1010 { 1011 int hfd; 1012 1013 _DIAGASSERT(fd != -1); 1014 hfd = unmapdup2(fd); 1015 if (hfd != -1) { 1016 _DIAGASSERT(hfd <= DUP2HIGH); 1017 return hfd; 1018 } 1019 return fd_rump2host(fd); 1020 } 1021 1022 static int 1023 fd_host2rump(int fd) 1024 { 1025 if (!isdup2d(fd)) 1026 return fd - hijack_fdoff; 1027 else 1028 return mapdup2(fd); 1029 } 1030 1031 static bool 1032 fd_isrump(int fd) 1033 { 1034 1035 return isdup2d(fd) || fd >= hijack_fdoff; 1036 } 1037 1038 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= hijack_fdoff) 1039 1040 static enum pathtype 1041 path_isrump(const char *path) 1042 { 1043 size_t plen; 1044 int i; 1045 1046 if (rumpprefix == NULL && nblanket == 0) 1047 return PATH_HOST; 1048 1049 if (*path == '/') { 1050 plen = strlen(path); 1051 if (rumpprefix && plen >= rumpprefixlen) { 1052 if (strncmp(path, rumpprefix, rumpprefixlen) == 0 1053 && (plen == rumpprefixlen 1054 || *(path + rumpprefixlen) == '/')) { 1055 return PATH_RUMP; 1056 } 1057 } 1058 for (i = 0; i < nblanket; i++) { 1059 if (strncmp(path, blanket[i].pfx, blanket[i].len) == 0) 1060 return PATH_RUMPBLANKET; 1061 } 1062 1063 return PATH_HOST; 1064 } else { 1065 return pwdinrump ? PATH_RUMP : PATH_HOST; 1066 } 1067 } 1068 1069 static const char *rootpath = "/"; 1070 static const char * 1071 path_host2rump(const char *path) 1072 { 1073 const char *rv; 1074 1075 if (*path == '/') { 1076 rv = path + rumpprefixlen; 1077 if (*rv == '\0') 1078 rv = rootpath; 1079 } else { 1080 rv = path; 1081 } 1082 1083 return rv; 1084 } 1085 1086 static int 1087 dodup(int oldd, int minfd) 1088 { 1089 int (*op_fcntl)(int, int, ...); 1090 int newd; 1091 int isrump; 1092 1093 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd)); 1094 if (fd_isrump(oldd)) { 1095 op_fcntl = GETSYSCALL(rump, FCNTL); 1096 oldd = fd_host2rump(oldd); 1097 if (minfd >= hijack_fdoff) 1098 minfd -= hijack_fdoff; 1099 isrump = 1; 1100 } else { 1101 if (minfd >= hijack_fdoff) { 1102 errno = EINVAL; 1103 return -1; 1104 } 1105 op_fcntl = GETSYSCALL(host, FCNTL); 1106 isrump = 0; 1107 } 1108 1109 newd = op_fcntl(oldd, F_DUPFD, minfd); 1110 1111 if (isrump) 1112 newd = fd_rump2host(newd); 1113 DPRINTF(("dup <- %d\n", newd)); 1114 1115 return newd; 1116 } 1117 1118 /* 1119 * Check that host fd value does not exceed fdoffset and if necessary 1120 * dup the file descriptor so that it doesn't collide with the dup2mask. 1121 */ 1122 static int 1123 fd_host2host(int fd) 1124 { 1125 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL); 1126 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 1127 int ofd, i; 1128 1129 if (fd >= hijack_fdoff) { 1130 op_close(fd); 1131 errno = ENFILE; 1132 return -1; 1133 } 1134 1135 for (i = 1; isdup2d(fd); i++) { 1136 ofd = fd; 1137 fd = op_fcntl(ofd, F_DUPFD, i); 1138 op_close(ofd); 1139 } 1140 1141 return fd; 1142 } 1143 1144 int 1145 open(const char *path, int flags, ...) 1146 { 1147 int (*op_open)(const char *, int, ...); 1148 bool isrump; 1149 va_list ap; 1150 enum pathtype pt; 1151 int fd; 1152 1153 DPRINTF(("open -> %s (%s)\n", path, whichpath(path))); 1154 1155 if ((pt = path_isrump(path)) != PATH_HOST) { 1156 if (pt == PATH_RUMP) 1157 path = path_host2rump(path); 1158 op_open = GETSYSCALL(rump, OPEN); 1159 isrump = true; 1160 } else { 1161 op_open = GETSYSCALL(host, OPEN); 1162 isrump = false; 1163 } 1164 1165 va_start(ap, flags); 1166 fd = op_open(path, flags, va_arg(ap, mode_t)); 1167 va_end(ap); 1168 1169 if (isrump) 1170 fd = fd_rump2host(fd); 1171 else 1172 fd = fd_host2host(fd); 1173 1174 DPRINTF(("open <- %d (%s)\n", fd, whichfd(fd))); 1175 return fd; 1176 } 1177 1178 int 1179 chdir(const char *path) 1180 { 1181 int (*op_chdir)(const char *); 1182 enum pathtype pt; 1183 int rv; 1184 1185 if ((pt = path_isrump(path)) != PATH_HOST) { 1186 op_chdir = GETSYSCALL(rump, CHDIR); 1187 if (pt == PATH_RUMP) 1188 path = path_host2rump(path); 1189 } else { 1190 op_chdir = GETSYSCALL(host, CHDIR); 1191 } 1192 1193 rv = op_chdir(path); 1194 if (rv == 0) 1195 pwdinrump = pt != PATH_HOST; 1196 1197 return rv; 1198 } 1199 1200 int 1201 fchdir(int fd) 1202 { 1203 int (*op_fchdir)(int); 1204 bool isrump; 1205 int rv; 1206 1207 if (fd_isrump(fd)) { 1208 op_fchdir = GETSYSCALL(rump, FCHDIR); 1209 isrump = true; 1210 fd = fd_host2rump(fd); 1211 } else { 1212 op_fchdir = GETSYSCALL(host, FCHDIR); 1213 isrump = false; 1214 } 1215 1216 rv = op_fchdir(fd); 1217 if (rv == 0) { 1218 pwdinrump = isrump; 1219 } 1220 1221 return rv; 1222 } 1223 1224 #ifndef __linux__ 1225 int 1226 __getcwd(char *bufp, size_t len) 1227 { 1228 int (*op___getcwd)(char *, size_t); 1229 size_t prefixgap; 1230 bool iamslash; 1231 int rv; 1232 1233 if (pwdinrump && rumpprefix) { 1234 if (rumpprefix[rumpprefixlen-1] == '/') 1235 iamslash = true; 1236 else 1237 iamslash = false; 1238 1239 if (iamslash) 1240 prefixgap = rumpprefixlen - 1; /* ``//+path'' */ 1241 else 1242 prefixgap = rumpprefixlen; /* ``/pfx+/path'' */ 1243 if (len <= prefixgap) { 1244 errno = ERANGE; 1245 return -1; 1246 } 1247 1248 op___getcwd = GETSYSCALL(rump, __GETCWD); 1249 rv = op___getcwd(bufp + prefixgap, len - prefixgap); 1250 if (rv == -1) 1251 return rv; 1252 1253 /* augment the "/" part only for a non-root path */ 1254 memcpy(bufp, rumpprefix, rumpprefixlen); 1255 1256 /* append / only to non-root cwd */ 1257 if (rv != 2) 1258 bufp[prefixgap] = '/'; 1259 1260 /* don't append extra slash in the purely-slash case */ 1261 if (rv == 2 && !iamslash) 1262 bufp[rumpprefixlen] = '\0'; 1263 } else if (pwdinrump) { 1264 /* assume blanket. we can't provide a prefix here */ 1265 op___getcwd = GETSYSCALL(rump, __GETCWD); 1266 rv = op___getcwd(bufp, len); 1267 } else { 1268 op___getcwd = GETSYSCALL(host, __GETCWD); 1269 rv = op___getcwd(bufp, len); 1270 } 1271 1272 return rv; 1273 } 1274 #endif 1275 1276 static int 1277 moveish(const char *from, const char *to, 1278 int (*rump_op)(const char *, const char *), 1279 int (*host_op)(const char *, const char *)) 1280 { 1281 int (*op)(const char *, const char *); 1282 enum pathtype ptf, ptt; 1283 1284 if ((ptf = path_isrump(from)) != PATH_HOST) { 1285 if ((ptt = path_isrump(to)) == PATH_HOST) { 1286 errno = EXDEV; 1287 return -1; 1288 } 1289 1290 if (ptf == PATH_RUMP) 1291 from = path_host2rump(from); 1292 if (ptt == PATH_RUMP) 1293 to = path_host2rump(to); 1294 op = rump_op; 1295 } else { 1296 if (path_isrump(to) != PATH_HOST) { 1297 errno = EXDEV; 1298 return -1; 1299 } 1300 1301 op = host_op; 1302 } 1303 1304 return op(from, to); 1305 } 1306 1307 #ifdef __NetBSD__ 1308 int 1309 linkat(int fromfd, const char *from, int tofd, const char *to, int flags) 1310 { 1311 if (fromfd != AT_FDCWD || tofd != AT_FDCWD 1312 || flags != AT_SYMLINK_FOLLOW) 1313 return ENOSYS; 1314 1315 return moveish(from, to, 1316 GETSYSCALL(rump, LINK), GETSYSCALL(host, LINK)); 1317 } 1318 #endif 1319 1320 int 1321 link(const char *from, const char *to) 1322 { 1323 return moveish(from, to, 1324 GETSYSCALL(rump, LINK), GETSYSCALL(host, LINK)); 1325 } 1326 1327 int 1328 rename(const char *from, const char *to) 1329 { 1330 return moveish(from, to, 1331 GETSYSCALL(rump, RENAME), GETSYSCALL(host, RENAME)); 1332 } 1333 1334 int 1335 REALSOCKET(int domain, int type, int protocol) 1336 { 1337 int (*op_socket)(int, int, int); 1338 int fd; 1339 bool isrump; 1340 1341 isrump = domain < PF_MAX && rumpsockets[domain]; 1342 1343 if (isrump) 1344 op_socket = GETSYSCALL(rump, SOCKET); 1345 else 1346 op_socket = GETSYSCALL(host, SOCKET); 1347 fd = op_socket(domain, type, protocol); 1348 1349 if (isrump) 1350 fd = fd_rump2host(fd); 1351 else 1352 fd = fd_host2host(fd); 1353 DPRINTF(("socket <- %d\n", fd)); 1354 1355 return fd; 1356 } 1357 1358 int 1359 accept(int s, struct sockaddr *addr, socklen_t *addrlen) 1360 { 1361 int (*op_accept)(int, struct sockaddr *, socklen_t *); 1362 int fd; 1363 bool isrump; 1364 1365 isrump = fd_isrump(s); 1366 1367 DPRINTF(("accept -> %d", s)); 1368 if (isrump) { 1369 op_accept = GETSYSCALL(rump, ACCEPT); 1370 s = fd_host2rump(s); 1371 } else { 1372 op_accept = GETSYSCALL(host, ACCEPT); 1373 } 1374 fd = op_accept(s, addr, addrlen); 1375 if (fd != -1 && isrump) 1376 fd = fd_rump2host(fd); 1377 else 1378 fd = fd_host2host(fd); 1379 1380 DPRINTF((" <- %d\n", fd)); 1381 1382 return fd; 1383 } 1384 1385 int 1386 paccept(int s, struct sockaddr *addr, socklen_t *addrlen, 1387 const sigset_t * restrict sigmask, int flags) 1388 { 1389 int (*op_paccept)(int, struct sockaddr *, socklen_t *, 1390 const sigset_t * restrict, int); 1391 int fd; 1392 bool isrump; 1393 1394 isrump = fd_isrump(s); 1395 1396 DPRINTF(("paccept -> %d", s)); 1397 if (isrump) { 1398 op_paccept = GETSYSCALL(rump, PACCEPT); 1399 s = fd_host2rump(s); 1400 } else { 1401 op_paccept = GETSYSCALL(host, PACCEPT); 1402 } 1403 fd = op_paccept(s, addr, addrlen, sigmask, flags); 1404 if (fd != -1 && isrump) 1405 fd = fd_rump2host(fd); 1406 else 1407 fd = fd_host2host(fd); 1408 1409 DPRINTF((" <- %d\n", fd)); 1410 1411 return fd; 1412 } 1413 1414 /* 1415 * ioctl() and fcntl() are varargs calls and need special treatment. 1416 */ 1417 1418 /* 1419 * Various [Linux] libc's have various signatures for ioctl so we 1420 * need to handle the discrepancies. On NetBSD, we use the 1421 * one with unsigned long cmd. 1422 */ 1423 int 1424 #ifdef HAVE_IOCTL_CMD_INT 1425 ioctl(int fd, int cmd, ...) 1426 { 1427 int (*op_ioctl)(int, int cmd, ...); 1428 #else 1429 ioctl(int fd, unsigned long cmd, ...) 1430 { 1431 int (*op_ioctl)(int, unsigned long cmd, ...); 1432 #endif 1433 va_list ap; 1434 int rv; 1435 1436 DPRINTF(("ioctl -> %d (%s)\n", fd, whichfd(fd))); 1437 if (fd_isrump(fd)) { 1438 fd = fd_host2rump(fd); 1439 op_ioctl = GETSYSCALL(rump, IOCTL); 1440 } else { 1441 op_ioctl = GETSYSCALL(host, IOCTL); 1442 } 1443 1444 va_start(ap, cmd); 1445 rv = op_ioctl(fd, cmd, va_arg(ap, void *)); 1446 va_end(ap); 1447 DPRINTF(("ioctl <- %d\n", rv)); 1448 return rv; 1449 } 1450 1451 int 1452 fcntl(int fd, int cmd, ...) 1453 { 1454 int (*op_fcntl)(int, int, ...); 1455 va_list ap; 1456 int rv, minfd; 1457 1458 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd)); 1459 1460 switch (cmd) { 1461 case F_DUPFD_CLOEXEC: /* Ignore CLOEXEC bit for now */ 1462 case F_DUPFD: 1463 va_start(ap, cmd); 1464 minfd = va_arg(ap, int); 1465 va_end(ap); 1466 return dodup(fd, minfd); 1467 1468 #ifdef F_CLOSEM 1469 case F_CLOSEM: { 1470 int maxdup2, i; 1471 1472 /* 1473 * So, if fd < HIJACKOFF, we want to do a host closem. 1474 */ 1475 1476 if (fd < hijack_fdoff) { 1477 int closemfd = fd; 1478 1479 if (rumpclient__closenotify(&closemfd, 1480 RUMPCLIENT_CLOSE_FCLOSEM) == -1) 1481 return -1; 1482 op_fcntl = GETSYSCALL(host, FCNTL); 1483 rv = op_fcntl(closemfd, cmd); 1484 if (rv) 1485 return rv; 1486 } 1487 1488 /* 1489 * Additionally, we want to do a rump closem, but only 1490 * for the file descriptors not dup2'd. 1491 */ 1492 1493 for (i = 0, maxdup2 = -1; i <= DUP2HIGH; i++) { 1494 if (dup2vec[i] & DUP2BIT) { 1495 int val; 1496 1497 val = dup2vec[i] & DUP2FDMASK; 1498 maxdup2 = MAX(val, maxdup2); 1499 } 1500 } 1501 1502 if (fd >= hijack_fdoff) 1503 fd -= hijack_fdoff; 1504 else 1505 fd = 0; 1506 fd = MAX(maxdup2+1, fd); 1507 1508 /* hmm, maybe we should close rump fd's not within dup2mask? */ 1509 return rump_sys_fcntl(fd, F_CLOSEM); 1510 } 1511 #endif /* F_CLOSEM */ 1512 1513 #ifdef F_MAXFD 1514 case F_MAXFD: 1515 /* 1516 * For maxfd, if there's a rump kernel fd, return 1517 * it hostified. Otherwise, return host's MAXFD 1518 * return value. 1519 */ 1520 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) { 1521 /* 1522 * This might go a little wrong in case 1523 * of dup2 to [012], but I'm not sure if 1524 * there's a justification for tracking 1525 * that info. Consider e.g. 1526 * dup2(rumpfd, 2) followed by rump_sys_open() 1527 * returning 1. We should return 1+HIJACKOFF, 1528 * not 2+HIJACKOFF. However, if [01] is not 1529 * open, the correct return value is 2. 1530 */ 1531 return fd_rump2host(fd); 1532 } else { 1533 op_fcntl = GETSYSCALL(host, FCNTL); 1534 return op_fcntl(fd, F_MAXFD); 1535 } 1536 /*NOTREACHED*/ 1537 #endif /* F_MAXFD */ 1538 1539 default: 1540 if (fd_isrump(fd)) { 1541 fd = fd_host2rump(fd); 1542 op_fcntl = GETSYSCALL(rump, FCNTL); 1543 } else { 1544 op_fcntl = GETSYSCALL(host, FCNTL); 1545 } 1546 1547 va_start(ap, cmd); 1548 rv = op_fcntl(fd, cmd, va_arg(ap, void *)); 1549 va_end(ap); 1550 return rv; 1551 } 1552 /*NOTREACHED*/ 1553 } 1554 1555 int 1556 close(int fd) 1557 { 1558 int (*op_close)(int); 1559 int rv; 1560 1561 DPRINTF(("close -> %d\n", fd)); 1562 if (fd_isrump(fd)) { 1563 bool undup2 = false; 1564 int ofd; 1565 1566 if (isdup2d(ofd = fd)) { 1567 undup2 = true; 1568 } 1569 1570 fd = fd_host2rump(fd); 1571 if (!undup2 && killdup2alias(fd)) { 1572 return 0; 1573 } 1574 1575 op_close = GETSYSCALL(rump, CLOSE); 1576 rv = op_close(fd); 1577 if (rv == 0 && undup2) { 1578 clrdup2(ofd); 1579 } 1580 } else { 1581 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1) 1582 return -1; 1583 op_close = GETSYSCALL(host, CLOSE); 1584 rv = op_close(fd); 1585 } 1586 1587 return rv; 1588 } 1589 1590 /* 1591 * write cannot issue a standard debug printf due to recursion 1592 */ 1593 ssize_t 1594 write(int fd, const void *buf, size_t blen) 1595 { 1596 ssize_t (*op_write)(int, const void *, size_t); 1597 1598 if (fd_isrump(fd)) { 1599 fd = fd_host2rump(fd); 1600 op_write = GETSYSCALL(rump, WRITE); 1601 } else { 1602 op_write = GETSYSCALL(host, WRITE); 1603 } 1604 1605 return op_write(fd, buf, blen); 1606 } 1607 1608 /* 1609 * file descriptor passing 1610 * 1611 * we intercept sendmsg and recvmsg to convert file descriptors in 1612 * control messages. an attempt to send a descriptor from a different kernel 1613 * is rejected. (ENOTSUP) 1614 */ 1615 1616 static int 1617 _msg_convert_fds(struct msghdr *msg, int (*func)(int), bool dryrun) 1618 { 1619 struct cmsghdr *cmsg; 1620 1621 for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; 1622 cmsg = CMSG_NXTHDR(msg, cmsg)) { 1623 if (cmsg->cmsg_level == SOL_SOCKET && 1624 cmsg->cmsg_type == SCM_RIGHTS) { 1625 int *fdp = (void *)CMSG_DATA(cmsg); 1626 const size_t size = 1627 cmsg->cmsg_len - __CMSG_ALIGN(sizeof(*cmsg)); 1628 const int nfds = (int)(size / sizeof(int)); 1629 const int * const efdp = fdp + nfds; 1630 1631 while (fdp < efdp) { 1632 const int newval = func(*fdp); 1633 1634 if (newval < 0) { 1635 return ENOTSUP; 1636 } 1637 if (!dryrun) 1638 *fdp = newval; 1639 fdp++; 1640 } 1641 } 1642 } 1643 return 0; 1644 } 1645 1646 static int 1647 msg_convert_fds(struct msghdr *msg, int (*func)(int)) 1648 { 1649 1650 return _msg_convert_fds(msg, func, false); 1651 } 1652 1653 static int 1654 msg_check_fds(struct msghdr *msg, int (*func)(int)) 1655 { 1656 1657 return _msg_convert_fds(msg, func, true); 1658 } 1659 1660 ssize_t 1661 recvmsg(int fd, struct msghdr *msg, int flags) 1662 { 1663 ssize_t (*op_recvmsg)(int, struct msghdr *, int); 1664 ssize_t ret; 1665 const bool isrump = fd_isrump(fd); 1666 1667 if (isrump) { 1668 fd = fd_host2rump(fd); 1669 op_recvmsg = GETSYSCALL(rump, RECVMSG); 1670 } else { 1671 op_recvmsg = GETSYSCALL(host, RECVMSG); 1672 } 1673 ret = op_recvmsg(fd, msg, flags); 1674 if (ret == -1) { 1675 return ret; 1676 } 1677 /* 1678 * convert descriptors in the message. 1679 */ 1680 if (isrump) { 1681 msg_convert_fds(msg, fd_rump2host); 1682 } else { 1683 msg_convert_fds(msg, fd_host2host); 1684 } 1685 return ret; 1686 } 1687 1688 ssize_t 1689 recv(int fd, void *buf, size_t len, int flags) 1690 { 1691 1692 return recvfrom(fd, buf, len, flags, NULL, NULL); 1693 } 1694 1695 ssize_t 1696 send(int fd, const void *buf, size_t len, int flags) 1697 { 1698 1699 return sendto(fd, buf, len, flags, NULL, 0); 1700 } 1701 1702 static int 1703 fd_check_rump(int fd) 1704 { 1705 1706 return fd_isrump(fd) ? 0 : -1; 1707 } 1708 1709 static int 1710 fd_check_host(int fd) 1711 { 1712 1713 return !fd_isrump(fd) ? 0 : -1; 1714 } 1715 1716 ssize_t 1717 sendmsg(int fd, const struct msghdr *msg, int flags) 1718 { 1719 ssize_t (*op_sendmsg)(int, const struct msghdr *, int); 1720 const bool isrump = fd_isrump(fd); 1721 int error; 1722 1723 /* 1724 * reject descriptors from a different kernel. 1725 */ 1726 error = msg_check_fds(__UNCONST(msg), 1727 isrump ? fd_check_rump: fd_check_host); 1728 if (error != 0) { 1729 errno = error; 1730 return -1; 1731 } 1732 /* 1733 * convert descriptors in the message to raw values. 1734 */ 1735 if (isrump) { 1736 fd = fd_host2rump(fd); 1737 /* 1738 * XXX we directly modify the given message assuming: 1739 * - cmsg is writable (typically on caller's stack) 1740 * - caller don't care cmsg's contents after calling sendmsg. 1741 * (thus no need to restore values) 1742 * 1743 * it's safer to copy and modify instead. 1744 */ 1745 msg_convert_fds(__UNCONST(msg), fd_host2rump); 1746 op_sendmsg = GETSYSCALL(rump, SENDMSG); 1747 } else { 1748 op_sendmsg = GETSYSCALL(host, SENDMSG); 1749 } 1750 return op_sendmsg(fd, msg, flags); 1751 } 1752 1753 /* 1754 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since 1755 * many programs do that. dup2 of a rump kernel fd to another value 1756 * not >= fdoff is an error. 1757 * 1758 * Note: cannot rump2host newd, because it is often hardcoded. 1759 */ 1760 int 1761 dup2(int oldd, int newd) 1762 { 1763 int (*host_dup2)(int, int); 1764 int rv; 1765 1766 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd)); 1767 1768 if (fd_isrump(oldd)) { 1769 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 1770 1771 /* only allow fd 0-2 for cross-kernel dup */ 1772 if (!(newd >= 0 && newd <= 2 && !fd_isrump(newd))) { 1773 errno = EBADF; 1774 return -1; 1775 } 1776 1777 /* regular dup2? */ 1778 if (fd_isrump(newd)) { 1779 newd = fd_host2rump(newd); 1780 rv = rump_sys_dup2(oldd, newd); 1781 return fd_rump2host(rv); 1782 } 1783 1784 /* 1785 * dup2 rump => host? just establish an 1786 * entry in the mapping table. 1787 */ 1788 op_close(newd); 1789 setdup2(newd, fd_host2rump(oldd)); 1790 rv = 0; 1791 } else { 1792 host_dup2 = syscalls[DUALCALL_DUP2].bs_host; 1793 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1) 1794 return -1; 1795 rv = host_dup2(oldd, newd); 1796 } 1797 1798 return rv; 1799 } 1800 1801 int 1802 dup(int oldd) 1803 { 1804 1805 return dodup(oldd, 0); 1806 } 1807 1808 pid_t 1809 fork(void) 1810 { 1811 pid_t rv; 1812 1813 DPRINTF(("fork\n")); 1814 1815 rv = rumpclient__dofork(host_fork); 1816 1817 DPRINTF(("fork returns %d\n", rv)); 1818 return rv; 1819 } 1820 #ifdef VFORK 1821 /* we do not have the luxury of not requiring a stackframe */ 1822 #define __strong_alias_macro(m, f) __strong_alias(m, f) 1823 __strong_alias_macro(VFORK,fork); 1824 #endif 1825 1826 int 1827 daemon(int nochdir, int noclose) 1828 { 1829 struct rumpclient_fork *rf; 1830 1831 if ((rf = rumpclient_prefork()) == NULL) 1832 return -1; 1833 1834 if (host_daemon(nochdir, noclose) == -1) 1835 return -1; 1836 1837 if (rumpclient_fork_init(rf) == -1) 1838 return -1; 1839 1840 return 0; 1841 } 1842 1843 int 1844 execve(const char *path, char *const argv[], char *const envp[]) 1845 { 1846 char buf[128]; 1847 char *dup2str; 1848 const char *pwdinrumpstr; 1849 char **newenv; 1850 size_t nelem; 1851 int rv, sverrno; 1852 int bonus = 2, i = 0; 1853 1854 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2INFO=%u,%u,%u", 1855 dup2vec[0], dup2vec[1], dup2vec[2]); 1856 dup2str = strdup(buf); 1857 if (dup2str == NULL) { 1858 errno = ENOMEM; 1859 return -1; 1860 } 1861 1862 if (pwdinrump) { 1863 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true"; 1864 bonus++; 1865 } else { 1866 pwdinrumpstr = NULL; 1867 } 1868 1869 for (nelem = 0; envp && envp[nelem]; nelem++) 1870 continue; 1871 newenv = malloc(sizeof(*newenv) * (nelem+bonus)); 1872 if (newenv == NULL) { 1873 free(dup2str); 1874 errno = ENOMEM; 1875 return -1; 1876 } 1877 memcpy(newenv, envp, nelem*sizeof(*newenv)); 1878 newenv[nelem+i] = dup2str; 1879 i++; 1880 1881 if (pwdinrumpstr) { 1882 newenv[nelem+i] = __UNCONST(pwdinrumpstr); 1883 i++; 1884 } 1885 newenv[nelem+i] = NULL; 1886 _DIAGASSERT(i < bonus); 1887 1888 rv = rumpclient_exec(path, argv, newenv); 1889 1890 _DIAGASSERT(rv != 0); 1891 sverrno = errno; 1892 free(newenv); 1893 free(dup2str); 1894 errno = sverrno; 1895 return rv; 1896 } 1897 1898 /* 1899 * select is done by calling poll. 1900 */ 1901 int 1902 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 1903 struct timeval *timeout) 1904 { 1905 struct pollfd *pfds; 1906 struct timespec ts, *tsp = NULL; 1907 nfds_t realnfds; 1908 int i, j; 1909 int rv, incr; 1910 1911 DPRINTF(("select %d %p %p %p %p\n", nfds, 1912 readfds, writefds, exceptfds, timeout)); 1913 1914 /* 1915 * Well, first we must scan the fds to figure out how many 1916 * fds there really are. This is because up to and including 1917 * nb5 poll() silently refuses nfds > process_maxopen_fds. 1918 * Seems to be fixed in current, thank the maker. 1919 * god damn cluster...bomb. 1920 */ 1921 1922 for (i = 0, realnfds = 0; i < nfds; i++) { 1923 if (readfds && FD_ISSET(i, readfds)) { 1924 realnfds++; 1925 continue; 1926 } 1927 if (writefds && FD_ISSET(i, writefds)) { 1928 realnfds++; 1929 continue; 1930 } 1931 if (exceptfds && FD_ISSET(i, exceptfds)) { 1932 realnfds++; 1933 continue; 1934 } 1935 } 1936 1937 if (realnfds) { 1938 pfds = calloc(realnfds, sizeof(*pfds)); 1939 if (!pfds) 1940 return -1; 1941 } else { 1942 pfds = NULL; 1943 } 1944 1945 for (i = 0, j = 0; i < nfds; i++) { 1946 incr = 0; 1947 if (readfds && FD_ISSET(i, readfds)) { 1948 pfds[j].fd = i; 1949 pfds[j].events |= POLLIN; 1950 incr=1; 1951 } 1952 if (writefds && FD_ISSET(i, writefds)) { 1953 pfds[j].fd = i; 1954 pfds[j].events |= POLLOUT; 1955 incr=1; 1956 } 1957 if (exceptfds && FD_ISSET(i, exceptfds)) { 1958 pfds[j].fd = i; 1959 pfds[j].events |= POLLHUP|POLLERR; 1960 incr=1; 1961 } 1962 if (incr) 1963 j++; 1964 } 1965 assert(j == (int)realnfds); 1966 1967 if (timeout) { 1968 TIMEVAL_TO_TIMESPEC(timeout, &ts); 1969 tsp = &ts; 1970 } 1971 rv = REALPOLLTS(pfds, realnfds, tsp, NULL); 1972 /* 1973 * "If select() returns with an error the descriptor sets 1974 * will be unmodified" 1975 */ 1976 if (rv < 0) 1977 goto out; 1978 1979 /* 1980 * zero out results (can't use FD_ZERO for the 1981 * obvious select-me-not reason). whee. 1982 * 1983 * We do this here since some software ignores the return 1984 * value of select, and hence if the timeout expires, it may 1985 * assume all input descriptors have activity. 1986 */ 1987 for (i = 0; i < nfds; i++) { 1988 if (readfds) 1989 FD_CLR(i, readfds); 1990 if (writefds) 1991 FD_CLR(i, writefds); 1992 if (exceptfds) 1993 FD_CLR(i, exceptfds); 1994 } 1995 if (rv == 0) 1996 goto out; 1997 1998 /* 1999 * We have >0 fds with activity. Harvest the results. 2000 */ 2001 for (i = 0; i < (int)realnfds; i++) { 2002 if (readfds) { 2003 if (pfds[i].revents & POLLIN) { 2004 FD_SET(pfds[i].fd, readfds); 2005 } 2006 } 2007 if (writefds) { 2008 if (pfds[i].revents & POLLOUT) { 2009 FD_SET(pfds[i].fd, writefds); 2010 } 2011 } 2012 if (exceptfds) { 2013 if (pfds[i].revents & (POLLHUP|POLLERR)) { 2014 FD_SET(pfds[i].fd, exceptfds); 2015 } 2016 } 2017 } 2018 2019 out: 2020 free(pfds); 2021 return rv; 2022 } 2023 2024 static void 2025 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall) 2026 { 2027 nfds_t i; 2028 2029 for (i = 0; i < nfds; i++) { 2030 if (fds[i].fd == -1) 2031 continue; 2032 2033 if (fd_isrump(fds[i].fd)) 2034 (*rumpcall)++; 2035 else 2036 (*hostcall)++; 2037 } 2038 } 2039 2040 static void 2041 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int)) 2042 { 2043 nfds_t i; 2044 2045 for (i = 0; i < nfds; i++) { 2046 fds[i].fd = fdadj(fds[i].fd); 2047 } 2048 } 2049 2050 /* 2051 * poll is easy as long as the call comes in the fds only in one 2052 * kernel. otherwise its quite tricky... 2053 */ 2054 struct pollarg { 2055 struct pollfd *pfds; 2056 nfds_t nfds; 2057 const struct timespec *ts; 2058 const sigset_t *sigmask; 2059 int pipefd; 2060 int errnum; 2061 }; 2062 2063 static void * 2064 hostpoll(void *arg) 2065 { 2066 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 2067 const sigset_t *); 2068 struct pollarg *parg = arg; 2069 intptr_t rv; 2070 2071 op_pollts = GETSYSCALL(host, POLLTS); 2072 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask); 2073 if (rv == -1) 2074 parg->errnum = errno; 2075 rump_sys_write(parg->pipefd, &rv, sizeof(rv)); 2076 2077 return (void *)rv; 2078 } 2079 2080 int 2081 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts, 2082 const sigset_t *sigmask) 2083 { 2084 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 2085 const sigset_t *); 2086 int (*host_close)(int); 2087 int hostcall = 0, rumpcall = 0; 2088 pthread_t pt; 2089 nfds_t i; 2090 int rv; 2091 2092 DPRINTF(("poll %p %d %p %p\n", fds, (int)nfds, ts, sigmask)); 2093 checkpoll(fds, nfds, &hostcall, &rumpcall); 2094 2095 if (hostcall && rumpcall) { 2096 struct pollfd *pfd_host = NULL, *pfd_rump = NULL; 2097 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1}; 2098 struct pollarg parg; 2099 void *trv_val; 2100 int sverrno = 0, rv_rump, rv_host, errno_rump, errno_host; 2101 2102 /* 2103 * ok, this is where it gets tricky. We must support 2104 * this since it's a very common operation in certain 2105 * types of software (telnet, netcat, etc). We allocate 2106 * two vectors and run two poll commands in separate 2107 * threads. Whichever returns first "wins" and the 2108 * other kernel's fds won't show activity. 2109 */ 2110 rv = -1; 2111 2112 /* allocate full vector for O(n) joining after call */ 2113 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1)); 2114 if (!pfd_host) 2115 goto out; 2116 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1)); 2117 if (!pfd_rump) { 2118 goto out; 2119 } 2120 2121 /* 2122 * then, open two pipes, one for notifications 2123 * to each kernel. 2124 * 2125 * At least the rump pipe should probably be 2126 * cached, along with the helper threads. This 2127 * should give a microbenchmark improvement (haven't 2128 * experienced a macro-level problem yet, though). 2129 */ 2130 if ((rv = rump_sys_pipe(rpipe)) == -1) { 2131 sverrno = errno; 2132 } 2133 if (rv == 0 && (rv = pipe(hpipe)) == -1) { 2134 sverrno = errno; 2135 } 2136 2137 /* split vectors (or signal errors) */ 2138 for (i = 0; i < nfds; i++) { 2139 int fd; 2140 2141 fds[i].revents = 0; 2142 if (fds[i].fd == -1) { 2143 pfd_host[i].fd = -1; 2144 pfd_rump[i].fd = -1; 2145 } else if (fd_isrump(fds[i].fd)) { 2146 pfd_host[i].fd = -1; 2147 fd = fd_host2rump(fds[i].fd); 2148 if (fd == rpipe[0] || fd == rpipe[1]) { 2149 fds[i].revents = POLLNVAL; 2150 if (rv != -1) 2151 rv++; 2152 } 2153 pfd_rump[i].fd = fd; 2154 pfd_rump[i].events = fds[i].events; 2155 } else { 2156 pfd_rump[i].fd = -1; 2157 fd = fds[i].fd; 2158 if (fd == hpipe[0] || fd == hpipe[1]) { 2159 fds[i].revents = POLLNVAL; 2160 if (rv != -1) 2161 rv++; 2162 } 2163 pfd_host[i].fd = fd; 2164 pfd_host[i].events = fds[i].events; 2165 } 2166 pfd_rump[i].revents = pfd_host[i].revents = 0; 2167 } 2168 if (rv) { 2169 goto out; 2170 } 2171 2172 pfd_host[nfds].fd = hpipe[0]; 2173 pfd_host[nfds].events = POLLIN; 2174 pfd_rump[nfds].fd = rpipe[0]; 2175 pfd_rump[nfds].events = POLLIN; 2176 2177 /* 2178 * then, create a thread to do host part and meanwhile 2179 * do rump kernel part right here 2180 */ 2181 2182 parg.pfds = pfd_host; 2183 parg.nfds = nfds+1; 2184 parg.ts = ts; 2185 parg.sigmask = sigmask; 2186 parg.pipefd = rpipe[1]; 2187 pthread_create(&pt, NULL, hostpoll, &parg); 2188 2189 op_pollts = GETSYSCALL(rump, POLLTS); 2190 rv_rump = op_pollts(pfd_rump, nfds+1, ts, NULL); 2191 errno_rump = errno; 2192 write(hpipe[1], &rv, sizeof(rv)); 2193 pthread_join(pt, &trv_val); 2194 rv_host = (int)(intptr_t)trv_val; 2195 errno_host = parg.errnum; 2196 2197 /* strip cross-thread notification from real results */ 2198 if (rv_host > 0 && pfd_host[nfds].revents & POLLIN) { 2199 rv_host--; 2200 } 2201 if (rv_rump > 0 && pfd_rump[nfds].revents & POLLIN) { 2202 rv_rump--; 2203 } 2204 2205 /* then merge the results into what's reported to the caller */ 2206 if (rv_rump > 0 || rv_host > 0) { 2207 /* SUCCESS */ 2208 2209 rv = 0; 2210 if (rv_rump > 0) { 2211 for (i = 0; i < nfds; i++) { 2212 if (pfd_rump[i].fd != -1) 2213 fds[i].revents 2214 = pfd_rump[i].revents; 2215 } 2216 rv += rv_rump; 2217 } 2218 if (rv_host > 0) { 2219 for (i = 0; i < nfds; i++) { 2220 if (pfd_host[i].fd != -1) 2221 fds[i].revents 2222 = pfd_host[i].revents; 2223 } 2224 rv += rv_host; 2225 } 2226 assert(rv > 0); 2227 sverrno = 0; 2228 } else if (rv_rump == -1 || rv_host == -1) { 2229 /* ERROR */ 2230 2231 /* just pick one kernel at "random" */ 2232 rv = -1; 2233 if (rv_host == -1) { 2234 sverrno = errno_host; 2235 } else if (rv_rump == -1) { 2236 sverrno = errno_rump; 2237 } 2238 } else { 2239 /* TIMEOUT */ 2240 2241 rv = 0; 2242 assert(rv_rump == 0 && rv_host == 0); 2243 } 2244 2245 out: 2246 host_close = GETSYSCALL(host, CLOSE); 2247 if (rpipe[0] != -1) 2248 rump_sys_close(rpipe[0]); 2249 if (rpipe[1] != -1) 2250 rump_sys_close(rpipe[1]); 2251 if (hpipe[0] != -1) 2252 host_close(hpipe[0]); 2253 if (hpipe[1] != -1) 2254 host_close(hpipe[1]); 2255 free(pfd_host); 2256 free(pfd_rump); 2257 errno = sverrno; 2258 } else { 2259 if (hostcall) { 2260 op_pollts = GETSYSCALL(host, POLLTS); 2261 } else { 2262 op_pollts = GETSYSCALL(rump, POLLTS); 2263 adjustpoll(fds, nfds, fd_host2rump); 2264 } 2265 2266 rv = op_pollts(fds, nfds, ts, sigmask); 2267 if (rumpcall) 2268 adjustpoll(fds, nfds, fd_rump2host_withdup); 2269 } 2270 2271 return rv; 2272 } 2273 2274 int 2275 poll(struct pollfd *fds, nfds_t nfds, int timeout) 2276 { 2277 struct timespec ts; 2278 struct timespec *tsp = NULL; 2279 2280 if (timeout != INFTIM) { 2281 ts.tv_sec = timeout / 1000; 2282 ts.tv_nsec = (timeout % 1000) * 1000*1000; 2283 2284 tsp = &ts; 2285 } 2286 2287 return REALPOLLTS(fds, nfds, tsp, NULL); 2288 } 2289 2290 #ifdef HAVE_KQUEUE 2291 int 2292 REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges, 2293 struct kevent *eventlist, size_t nevents, 2294 const struct timespec *timeout) 2295 { 2296 int (*op_kevent)(int, const struct kevent *, size_t, 2297 struct kevent *, size_t, const struct timespec *); 2298 const struct kevent *ev; 2299 size_t i; 2300 2301 /* 2302 * Check that we don't attempt to kevent rump kernel fd's. 2303 * That needs similar treatment to select/poll, but is slightly 2304 * trickier since we need to manage to different kq descriptors. 2305 * (TODO, in case you're wondering). 2306 */ 2307 for (i = 0; i < nchanges; i++) { 2308 ev = &changelist[i]; 2309 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE || 2310 ev->filter == EVFILT_VNODE) { 2311 if (fd_isrump((int)ev->ident)) { 2312 errno = ENOTSUP; 2313 return -1; 2314 } 2315 } 2316 } 2317 2318 op_kevent = GETSYSCALL(host, KEVENT); 2319 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout); 2320 } 2321 #endif /* HAVE_KQUEUE */ 2322 2323 /* 2324 * mmapping from a rump kernel is not supported, so disallow it. 2325 */ 2326 void * 2327 mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) 2328 { 2329 2330 if (flags & MAP_FILE && fd_isrump(fd)) { 2331 errno = ENOSYS; 2332 return MAP_FAILED; 2333 } 2334 if (__predict_false(host_mmap == NULL)) { 2335 host_mmap = rumphijack_dlsym(RTLD_NEXT, "mmap"); 2336 } 2337 return host_mmap(addr, len, prot, flags, fd, offset); 2338 } 2339 2340 #ifdef __NetBSD__ 2341 /* 2342 * these go to one or the other on a per-process configuration 2343 */ 2344 int __sysctl(const int *, unsigned int, void *, size_t *, const void *, size_t); 2345 int 2346 __sysctl(const int *name, unsigned int namelen, void *old, size_t *oldlenp, 2347 const void *new, size_t newlen) 2348 { 2349 int (*op___sysctl)(const int *, unsigned int, void *, size_t *, 2350 const void *, size_t); 2351 2352 if (rumpsysctl) { 2353 op___sysctl = GETSYSCALL(rump, __SYSCTL); 2354 } else { 2355 op___sysctl = GETSYSCALL(host, __SYSCTL); 2356 /* we haven't inited yet */ 2357 if (__predict_false(op___sysctl == NULL)) { 2358 op___sysctl = rumphijack_dlsym(RTLD_NEXT, "__sysctl"); 2359 } 2360 } 2361 2362 return op___sysctl(name, namelen, old, oldlenp, new, newlen); 2363 } 2364 int modctl(int, void *); 2365 int 2366 modctl(int operation, void *argp) 2367 { 2368 int (*op_modctl)(int operation, void *argp); 2369 2370 if (rumpmodctl) { 2371 op_modctl = GETSYSCALL(rump, MODCTL); 2372 } else { 2373 op_modctl = GETSYSCALL(host, MODCTL); 2374 } 2375 2376 return op_modctl(operation, argp); 2377 } 2378 #endif 2379 2380 /* 2381 * Rest are std type calls. 2382 */ 2383 2384 #ifdef HAVE_UTIMENSAT 2385 ATCALL(int, utimensat, DUALCALL_UTIMENSAT, \ 2386 (int fd, const char *path, const struct timespec t[2], int f), \ 2387 (int, const char *, const struct timespec [2], int), 2388 (fd, path, t, f)) 2389 #endif 2390 2391 FDCALL(int, bind, DUALCALL_BIND, \ 2392 (int fd, const struct sockaddr *name, socklen_t namelen), \ 2393 (int, const struct sockaddr *, socklen_t), \ 2394 (fd, name, namelen)) 2395 2396 FDCALL(int, connect, DUALCALL_CONNECT, \ 2397 (int fd, const struct sockaddr *name, socklen_t namelen), \ 2398 (int, const struct sockaddr *, socklen_t), \ 2399 (fd, name, namelen)) 2400 2401 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \ 2402 (int fd, struct sockaddr *name, socklen_t *namelen), \ 2403 (int, struct sockaddr *, socklen_t *), \ 2404 (fd, name, namelen)) 2405 2406 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \ 2407 (int fd, struct sockaddr *name, socklen_t *namelen), \ 2408 (int, struct sockaddr *, socklen_t *), \ 2409 (fd, name, namelen)) 2410 2411 FDCALL(int, listen, DUALCALL_LISTEN, \ 2412 (int fd, int backlog), \ 2413 (int, int), \ 2414 (fd, backlog)) 2415 2416 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \ 2417 (int fd, void *buf, size_t len, int flags, \ 2418 struct sockaddr *from, socklen_t *fromlen), \ 2419 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \ 2420 (fd, buf, len, flags, from, fromlen)) 2421 2422 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \ 2423 (int fd, const void *buf, size_t len, int flags, \ 2424 const struct sockaddr *to, socklen_t tolen), \ 2425 (int, const void *, size_t, int, \ 2426 const struct sockaddr *, socklen_t), \ 2427 (fd, buf, len, flags, to, tolen)) 2428 2429 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \ 2430 (int fd, int level, int optn, void *optval, socklen_t *optlen), \ 2431 (int, int, int, void *, socklen_t *), \ 2432 (fd, level, optn, optval, optlen)) 2433 2434 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \ 2435 (int fd, int level, int optn, \ 2436 const void *optval, socklen_t optlen), \ 2437 (int, int, int, const void *, socklen_t), \ 2438 (fd, level, optn, optval, optlen)) 2439 2440 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \ 2441 (int fd, int how), \ 2442 (int, int), \ 2443 (fd, how)) 2444 2445 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \ 2446 (int fd, void *buf, size_t buflen), \ 2447 (int, void *, size_t), \ 2448 (fd, buf, buflen)) 2449 2450 #ifdef __linux__ 2451 ssize_t __read_chk(int, void *, size_t) 2452 __attribute__((alias("read"))); 2453 #endif 2454 2455 FDCALL(ssize_t, readv, DUALCALL_READV, \ 2456 (int fd, const struct iovec *iov, int iovcnt), \ 2457 (int, const struct iovec *, int), \ 2458 (fd, iov, iovcnt)) 2459 2460 FDCALL(ssize_t, REALPREAD, DUALCALL_PREAD, \ 2461 (int fd, void *buf, size_t nbytes, off_t offset), \ 2462 (int, void *, size_t, off_t), \ 2463 (fd, buf, nbytes, offset)) 2464 2465 FDCALL(ssize_t, preadv, DUALCALL_PREADV, \ 2466 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 2467 (int, const struct iovec *, int, off_t), \ 2468 (fd, iov, iovcnt, offset)) 2469 2470 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \ 2471 (int fd, const struct iovec *iov, int iovcnt), \ 2472 (int, const struct iovec *, int), \ 2473 (fd, iov, iovcnt)) 2474 2475 FDCALL(ssize_t, REALPWRITE, DUALCALL_PWRITE, \ 2476 (int fd, const void *buf, size_t nbytes, off_t offset), \ 2477 (int, const void *, size_t, off_t), \ 2478 (fd, buf, nbytes, offset)) 2479 2480 FDCALL(ssize_t, pwritev, DUALCALL_PWRITEV, \ 2481 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 2482 (int, const struct iovec *, int, off_t), \ 2483 (fd, iov, iovcnt, offset)) 2484 2485 #ifndef __linux__ 2486 FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \ 2487 (int fd, struct stat *sb), \ 2488 (int, struct stat *), \ 2489 (fd, sb)) 2490 #endif 2491 2492 #ifdef __NetBSD__ 2493 FDCALL(int, fstatvfs1, DUALCALL_FSTATVFS1, \ 2494 (int fd, struct statvfs *buf, int flags), \ 2495 (int, struct statvfs *, int), \ 2496 (fd, buf, flags)) 2497 #endif 2498 2499 FDCALL(off_t, lseek, DUALCALL_LSEEK, \ 2500 (int fd, off_t offset, int whence), \ 2501 (int, off_t, int), \ 2502 (fd, offset, whence)) 2503 #ifdef LSEEK_ALIAS 2504 __strong_alias(LSEEK_ALIAS,lseek); 2505 #endif 2506 2507 #ifndef __linux__ 2508 FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \ 2509 (int fd, char *buf, size_t nbytes), \ 2510 (int, char *, size_t), \ 2511 (fd, buf, nbytes)) 2512 #endif 2513 2514 FDCALL(int, fchown, DUALCALL_FCHOWN, \ 2515 (int fd, uid_t owner, gid_t group), \ 2516 (int, uid_t, gid_t), \ 2517 (fd, owner, group)) 2518 2519 FDCALL(int, fchmod, DUALCALL_FCHMOD, \ 2520 (int fd, mode_t mode), \ 2521 (int, mode_t), \ 2522 (fd, mode)) 2523 2524 FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \ 2525 (int fd, off_t length), \ 2526 (int, off_t), \ 2527 (fd, length)) 2528 2529 FDCALL(int, fsync, DUALCALL_FSYNC, \ 2530 (int fd), \ 2531 (int), \ 2532 (fd)) 2533 2534 #ifdef HAVE_FSYNC_RANGE 2535 FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \ 2536 (int fd, int how, off_t start, off_t length), \ 2537 (int, int, off_t, off_t), \ 2538 (fd, how, start, length)) 2539 #endif 2540 2541 FDCALL(int, futimes, DUALCALL_FUTIMES, \ 2542 (int fd, const struct timeval *tv), \ 2543 (int, const struct timeval *), \ 2544 (fd, tv)) 2545 2546 FDCALL(int, futimens, DUALCALL_FUTIMENS, \ 2547 (int fd, const struct timespec *ts), \ 2548 (int, const struct timespec *), \ 2549 (fd, ts)) 2550 2551 #ifdef HAVE_CHFLAGS 2552 FDCALL(int, fchflags, DUALCALL_FCHFLAGS, \ 2553 (int fd, u_long flags), \ 2554 (int, u_long), \ 2555 (fd, flags)) 2556 #endif 2557 2558 /* 2559 * path-based selectors 2560 */ 2561 2562 #ifndef __linux__ 2563 PATHCALL(int, REALSTAT, DUALCALL_STAT, \ 2564 (const char *path, struct stat *sb), \ 2565 (const char *, struct stat *), \ 2566 (path, sb)) 2567 2568 PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \ 2569 (const char *path, struct stat *sb), \ 2570 (const char *, struct stat *), \ 2571 (path, sb)) 2572 #endif 2573 2574 PATHCALL(int, chown, DUALCALL_CHOWN, \ 2575 (const char *path, uid_t owner, gid_t group), \ 2576 (const char *, uid_t, gid_t), \ 2577 (path, owner, group)) 2578 2579 PATHCALL(int, lchown, DUALCALL_LCHOWN, \ 2580 (const char *path, uid_t owner, gid_t group), \ 2581 (const char *, uid_t, gid_t), \ 2582 (path, owner, group)) 2583 2584 PATHCALL(int, chmod, DUALCALL_CHMOD, \ 2585 (const char *path, mode_t mode), \ 2586 (const char *, mode_t), \ 2587 (path, mode)) 2588 2589 PATHCALL(int, lchmod, DUALCALL_LCHMOD, \ 2590 (const char *path, mode_t mode), \ 2591 (const char *, mode_t), \ 2592 (path, mode)) 2593 2594 #ifdef __NetBSD__ 2595 PATHCALL(int, statvfs1, DUALCALL_STATVFS1, \ 2596 (const char *path, struct statvfs *buf, int flags), \ 2597 (const char *, struct statvfs *, int), \ 2598 (path, buf, flags)) 2599 #endif 2600 2601 PATHCALL(int, unlink, DUALCALL_UNLINK, \ 2602 (const char *path), \ 2603 (const char *), \ 2604 (path)) 2605 2606 PATHCALL(int, symlink, DUALCALL_SYMLINK, \ 2607 (const char *target, const char *path), \ 2608 (const char *, const char *), \ 2609 (target, path)) 2610 2611 /* 2612 * readlink() can be called from malloc which can be called 2613 * from dlsym() during init 2614 */ 2615 ssize_t 2616 readlink(const char *path, char *buf, size_t bufsiz) 2617 { 2618 int (*op_readlink)(const char *, char *, size_t); 2619 enum pathtype pt; 2620 2621 if ((pt = path_isrump(path)) != PATH_HOST) { 2622 op_readlink = GETSYSCALL(rump, READLINK); 2623 if (pt == PATH_RUMP) 2624 path = path_host2rump(path); 2625 } else { 2626 op_readlink = GETSYSCALL(host, READLINK); 2627 } 2628 2629 if (__predict_false(op_readlink == NULL)) { 2630 errno = ENOENT; 2631 return -1; 2632 } 2633 2634 return op_readlink(path, buf, bufsiz); 2635 } 2636 2637 PATHCALL(int, mkdir, DUALCALL_MKDIR, \ 2638 (const char *path, mode_t mode), \ 2639 (const char *, mode_t), \ 2640 (path, mode)) 2641 2642 PATHCALL(int, rmdir, DUALCALL_RMDIR, \ 2643 (const char *path), \ 2644 (const char *), \ 2645 (path)) 2646 2647 PATHCALL(int, utimes, DUALCALL_UTIMES, \ 2648 (const char *path, const struct timeval *tv), \ 2649 (const char *, const struct timeval *), \ 2650 (path, tv)) 2651 2652 PATHCALL(int, lutimes, DUALCALL_LUTIMES, \ 2653 (const char *path, const struct timeval *tv), \ 2654 (const char *, const struct timeval *), \ 2655 (path, tv)) 2656 2657 #ifdef HAVE_CHFLAGS 2658 PATHCALL(int, chflags, DUALCALL_CHFLAGS, \ 2659 (const char *path, u_long flags), \ 2660 (const char *, u_long), \ 2661 (path, flags)) 2662 2663 PATHCALL(int, lchflags, DUALCALL_LCHFLAGS, \ 2664 (const char *path, u_long flags), \ 2665 (const char *, u_long), \ 2666 (path, flags)) 2667 #endif /* HAVE_CHFLAGS */ 2668 2669 PATHCALL(int, truncate, DUALCALL_TRUNCATE, \ 2670 (const char *path, off_t length), \ 2671 (const char *, off_t), \ 2672 (path, length)) 2673 2674 PATHCALL(int, access, DUALCALL_ACCESS, \ 2675 (const char *path, int mode), \ 2676 (const char *, int), \ 2677 (path, mode)) 2678 2679 #ifndef __linux__ 2680 PATHCALL(int, REALMKNOD, DUALCALL_MKNOD, \ 2681 (const char *path, mode_t mode, dev_t dev), \ 2682 (const char *, mode_t, dev_t), \ 2683 (path, mode, dev)) 2684 #endif 2685 2686 /* 2687 * Note: with mount the decisive parameter is the mount 2688 * destination directory. This is because we don't really know 2689 * about the "source" directory in a generic call (and besides, 2690 * it might not even exist, cf. nfs). 2691 */ 2692 #ifdef __NetBSD__ 2693 PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \ 2694 (const char *type, const char *path, int flags, \ 2695 void *data, size_t dlen), \ 2696 (const char *, const char *, int, void *, size_t), \ 2697 (type, path, flags, data, dlen)) 2698 2699 PATHCALL(int, unmount, DUALCALL_UNMOUNT, \ 2700 (const char *path, int flags), \ 2701 (const char *, int), \ 2702 (path, flags)) 2703 #endif /* __NetBSD__ */ 2704 2705 #ifdef HAVE___QUOTACTL 2706 PATHCALL(int, __quotactl, DUALCALL_QUOTACTL, \ 2707 (const char *path, struct quotactl_args *args), \ 2708 (const char *, struct quotactl_args *), \ 2709 (path, args)) 2710 #endif /* HAVE___QUOTACTL */ 2711 2712 #ifdef __NetBSD__ 2713 PATHCALL(int, REALGETFH, DUALCALL_GETFH, \ 2714 (const char *path, void *fhp, size_t *fh_size), \ 2715 (const char *, void *, size_t *), \ 2716 (path, fhp, fh_size)) 2717 #endif 2718 2719 /* 2720 * These act different on a per-process vfs configuration 2721 */ 2722 2723 #ifdef __NetBSD__ 2724 VFSCALL(VFSBIT_GETVFSSTAT, int, getvfsstat, DUALCALL_GETVFSSTAT, \ 2725 (struct statvfs *buf, size_t buflen, int flags), \ 2726 (struct statvfs *, size_t, int), \ 2727 (buf, buflen, flags)) 2728 #endif 2729 2730 #ifdef __NetBSD__ 2731 VFSCALL(VFSBIT_FHCALLS, int, REALFHOPEN, DUALCALL_FHOPEN, \ 2732 (const void *fhp, size_t fh_size, int flags), \ 2733 (const char *, size_t, int), \ 2734 (fhp, fh_size, flags)) 2735 2736 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTAT, DUALCALL_FHSTAT, \ 2737 (const void *fhp, size_t fh_size, struct stat *sb), \ 2738 (const char *, size_t, struct stat *), \ 2739 (fhp, fh_size, sb)) 2740 2741 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTATVFS1, DUALCALL_FHSTATVFS1, \ 2742 (const void *fhp, size_t fh_size, struct statvfs *sb, int flgs),\ 2743 (const char *, size_t, struct statvfs *, int), \ 2744 (fhp, fh_size, sb, flgs)) 2745 #endif 2746 2747 2748 #ifdef __NetBSD__ 2749 2750 /* finally, put nfssvc here. "keep the namespace clean" */ 2751 #include <nfs/rpcv2.h> 2752 #include <nfs/nfs.h> 2753 2754 int 2755 nfssvc(int flags, void *argstructp) 2756 { 2757 int (*op_nfssvc)(int, void *); 2758 2759 if (vfsbits & VFSBIT_NFSSVC){ 2760 struct nfsd_args *nfsdargs; 2761 2762 /* massage the socket descriptor if necessary */ 2763 if (flags == NFSSVC_ADDSOCK) { 2764 nfsdargs = argstructp; 2765 nfsdargs->sock = fd_host2rump(nfsdargs->sock); 2766 } 2767 op_nfssvc = GETSYSCALL(rump, NFSSVC); 2768 } else 2769 op_nfssvc = GETSYSCALL(host, NFSSVC); 2770 2771 return op_nfssvc(flags, argstructp); 2772 } 2773 #endif /* __NetBSD__ */ 2774