1 /* $NetBSD: hijack.c,v 1.132 2021/09/10 20:33:38 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * XXX: rumphijack sort of works on glibc Linux. But it's not 30 * the same quality working as on NetBSD. 31 * autoconf HAVE_FOO vs. __NetBSD__ / __linux__ could be further 32 * improved. 33 */ 34 #include <rump/rumpuser_port.h> 35 36 #if !defined(lint) 37 __RCSID("$NetBSD: hijack.c,v 1.132 2021/09/10 20:33:38 christos Exp $"); 38 #endif 39 40 #include <sys/param.h> 41 #include <sys/types.h> 42 #include <sys/ioctl.h> 43 #include <sys/mman.h> 44 #include <sys/mount.h> 45 #include <sys/socket.h> 46 #include <sys/stat.h> 47 #include <sys/time.h> 48 #include <sys/uio.h> 49 50 #ifdef __NetBSD__ 51 #include <sys/statvfs.h> 52 #endif 53 54 #ifdef HAVE_KQUEUE 55 #include <sys/event.h> 56 #endif 57 58 #ifdef __NetBSD__ 59 #include <sys/quotactl.h> 60 #endif 61 62 #include <assert.h> 63 #include <dlfcn.h> 64 #include <err.h> 65 #include <errno.h> 66 #include <fcntl.h> 67 #include <poll.h> 68 #include <pthread.h> 69 #include <signal.h> 70 #include <stdarg.h> 71 #include <stdbool.h> 72 #include <stdint.h> 73 #include <stdio.h> 74 #include <stdlib.h> 75 #include <string.h> 76 #include <time.h> 77 #include <unistd.h> 78 79 #include <rump/rumpclient.h> 80 #include <rump/rump_syscalls.h> 81 82 #include "hijack.h" 83 84 /* 85 * XXX: Consider autogenerating this, syscnames[] and syscalls[] with 86 * a DSL where the tool also checks the symbols exported by this library 87 * to make sure all relevant calls are accounted for. 88 */ 89 enum dualcall { 90 DUALCALL_WRITE, DUALCALL_WRITEV, DUALCALL_PWRITE, DUALCALL_PWRITEV, 91 DUALCALL_IOCTL, DUALCALL_FCNTL, 92 DUALCALL_SOCKET, DUALCALL_ACCEPT, 93 #ifndef __linux__ 94 DUALCALL_PACCEPT, 95 #endif 96 DUALCALL_BIND, DUALCALL_CONNECT, 97 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN, 98 DUALCALL_RECVFROM, DUALCALL_RECVMSG, 99 DUALCALL_SENDTO, DUALCALL_SENDMSG, 100 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT, 101 DUALCALL_SHUTDOWN, 102 DUALCALL_READ, DUALCALL_READV, DUALCALL_PREAD, DUALCALL_PREADV, 103 DUALCALL_DUP2, 104 DUALCALL_CLOSE, 105 DUALCALL_POLLTS, 106 107 #ifndef __linux__ 108 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT, 109 #endif 110 111 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD, 112 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN, 113 DUALCALL_OPEN, 114 DUALCALL_CHDIR, DUALCALL_FCHDIR, 115 DUALCALL_LSEEK, 116 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK, 117 DUALCALL_LINK, DUALCALL_RENAME, 118 DUALCALL_MKDIR, DUALCALL_RMDIR, 119 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES, 120 DUALCALL_UTIMENSAT, DUALCALL_FUTIMENS, 121 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE, 122 DUALCALL_FSYNC, 123 DUALCALL_ACCESS, 124 125 #ifndef __linux__ 126 DUALCALL___GETCWD, 127 DUALCALL_GETDENTS, 128 #endif 129 130 #ifndef __linux__ 131 DUALCALL_MKNOD, 132 #endif 133 134 #ifdef __NetBSD__ 135 DUALCALL_GETFH, DUALCALL_FHOPEN, DUALCALL_FHSTAT, DUALCALL_FHSTATVFS1, 136 #endif 137 138 #ifdef HAVE_KQUEUE 139 DUALCALL_KEVENT, 140 #endif 141 142 #ifdef __NetBSD__ 143 DUALCALL___SYSCTL, 144 DUALCALL_MODCTL, 145 #endif 146 147 #ifdef __NetBSD__ 148 DUALCALL_NFSSVC, 149 #endif 150 151 #ifdef __NetBSD__ 152 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1, DUALCALL_GETVFSSTAT, 153 #endif 154 155 #ifdef __NetBSD__ 156 DUALCALL_MOUNT, DUALCALL_UNMOUNT, 157 #endif 158 159 #ifdef HAVE_FSYNC_RANGE 160 DUALCALL_FSYNC_RANGE, 161 #endif 162 163 #ifdef HAVE_CHFLAGS 164 DUALCALL_CHFLAGS, DUALCALL_LCHFLAGS, DUALCALL_FCHFLAGS, 165 #endif 166 167 #ifdef HAVE___QUOTACTL 168 DUALCALL_QUOTACTL, 169 #endif 170 #ifdef __NetBSD__ 171 DUALCALL_LINKAT, 172 #endif 173 DUALCALL_PATHCONF, 174 DUALCALL_LPATHCONF, 175 176 DUALCALL__NUM 177 }; 178 179 #define RSYS_STRING(a) __STRING(a) 180 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a)) 181 182 /* 183 * Would be nice to get this automatically in sync with libc. 184 * Also, this does not work for compat-using binaries (we should 185 * provide all previous interfaces, not just the current ones) 186 */ 187 #if defined(__NetBSD__) 188 189 #if !__NetBSD_Prereq__(5,99,7) 190 #define REALSELECT pselect 191 #define REALSELECT select 192 #define REALPOLLTS pollts 193 #define REALKEVENT kevent 194 #define REALSTAT __stat30 195 #define REALLSTAT __lstat30 196 #define REALFSTAT __fstat30 197 #define REALUTIMES utimes 198 #define REALLUTIMES lutimes 199 #define REALFUTIMES futimes 200 #define REALMKNOD mknod 201 #define REALFHSTAT __fhstat40 202 #else /* >= 5.99.7 */ 203 #define REALPSELECT _sys___pselect50 204 #define REALSELECT _sys___select50 205 #define REALPOLLTS _sys___pollts50 206 #define REALKEVENT _sys___kevent50 207 #define REALSTAT __stat50 208 #define REALLSTAT __lstat50 209 #define REALFSTAT __fstat50 210 #define REALUTIMES __utimes50 211 #define REALLUTIMES __lutimes50 212 #define REALFUTIMES __futimes50 213 #define REALMKNOD __mknod50 214 #define REALFHSTAT __fhstat50 215 #endif /* < 5.99.7 */ 216 217 #define REALREAD _sys_read 218 #define REALPREAD _sys_pread 219 #define REALPWRITE _sys_pwrite 220 #define REALGETDENTS __getdents30 221 #define REALMOUNT __mount50 222 #define REALGETFH __getfh30 223 #define REALFHOPEN __fhopen40 224 #if !__NetBSD_Prereq__(9,99,13) 225 #define REALSTATVFS1 statvfs1 226 #define REALFSTATVFS1 fstatvfs1 227 #define REALGETVFSSTAT getvfsstat 228 #define REALFHSTATVFS1 __fhstatvfs140 229 #else 230 #define REALSTATVFS1 __statvfs190 231 #define REALFSTATVFS1 __fstatvfs190 232 #define REALGETVFSSTAT __getvfsstat90 233 #define REALFHSTATVFS1 __fhstatvfs190 234 #endif 235 #define REALSOCKET __socket30 236 237 #define LSEEK_ALIAS _lseek 238 #define VFORK __vfork14 239 240 int REALSTAT(const char *, struct stat *); 241 int REALLSTAT(const char *, struct stat *); 242 int REALFSTAT(int, struct stat *); 243 int REALMKNOD(const char *, mode_t, dev_t); 244 int REALGETDENTS(int, char *, size_t); 245 246 int __getcwd(char *, size_t); 247 248 #elif defined(__linux__) /* glibc, really */ 249 250 #define REALREAD read 251 #define REALPREAD pread 252 #define REALPWRITE pwrite 253 #define REALPSELECT pselect 254 #define REALSELECT select 255 #define REALPOLLTS ppoll 256 #define REALUTIMES utimes 257 #define REALLUTIMES lutimes 258 #define REALFUTIMES futimes 259 #define REALFHSTAT fhstat 260 #define REALSOCKET socket 261 262 #else /* !NetBSD && !linux */ 263 264 #error platform not supported 265 266 #endif /* platform */ 267 268 int REALPSELECT(int, fd_set *, fd_set *, fd_set *, const struct timespec *, 269 const sigset_t *); 270 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *); 271 int REALPOLLTS(struct pollfd *, nfds_t, 272 const struct timespec *, const sigset_t *); 273 int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t, 274 const struct timespec *); 275 ssize_t REALREAD(int, void *, size_t); 276 ssize_t REALPREAD(int, void *, size_t, off_t); 277 ssize_t REALPWRITE(int, const void *, size_t, off_t); 278 int REALUTIMES(const char *, const struct timeval [2]); 279 int REALLUTIMES(const char *, const struct timeval [2]); 280 int REALFUTIMES(int, const struct timeval [2]); 281 int REALMOUNT(const char *, const char *, int, void *, size_t); 282 int REALGETFH(const char *, void *, size_t *); 283 int REALFHOPEN(const void *, size_t, int); 284 int REALFHSTAT(const void *, size_t, struct stat *); 285 int REALSTATVFS1(const char *, struct statvfs *, int); 286 int REALFSTATVFS1(int, struct statvfs *, int); 287 int REALFHSTATVFS1(const void *, size_t, struct statvfs *, int); 288 int REALGETVFSSTAT(struct statvfs *, size_t, int); 289 int REALSOCKET(int, int, int); 290 291 #define S(a) __STRING(a) 292 struct sysnames { 293 enum dualcall scm_callnum; 294 const char *scm_hostname; 295 const char *scm_rumpname; 296 } syscnames[] = { 297 { DUALCALL_SOCKET, S(REALSOCKET), RSYS_NAME(SOCKET) }, 298 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) }, 299 #ifndef __linux__ 300 { DUALCALL_PACCEPT, "paccept", RSYS_NAME(PACCEPT) }, 301 #endif 302 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) }, 303 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) }, 304 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) }, 305 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) }, 306 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) }, 307 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) }, 308 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) }, 309 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) }, 310 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) }, 311 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) }, 312 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) }, 313 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) }, 314 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) }, 315 { DUALCALL_READV, "readv", RSYS_NAME(READV) }, 316 { DUALCALL_PREAD, S(REALPREAD), RSYS_NAME(PREAD) }, 317 { DUALCALL_PREADV, "preadv", RSYS_NAME(PREADV) }, 318 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) }, 319 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) }, 320 { DUALCALL_PWRITE, S(REALPWRITE), RSYS_NAME(PWRITE) }, 321 { DUALCALL_PWRITEV, "pwritev", RSYS_NAME(PWRITEV) }, 322 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) }, 323 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) }, 324 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) }, 325 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) }, 326 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) }, 327 #ifndef __linux__ 328 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) }, 329 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) }, 330 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) }, 331 #endif 332 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) }, 333 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) }, 334 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) }, 335 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) }, 336 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) }, 337 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) }, 338 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) }, 339 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) }, 340 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) }, 341 { DUALCALL_UTIMENSAT, "utimensat", RSYS_NAME(UTIMENSAT) }, 342 { DUALCALL_FUTIMENS, "futimens", RSYS_NAME(FUTIMENS) }, 343 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) }, 344 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) }, 345 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) }, 346 { DUALCALL_LSEEK, "lseek", RSYS_NAME(LSEEK) }, 347 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) }, 348 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) }, 349 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) }, 350 { DUALCALL_LINK, "link", RSYS_NAME(LINK) }, 351 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) }, 352 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) }, 353 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) }, 354 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) }, 355 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) }, 356 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) }, 357 { DUALCALL_ACCESS, "access", RSYS_NAME(ACCESS) }, 358 359 #ifndef __linux__ 360 { DUALCALL___GETCWD, "__getcwd", RSYS_NAME(__GETCWD) }, 361 { DUALCALL_GETDENTS, S(REALGETDENTS),RSYS_NAME(GETDENTS) }, 362 #endif 363 364 #ifndef __linux__ 365 { DUALCALL_MKNOD, S(REALMKNOD), RSYS_NAME(MKNOD) }, 366 #endif 367 368 #ifdef __NetBSD__ 369 { DUALCALL_GETFH, S(REALGETFH), RSYS_NAME(GETFH) }, 370 { DUALCALL_FHOPEN, S(REALFHOPEN), RSYS_NAME(FHOPEN) }, 371 { DUALCALL_FHSTAT, S(REALFHSTAT), RSYS_NAME(FHSTAT) }, 372 { DUALCALL_FHSTATVFS1, S(REALFHSTATVFS1),RSYS_NAME(FHSTATVFS1) }, 373 #endif 374 375 #ifdef HAVE_KQUEUE 376 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) }, 377 #endif 378 379 #ifdef __NetBSD__ 380 { DUALCALL___SYSCTL, "__sysctl", RSYS_NAME(__SYSCTL) }, 381 { DUALCALL_MODCTL, "modctl", RSYS_NAME(MODCTL) }, 382 #endif 383 384 #ifdef __NetBSD__ 385 { DUALCALL_NFSSVC, "nfssvc", RSYS_NAME(NFSSVC) }, 386 #endif 387 388 #ifdef __NetBSD__ 389 { DUALCALL_STATVFS1, S(REALSTATVFS1),RSYS_NAME(STATVFS1) }, 390 { DUALCALL_FSTATVFS1, S(REALFSTATVFS1),RSYS_NAME(FSTATVFS1) }, 391 { DUALCALL_GETVFSSTAT, S(REALGETVFSSTAT),RSYS_NAME(GETVFSSTAT) }, 392 #endif 393 394 #ifdef __NetBSD__ 395 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) }, 396 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) }, 397 #endif 398 399 #ifdef HAVE_FSYNC_RANGE 400 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) }, 401 #endif 402 403 #ifdef HAVE_CHFLAGS 404 { DUALCALL_CHFLAGS, "chflags", RSYS_NAME(CHFLAGS) }, 405 { DUALCALL_LCHFLAGS, "lchflags", RSYS_NAME(LCHFLAGS) }, 406 { DUALCALL_FCHFLAGS, "fchflags", RSYS_NAME(FCHFLAGS) }, 407 #endif /* HAVE_CHFLAGS */ 408 409 #ifdef HAVE___QUOTACTL 410 { DUALCALL_QUOTACTL, "__quotactl", RSYS_NAME(__QUOTACTL) }, 411 #endif /* HAVE___QUOTACTL */ 412 413 #ifdef __NetBSD__ 414 { DUALCALL_LINKAT, "linkat", RSYS_NAME(LINKAT) }, 415 #endif 416 { DUALCALL_PATHCONF, "pathconf", RSYS_NAME(PATHCONF) }, 417 { DUALCALL_LPATHCONF, "lpathconf", RSYS_NAME(LPATHCONF) }, 418 }; 419 #undef S 420 421 struct bothsys { 422 void *bs_host; 423 void *bs_rump; 424 } syscalls[DUALCALL__NUM]; 425 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which 426 427 static pid_t (*host_fork)(void); 428 static int (*host_daemon)(int, int); 429 static void * (*host_mmap)(void *, size_t, int, int, int, off_t); 430 431 /* 432 * This tracks if our process is in a subdirectory of /rump. 433 * It's preserved over exec. 434 */ 435 static bool pwdinrump; 436 437 enum pathtype { PATH_HOST, PATH_RUMP, PATH_RUMPBLANKET }; 438 439 static bool fd_isrump(int); 440 static enum pathtype path_isrump(const char *); 441 442 /* default FD_SETSIZE is 256 ==> default fdoff is 128 */ 443 static int hijack_fdoff = FD_SETSIZE/2; 444 445 /* 446 * Maintain a mapping table for the usual dup2 suspects. 447 * Could use atomic ops to operate on dup2vec, but an application 448 * racing there is not well-defined, so don't bother. 449 */ 450 /* note: you cannot change this without editing the env-passing code */ 451 #define DUP2HIGH 2 452 static uint32_t dup2vec[DUP2HIGH+1]; 453 #define DUP2BIT (1U<<31) 454 #define DUP2ALIAS (1U<<30) 455 #define DUP2FDMASK ((1U<<30)-1) 456 457 static bool 458 isdup2d(int fd) 459 { 460 461 return fd <= DUP2HIGH && fd >= 0 && dup2vec[fd] & DUP2BIT; 462 } 463 464 static int 465 mapdup2(int hostfd) 466 { 467 468 _DIAGASSERT(isdup2d(hostfd)); 469 return dup2vec[hostfd] & DUP2FDMASK; 470 } 471 472 static int 473 unmapdup2(int rumpfd) 474 { 475 int i; 476 477 for (i = 0; i <= DUP2HIGH; i++) { 478 if (dup2vec[i] & DUP2BIT && 479 (dup2vec[i] & DUP2FDMASK) == (unsigned)rumpfd) 480 return i; 481 } 482 return -1; 483 } 484 485 static void 486 setdup2(int hostfd, int rumpfd) 487 { 488 489 if (hostfd > DUP2HIGH) { 490 _DIAGASSERT(/*CONSTCOND*/0); 491 return; 492 } 493 494 dup2vec[hostfd] = DUP2BIT | DUP2ALIAS | rumpfd; 495 } 496 497 static void 498 clrdup2(int hostfd) 499 { 500 501 if (hostfd > DUP2HIGH) { 502 _DIAGASSERT(/*CONSTCOND*/0); 503 return; 504 } 505 506 dup2vec[hostfd] = 0; 507 } 508 509 static bool 510 killdup2alias(int rumpfd) 511 { 512 int hostfd; 513 514 if ((hostfd = unmapdup2(rumpfd)) == -1) 515 return false; 516 517 if (dup2vec[hostfd] & DUP2ALIAS) { 518 dup2vec[hostfd] &= ~DUP2ALIAS; 519 return true; 520 } 521 return false; 522 } 523 524 //#define DEBUGJACK 525 #ifdef DEBUGJACK 526 #define DPRINTF(x) mydprintf x 527 static void 528 mydprintf(const char *fmt, ...) 529 { 530 va_list ap; 531 532 if (isdup2d(STDERR_FILENO)) 533 return; 534 535 va_start(ap, fmt); 536 vfprintf(stderr, fmt, ap); 537 va_end(ap); 538 } 539 540 static const char * 541 whichfd(int fd) 542 { 543 544 if (fd == -1) 545 return "-1"; 546 else if (fd_isrump(fd)) 547 return "rump"; 548 else 549 return "host"; 550 } 551 552 static const char * 553 whichpath(const char *path) 554 { 555 556 if (path_isrump(path)) 557 return "rump"; 558 else 559 return "host"; 560 } 561 562 #else 563 #define DPRINTF(x) 564 #endif 565 566 #define ATCALL(type, name, rcname, args, proto, vars) \ 567 type name args \ 568 { \ 569 type (*fun) proto; \ 570 int isrump = -1; \ 571 \ 572 if (fd == AT_FDCWD || *path == '/') { \ 573 isrump = path_isrump(path); \ 574 } else { \ 575 isrump = fd_isrump(fd); \ 576 } \ 577 \ 578 DPRINTF(("%s -> %d:%s (%s)\n", __STRING(name), \ 579 fd, path, isrump ? "rump" : "host")); \ 580 \ 581 assert(isrump != -1); \ 582 if (isrump) { \ 583 fun = syscalls[rcname].bs_rump; \ 584 if (fd != AT_FDCWD) \ 585 fd = fd_host2rump(fd); \ 586 path = path_host2rump(path); \ 587 } else { \ 588 fun = syscalls[rcname].bs_host; \ 589 } \ 590 return fun vars; \ 591 } 592 593 #define FDCALL(type, name, rcname, args, proto, vars) \ 594 type name args \ 595 { \ 596 type (*fun) proto; \ 597 \ 598 DPRINTF(("%s -> %d (%s)\n", __STRING(name), fd, whichfd(fd))); \ 599 if (fd_isrump(fd)) { \ 600 fun = syscalls[rcname].bs_rump; \ 601 fd = fd_host2rump(fd); \ 602 } else { \ 603 fun = syscalls[rcname].bs_host; \ 604 } \ 605 \ 606 return fun vars; \ 607 } 608 609 #define PATHCALL(type, name, rcname, args, proto, vars) \ 610 type name args \ 611 { \ 612 type (*fun) proto; \ 613 enum pathtype pt; \ 614 \ 615 DPRINTF(("%s -> %s (%s)\n", __STRING(name), path, \ 616 whichpath(path))); \ 617 if ((pt = path_isrump(path)) != PATH_HOST) { \ 618 fun = syscalls[rcname].bs_rump; \ 619 if (pt == PATH_RUMP) \ 620 path = path_host2rump(path); \ 621 } else { \ 622 fun = syscalls[rcname].bs_host; \ 623 } \ 624 \ 625 return fun vars; \ 626 } 627 628 #define VFSCALL(bit, type, name, rcname, args, proto, vars) \ 629 type name args \ 630 { \ 631 type (*fun) proto; \ 632 \ 633 DPRINTF(("%s (0x%x, 0x%x)\n", __STRING(name), bit, vfsbits)); \ 634 if (vfsbits & bit) { \ 635 fun = syscalls[rcname].bs_rump; \ 636 } else { \ 637 fun = syscalls[rcname].bs_host; \ 638 } \ 639 \ 640 return fun vars; \ 641 } 642 643 /* 644 * These variables are set from the RUMPHIJACK string and control 645 * which operations can product rump kernel file descriptors. 646 * This should be easily extendable for future needs. 647 */ 648 #define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal" 649 static bool rumpsockets[PF_MAX]; 650 static const char *rumpprefix; 651 static size_t rumpprefixlen; 652 653 static struct { 654 int pf; 655 const char *name; 656 } socketmap[] = { 657 { PF_LOCAL, "local" }, 658 { PF_INET, "inet" }, 659 #ifdef PF_LINK 660 { PF_LINK, "link" }, 661 #endif 662 #ifdef PF_OROUTE 663 { PF_OROUTE, "oroute" }, 664 #endif 665 { PF_ROUTE, "route" }, 666 { PF_INET6, "inet6" }, 667 #ifdef PF_MPLS 668 { PF_MPLS, "mpls" }, 669 #endif 670 { -1, NULL } 671 }; 672 673 static void 674 sockparser(char *buf) 675 { 676 char *p, *l = NULL; 677 bool value; 678 int i; 679 680 /* if "all" is present, it must be specified first */ 681 if (strncmp(buf, "all", strlen("all")) == 0) { 682 for (i = 0; i < (int)__arraycount(rumpsockets); i++) { 683 rumpsockets[i] = true; 684 } 685 buf += strlen("all"); 686 if (*buf == ':') 687 buf++; 688 } 689 690 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 691 value = true; 692 if (strncmp(p, "no", strlen("no")) == 0) { 693 value = false; 694 p += strlen("no"); 695 } 696 697 for (i = 0; socketmap[i].name; i++) { 698 if (strcmp(p, socketmap[i].name) == 0) { 699 rumpsockets[socketmap[i].pf] = value; 700 break; 701 } 702 } 703 if (socketmap[i].name == NULL) { 704 errx(EXIT_FAILURE, "invalid socket specifier %s", p); 705 } 706 } 707 } 708 709 static void 710 pathparser(char *buf) 711 { 712 713 /* sanity-check */ 714 if (*buf != '/') 715 errx(EXIT_FAILURE, 716 "hijack path specifier must begin with ``/''"); 717 rumpprefixlen = strlen(buf); 718 if (rumpprefixlen < 2) 719 errx(EXIT_FAILURE, "invalid hijack prefix: %s", buf); 720 if (buf[rumpprefixlen-1] == '/' && strspn(buf, "/") != rumpprefixlen) 721 errx(EXIT_FAILURE, "hijack prefix may end in slash only if " 722 "pure slash, gave %s", buf); 723 724 if ((rumpprefix = strdup(buf)) == NULL) 725 err(EXIT_FAILURE, "strdup"); 726 rumpprefixlen = strlen(rumpprefix); 727 } 728 729 static struct blanket { 730 const char *pfx; 731 size_t len; 732 } *blanket; 733 static int nblanket; 734 735 static void 736 blanketparser(char *buf) 737 { 738 char *p, *l = NULL; 739 int i; 740 741 for (nblanket = 0, p = buf; p; p = strchr(p+1, ':'), nblanket++) 742 continue; 743 744 blanket = malloc(nblanket * sizeof(*blanket)); 745 if (blanket == NULL) 746 err(EXIT_FAILURE, "alloc blanket %d", nblanket); 747 748 for (p = strtok_r(buf, ":", &l), i = 0; p; 749 p = strtok_r(NULL, ":", &l), i++) { 750 blanket[i].pfx = strdup(p); 751 if (blanket[i].pfx == NULL) 752 err(EXIT_FAILURE, "strdup blanket"); 753 blanket[i].len = strlen(p); 754 755 if (blanket[i].len == 0 || *blanket[i].pfx != '/') 756 errx(EXIT_FAILURE, "invalid blanket specifier %s", p); 757 if (*(blanket[i].pfx + blanket[i].len-1) == '/') 758 errx(EXIT_FAILURE, "invalid blanket specifier %s", p); 759 } 760 } 761 762 #define VFSBIT_NFSSVC 0x01 763 #define VFSBIT_GETVFSSTAT 0x02 764 #define VFSBIT_FHCALLS 0x04 765 static unsigned vfsbits; 766 767 static struct { 768 int bit; 769 const char *name; 770 } vfscalls[] = { 771 { VFSBIT_NFSSVC, "nfssvc" }, 772 { VFSBIT_GETVFSSTAT, "getvfsstat" }, 773 { VFSBIT_FHCALLS, "fhcalls" }, 774 { -1, NULL } 775 }; 776 777 static void 778 vfsparser(char *buf) 779 { 780 char *p, *l = NULL; 781 bool turnon; 782 unsigned int fullmask; 783 int i; 784 785 /* build the full mask and sanity-check while we're at it */ 786 fullmask = 0; 787 for (i = 0; vfscalls[i].name != NULL; i++) { 788 if (fullmask & vfscalls[i].bit) 789 errx(EXIT_FAILURE, 790 "problem exists between vi and chair"); 791 fullmask |= vfscalls[i].bit; 792 } 793 794 795 /* if "all" is present, it must be specified first */ 796 if (strncmp(buf, "all", strlen("all")) == 0) { 797 vfsbits = fullmask; 798 buf += strlen("all"); 799 if (*buf == ':') 800 buf++; 801 } 802 803 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) { 804 turnon = true; 805 if (strncmp(p, "no", strlen("no")) == 0) { 806 turnon = false; 807 p += strlen("no"); 808 } 809 810 for (i = 0; vfscalls[i].name; i++) { 811 if (strcmp(p, vfscalls[i].name) == 0) { 812 if (turnon) 813 vfsbits |= vfscalls[i].bit; 814 else 815 vfsbits &= ~vfscalls[i].bit; 816 break; 817 } 818 } 819 if (vfscalls[i].name == NULL) { 820 errx(EXIT_FAILURE, "invalid vfscall specifier %s", p); 821 } 822 } 823 } 824 825 static bool rumpsysctl = false; 826 827 static void 828 sysctlparser(char *buf) 829 { 830 831 if (buf == NULL) { 832 rumpsysctl = true; 833 return; 834 } 835 836 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 || 837 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) { 838 rumpsysctl = true; 839 return; 840 } 841 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) { 842 rumpsysctl = false; 843 return; 844 } 845 846 errx(EXIT_FAILURE, "sysctl value should be y(es)/n(o), gave: %s", buf); 847 } 848 849 static bool rumpmodctl = false; 850 851 static void 852 modctlparser(char *buf) 853 { 854 855 if (buf == NULL) { 856 rumpmodctl = true; 857 return; 858 } 859 860 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 || 861 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) { 862 rumpmodctl = true; 863 return; 864 } 865 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) { 866 rumpmodctl = false; 867 return; 868 } 869 870 errx(EXIT_FAILURE, "modctl value should be y(es)/n(o), gave: %s", buf); 871 } 872 873 static void 874 fdoffparser(char *buf) 875 { 876 unsigned long fdoff; 877 char *ep; 878 879 if (*buf == '-') { 880 errx(EXIT_FAILURE, "fdoff must not be negative"); 881 } 882 fdoff = strtoul(buf, &ep, 10); 883 if (*ep != '\0') 884 errx(EXIT_FAILURE, "invalid fdoff specifier \"%s\"", buf); 885 if (fdoff >= INT_MAX/2 || fdoff < 3) 886 errx(EXIT_FAILURE, "fdoff out of range"); 887 hijack_fdoff = (int)fdoff; 888 } 889 890 static struct { 891 void (*parsefn)(char *); 892 const char *name; 893 bool needvalues; 894 } hijackparse[] = { 895 { sockparser, "socket", true }, 896 { pathparser, "path", true }, 897 { blanketparser, "blanket", true }, 898 { vfsparser, "vfs", true }, 899 { sysctlparser, "sysctl", false }, 900 { modctlparser, "modctl", false }, 901 { fdoffparser, "fdoff", true }, 902 { NULL, NULL, false }, 903 }; 904 905 static void 906 parsehijack(char *hijack) 907 { 908 char *p, *p2, *l; 909 const char *hijackcopy; 910 bool nop2; 911 int i; 912 913 if ((hijackcopy = strdup(hijack)) == NULL) 914 err(EXIT_FAILURE, "strdup"); 915 916 /* disable everything explicitly */ 917 for (i = 0; i < PF_MAX; i++) 918 rumpsockets[i] = false; 919 920 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) { 921 nop2 = false; 922 p2 = strchr(p, '='); 923 if (!p2) { 924 nop2 = true; 925 p2 = p + strlen(p); 926 } 927 928 for (i = 0; hijackparse[i].parsefn; i++) { 929 if (strncmp(hijackparse[i].name, p, 930 (size_t)(p2-p)) == 0) { 931 if (nop2 && hijackparse[i].needvalues) 932 errx(EXIT_FAILURE, "invalid hijack specifier: %s", 933 hijackcopy); 934 hijackparse[i].parsefn(nop2 ? NULL : p2+1); 935 break; 936 } 937 } 938 939 if (hijackparse[i].parsefn == NULL) 940 errx(EXIT_FAILURE, 941 "invalid hijack specifier name in %s", p); 942 } 943 944 } 945 946 static void __attribute__((__constructor__)) 947 rcinit(void) 948 { 949 char buf[1024]; 950 unsigned i, j; 951 952 host_fork = dlsym(RTLD_NEXT, "fork"); 953 host_daemon = dlsym(RTLD_NEXT, "daemon"); 954 if (host_mmap == NULL) 955 host_mmap = dlsym(RTLD_NEXT, "mmap"); 956 957 /* 958 * In theory cannot print anything during lookups because 959 * we might not have the call vector set up. so, the errx() 960 * is a bit of a strech, but it might work. 961 */ 962 963 for (i = 0; i < DUALCALL__NUM; i++) { 964 /* build runtime O(1) access */ 965 for (j = 0; j < __arraycount(syscnames); j++) { 966 if (syscnames[j].scm_callnum == i) 967 break; 968 } 969 970 if (j == __arraycount(syscnames)) 971 errx(EXIT_FAILURE, 972 "rumphijack error: syscall pos %d missing", i); 973 974 syscalls[i].bs_host = dlsym(RTLD_NEXT, 975 syscnames[j].scm_hostname); 976 if (syscalls[i].bs_host == NULL) 977 errx(EXIT_FAILURE, "hostcall %s not found!", 978 syscnames[j].scm_hostname); 979 980 syscalls[i].bs_rump = dlsym(RTLD_NEXT, 981 syscnames[j].scm_rumpname); 982 if (syscalls[i].bs_rump == NULL) 983 errx(EXIT_FAILURE, "rumpcall %s not found!", 984 syscnames[j].scm_rumpname); 985 #if 0 986 fprintf(stderr, "%s %p %s %p\n", 987 syscnames[j].scm_hostname, syscalls[i].bs_host, 988 syscnames[j].scm_rumpname, syscalls[i].bs_rump); 989 #endif 990 } 991 992 if (rumpclient_init() == -1) 993 err(EXIT_FAILURE, "rumpclient init"); 994 995 /* check which syscalls we're supposed to hijack */ 996 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) { 997 strcpy(buf, RUMPHIJACK_DEFAULT); 998 } 999 parsehijack(buf); 1000 1001 /* set client persistence level */ 1002 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) { 1003 if (strcmp(buf, "die") == 0) 1004 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE); 1005 else if (strcmp(buf, "inftime") == 0) 1006 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME); 1007 else if (strcmp(buf, "once") == 0) 1008 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE); 1009 else { 1010 time_t timeout; 1011 char *ep; 1012 1013 timeout = (time_t)strtoll(buf, &ep, 10); 1014 if (timeout <= 0 || ep != buf + strlen(buf)) 1015 errx(EXIT_FAILURE, 1016 "RUMPHIJACK_RETRYCONNECT must be " 1017 "keyword or integer, got: %s", buf); 1018 1019 rumpclient_setconnretry(timeout); 1020 } 1021 } 1022 1023 if (getenv_r("RUMPHIJACK__DUP2INFO", buf, sizeof(buf)) == 0) { 1024 if (sscanf(buf, "%u,%u,%u", 1025 &dup2vec[0], &dup2vec[1], &dup2vec[2]) != 3) { 1026 warnx("invalid dup2mask: %s", buf); 1027 memset(dup2vec, 0, sizeof(dup2vec)); 1028 } 1029 unsetenv("RUMPHIJACK__DUP2INFO"); 1030 } 1031 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) { 1032 pwdinrump = true; 1033 unsetenv("RUMPHIJACK__PWDINRUMP"); 1034 } 1035 } 1036 1037 static int 1038 fd_rump2host(int fd) 1039 { 1040 1041 if (fd == -1) 1042 return fd; 1043 return fd + hijack_fdoff; 1044 } 1045 1046 static int 1047 fd_rump2host_withdup(int fd) 1048 { 1049 int hfd; 1050 1051 _DIAGASSERT(fd != -1); 1052 hfd = unmapdup2(fd); 1053 if (hfd != -1) { 1054 _DIAGASSERT(hfd <= DUP2HIGH); 1055 return hfd; 1056 } 1057 return fd_rump2host(fd); 1058 } 1059 1060 static int 1061 fd_host2rump(int fd) 1062 { 1063 if (!isdup2d(fd)) 1064 return fd - hijack_fdoff; 1065 else 1066 return mapdup2(fd); 1067 } 1068 1069 static bool 1070 fd_isrump(int fd) 1071 { 1072 1073 return isdup2d(fd) || fd >= hijack_fdoff; 1074 } 1075 1076 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= hijack_fdoff) 1077 1078 static enum pathtype 1079 path_isrump(const char *path) 1080 { 1081 size_t plen; 1082 int i; 1083 1084 if (rumpprefix == NULL && nblanket == 0) 1085 return PATH_HOST; 1086 1087 if (*path == '/') { 1088 plen = strlen(path); 1089 if (rumpprefix && plen >= rumpprefixlen) { 1090 if (strncmp(path, rumpprefix, rumpprefixlen) == 0 1091 && (plen == rumpprefixlen 1092 || *(path + rumpprefixlen) == '/')) { 1093 return PATH_RUMP; 1094 } 1095 } 1096 for (i = 0; i < nblanket; i++) { 1097 if (strncmp(path, blanket[i].pfx, blanket[i].len) == 0) 1098 return PATH_RUMPBLANKET; 1099 } 1100 1101 return PATH_HOST; 1102 } else { 1103 return pwdinrump ? PATH_RUMP : PATH_HOST; 1104 } 1105 } 1106 1107 static const char *rootpath = "/"; 1108 static const char * 1109 path_host2rump(const char *path) 1110 { 1111 const char *rv; 1112 1113 if (*path == '/') { 1114 rv = path + rumpprefixlen; 1115 if (*rv == '\0') 1116 rv = rootpath; 1117 } else { 1118 rv = path; 1119 } 1120 1121 return rv; 1122 } 1123 1124 static int 1125 dodup(int oldd, int minfd) 1126 { 1127 int (*op_fcntl)(int, int, ...); 1128 int newd; 1129 int isrump; 1130 1131 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd)); 1132 if (fd_isrump(oldd)) { 1133 op_fcntl = GETSYSCALL(rump, FCNTL); 1134 oldd = fd_host2rump(oldd); 1135 if (minfd >= hijack_fdoff) 1136 minfd -= hijack_fdoff; 1137 isrump = 1; 1138 } else { 1139 if (minfd >= hijack_fdoff) { 1140 errno = EINVAL; 1141 return -1; 1142 } 1143 op_fcntl = GETSYSCALL(host, FCNTL); 1144 isrump = 0; 1145 } 1146 1147 newd = op_fcntl(oldd, F_DUPFD, minfd); 1148 1149 if (isrump) 1150 newd = fd_rump2host(newd); 1151 DPRINTF(("dup <- %d\n", newd)); 1152 1153 return newd; 1154 } 1155 1156 /* 1157 * Check that host fd value does not exceed fdoffset and if necessary 1158 * dup the file descriptor so that it doesn't collide with the dup2mask. 1159 */ 1160 static int 1161 fd_host2host(int fd) 1162 { 1163 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL); 1164 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 1165 int ofd, i; 1166 1167 if (fd >= hijack_fdoff) { 1168 op_close(fd); 1169 errno = ENFILE; 1170 return -1; 1171 } 1172 1173 for (i = 1; isdup2d(fd); i++) { 1174 ofd = fd; 1175 fd = op_fcntl(ofd, F_DUPFD, i); 1176 op_close(ofd); 1177 } 1178 1179 return fd; 1180 } 1181 1182 int 1183 open(const char *path, int flags, ...) 1184 { 1185 int (*op_open)(const char *, int, ...); 1186 bool isrump; 1187 va_list ap; 1188 enum pathtype pt; 1189 int fd, rfd; 1190 1191 DPRINTF(("open -> %s (%s)", path, whichpath(path))); 1192 1193 if ((pt = path_isrump(path)) != PATH_HOST) { 1194 if (pt == PATH_RUMP) 1195 path = path_host2rump(path); 1196 op_open = GETSYSCALL(rump, OPEN); 1197 isrump = true; 1198 } else { 1199 op_open = GETSYSCALL(host, OPEN); 1200 isrump = false; 1201 } 1202 1203 va_start(ap, flags); 1204 fd = op_open(path, flags, va_arg(ap, mode_t)); 1205 va_end(ap); 1206 1207 if (isrump) 1208 rfd = fd_rump2host(fd); 1209 else 1210 rfd = fd_host2host(fd); 1211 1212 DPRINTF((" <- %d/%d (%s)\n", fd, rfd, whichfd(rfd))); 1213 return rfd; 1214 } 1215 1216 int 1217 chdir(const char *path) 1218 { 1219 int (*op_chdir)(const char *); 1220 enum pathtype pt; 1221 int rv; 1222 1223 if ((pt = path_isrump(path)) != PATH_HOST) { 1224 op_chdir = GETSYSCALL(rump, CHDIR); 1225 if (pt == PATH_RUMP) 1226 path = path_host2rump(path); 1227 } else { 1228 op_chdir = GETSYSCALL(host, CHDIR); 1229 } 1230 1231 rv = op_chdir(path); 1232 if (rv == 0) 1233 pwdinrump = pt != PATH_HOST; 1234 1235 return rv; 1236 } 1237 1238 int 1239 fchdir(int fd) 1240 { 1241 int (*op_fchdir)(int); 1242 bool isrump; 1243 int rv; 1244 1245 if (fd_isrump(fd)) { 1246 op_fchdir = GETSYSCALL(rump, FCHDIR); 1247 isrump = true; 1248 fd = fd_host2rump(fd); 1249 } else { 1250 op_fchdir = GETSYSCALL(host, FCHDIR); 1251 isrump = false; 1252 } 1253 1254 rv = op_fchdir(fd); 1255 if (rv == 0) { 1256 pwdinrump = isrump; 1257 } 1258 1259 return rv; 1260 } 1261 1262 #ifndef __linux__ 1263 int 1264 __getcwd(char *bufp, size_t len) 1265 { 1266 int (*op___getcwd)(char *, size_t); 1267 size_t prefixgap; 1268 bool iamslash; 1269 int rv; 1270 1271 if (pwdinrump && rumpprefix) { 1272 if (rumpprefix[rumpprefixlen-1] == '/') 1273 iamslash = true; 1274 else 1275 iamslash = false; 1276 1277 if (iamslash) 1278 prefixgap = rumpprefixlen - 1; /* ``//+path'' */ 1279 else 1280 prefixgap = rumpprefixlen; /* ``/pfx+/path'' */ 1281 if (len <= prefixgap) { 1282 errno = ERANGE; 1283 return -1; 1284 } 1285 1286 op___getcwd = GETSYSCALL(rump, __GETCWD); 1287 rv = op___getcwd(bufp + prefixgap, len - prefixgap); 1288 if (rv == -1) 1289 return rv; 1290 1291 /* augment the "/" part only for a non-root path */ 1292 memcpy(bufp, rumpprefix, rumpprefixlen); 1293 1294 /* append / only to non-root cwd */ 1295 if (rv != 2) 1296 bufp[prefixgap] = '/'; 1297 1298 /* don't append extra slash in the purely-slash case */ 1299 if (rv == 2 && !iamslash) 1300 bufp[rumpprefixlen] = '\0'; 1301 } else if (pwdinrump) { 1302 /* assume blanket. we can't provide a prefix here */ 1303 op___getcwd = GETSYSCALL(rump, __GETCWD); 1304 rv = op___getcwd(bufp, len); 1305 } else { 1306 op___getcwd = GETSYSCALL(host, __GETCWD); 1307 rv = op___getcwd(bufp, len); 1308 } 1309 1310 return rv; 1311 } 1312 #endif 1313 1314 static int 1315 moveish(const char *from, const char *to, 1316 int (*rump_op)(const char *, const char *), 1317 int (*host_op)(const char *, const char *)) 1318 { 1319 int (*op)(const char *, const char *); 1320 enum pathtype ptf, ptt; 1321 1322 if ((ptf = path_isrump(from)) != PATH_HOST) { 1323 if ((ptt = path_isrump(to)) == PATH_HOST) { 1324 errno = EXDEV; 1325 return -1; 1326 } 1327 1328 if (ptf == PATH_RUMP) 1329 from = path_host2rump(from); 1330 if (ptt == PATH_RUMP) 1331 to = path_host2rump(to); 1332 op = rump_op; 1333 } else { 1334 if (path_isrump(to) != PATH_HOST) { 1335 errno = EXDEV; 1336 return -1; 1337 } 1338 1339 op = host_op; 1340 } 1341 1342 return op(from, to); 1343 } 1344 1345 #ifdef __NetBSD__ 1346 int 1347 linkat(int fromfd, const char *from, int tofd, const char *to, int flags) 1348 { 1349 if (fromfd != AT_FDCWD || tofd != AT_FDCWD 1350 || flags != AT_SYMLINK_FOLLOW) 1351 return ENOSYS; 1352 1353 return moveish(from, to, 1354 GETSYSCALL(rump, LINK), GETSYSCALL(host, LINK)); 1355 } 1356 #endif 1357 1358 static long 1359 do_pathconf(const char *path, int name, int link) 1360 { 1361 long (*op_pathconf)(const char *, int); 1362 enum pathtype pt; 1363 1364 if ((pt = path_isrump(path)) != PATH_HOST) { 1365 op_pathconf = link ? 1366 GETSYSCALL(rump, LPATHCONF) : 1367 GETSYSCALL(rump, PATHCONF); 1368 if (pt == PATH_RUMP) 1369 path = path_host2rump(path); 1370 } else { 1371 op_pathconf = link ? 1372 GETSYSCALL(host, LPATHCONF) : 1373 GETSYSCALL(host, PATHCONF); 1374 } 1375 1376 return op_pathconf(path, name); 1377 } 1378 1379 long 1380 lpathconf(const char *path, int name) 1381 { 1382 return do_pathconf(path, name, 1); 1383 } 1384 1385 long 1386 pathconf(const char *path, int name) 1387 { 1388 return do_pathconf(path, name, 0); 1389 } 1390 1391 int 1392 link(const char *from, const char *to) 1393 { 1394 return moveish(from, to, 1395 GETSYSCALL(rump, LINK), GETSYSCALL(host, LINK)); 1396 } 1397 1398 int 1399 rename(const char *from, const char *to) 1400 { 1401 return moveish(from, to, 1402 GETSYSCALL(rump, RENAME), GETSYSCALL(host, RENAME)); 1403 } 1404 1405 int 1406 REALSOCKET(int domain, int type, int protocol) 1407 { 1408 int (*op_socket)(int, int, int); 1409 int fd, rfd; 1410 bool isrump; 1411 1412 isrump = domain < PF_MAX && rumpsockets[domain]; 1413 1414 if (isrump) 1415 op_socket = GETSYSCALL(rump, SOCKET); 1416 else 1417 op_socket = GETSYSCALL(host, SOCKET); 1418 fd = op_socket(domain, type, protocol); 1419 1420 if (isrump) 1421 rfd = fd_rump2host(fd); 1422 else 1423 rfd = fd_host2host(fd); 1424 DPRINTF(("socket <- %d/%d (%s)\n", fd, rfd, whichfd(rfd))); 1425 1426 return rfd; 1427 } 1428 1429 int 1430 accept(int s, struct sockaddr *addr, socklen_t *addrlen) 1431 { 1432 int (*op_accept)(int, struct sockaddr *, socklen_t *); 1433 int fd, rfd; 1434 bool isrump; 1435 1436 isrump = fd_isrump(s); 1437 1438 DPRINTF(("accept -> %d", s)); 1439 if (isrump) { 1440 op_accept = GETSYSCALL(rump, ACCEPT); 1441 s = fd_host2rump(s); 1442 } else { 1443 op_accept = GETSYSCALL(host, ACCEPT); 1444 } 1445 fd = op_accept(s, addr, addrlen); 1446 if (fd != -1 && isrump) 1447 rfd = fd_rump2host(fd); 1448 else 1449 rfd = fd_host2host(fd); 1450 1451 DPRINTF((" <- %d/%d (%s)\n", fd, rfd, whichfd(rfd))); 1452 1453 return rfd; 1454 } 1455 1456 #ifndef __linux__ 1457 int 1458 paccept(int s, struct sockaddr *addr, socklen_t *addrlen, 1459 const sigset_t * restrict sigmask, int flags) 1460 { 1461 int (*op_paccept)(int, struct sockaddr *, socklen_t *, 1462 const sigset_t * restrict, int); 1463 int fd, rfd; 1464 bool isrump; 1465 1466 isrump = fd_isrump(s); 1467 1468 DPRINTF(("paccept -> %d", s)); 1469 if (isrump) { 1470 op_paccept = GETSYSCALL(rump, PACCEPT); 1471 s = fd_host2rump(s); 1472 } else { 1473 op_paccept = GETSYSCALL(host, PACCEPT); 1474 } 1475 fd = op_paccept(s, addr, addrlen, sigmask, flags); 1476 if (fd != -1 && isrump) 1477 rfd = fd_rump2host(fd); 1478 else 1479 rfd = fd_host2host(fd); 1480 1481 DPRINTF((" <- %d/%d (%s)\n", fd, rfd, whichfd(rfd))); 1482 1483 return rfd; 1484 } 1485 #endif 1486 1487 /* 1488 * ioctl() and fcntl() are varargs calls and need special treatment. 1489 */ 1490 1491 /* 1492 * Various [Linux] libc's have various signatures for ioctl so we 1493 * need to handle the discrepancies. On NetBSD, we use the 1494 * one with unsigned long cmd. 1495 */ 1496 int 1497 #ifdef HAVE_IOCTL_CMD_INT 1498 ioctl(int fd, int cmd, ...) 1499 { 1500 int (*op_ioctl)(int, int cmd, ...); 1501 #else 1502 ioctl(int fd, unsigned long cmd, ...) 1503 { 1504 int (*op_ioctl)(int, unsigned long cmd, ...); 1505 #endif 1506 va_list ap; 1507 int rv; 1508 1509 DPRINTF(("ioctl -> %d (%s)\n", fd, whichfd(fd))); 1510 if (fd_isrump(fd)) { 1511 fd = fd_host2rump(fd); 1512 op_ioctl = GETSYSCALL(rump, IOCTL); 1513 } else { 1514 op_ioctl = GETSYSCALL(host, IOCTL); 1515 } 1516 1517 va_start(ap, cmd); 1518 rv = op_ioctl(fd, cmd, va_arg(ap, void *)); 1519 va_end(ap); 1520 DPRINTF(("ioctl <- %d\n", rv)); 1521 return rv; 1522 } 1523 1524 int 1525 fcntl(int fd, int cmd, ...) 1526 { 1527 int (*op_fcntl)(int, int, ...); 1528 va_list ap; 1529 int rv, minfd; 1530 1531 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd)); 1532 1533 switch (cmd) { 1534 case F_DUPFD_CLOEXEC: /* Ignore CLOEXEC bit for now */ 1535 case F_DUPFD: 1536 va_start(ap, cmd); 1537 minfd = va_arg(ap, int); 1538 va_end(ap); 1539 return dodup(fd, minfd); 1540 1541 #ifdef F_CLOSEM 1542 case F_CLOSEM: { 1543 int maxdup2, i; 1544 1545 /* 1546 * So, if fd < HIJACKOFF, we want to do a host closem. 1547 */ 1548 1549 if (fd < hijack_fdoff) { 1550 int closemfd = fd; 1551 1552 if (rumpclient__closenotify(&closemfd, 1553 RUMPCLIENT_CLOSE_FCLOSEM) == -1) 1554 return -1; 1555 op_fcntl = GETSYSCALL(host, FCNTL); 1556 rv = op_fcntl(closemfd, cmd); 1557 if (rv) 1558 return rv; 1559 } 1560 1561 /* 1562 * Additionally, we want to do a rump closem, but only 1563 * for the file descriptors not dup2'd. 1564 */ 1565 1566 for (i = 0, maxdup2 = -1; i <= DUP2HIGH; i++) { 1567 if (dup2vec[i] & DUP2BIT) { 1568 int val; 1569 1570 val = dup2vec[i] & DUP2FDMASK; 1571 maxdup2 = MAX(val, maxdup2); 1572 } 1573 } 1574 1575 if (fd >= hijack_fdoff) 1576 fd -= hijack_fdoff; 1577 else 1578 fd = 0; 1579 fd = MAX(maxdup2+1, fd); 1580 1581 /* hmm, maybe we should close rump fd's not within dup2mask? */ 1582 return rump_sys_fcntl(fd, F_CLOSEM); 1583 } 1584 #endif /* F_CLOSEM */ 1585 1586 #ifdef F_MAXFD 1587 case F_MAXFD: 1588 /* 1589 * For maxfd, if there's a rump kernel fd, return 1590 * it hostified. Otherwise, return host's MAXFD 1591 * return value. 1592 */ 1593 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) { 1594 /* 1595 * This might go a little wrong in case 1596 * of dup2 to [012], but I'm not sure if 1597 * there's a justification for tracking 1598 * that info. Consider e.g. 1599 * dup2(rumpfd, 2) followed by rump_sys_open() 1600 * returning 1. We should return 1+HIJACKOFF, 1601 * not 2+HIJACKOFF. However, if [01] is not 1602 * open, the correct return value is 2. 1603 */ 1604 return fd_rump2host(fd); 1605 } else { 1606 op_fcntl = GETSYSCALL(host, FCNTL); 1607 return op_fcntl(fd, F_MAXFD); 1608 } 1609 /*NOTREACHED*/ 1610 #endif /* F_MAXFD */ 1611 1612 default: 1613 if (fd_isrump(fd)) { 1614 fd = fd_host2rump(fd); 1615 op_fcntl = GETSYSCALL(rump, FCNTL); 1616 } else { 1617 op_fcntl = GETSYSCALL(host, FCNTL); 1618 } 1619 1620 va_start(ap, cmd); 1621 rv = op_fcntl(fd, cmd, va_arg(ap, void *)); 1622 va_end(ap); 1623 return rv; 1624 } 1625 /*NOTREACHED*/ 1626 } 1627 1628 int 1629 close(int fd) 1630 { 1631 int (*op_close)(int); 1632 int rv; 1633 1634 DPRINTF(("close -> %d\n", fd)); 1635 if (fd_isrump(fd)) { 1636 bool undup2 = false; 1637 int ofd; 1638 1639 if (isdup2d(ofd = fd)) { 1640 undup2 = true; 1641 } 1642 1643 fd = fd_host2rump(fd); 1644 if (!undup2 && killdup2alias(fd)) { 1645 return 0; 1646 } 1647 1648 op_close = GETSYSCALL(rump, CLOSE); 1649 rv = op_close(fd); 1650 if (rv == 0 && undup2) { 1651 clrdup2(ofd); 1652 } 1653 } else { 1654 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1) 1655 return -1; 1656 op_close = GETSYSCALL(host, CLOSE); 1657 rv = op_close(fd); 1658 } 1659 1660 return rv; 1661 } 1662 1663 /* 1664 * write cannot issue a standard debug printf due to recursion 1665 */ 1666 ssize_t 1667 write(int fd, const void *buf, size_t blen) 1668 { 1669 ssize_t (*op_write)(int, const void *, size_t); 1670 1671 if (fd_isrump(fd)) { 1672 fd = fd_host2rump(fd); 1673 op_write = GETSYSCALL(rump, WRITE); 1674 } else { 1675 op_write = GETSYSCALL(host, WRITE); 1676 } 1677 1678 return op_write(fd, buf, blen); 1679 } 1680 1681 /* 1682 * file descriptor passing 1683 * 1684 * we intercept sendmsg and recvmsg to convert file descriptors in 1685 * control messages. an attempt to send a descriptor from a different kernel 1686 * is rejected. (ENOTSUP) 1687 */ 1688 1689 static int 1690 _msg_convert_fds(struct msghdr *msg, int (*func)(int), bool dryrun) 1691 { 1692 struct cmsghdr *cmsg; 1693 1694 for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; 1695 cmsg = CMSG_NXTHDR(msg, cmsg)) { 1696 if (cmsg->cmsg_level == SOL_SOCKET && 1697 cmsg->cmsg_type == SCM_RIGHTS) { 1698 int *fdp = (void *)CMSG_DATA(cmsg); 1699 const size_t size = 1700 cmsg->cmsg_len - __CMSG_ALIGN(sizeof(*cmsg)); 1701 const int nfds = (int)(size / sizeof(int)); 1702 const int * const efdp = fdp + nfds; 1703 1704 while (fdp < efdp) { 1705 const int newval = func(*fdp); 1706 1707 if (newval < 0) { 1708 return ENOTSUP; 1709 } 1710 if (!dryrun) 1711 *fdp = newval; 1712 fdp++; 1713 } 1714 } 1715 } 1716 return 0; 1717 } 1718 1719 static int 1720 msg_convert_fds(struct msghdr *msg, int (*func)(int)) 1721 { 1722 1723 return _msg_convert_fds(msg, func, false); 1724 } 1725 1726 static int 1727 msg_check_fds(struct msghdr *msg, int (*func)(int)) 1728 { 1729 1730 return _msg_convert_fds(msg, func, true); 1731 } 1732 1733 ssize_t 1734 recvmsg(int fd, struct msghdr *msg, int flags) 1735 { 1736 ssize_t (*op_recvmsg)(int, struct msghdr *, int); 1737 ssize_t ret; 1738 const bool isrump = fd_isrump(fd); 1739 1740 DPRINTF(("%s -> %d (%s)\n", __func__, fd, whichfd(fd))); 1741 if (isrump) { 1742 fd = fd_host2rump(fd); 1743 op_recvmsg = GETSYSCALL(rump, RECVMSG); 1744 } else { 1745 op_recvmsg = GETSYSCALL(host, RECVMSG); 1746 } 1747 ret = op_recvmsg(fd, msg, flags); 1748 if (ret == -1) { 1749 return ret; 1750 } 1751 /* 1752 * convert descriptors in the message. 1753 */ 1754 if (isrump) { 1755 msg_convert_fds(msg, fd_rump2host); 1756 } else { 1757 msg_convert_fds(msg, fd_host2host); 1758 } 1759 return ret; 1760 } 1761 1762 ssize_t 1763 recv(int fd, void *buf, size_t len, int flags) 1764 { 1765 1766 return recvfrom(fd, buf, len, flags, NULL, NULL); 1767 } 1768 1769 ssize_t 1770 send(int fd, const void *buf, size_t len, int flags) 1771 { 1772 1773 return sendto(fd, buf, len, flags, NULL, 0); 1774 } 1775 1776 static int 1777 fd_check_rump(int fd) 1778 { 1779 1780 return fd_isrump(fd) ? 0 : -1; 1781 } 1782 1783 static int 1784 fd_check_host(int fd) 1785 { 1786 1787 return !fd_isrump(fd) ? 0 : -1; 1788 } 1789 1790 ssize_t 1791 sendmsg(int fd, const struct msghdr *msg, int flags) 1792 { 1793 ssize_t (*op_sendmsg)(int, const struct msghdr *, int); 1794 const bool isrump = fd_isrump(fd); 1795 int error; 1796 1797 DPRINTF(("%s -> %d (%s)\n", __func__, fd, whichfd(fd))); 1798 /* 1799 * reject descriptors from a different kernel. 1800 */ 1801 error = msg_check_fds(__UNCONST(msg), 1802 isrump ? fd_check_rump: fd_check_host); 1803 if (error != 0) { 1804 errno = error; 1805 return -1; 1806 } 1807 /* 1808 * convert descriptors in the message to raw values. 1809 */ 1810 if (isrump) { 1811 fd = fd_host2rump(fd); 1812 /* 1813 * XXX we directly modify the given message assuming: 1814 * - cmsg is writable (typically on caller's stack) 1815 * - caller don't care cmsg's contents after calling sendmsg. 1816 * (thus no need to restore values) 1817 * 1818 * it's safer to copy and modify instead. 1819 */ 1820 msg_convert_fds(__UNCONST(msg), fd_host2rump); 1821 op_sendmsg = GETSYSCALL(rump, SENDMSG); 1822 } else { 1823 op_sendmsg = GETSYSCALL(host, SENDMSG); 1824 } 1825 return op_sendmsg(fd, msg, flags); 1826 } 1827 1828 /* 1829 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since 1830 * many programs do that. dup2 of a rump kernel fd to another value 1831 * not >= fdoff is an error. 1832 * 1833 * Note: cannot rump2host newd, because it is often hardcoded. 1834 */ 1835 int 1836 dup2(int oldd, int newd) 1837 { 1838 int (*host_dup2)(int, int); 1839 int rv; 1840 1841 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd)); 1842 1843 if (fd_isrump(oldd)) { 1844 int (*op_close)(int) = GETSYSCALL(host, CLOSE); 1845 1846 /* only allow fd 0-2 for cross-kernel dup */ 1847 if (!(newd >= 0 && newd <= 2 && !fd_isrump(newd))) { 1848 errno = EBADF; 1849 return -1; 1850 } 1851 1852 /* regular dup2? */ 1853 if (fd_isrump(newd)) { 1854 newd = fd_host2rump(newd); 1855 rv = rump_sys_dup2(oldd, newd); 1856 return fd_rump2host(rv); 1857 } 1858 1859 /* 1860 * dup2 rump => host? just establish an 1861 * entry in the mapping table. 1862 */ 1863 op_close(newd); 1864 setdup2(newd, fd_host2rump(oldd)); 1865 rv = 0; 1866 } else { 1867 host_dup2 = syscalls[DUALCALL_DUP2].bs_host; 1868 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1) 1869 return -1; 1870 rv = host_dup2(oldd, newd); 1871 } 1872 1873 return rv; 1874 } 1875 1876 int 1877 dup(int oldd) 1878 { 1879 1880 return dodup(oldd, 0); 1881 } 1882 1883 pid_t 1884 fork(void) 1885 { 1886 pid_t rv; 1887 1888 DPRINTF(("fork\n")); 1889 1890 rv = rumpclient__dofork(host_fork); 1891 1892 DPRINTF(("fork returns %d\n", rv)); 1893 return rv; 1894 } 1895 #ifdef VFORK 1896 /* we do not have the luxury of not requiring a stackframe */ 1897 #define __strong_alias_macro(m, f) __strong_alias(m, f) 1898 __strong_alias_macro(VFORK,fork) 1899 #endif 1900 1901 int 1902 daemon(int nochdir, int noclose) 1903 { 1904 struct rumpclient_fork *rf; 1905 1906 if ((rf = rumpclient_prefork()) == NULL) 1907 return -1; 1908 1909 if (host_daemon(nochdir, noclose) == -1) 1910 return -1; 1911 1912 if (rumpclient_fork_init(rf) == -1) 1913 return -1; 1914 1915 return 0; 1916 } 1917 1918 int 1919 execve(const char *path, char *const argv[], char *const envp[]) 1920 { 1921 char buf[128]; 1922 char *dup2str; 1923 const char *pwdinrumpstr; 1924 char **newenv; 1925 size_t nelem; 1926 int rv, sverrno; 1927 int bonus = 2, i = 0; 1928 1929 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2INFO=%u,%u,%u", 1930 dup2vec[0], dup2vec[1], dup2vec[2]); 1931 dup2str = strdup(buf); 1932 if (dup2str == NULL) { 1933 errno = ENOMEM; 1934 return -1; 1935 } 1936 1937 if (pwdinrump) { 1938 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true"; 1939 bonus++; 1940 } else { 1941 pwdinrumpstr = NULL; 1942 } 1943 1944 for (nelem = 0; envp && envp[nelem]; nelem++) 1945 continue; 1946 newenv = malloc(sizeof(*newenv) * (nelem+bonus)); 1947 if (newenv == NULL) { 1948 free(dup2str); 1949 errno = ENOMEM; 1950 return -1; 1951 } 1952 memcpy(newenv, envp, nelem*sizeof(*newenv)); 1953 newenv[nelem+i] = dup2str; 1954 i++; 1955 1956 if (pwdinrumpstr) { 1957 newenv[nelem+i] = __UNCONST(pwdinrumpstr); 1958 i++; 1959 } 1960 newenv[nelem+i] = NULL; 1961 _DIAGASSERT(i < bonus); 1962 1963 rv = rumpclient_exec(path, argv, newenv); 1964 1965 _DIAGASSERT(rv != 0); 1966 sverrno = errno; 1967 free(newenv); 1968 free(dup2str); 1969 errno = sverrno; 1970 return rv; 1971 } 1972 1973 /* 1974 * select is done by calling poll. 1975 */ 1976 int 1977 REALPSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 1978 const struct timespec *timeout, const sigset_t *sigmask) 1979 { 1980 struct pollfd *pfds; 1981 nfds_t realnfds; 1982 int i, j; 1983 int rv, incr; 1984 1985 DPRINTF(("pselect %d %p %p %p %p %p\n", nfds, 1986 readfds, writefds, exceptfds, timeout, sigmask)); 1987 1988 /* 1989 * Well, first we must scan the fds to figure out how many 1990 * fds there really are. This is because up to and including 1991 * nb5 poll() silently refuses nfds > process_maxopen_fds. 1992 * Seems to be fixed in current, thank the maker. 1993 * god damn cluster...bomb. 1994 */ 1995 1996 for (i = 0, realnfds = 0; i < nfds; i++) { 1997 if (readfds && FD_ISSET(i, readfds)) { 1998 realnfds++; 1999 continue; 2000 } 2001 if (writefds && FD_ISSET(i, writefds)) { 2002 realnfds++; 2003 continue; 2004 } 2005 if (exceptfds && FD_ISSET(i, exceptfds)) { 2006 realnfds++; 2007 continue; 2008 } 2009 } 2010 2011 if (realnfds) { 2012 pfds = calloc(realnfds, sizeof(*pfds)); 2013 if (!pfds) 2014 return -1; 2015 } else { 2016 pfds = NULL; 2017 } 2018 2019 for (i = 0, j = 0; i < nfds; i++) { 2020 incr = 0; 2021 if (readfds && FD_ISSET(i, readfds)) { 2022 pfds[j].fd = i; 2023 pfds[j].events |= POLLIN; 2024 incr=1; 2025 } 2026 if (writefds && FD_ISSET(i, writefds)) { 2027 pfds[j].fd = i; 2028 pfds[j].events |= POLLOUT; 2029 incr=1; 2030 } 2031 if (exceptfds && FD_ISSET(i, exceptfds)) { 2032 pfds[j].fd = i; 2033 pfds[j].events |= POLLHUP|POLLERR; 2034 incr=1; 2035 } 2036 if (incr) 2037 j++; 2038 } 2039 assert(j == (int)realnfds); 2040 2041 rv = REALPOLLTS(pfds, realnfds, timeout, sigmask); 2042 /* 2043 * "If select() returns with an error the descriptor sets 2044 * will be unmodified" 2045 */ 2046 if (rv < 0) 2047 goto out; 2048 2049 /* 2050 * zero out results (can't use FD_ZERO for the 2051 * obvious select-me-not reason). whee. 2052 * 2053 * We do this here since some software ignores the return 2054 * value of select, and hence if the timeout expires, it may 2055 * assume all input descriptors have activity. 2056 */ 2057 for (i = 0; i < nfds; i++) { 2058 if (readfds) 2059 FD_CLR(i, readfds); 2060 if (writefds) 2061 FD_CLR(i, writefds); 2062 if (exceptfds) 2063 FD_CLR(i, exceptfds); 2064 } 2065 if (rv == 0) 2066 goto out; 2067 2068 /* 2069 * We have >0 fds with activity. Harvest the results. 2070 */ 2071 for (i = 0; i < (int)realnfds; i++) { 2072 if (readfds) { 2073 if (pfds[i].revents & POLLIN) { 2074 FD_SET(pfds[i].fd, readfds); 2075 } 2076 } 2077 if (writefds) { 2078 if (pfds[i].revents & POLLOUT) { 2079 FD_SET(pfds[i].fd, writefds); 2080 } 2081 } 2082 if (exceptfds) { 2083 if (pfds[i].revents & (POLLHUP|POLLERR)) { 2084 FD_SET(pfds[i].fd, exceptfds); 2085 } 2086 } 2087 } 2088 2089 out: 2090 free(pfds); 2091 return rv; 2092 } 2093 2094 int 2095 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 2096 struct timeval *timeout) 2097 { 2098 struct timespec ts, *tsp = NULL; 2099 if (timeout) { 2100 TIMEVAL_TO_TIMESPEC(timeout, &ts); 2101 tsp = &ts; 2102 } 2103 return REALPSELECT(nfds, readfds, writefds, exceptfds, tsp, NULL); 2104 } 2105 2106 2107 static void 2108 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall) 2109 { 2110 nfds_t i; 2111 2112 for (i = 0; i < nfds; i++) { 2113 if (fds[i].fd == -1) 2114 continue; 2115 2116 if (fd_isrump(fds[i].fd)) 2117 (*rumpcall)++; 2118 else 2119 (*hostcall)++; 2120 } 2121 } 2122 2123 static void 2124 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int)) 2125 { 2126 nfds_t i; 2127 2128 for (i = 0; i < nfds; i++) { 2129 fds[i].fd = fdadj(fds[i].fd); 2130 } 2131 } 2132 2133 /* 2134 * poll is easy as long as the call comes in the fds only in one 2135 * kernel. otherwise its quite tricky... 2136 */ 2137 struct pollarg { 2138 struct pollfd *pfds; 2139 nfds_t nfds; 2140 const struct timespec *ts; 2141 const sigset_t *sigmask; 2142 int pipefd; 2143 int errnum; 2144 }; 2145 2146 static void * 2147 hostpoll(void *arg) 2148 { 2149 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 2150 const sigset_t *); 2151 struct pollarg *parg = arg; 2152 intptr_t rv; 2153 2154 op_pollts = GETSYSCALL(host, POLLTS); 2155 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask); 2156 if (rv == -1) 2157 parg->errnum = errno; 2158 rump_sys_write(parg->pipefd, &rv, sizeof(rv)); 2159 2160 return (void *)rv; 2161 } 2162 2163 int 2164 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts, 2165 const sigset_t *sigmask) 2166 { 2167 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *, 2168 const sigset_t *); 2169 int (*host_close)(int); 2170 int hostcall = 0, rumpcall = 0; 2171 pthread_t pt; 2172 nfds_t i; 2173 int rv; 2174 2175 DPRINTF(("poll %p %d %p %p\n", fds, (int)nfds, ts, sigmask)); 2176 checkpoll(fds, nfds, &hostcall, &rumpcall); 2177 2178 if (hostcall && rumpcall) { 2179 struct pollfd *pfd_host = NULL, *pfd_rump = NULL; 2180 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1}; 2181 struct pollarg parg; 2182 void *trv_val; 2183 int sverrno = 0, rv_rump, rv_host, errno_rump, errno_host; 2184 2185 /* 2186 * ok, this is where it gets tricky. We must support 2187 * this since it's a very common operation in certain 2188 * types of software (telnet, netcat, etc). We allocate 2189 * two vectors and run two poll commands in separate 2190 * threads. Whichever returns first "wins" and the 2191 * other kernel's fds won't show activity. 2192 */ 2193 rv = -1; 2194 2195 /* allocate full vector for O(n) joining after call */ 2196 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1)); 2197 if (!pfd_host) 2198 goto out; 2199 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1)); 2200 if (!pfd_rump) { 2201 goto out; 2202 } 2203 2204 /* 2205 * then, open two pipes, one for notifications 2206 * to each kernel. 2207 * 2208 * At least the rump pipe should probably be 2209 * cached, along with the helper threads. This 2210 * should give a microbenchmark improvement (haven't 2211 * experienced a macro-level problem yet, though). 2212 */ 2213 if ((rv = rump_sys_pipe(rpipe)) == -1) { 2214 sverrno = errno; 2215 } 2216 if (rv == 0 && (rv = pipe(hpipe)) == -1) { 2217 sverrno = errno; 2218 } 2219 2220 /* split vectors (or signal errors) */ 2221 for (i = 0; i < nfds; i++) { 2222 int fd; 2223 2224 fds[i].revents = 0; 2225 if (fds[i].fd == -1) { 2226 pfd_host[i].fd = -1; 2227 pfd_rump[i].fd = -1; 2228 } else if (fd_isrump(fds[i].fd)) { 2229 pfd_host[i].fd = -1; 2230 fd = fd_host2rump(fds[i].fd); 2231 if (fd == rpipe[0] || fd == rpipe[1]) { 2232 fds[i].revents = POLLNVAL; 2233 if (rv != -1) 2234 rv++; 2235 } 2236 pfd_rump[i].fd = fd; 2237 pfd_rump[i].events = fds[i].events; 2238 } else { 2239 pfd_rump[i].fd = -1; 2240 fd = fds[i].fd; 2241 if (fd == hpipe[0] || fd == hpipe[1]) { 2242 fds[i].revents = POLLNVAL; 2243 if (rv != -1) 2244 rv++; 2245 } 2246 pfd_host[i].fd = fd; 2247 pfd_host[i].events = fds[i].events; 2248 } 2249 pfd_rump[i].revents = pfd_host[i].revents = 0; 2250 } 2251 if (rv) { 2252 goto out; 2253 } 2254 2255 pfd_host[nfds].fd = hpipe[0]; 2256 pfd_host[nfds].events = POLLIN; 2257 pfd_rump[nfds].fd = rpipe[0]; 2258 pfd_rump[nfds].events = POLLIN; 2259 2260 /* 2261 * then, create a thread to do host part and meanwhile 2262 * do rump kernel part right here 2263 */ 2264 2265 parg.pfds = pfd_host; 2266 parg.nfds = nfds+1; 2267 parg.ts = ts; 2268 parg.sigmask = sigmask; 2269 parg.pipefd = rpipe[1]; 2270 pthread_create(&pt, NULL, hostpoll, &parg); 2271 2272 op_pollts = GETSYSCALL(rump, POLLTS); 2273 rv_rump = op_pollts(pfd_rump, nfds+1, ts, NULL); 2274 errno_rump = errno; 2275 write(hpipe[1], &rv, sizeof(rv)); 2276 pthread_join(pt, &trv_val); 2277 rv_host = (int)(intptr_t)trv_val; 2278 errno_host = parg.errnum; 2279 2280 /* strip cross-thread notification from real results */ 2281 if (rv_host > 0 && pfd_host[nfds].revents & POLLIN) { 2282 rv_host--; 2283 } 2284 if (rv_rump > 0 && pfd_rump[nfds].revents & POLLIN) { 2285 rv_rump--; 2286 } 2287 2288 /* then merge the results into what's reported to the caller */ 2289 if (rv_rump > 0 || rv_host > 0) { 2290 /* SUCCESS */ 2291 2292 rv = 0; 2293 if (rv_rump > 0) { 2294 for (i = 0; i < nfds; i++) { 2295 if (pfd_rump[i].fd != -1) 2296 fds[i].revents 2297 = pfd_rump[i].revents; 2298 } 2299 rv += rv_rump; 2300 } 2301 if (rv_host > 0) { 2302 for (i = 0; i < nfds; i++) { 2303 if (pfd_host[i].fd != -1) 2304 fds[i].revents 2305 = pfd_host[i].revents; 2306 } 2307 rv += rv_host; 2308 } 2309 assert(rv > 0); 2310 sverrno = 0; 2311 } else if (rv_rump == -1 || rv_host == -1) { 2312 /* ERROR */ 2313 2314 /* just pick one kernel at "random" */ 2315 rv = -1; 2316 if (rv_host == -1) { 2317 sverrno = errno_host; 2318 } else if (rv_rump == -1) { 2319 sverrno = errno_rump; 2320 } 2321 } else { 2322 /* TIMEOUT */ 2323 2324 rv = 0; 2325 assert(rv_rump == 0 && rv_host == 0); 2326 } 2327 2328 out: 2329 host_close = GETSYSCALL(host, CLOSE); 2330 if (rpipe[0] != -1) 2331 rump_sys_close(rpipe[0]); 2332 if (rpipe[1] != -1) 2333 rump_sys_close(rpipe[1]); 2334 if (hpipe[0] != -1) 2335 host_close(hpipe[0]); 2336 if (hpipe[1] != -1) 2337 host_close(hpipe[1]); 2338 free(pfd_host); 2339 free(pfd_rump); 2340 errno = sverrno; 2341 } else { 2342 if (hostcall) { 2343 op_pollts = GETSYSCALL(host, POLLTS); 2344 } else { 2345 op_pollts = GETSYSCALL(rump, POLLTS); 2346 adjustpoll(fds, nfds, fd_host2rump); 2347 } 2348 2349 rv = op_pollts(fds, nfds, ts, sigmask); 2350 if (rumpcall) 2351 adjustpoll(fds, nfds, fd_rump2host_withdup); 2352 } 2353 2354 return rv; 2355 } 2356 2357 int 2358 poll(struct pollfd *fds, nfds_t nfds, int timeout) 2359 { 2360 struct timespec ts; 2361 struct timespec *tsp = NULL; 2362 2363 if (timeout != INFTIM) { 2364 ts.tv_sec = timeout / 1000; 2365 ts.tv_nsec = (timeout % 1000) * 1000*1000; 2366 2367 tsp = &ts; 2368 } 2369 2370 return REALPOLLTS(fds, nfds, tsp, NULL); 2371 } 2372 2373 #ifdef HAVE_KQUEUE 2374 int 2375 REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges, 2376 struct kevent *eventlist, size_t nevents, 2377 const struct timespec *timeout) 2378 { 2379 int (*op_kevent)(int, const struct kevent *, size_t, 2380 struct kevent *, size_t, const struct timespec *); 2381 const struct kevent *ev; 2382 size_t i; 2383 2384 /* 2385 * Check that we don't attempt to kevent rump kernel fd's. 2386 * That needs similar treatment to select/poll, but is slightly 2387 * trickier since we need to manage to different kq descriptors. 2388 * (TODO, in case you're wondering). 2389 */ 2390 for (i = 0; i < nchanges; i++) { 2391 ev = &changelist[i]; 2392 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE || 2393 ev->filter == EVFILT_VNODE) { 2394 if (fd_isrump((int)ev->ident)) { 2395 errno = ENOTSUP; 2396 return -1; 2397 } 2398 } 2399 } 2400 2401 op_kevent = GETSYSCALL(host, KEVENT); 2402 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout); 2403 } 2404 #endif /* HAVE_KQUEUE */ 2405 2406 /* 2407 * mmapping from a rump kernel is not supported, so disallow it. 2408 */ 2409 void * 2410 mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) 2411 { 2412 2413 if (flags & MAP_FILE && fd_isrump(fd)) { 2414 errno = ENOSYS; 2415 return MAP_FAILED; 2416 } 2417 if (__predict_false(host_mmap == NULL)) { 2418 host_mmap = rumphijack_dlsym(RTLD_NEXT, "mmap"); 2419 } 2420 return host_mmap(addr, len, prot, flags, fd, offset); 2421 } 2422 2423 #ifdef __NetBSD__ 2424 /* 2425 * these go to one or the other on a per-process configuration 2426 */ 2427 int __sysctl(const int *, unsigned int, void *, size_t *, const void *, size_t); 2428 int 2429 __sysctl(const int *name, unsigned int namelen, void *old, size_t *oldlenp, 2430 const void *new, size_t newlen) 2431 { 2432 int (*op___sysctl)(const int *, unsigned int, void *, size_t *, 2433 const void *, size_t); 2434 2435 if (rumpsysctl) { 2436 op___sysctl = GETSYSCALL(rump, __SYSCTL); 2437 } else { 2438 op___sysctl = GETSYSCALL(host, __SYSCTL); 2439 /* we haven't inited yet */ 2440 if (__predict_false(op___sysctl == NULL)) { 2441 op___sysctl = rumphijack_dlsym(RTLD_NEXT, "__sysctl"); 2442 } 2443 } 2444 2445 return op___sysctl(name, namelen, old, oldlenp, new, newlen); 2446 } 2447 int modctl(int, void *); 2448 int 2449 modctl(int operation, void *argp) 2450 { 2451 int (*op_modctl)(int operation, void *argp); 2452 2453 if (rumpmodctl) { 2454 op_modctl = GETSYSCALL(rump, MODCTL); 2455 } else { 2456 op_modctl = GETSYSCALL(host, MODCTL); 2457 } 2458 2459 return op_modctl(operation, argp); 2460 } 2461 #endif 2462 2463 /* 2464 * Rest are std type calls. 2465 */ 2466 2467 #ifdef HAVE_UTIMENSAT 2468 ATCALL(int, utimensat, DUALCALL_UTIMENSAT, \ 2469 (int fd, const char *path, const struct timespec t[2], int f), \ 2470 (int, const char *, const struct timespec [2], int), 2471 (fd, path, t, f)) 2472 #endif 2473 2474 FDCALL(int, bind, DUALCALL_BIND, \ 2475 (int fd, const struct sockaddr *name, socklen_t namelen), \ 2476 (int, const struct sockaddr *, socklen_t), \ 2477 (fd, name, namelen)) 2478 2479 FDCALL(int, connect, DUALCALL_CONNECT, \ 2480 (int fd, const struct sockaddr *name, socklen_t namelen), \ 2481 (int, const struct sockaddr *, socklen_t), \ 2482 (fd, name, namelen)) 2483 2484 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \ 2485 (int fd, struct sockaddr *name, socklen_t *namelen), \ 2486 (int, struct sockaddr *, socklen_t *), \ 2487 (fd, name, namelen)) 2488 2489 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \ 2490 (int fd, struct sockaddr *name, socklen_t *namelen), \ 2491 (int, struct sockaddr *, socklen_t *), \ 2492 (fd, name, namelen)) 2493 2494 FDCALL(int, listen, DUALCALL_LISTEN, \ 2495 (int fd, int backlog), \ 2496 (int, int), \ 2497 (fd, backlog)) 2498 2499 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \ 2500 (int fd, void *buf, size_t len, int flags, \ 2501 struct sockaddr *from, socklen_t *fromlen), \ 2502 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \ 2503 (fd, buf, len, flags, from, fromlen)) 2504 2505 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \ 2506 (int fd, const void *buf, size_t len, int flags, \ 2507 const struct sockaddr *to, socklen_t tolen), \ 2508 (int, const void *, size_t, int, \ 2509 const struct sockaddr *, socklen_t), \ 2510 (fd, buf, len, flags, to, tolen)) 2511 2512 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \ 2513 (int fd, int level, int optn, void *optval, socklen_t *optlen), \ 2514 (int, int, int, void *, socklen_t *), \ 2515 (fd, level, optn, optval, optlen)) 2516 2517 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \ 2518 (int fd, int level, int optn, \ 2519 const void *optval, socklen_t optlen), \ 2520 (int, int, int, const void *, socklen_t), \ 2521 (fd, level, optn, optval, optlen)) 2522 2523 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \ 2524 (int fd, int how), \ 2525 (int, int), \ 2526 (fd, how)) 2527 2528 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \ 2529 (int fd, void *buf, size_t buflen), \ 2530 (int, void *, size_t), \ 2531 (fd, buf, buflen)) 2532 2533 #ifdef __linux__ 2534 ssize_t __read_chk(int, void *, size_t) 2535 __attribute__((alias("read"))); 2536 #endif 2537 2538 FDCALL(ssize_t, readv, DUALCALL_READV, \ 2539 (int fd, const struct iovec *iov, int iovcnt), \ 2540 (int, const struct iovec *, int), \ 2541 (fd, iov, iovcnt)) 2542 2543 FDCALL(ssize_t, REALPREAD, DUALCALL_PREAD, \ 2544 (int fd, void *buf, size_t nbytes, off_t offset), \ 2545 (int, void *, size_t, off_t), \ 2546 (fd, buf, nbytes, offset)) 2547 2548 FDCALL(ssize_t, preadv, DUALCALL_PREADV, \ 2549 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 2550 (int, const struct iovec *, int, off_t), \ 2551 (fd, iov, iovcnt, offset)) 2552 2553 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \ 2554 (int fd, const struct iovec *iov, int iovcnt), \ 2555 (int, const struct iovec *, int), \ 2556 (fd, iov, iovcnt)) 2557 2558 FDCALL(ssize_t, REALPWRITE, DUALCALL_PWRITE, \ 2559 (int fd, const void *buf, size_t nbytes, off_t offset), \ 2560 (int, const void *, size_t, off_t), \ 2561 (fd, buf, nbytes, offset)) 2562 2563 FDCALL(ssize_t, pwritev, DUALCALL_PWRITEV, \ 2564 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \ 2565 (int, const struct iovec *, int, off_t), \ 2566 (fd, iov, iovcnt, offset)) 2567 2568 #ifndef __linux__ 2569 FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \ 2570 (int fd, struct stat *sb), \ 2571 (int, struct stat *), \ 2572 (fd, sb)) 2573 #endif 2574 2575 #ifdef __NetBSD__ 2576 FDCALL(int, REALFSTATVFS1, DUALCALL_FSTATVFS1, \ 2577 (int fd, struct statvfs *buf, int flags), \ 2578 (int, struct statvfs *, int), \ 2579 (fd, buf, flags)) 2580 #endif 2581 2582 FDCALL(off_t, lseek, DUALCALL_LSEEK, \ 2583 (int fd, off_t offset, int whence), \ 2584 (int, off_t, int), \ 2585 (fd, offset, whence)) 2586 #ifdef LSEEK_ALIAS 2587 __strong_alias(LSEEK_ALIAS,lseek) 2588 #endif 2589 2590 #ifndef __linux__ 2591 FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \ 2592 (int fd, char *buf, size_t nbytes), \ 2593 (int, char *, size_t), \ 2594 (fd, buf, nbytes)) 2595 #endif 2596 2597 FDCALL(int, fchown, DUALCALL_FCHOWN, \ 2598 (int fd, uid_t owner, gid_t group), \ 2599 (int, uid_t, gid_t), \ 2600 (fd, owner, group)) 2601 2602 FDCALL(int, fchmod, DUALCALL_FCHMOD, \ 2603 (int fd, mode_t mode), \ 2604 (int, mode_t), \ 2605 (fd, mode)) 2606 2607 FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \ 2608 (int fd, off_t length), \ 2609 (int, off_t), \ 2610 (fd, length)) 2611 2612 FDCALL(int, fsync, DUALCALL_FSYNC, \ 2613 (int fd), \ 2614 (int), \ 2615 (fd)) 2616 2617 #ifdef HAVE_FSYNC_RANGE 2618 FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \ 2619 (int fd, int how, off_t start, off_t length), \ 2620 (int, int, off_t, off_t), \ 2621 (fd, how, start, length)) 2622 #endif 2623 2624 FDCALL(int, futimes, DUALCALL_FUTIMES, \ 2625 (int fd, const struct timeval *tv), \ 2626 (int, const struct timeval *), \ 2627 (fd, tv)) 2628 2629 FDCALL(int, futimens, DUALCALL_FUTIMENS, \ 2630 (int fd, const struct timespec *ts), \ 2631 (int, const struct timespec *), \ 2632 (fd, ts)) 2633 2634 #ifdef HAVE_CHFLAGS 2635 FDCALL(int, fchflags, DUALCALL_FCHFLAGS, \ 2636 (int fd, u_long flags), \ 2637 (int, u_long), \ 2638 (fd, flags)) 2639 #endif 2640 2641 /* 2642 * path-based selectors 2643 */ 2644 2645 #ifndef __linux__ 2646 PATHCALL(int, REALSTAT, DUALCALL_STAT, \ 2647 (const char *path, struct stat *sb), \ 2648 (const char *, struct stat *), \ 2649 (path, sb)) 2650 2651 PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \ 2652 (const char *path, struct stat *sb), \ 2653 (const char *, struct stat *), \ 2654 (path, sb)) 2655 #endif 2656 2657 PATHCALL(int, chown, DUALCALL_CHOWN, \ 2658 (const char *path, uid_t owner, gid_t group), \ 2659 (const char *, uid_t, gid_t), \ 2660 (path, owner, group)) 2661 2662 PATHCALL(int, lchown, DUALCALL_LCHOWN, \ 2663 (const char *path, uid_t owner, gid_t group), \ 2664 (const char *, uid_t, gid_t), \ 2665 (path, owner, group)) 2666 2667 PATHCALL(int, chmod, DUALCALL_CHMOD, \ 2668 (const char *path, mode_t mode), \ 2669 (const char *, mode_t), \ 2670 (path, mode)) 2671 2672 PATHCALL(int, lchmod, DUALCALL_LCHMOD, \ 2673 (const char *path, mode_t mode), \ 2674 (const char *, mode_t), \ 2675 (path, mode)) 2676 2677 #ifdef __NetBSD__ 2678 PATHCALL(int, REALSTATVFS1, DUALCALL_STATVFS1, \ 2679 (const char *path, struct statvfs *buf, int flags), \ 2680 (const char *, struct statvfs *, int), \ 2681 (path, buf, flags)) 2682 #endif 2683 2684 PATHCALL(int, unlink, DUALCALL_UNLINK, \ 2685 (const char *path), \ 2686 (const char *), \ 2687 (path)) 2688 2689 PATHCALL(int, symlink, DUALCALL_SYMLINK, \ 2690 (const char *target, const char *path), \ 2691 (const char *, const char *), \ 2692 (target, path)) 2693 2694 /* 2695 * readlink() can be called from malloc which can be called 2696 * from dlsym() during init 2697 */ 2698 ssize_t 2699 readlink(const char *path, char *buf, size_t bufsiz) 2700 { 2701 int (*op_readlink)(const char *, char *, size_t); 2702 enum pathtype pt; 2703 2704 if ((pt = path_isrump(path)) != PATH_HOST) { 2705 op_readlink = GETSYSCALL(rump, READLINK); 2706 if (pt == PATH_RUMP) 2707 path = path_host2rump(path); 2708 } else { 2709 op_readlink = GETSYSCALL(host, READLINK); 2710 } 2711 2712 if (__predict_false(op_readlink == NULL)) { 2713 errno = ENOENT; 2714 return -1; 2715 } 2716 2717 return op_readlink(path, buf, bufsiz); 2718 } 2719 2720 PATHCALL(int, mkdir, DUALCALL_MKDIR, \ 2721 (const char *path, mode_t mode), \ 2722 (const char *, mode_t), \ 2723 (path, mode)) 2724 2725 PATHCALL(int, rmdir, DUALCALL_RMDIR, \ 2726 (const char *path), \ 2727 (const char *), \ 2728 (path)) 2729 2730 PATHCALL(int, utimes, DUALCALL_UTIMES, \ 2731 (const char *path, const struct timeval *tv), \ 2732 (const char *, const struct timeval *), \ 2733 (path, tv)) 2734 2735 PATHCALL(int, lutimes, DUALCALL_LUTIMES, \ 2736 (const char *path, const struct timeval *tv), \ 2737 (const char *, const struct timeval *), \ 2738 (path, tv)) 2739 2740 #ifdef HAVE_CHFLAGS 2741 PATHCALL(int, chflags, DUALCALL_CHFLAGS, \ 2742 (const char *path, u_long flags), \ 2743 (const char *, u_long), \ 2744 (path, flags)) 2745 2746 PATHCALL(int, lchflags, DUALCALL_LCHFLAGS, \ 2747 (const char *path, u_long flags), \ 2748 (const char *, u_long), \ 2749 (path, flags)) 2750 #endif /* HAVE_CHFLAGS */ 2751 2752 PATHCALL(int, truncate, DUALCALL_TRUNCATE, \ 2753 (const char *path, off_t length), \ 2754 (const char *, off_t), \ 2755 (path, length)) 2756 2757 PATHCALL(int, access, DUALCALL_ACCESS, \ 2758 (const char *path, int mode), \ 2759 (const char *, int), \ 2760 (path, mode)) 2761 2762 #ifndef __linux__ 2763 PATHCALL(int, REALMKNOD, DUALCALL_MKNOD, \ 2764 (const char *path, mode_t mode, dev_t dev), \ 2765 (const char *, mode_t, dev_t), \ 2766 (path, mode, dev)) 2767 #endif 2768 2769 /* 2770 * Note: with mount the decisive parameter is the mount 2771 * destination directory. This is because we don't really know 2772 * about the "source" directory in a generic call (and besides, 2773 * it might not even exist, cf. nfs). 2774 */ 2775 #ifdef __NetBSD__ 2776 PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \ 2777 (const char *type, const char *path, int flags, \ 2778 void *data, size_t dlen), \ 2779 (const char *, const char *, int, void *, size_t), \ 2780 (type, path, flags, data, dlen)) 2781 2782 PATHCALL(int, unmount, DUALCALL_UNMOUNT, \ 2783 (const char *path, int flags), \ 2784 (const char *, int), \ 2785 (path, flags)) 2786 #endif /* __NetBSD__ */ 2787 2788 #ifdef HAVE___QUOTACTL 2789 PATHCALL(int, __quotactl, DUALCALL_QUOTACTL, \ 2790 (const char *path, struct quotactl_args *args), \ 2791 (const char *, struct quotactl_args *), \ 2792 (path, args)) 2793 #endif /* HAVE___QUOTACTL */ 2794 2795 #ifdef __NetBSD__ 2796 PATHCALL(int, REALGETFH, DUALCALL_GETFH, \ 2797 (const char *path, void *fhp, size_t *fh_size), \ 2798 (const char *, void *, size_t *), \ 2799 (path, fhp, fh_size)) 2800 #endif 2801 2802 /* 2803 * These act different on a per-process vfs configuration 2804 */ 2805 2806 #ifdef __NetBSD__ 2807 VFSCALL(VFSBIT_GETVFSSTAT, int, REALGETVFSSTAT, DUALCALL_GETVFSSTAT, \ 2808 (struct statvfs *buf, size_t buflen, int flags), \ 2809 (struct statvfs *, size_t, int), \ 2810 (buf, buflen, flags)) 2811 #endif 2812 2813 #ifdef __NetBSD__ 2814 VFSCALL(VFSBIT_FHCALLS, int, REALFHOPEN, DUALCALL_FHOPEN, \ 2815 (const void *fhp, size_t fh_size, int flags), \ 2816 (const char *, size_t, int), \ 2817 (fhp, fh_size, flags)) 2818 2819 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTAT, DUALCALL_FHSTAT, \ 2820 (const void *fhp, size_t fh_size, struct stat *sb), \ 2821 (const char *, size_t, struct stat *), \ 2822 (fhp, fh_size, sb)) 2823 2824 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTATVFS1, DUALCALL_FHSTATVFS1, \ 2825 (const void *fhp, size_t fh_size, struct statvfs *sb, int flgs),\ 2826 (const char *, size_t, struct statvfs *, int), \ 2827 (fhp, fh_size, sb, flgs)) 2828 #endif 2829 2830 2831 #ifdef __NetBSD__ 2832 2833 /* finally, put nfssvc here. "keep the namespace clean" */ 2834 #include <nfs/rpcv2.h> 2835 #include <nfs/nfs.h> 2836 2837 int 2838 nfssvc(int flags, void *argstructp) 2839 { 2840 int (*op_nfssvc)(int, void *); 2841 2842 if (vfsbits & VFSBIT_NFSSVC){ 2843 struct nfsd_args *nfsdargs; 2844 2845 /* massage the socket descriptor if necessary */ 2846 if (flags == NFSSVC_ADDSOCK) { 2847 nfsdargs = argstructp; 2848 nfsdargs->sock = fd_host2rump(nfsdargs->sock); 2849 } 2850 op_nfssvc = GETSYSCALL(rump, NFSSVC); 2851 } else 2852 op_nfssvc = GETSYSCALL(host, NFSSVC); 2853 2854 return op_nfssvc(flags, argstructp); 2855 } 2856 #endif /* __NetBSD__ */ 2857