1 /* $NetBSD: rumpuser.c,v 1.14 2011/01/22 14:22:10 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #if !defined(lint) 30 __RCSID("$NetBSD: rumpuser.c,v 1.14 2011/01/22 14:22:10 pooka Exp $"); 31 #endif /* !lint */ 32 33 /* thank the maker for this */ 34 #ifdef __linux__ 35 #define _XOPEN_SOURCE 500 36 #define _BSD_SOURCE 37 #define _FILE_OFFSET_BITS 64 38 #include <features.h> 39 #endif 40 41 #include <sys/param.h> 42 #include <sys/event.h> 43 #include <sys/ioctl.h> 44 #include <sys/mman.h> 45 #include <sys/uio.h> 46 47 #ifdef __NetBSD__ 48 #include <sys/disklabel.h> 49 #include <sys/sysctl.h> 50 #endif 51 52 #include <assert.h> 53 #include <err.h> 54 #include <errno.h> 55 #include <fcntl.h> 56 #include <poll.h> 57 #include <signal.h> 58 #include <stdarg.h> 59 #include <stdint.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <string.h> 63 #include <time.h> 64 #include <unistd.h> 65 66 #include <rump/rumpuser.h> 67 68 #include "rumpuser_int.h" 69 70 int 71 rumpuser_getversion() 72 { 73 74 return RUMPUSER_VERSION; 75 } 76 77 int 78 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp, int *error) 79 { 80 struct stat sb; 81 uint64_t size; 82 int needsdev = 0, rv = 0, ft; 83 int fd = -1; 84 85 if (stat(path, &sb) == -1) { 86 seterror(errno); 87 return -1; 88 } 89 90 switch (sb.st_mode & S_IFMT) { 91 case S_IFDIR: 92 ft = RUMPUSER_FT_DIR; 93 break; 94 case S_IFREG: 95 ft = RUMPUSER_FT_REG; 96 break; 97 case S_IFBLK: 98 ft = RUMPUSER_FT_BLK; 99 needsdev = 1; 100 break; 101 case S_IFCHR: 102 ft = RUMPUSER_FT_CHR; 103 needsdev = 1; 104 break; 105 default: 106 ft = RUMPUSER_FT_OTHER; 107 break; 108 } 109 110 if (!needsdev) { 111 size = sb.st_size; 112 } else if (sizep) { 113 /* 114 * Welcome to the jungle. Of course querying the kernel 115 * for a device partition size is supposed to be far from 116 * trivial. On NetBSD we use ioctl. On $other platform 117 * we have a problem. We try "the lseek trick" and just 118 * fail if that fails. Platform specific code can later 119 * be written here if appropriate. 120 * 121 * On NetBSD we hope and pray that for block devices nobody 122 * else is holding them open, because otherwise the kernel 123 * will not permit us to open it. Thankfully, this is 124 * usually called only in bootstrap and then we can 125 * forget about it. 126 */ 127 #ifndef __NetBSD__ 128 off_t off; 129 130 fd = open(path, O_RDONLY); 131 if (fd == -1) { 132 seterror(errno); 133 rv = -1; 134 goto out; 135 } 136 137 off = lseek(fd, 0, SEEK_END); 138 if (off != 0) { 139 size = off; 140 goto out; 141 } 142 fprintf(stderr, "error: device size query not implemented on " 143 "this platform\n"); 144 seterror(EOPNOTSUPP); 145 rv = -1; 146 goto out; 147 #else 148 struct disklabel lab; 149 struct partition *parta; 150 151 fd = open(path, O_RDONLY); 152 if (fd == -1) { 153 seterror(errno); 154 rv = -1; 155 goto out; 156 } 157 158 if (ioctl(fd, DIOCGDINFO, &lab) == -1) { 159 seterror(errno); 160 rv = -1; 161 goto out; 162 } 163 164 parta = &lab.d_partitions[DISKPART(sb.st_rdev)]; 165 size = (uint64_t)lab.d_secsize * parta->p_size; 166 #endif /* __NetBSD__ */ 167 } 168 169 out: 170 if (rv == 0 && sizep) 171 *sizep = size; 172 if (rv == 0 && ftp) 173 *ftp = ft; 174 if (fd != -1) 175 close(fd); 176 177 return rv; 178 } 179 180 int 181 rumpuser_nanosleep(uint64_t *sec, uint64_t *nsec, int *error) 182 { 183 struct timespec rqt, rmt; 184 int rv; 185 186 /*LINTED*/ 187 rqt.tv_sec = *sec; 188 /*LINTED*/ 189 rqt.tv_nsec = *nsec; 190 191 KLOCK_WRAP(rv = nanosleep(&rqt, &rmt)); 192 if (rv == -1) 193 seterror(errno); 194 195 *sec = rmt.tv_sec; 196 *nsec = rmt.tv_nsec; 197 198 return rv; 199 } 200 201 void * 202 rumpuser_malloc(size_t howmuch, int alignment) 203 { 204 void *mem; 205 int rv; 206 207 if (alignment == 0) 208 alignment = sizeof(void *); 209 210 rv = posix_memalign(&mem, (size_t)alignment, howmuch); 211 if (__predict_false(rv != 0)) { 212 if (rv == EINVAL) { 213 printf("rumpuser_malloc: invalid alignment %d\n", 214 alignment); 215 abort(); 216 } 217 mem = NULL; 218 } 219 220 return mem; 221 } 222 223 void * 224 rumpuser_realloc(void *ptr, size_t howmuch) 225 { 226 227 return realloc(ptr, howmuch); 228 } 229 230 void 231 rumpuser_free(void *ptr) 232 { 233 234 free(ptr); 235 } 236 237 void * 238 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit, 239 int exec, int *error) 240 { 241 void *rv; 242 int prot; 243 244 prot = PROT_READ|PROT_WRITE; 245 if (exec) 246 prot |= PROT_EXEC; 247 /* XXX: MAP_ALIGNED() is not portable */ 248 rv = mmap(prefaddr, size, prot, 249 MAP_ANON | MAP_ALIGNED(alignbit), -1, 0); 250 if (rv == MAP_FAILED) { 251 seterror(errno); 252 return NULL; 253 } 254 return rv; 255 } 256 257 void 258 rumpuser_unmap(void *addr, size_t len) 259 { 260 int rv; 261 262 rv = munmap(addr, len); 263 assert(rv == 0); 264 } 265 266 void * 267 rumpuser_filemmap(int fd, off_t offset, size_t len, int flags, int *error) 268 { 269 void *rv; 270 int mmflags, prot; 271 272 if (flags & RUMPUSER_FILEMMAP_TRUNCATE) 273 ftruncate(fd, offset + len); 274 275 mmflags = MAP_FILE; 276 if (flags & RUMPUSER_FILEMMAP_SHARED) 277 mmflags |= MAP_SHARED; 278 else 279 mmflags |= MAP_PRIVATE; 280 281 prot = 0; 282 if (flags & RUMPUSER_FILEMMAP_READ) 283 prot |= PROT_READ; 284 if (flags & RUMPUSER_FILEMMAP_WRITE) 285 prot |= PROT_WRITE; 286 287 rv = mmap(NULL, len, PROT_READ|PROT_WRITE, mmflags, fd, offset); 288 if (rv == MAP_FAILED) { 289 seterror(errno); 290 return NULL; 291 } 292 293 seterror(0); 294 return rv; 295 } 296 297 int 298 rumpuser_memsync(void *addr, size_t len, int *error) 299 { 300 301 DOCALL_KLOCK(int, (msync(addr, len, MS_SYNC))); 302 } 303 304 int 305 rumpuser_open(const char *path, int flags, int *error) 306 { 307 308 DOCALL(int, (open(path, flags, 0644))); 309 } 310 311 int 312 rumpuser_ioctl(int fd, u_long cmd, void *data, int *error) 313 { 314 315 DOCALL_KLOCK(int, (ioctl(fd, cmd, data))); 316 } 317 318 int 319 rumpuser_close(int fd, int *error) 320 { 321 322 DOCALL(int, close(fd)); 323 } 324 325 int 326 rumpuser_fsync(int fd, int *error) 327 { 328 329 DOCALL_KLOCK(int, fsync(fd)); 330 } 331 332 ssize_t 333 rumpuser_read(int fd, void *data, size_t size, int *error) 334 { 335 ssize_t rv; 336 337 KLOCK_WRAP(rv = read(fd, data, size)); 338 if (rv == -1) 339 seterror(errno); 340 341 return rv; 342 } 343 344 ssize_t 345 rumpuser_pread(int fd, void *data, size_t size, off_t offset, int *error) 346 { 347 ssize_t rv; 348 349 KLOCK_WRAP(rv = pread(fd, data, size, offset)); 350 if (rv == -1) 351 seterror(errno); 352 353 return rv; 354 } 355 356 void 357 rumpuser_read_bio(int fd, void *data, size_t size, off_t offset, 358 rump_biodone_fn biodone, void *biodonecookie) 359 { 360 ssize_t rv; 361 int error = 0; 362 363 rv = rumpuser_pread(fd, data, size, offset, &error); 364 /* check against <0 instead of ==-1 to get typing below right */ 365 if (rv < 0) 366 rv = 0; 367 368 /* LINTED: see above */ 369 biodone(biodonecookie, rv, error); 370 } 371 372 ssize_t 373 rumpuser_write(int fd, const void *data, size_t size, int *error) 374 { 375 ssize_t rv; 376 377 KLOCK_WRAP(rv = write(fd, data, size)); 378 if (rv == -1) 379 seterror(errno); 380 381 return rv; 382 } 383 384 ssize_t 385 rumpuser_pwrite(int fd, const void *data, size_t size, off_t offset, int *error) 386 { 387 ssize_t rv; 388 389 KLOCK_WRAP(rv = pwrite(fd, data, size, offset)); 390 if (rv == -1) 391 seterror(errno); 392 393 return rv; 394 } 395 396 void 397 rumpuser_write_bio(int fd, const void *data, size_t size, off_t offset, 398 rump_biodone_fn biodone, void *biodonecookie) 399 { 400 ssize_t rv; 401 int error = 0; 402 403 rv = rumpuser_pwrite(fd, data, size, offset, &error); 404 /* check against <0 instead of ==-1 to get typing below right */ 405 if (rv < 0) 406 rv = 0; 407 408 /* LINTED: see above */ 409 biodone(biodonecookie, rv, error); 410 } 411 412 ssize_t 413 rumpuser_readv(int fd, const struct rumpuser_iovec *riov, int iovcnt, 414 int *error) 415 { 416 struct iovec *iovp; 417 ssize_t rv; 418 int i; 419 420 iovp = malloc(iovcnt * sizeof(struct iovec)); 421 if (iovp == NULL) { 422 seterror(ENOMEM); 423 return -1; 424 } 425 for (i = 0; i < iovcnt; i++) { 426 iovp[i].iov_base = riov[i].iov_base; 427 /*LINTED*/ 428 iovp[i].iov_len = riov[i].iov_len; 429 } 430 431 KLOCK_WRAP(rv = readv(fd, iovp, iovcnt)); 432 if (rv == -1) 433 seterror(errno); 434 free(iovp); 435 436 return rv; 437 } 438 439 ssize_t 440 rumpuser_writev(int fd, const struct rumpuser_iovec *riov, int iovcnt, 441 int *error) 442 { 443 struct iovec *iovp; 444 ssize_t rv; 445 int i; 446 447 iovp = malloc(iovcnt * sizeof(struct iovec)); 448 if (iovp == NULL) { 449 seterror(ENOMEM); 450 return -1; 451 } 452 for (i = 0; i < iovcnt; i++) { 453 iovp[i].iov_base = riov[i].iov_base; 454 /*LINTED*/ 455 iovp[i].iov_len = riov[i].iov_len; 456 } 457 458 KLOCK_WRAP(rv = writev(fd, iovp, iovcnt)); 459 if (rv == -1) 460 seterror(errno); 461 free(iovp); 462 463 return rv; 464 } 465 466 int 467 rumpuser_gettime(uint64_t *sec, uint64_t *nsec, int *error) 468 { 469 struct timeval tv; 470 int rv; 471 472 rv = gettimeofday(&tv, NULL); 473 if (rv == -1) { 474 seterror(errno); 475 return rv; 476 } 477 478 *sec = tv.tv_sec; 479 *nsec = tv.tv_usec * 1000; 480 481 return 0; 482 } 483 484 int 485 rumpuser_getenv(const char *name, char *buf, size_t blen, int *error) 486 { 487 488 DOCALL(int, getenv_r(name, buf, blen)); 489 } 490 491 int 492 rumpuser_gethostname(char *name, size_t namelen, int *error) 493 { 494 char tmp[MAXHOSTNAMELEN]; 495 496 if (gethostname(tmp, sizeof(tmp)) == -1) { 497 snprintf(name, namelen, "rump-%05d.rumpdomain", getpid()); 498 } else { 499 snprintf(name, namelen, "rump-%05d.%s.rumpdomain", 500 getpid(), tmp); 501 } 502 503 *error = 0; 504 return 0; 505 } 506 507 int 508 rumpuser_poll(struct pollfd *fds, int nfds, int timeout, int *error) 509 { 510 511 DOCALL_KLOCK(int, (poll(fds, (nfds_t)nfds, timeout))); 512 } 513 514 int 515 rumpuser_putchar(int c, int *error) 516 { 517 518 DOCALL(int, (putchar(c))); 519 } 520 521 void 522 rumpuser_exit(int rv) 523 { 524 525 if (rv == RUMPUSER_PANIC) 526 abort(); 527 else 528 exit(rv); 529 } 530 531 void 532 rumpuser_seterrno(int error) 533 { 534 535 errno = error; 536 } 537 538 int 539 rumpuser_writewatchfile_setup(int kq, int fd, intptr_t opaque, int *error) 540 { 541 struct kevent kev; 542 543 if (kq == -1) { 544 kq = kqueue(); 545 if (kq == -1) { 546 seterror(errno); 547 return -1; 548 } 549 } 550 551 EV_SET(&kev, fd, EVFILT_VNODE, EV_ADD|EV_ENABLE|EV_CLEAR, 552 NOTE_WRITE, 0, opaque); 553 if (kevent(kq, &kev, 1, NULL, 0, NULL) == -1) { 554 seterror(errno); 555 return -1; 556 } 557 558 return kq; 559 } 560 561 int 562 rumpuser_writewatchfile_wait(int kq, intptr_t *opaque, int *error) 563 { 564 struct kevent kev; 565 int rv; 566 567 again: 568 KLOCK_WRAP(rv = kevent(kq, NULL, 0, &kev, 1, NULL)); 569 if (rv == -1) { 570 if (errno == EINTR) 571 goto again; 572 seterror(errno); 573 return -1; 574 } 575 576 if (opaque) 577 *opaque = kev.udata; 578 return rv; 579 } 580 581 /* 582 * This is meant for safe debugging prints from the kernel. 583 */ 584 int 585 rumpuser_dprintf(const char *format, ...) 586 { 587 va_list ap; 588 int rv; 589 590 va_start(ap, format); 591 rv = vfprintf(stderr, format, ap); 592 va_end(ap); 593 594 return rv; 595 } 596 597 int 598 rumpuser_kill(int64_t pid, int sig, int *error) 599 { 600 601 #ifdef __NetBSD__ 602 if (pid == RUMPUSER_PID_SELF) { 603 DOCALL(int, raise(sig)); 604 } else { 605 DOCALL(int, kill((pid_t)pid, sig)); 606 } 607 #else 608 /* XXXfixme: signal numbers may not match on non-NetBSD */ 609 seterror(EOPNOTSUPP); 610 return -1; 611 #endif 612 } 613 614 int 615 rumpuser_getnhostcpu(void) 616 { 617 int ncpu; 618 size_t sz = sizeof(ncpu); 619 620 #ifdef __NetBSD__ 621 if (sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0) == -1) 622 return 1; 623 return ncpu; 624 #else 625 return 1; 626 #endif 627 } 628