1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2009 Red Hat, Inc. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "net/tap.h" 27 28 #include "config-host.h" 29 30 #include <signal.h> 31 #include <sys/ioctl.h> 32 #include <sys/stat.h> 33 #include <sys/wait.h> 34 #include <net/if.h> 35 36 #include "net.h" 37 #include "sysemu.h" 38 #include "qemu-char.h" 39 #include "qemu-common.h" 40 41 #ifdef __linux__ 42 #include "net/tap-linux.h" 43 #endif 44 45 #if !defined(_AIX) 46 47 /* Maximum GSO packet size (64k) plus plenty of room for 48 * the ethernet and virtio_net headers 49 */ 50 #define TAP_BUFSIZE (4096 + 65536) 51 52 typedef struct TAPState { 53 VLANClientState *vc; 54 int fd; 55 char down_script[1024]; 56 char down_script_arg[128]; 57 uint8_t buf[TAP_BUFSIZE]; 58 unsigned int read_poll : 1; 59 unsigned int write_poll : 1; 60 unsigned int has_vnet_hdr : 1; 61 unsigned int using_vnet_hdr : 1; 62 unsigned int has_ufo: 1; 63 } TAPState; 64 65 static int launch_script(const char *setup_script, const char *ifname, int fd); 66 67 static int tap_can_send(void *opaque); 68 static void tap_send(void *opaque); 69 static void tap_writable(void *opaque); 70 71 static void tap_update_fd_handler(TAPState *s) 72 { 73 qemu_set_fd_handler2(s->fd, 74 s->read_poll ? tap_can_send : NULL, 75 s->read_poll ? tap_send : NULL, 76 s->write_poll ? tap_writable : NULL, 77 s); 78 } 79 80 static void tap_read_poll(TAPState *s, int enable) 81 { 82 s->read_poll = !!enable; 83 tap_update_fd_handler(s); 84 } 85 86 static void tap_write_poll(TAPState *s, int enable) 87 { 88 s->write_poll = !!enable; 89 tap_update_fd_handler(s); 90 } 91 92 static void tap_writable(void *opaque) 93 { 94 TAPState *s = opaque; 95 96 tap_write_poll(s, 0); 97 98 qemu_flush_queued_packets(s->vc); 99 } 100 101 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) 102 { 103 ssize_t len; 104 105 do { 106 len = writev(s->fd, iov, iovcnt); 107 } while (len == -1 && errno == EINTR); 108 109 if (len == -1 && errno == EAGAIN) { 110 tap_write_poll(s, 1); 111 return 0; 112 } 113 114 return len; 115 } 116 117 static ssize_t tap_receive_iov(VLANClientState *vc, const struct iovec *iov, 118 int iovcnt) 119 { 120 TAPState *s = vc->opaque; 121 const struct iovec *iovp = iov; 122 struct iovec iov_copy[iovcnt + 1]; 123 struct virtio_net_hdr hdr = { 0, }; 124 125 if (s->has_vnet_hdr && !s->using_vnet_hdr) { 126 iov_copy[0].iov_base = &hdr; 127 iov_copy[0].iov_len = sizeof(hdr); 128 memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); 129 iovp = iov_copy; 130 iovcnt++; 131 } 132 133 return tap_write_packet(s, iovp, iovcnt); 134 } 135 136 static ssize_t tap_receive_raw(VLANClientState *vc, const uint8_t *buf, size_t size) 137 { 138 TAPState *s = vc->opaque; 139 struct iovec iov[2]; 140 int iovcnt = 0; 141 struct virtio_net_hdr hdr = { 0, }; 142 143 if (s->has_vnet_hdr) { 144 iov[iovcnt].iov_base = &hdr; 145 iov[iovcnt].iov_len = sizeof(hdr); 146 iovcnt++; 147 } 148 149 iov[iovcnt].iov_base = (char *)buf; 150 iov[iovcnt].iov_len = size; 151 iovcnt++; 152 153 return tap_write_packet(s, iov, iovcnt); 154 } 155 156 static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size) 157 { 158 TAPState *s = vc->opaque; 159 struct iovec iov[1]; 160 161 if (s->has_vnet_hdr && !s->using_vnet_hdr) { 162 return tap_receive_raw(vc, buf, size); 163 } 164 165 iov[0].iov_base = (char *)buf; 166 iov[0].iov_len = size; 167 168 return tap_write_packet(s, iov, 1); 169 } 170 171 static int tap_can_send(void *opaque) 172 { 173 TAPState *s = opaque; 174 175 return qemu_can_send_packet(s->vc); 176 } 177 178 #ifndef __sun__ 179 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) 180 { 181 return read(tapfd, buf, maxlen); 182 } 183 #endif 184 185 static void tap_send_completed(VLANClientState *vc, ssize_t len) 186 { 187 TAPState *s = vc->opaque; 188 tap_read_poll(s, 1); 189 } 190 191 static void tap_send(void *opaque) 192 { 193 TAPState *s = opaque; 194 int size; 195 196 do { 197 uint8_t *buf = s->buf; 198 199 size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); 200 if (size <= 0) { 201 break; 202 } 203 204 if (s->has_vnet_hdr && !s->using_vnet_hdr) { 205 buf += sizeof(struct virtio_net_hdr); 206 size -= sizeof(struct virtio_net_hdr); 207 } 208 209 size = qemu_send_packet_async(s->vc, buf, size, tap_send_completed); 210 if (size == 0) { 211 tap_read_poll(s, 0); 212 } 213 } while (size > 0); 214 } 215 216 /* sndbuf should be set to a value lower than the tx queue 217 * capacity of any destination network interface. 218 * Ethernet NICs generally have txqueuelen=1000, so 1Mb is 219 * a good default, given a 1500 byte MTU. 220 */ 221 #define TAP_DEFAULT_SNDBUF 1024*1024 222 223 static int tap_set_sndbuf(TAPState *s, QemuOpts *opts) 224 { 225 int sndbuf; 226 227 sndbuf = qemu_opt_get_size(opts, "sndbuf", TAP_DEFAULT_SNDBUF); 228 if (!sndbuf) { 229 sndbuf = INT_MAX; 230 } 231 232 if (ioctl(s->fd, TUNSETSNDBUF, &sndbuf) == -1 && qemu_opt_get(opts, "sndbuf")) { 233 qemu_error("TUNSETSNDBUF ioctl failed: %s\n", strerror(errno)); 234 return -1; 235 } 236 return 0; 237 } 238 239 int tap_has_ufo(VLANClientState *vc) 240 { 241 TAPState *s = vc->opaque; 242 243 assert(vc->type == NET_CLIENT_TYPE_TAP); 244 245 return s->has_ufo; 246 } 247 248 int tap_has_vnet_hdr(VLANClientState *vc) 249 { 250 TAPState *s = vc->opaque; 251 252 assert(vc->type == NET_CLIENT_TYPE_TAP); 253 254 return s->has_vnet_hdr; 255 } 256 257 void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr) 258 { 259 TAPState *s = vc->opaque; 260 261 using_vnet_hdr = using_vnet_hdr != 0; 262 263 assert(vc->type == NET_CLIENT_TYPE_TAP); 264 assert(s->has_vnet_hdr == using_vnet_hdr); 265 266 s->using_vnet_hdr = using_vnet_hdr; 267 } 268 269 static int tap_probe_vnet_hdr(int fd) 270 { 271 struct ifreq ifr; 272 273 if (ioctl(fd, TUNGETIFF, &ifr) != 0) { 274 qemu_error("TUNGETIFF ioctl() failed: %s\n", strerror(errno)); 275 return 0; 276 } 277 278 return ifr.ifr_flags & IFF_VNET_HDR; 279 } 280 281 void tap_set_offload(VLANClientState *vc, int csum, int tso4, 282 int tso6, int ecn, int ufo) 283 { 284 TAPState *s = vc->opaque; 285 unsigned int offload = 0; 286 287 if (csum) { 288 offload |= TUN_F_CSUM; 289 if (tso4) 290 offload |= TUN_F_TSO4; 291 if (tso6) 292 offload |= TUN_F_TSO6; 293 if ((tso4 || tso6) && ecn) 294 offload |= TUN_F_TSO_ECN; 295 if (ufo) 296 offload |= TUN_F_UFO; 297 } 298 299 if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) { 300 offload &= ~TUN_F_UFO; 301 if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) { 302 fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n", 303 strerror(errno)); 304 } 305 } 306 } 307 308 static void tap_cleanup(VLANClientState *vc) 309 { 310 TAPState *s = vc->opaque; 311 312 qemu_purge_queued_packets(vc); 313 314 if (s->down_script[0]) 315 launch_script(s->down_script, s->down_script_arg, s->fd); 316 317 tap_read_poll(s, 0); 318 tap_write_poll(s, 0); 319 close(s->fd); 320 qemu_free(s); 321 } 322 323 /* fd support */ 324 325 static TAPState *net_tap_fd_init(VLANState *vlan, 326 const char *model, 327 const char *name, 328 int fd, 329 int vnet_hdr) 330 { 331 TAPState *s; 332 unsigned int offload; 333 334 s = qemu_mallocz(sizeof(TAPState)); 335 s->fd = fd; 336 s->has_vnet_hdr = vnet_hdr != 0; 337 s->using_vnet_hdr = 0; 338 s->vc = qemu_new_vlan_client(NET_CLIENT_TYPE_TAP, 339 vlan, NULL, model, name, NULL, 340 tap_receive, tap_receive_raw, 341 tap_receive_iov, tap_cleanup, s); 342 s->has_ufo = 0; 343 /* Check if tap supports UFO */ 344 offload = TUN_F_CSUM | TUN_F_UFO; 345 if (ioctl(s->fd, TUNSETOFFLOAD, offload) == 0) 346 s->has_ufo = 1; 347 tap_set_offload(s->vc, 0, 0, 0, 0, 0); 348 tap_read_poll(s, 1); 349 return s; 350 } 351 352 #ifdef _AIX 353 int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required) 354 { 355 fprintf (stderr, "no tap on AIX\n"); 356 return -1; 357 } 358 #else 359 int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required) 360 { 361 struct ifreq ifr; 362 int fd, ret; 363 364 TFR(fd = open("/dev/net/tun", O_RDWR)); 365 if (fd < 0) { 366 fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n"); 367 return -1; 368 } 369 memset(&ifr, 0, sizeof(ifr)); 370 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 371 372 if (*vnet_hdr) { 373 unsigned int features; 374 375 if (ioctl(fd, TUNGETFEATURES, &features) == 0 && 376 features & IFF_VNET_HDR) { 377 *vnet_hdr = 1; 378 ifr.ifr_flags |= IFF_VNET_HDR; 379 } 380 381 if (vnet_hdr_required && !*vnet_hdr) { 382 qemu_error("vnet_hdr=1 requested, but no kernel " 383 "support for IFF_VNET_HDR available"); 384 close(fd); 385 return -1; 386 } 387 } 388 389 if (ifname[0] != '\0') 390 pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname); 391 else 392 pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d"); 393 ret = ioctl(fd, TUNSETIFF, (void *) &ifr); 394 if (ret != 0) { 395 fprintf(stderr, "warning: could not configure /dev/net/tun: no virtual network emulation\n"); 396 close(fd); 397 return -1; 398 } 399 pstrcpy(ifname, ifname_size, ifr.ifr_name); 400 fcntl(fd, F_SETFL, O_NONBLOCK); 401 return fd; 402 } 403 #endif 404 405 static int launch_script(const char *setup_script, const char *ifname, int fd) 406 { 407 sigset_t oldmask, mask; 408 int pid, status; 409 char *args[3]; 410 char **parg; 411 412 sigemptyset(&mask); 413 sigaddset(&mask, SIGCHLD); 414 sigprocmask(SIG_BLOCK, &mask, &oldmask); 415 416 /* try to launch network script */ 417 pid = fork(); 418 if (pid == 0) { 419 int open_max = sysconf(_SC_OPEN_MAX), i; 420 421 for (i = 0; i < open_max; i++) { 422 if (i != STDIN_FILENO && 423 i != STDOUT_FILENO && 424 i != STDERR_FILENO && 425 i != fd) { 426 close(i); 427 } 428 } 429 parg = args; 430 *parg++ = (char *)setup_script; 431 *parg++ = (char *)ifname; 432 *parg++ = NULL; 433 execv(setup_script, args); 434 _exit(1); 435 } else if (pid > 0) { 436 while (waitpid(pid, &status, 0) != pid) { 437 /* loop */ 438 } 439 sigprocmask(SIG_SETMASK, &oldmask, NULL); 440 441 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 442 return 0; 443 } 444 } 445 fprintf(stderr, "%s: could not launch network script\n", setup_script); 446 return -1; 447 } 448 449 static int net_tap_init(QemuOpts *opts, int *vnet_hdr) 450 { 451 int fd, vnet_hdr_required; 452 char ifname[128] = {0,}; 453 const char *setup_script; 454 455 if (qemu_opt_get(opts, "ifname")) { 456 pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname")); 457 } 458 459 *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1); 460 if (qemu_opt_get(opts, "vnet_hdr")) { 461 vnet_hdr_required = *vnet_hdr; 462 } else { 463 vnet_hdr_required = 0; 464 } 465 466 TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required)); 467 if (fd < 0) { 468 return -1; 469 } 470 471 setup_script = qemu_opt_get(opts, "script"); 472 if (setup_script && 473 setup_script[0] != '\0' && 474 strcmp(setup_script, "no") != 0 && 475 launch_script(setup_script, ifname, fd)) { 476 close(fd); 477 return -1; 478 } 479 480 qemu_opt_set(opts, "ifname", ifname); 481 482 return fd; 483 } 484 485 int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan) 486 { 487 TAPState *s; 488 int fd, vnet_hdr; 489 490 if (qemu_opt_get(opts, "fd")) { 491 if (qemu_opt_get(opts, "ifname") || 492 qemu_opt_get(opts, "script") || 493 qemu_opt_get(opts, "downscript") || 494 qemu_opt_get(opts, "vnet_hdr")) { 495 qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n"); 496 return -1; 497 } 498 499 fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd")); 500 if (fd == -1) { 501 return -1; 502 } 503 504 fcntl(fd, F_SETFL, O_NONBLOCK); 505 506 vnet_hdr = tap_probe_vnet_hdr(fd); 507 } else { 508 if (!qemu_opt_get(opts, "script")) { 509 qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT); 510 } 511 512 if (!qemu_opt_get(opts, "downscript")) { 513 qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT); 514 } 515 516 fd = net_tap_init(opts, &vnet_hdr); 517 } 518 519 s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr); 520 if (!s) { 521 close(fd); 522 return -1; 523 } 524 525 if (tap_set_sndbuf(s, opts) < 0) { 526 return -1; 527 } 528 529 if (qemu_opt_get(opts, "fd")) { 530 snprintf(s->vc->info_str, sizeof(s->vc->info_str), "fd=%d", fd); 531 } else { 532 const char *ifname, *script, *downscript; 533 534 ifname = qemu_opt_get(opts, "ifname"); 535 script = qemu_opt_get(opts, "script"); 536 downscript = qemu_opt_get(opts, "downscript"); 537 538 snprintf(s->vc->info_str, sizeof(s->vc->info_str), 539 "ifname=%s,script=%s,downscript=%s", 540 ifname, script, downscript); 541 542 if (strcmp(downscript, "no") != 0) { 543 snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); 544 snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname); 545 } 546 } 547 548 if (vlan) { 549 vlan->nb_host_devs++; 550 } 551 552 return 0; 553 } 554 555 #endif /* !defined(_AIX) */ 556