xref: /qemu/net/tap.c (revision 966ea5ec)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2009 Red Hat, Inc.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "net/tap.h"
27 
28 #include "config-host.h"
29 
30 #include <signal.h>
31 #include <sys/ioctl.h>
32 #include <sys/stat.h>
33 #include <sys/wait.h>
34 #include <net/if.h>
35 
36 #include "net.h"
37 #include "sysemu.h"
38 #include "qemu-char.h"
39 #include "qemu-common.h"
40 
41 #ifdef __linux__
42 #include "net/tap-linux.h"
43 #endif
44 
45 #if !defined(_AIX)
46 
47 /* Maximum GSO packet size (64k) plus plenty of room for
48  * the ethernet and virtio_net headers
49  */
50 #define TAP_BUFSIZE (4096 + 65536)
51 
52 typedef struct TAPState {
53     VLANClientState *vc;
54     int fd;
55     char down_script[1024];
56     char down_script_arg[128];
57     uint8_t buf[TAP_BUFSIZE];
58     unsigned int read_poll : 1;
59     unsigned int write_poll : 1;
60     unsigned int has_vnet_hdr : 1;
61     unsigned int using_vnet_hdr : 1;
62     unsigned int has_ufo: 1;
63 } TAPState;
64 
65 static int launch_script(const char *setup_script, const char *ifname, int fd);
66 
67 static int tap_can_send(void *opaque);
68 static void tap_send(void *opaque);
69 static void tap_writable(void *opaque);
70 
71 static void tap_update_fd_handler(TAPState *s)
72 {
73     qemu_set_fd_handler2(s->fd,
74                          s->read_poll  ? tap_can_send : NULL,
75                          s->read_poll  ? tap_send     : NULL,
76                          s->write_poll ? tap_writable : NULL,
77                          s);
78 }
79 
80 static void tap_read_poll(TAPState *s, int enable)
81 {
82     s->read_poll = !!enable;
83     tap_update_fd_handler(s);
84 }
85 
86 static void tap_write_poll(TAPState *s, int enable)
87 {
88     s->write_poll = !!enable;
89     tap_update_fd_handler(s);
90 }
91 
92 static void tap_writable(void *opaque)
93 {
94     TAPState *s = opaque;
95 
96     tap_write_poll(s, 0);
97 
98     qemu_flush_queued_packets(s->vc);
99 }
100 
101 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
102 {
103     ssize_t len;
104 
105     do {
106         len = writev(s->fd, iov, iovcnt);
107     } while (len == -1 && errno == EINTR);
108 
109     if (len == -1 && errno == EAGAIN) {
110         tap_write_poll(s, 1);
111         return 0;
112     }
113 
114     return len;
115 }
116 
117 static ssize_t tap_receive_iov(VLANClientState *vc, const struct iovec *iov,
118                                int iovcnt)
119 {
120     TAPState *s = vc->opaque;
121     const struct iovec *iovp = iov;
122     struct iovec iov_copy[iovcnt + 1];
123     struct virtio_net_hdr hdr = { 0, };
124 
125     if (s->has_vnet_hdr && !s->using_vnet_hdr) {
126         iov_copy[0].iov_base = &hdr;
127         iov_copy[0].iov_len =  sizeof(hdr);
128         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
129         iovp = iov_copy;
130         iovcnt++;
131     }
132 
133     return tap_write_packet(s, iovp, iovcnt);
134 }
135 
136 static ssize_t tap_receive_raw(VLANClientState *vc, const uint8_t *buf, size_t size)
137 {
138     TAPState *s = vc->opaque;
139     struct iovec iov[2];
140     int iovcnt = 0;
141     struct virtio_net_hdr hdr = { 0, };
142 
143     if (s->has_vnet_hdr) {
144         iov[iovcnt].iov_base = &hdr;
145         iov[iovcnt].iov_len  = sizeof(hdr);
146         iovcnt++;
147     }
148 
149     iov[iovcnt].iov_base = (char *)buf;
150     iov[iovcnt].iov_len  = size;
151     iovcnt++;
152 
153     return tap_write_packet(s, iov, iovcnt);
154 }
155 
156 static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size)
157 {
158     TAPState *s = vc->opaque;
159     struct iovec iov[1];
160 
161     if (s->has_vnet_hdr && !s->using_vnet_hdr) {
162         return tap_receive_raw(vc, buf, size);
163     }
164 
165     iov[0].iov_base = (char *)buf;
166     iov[0].iov_len  = size;
167 
168     return tap_write_packet(s, iov, 1);
169 }
170 
171 static int tap_can_send(void *opaque)
172 {
173     TAPState *s = opaque;
174 
175     return qemu_can_send_packet(s->vc);
176 }
177 
178 #ifndef __sun__
179 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
180 {
181     return read(tapfd, buf, maxlen);
182 }
183 #endif
184 
185 static void tap_send_completed(VLANClientState *vc, ssize_t len)
186 {
187     TAPState *s = vc->opaque;
188     tap_read_poll(s, 1);
189 }
190 
191 static void tap_send(void *opaque)
192 {
193     TAPState *s = opaque;
194     int size;
195 
196     do {
197         uint8_t *buf = s->buf;
198 
199         size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
200         if (size <= 0) {
201             break;
202         }
203 
204         if (s->has_vnet_hdr && !s->using_vnet_hdr) {
205             buf  += sizeof(struct virtio_net_hdr);
206             size -= sizeof(struct virtio_net_hdr);
207         }
208 
209         size = qemu_send_packet_async(s->vc, buf, size, tap_send_completed);
210         if (size == 0) {
211             tap_read_poll(s, 0);
212         }
213     } while (size > 0);
214 }
215 
216 /* sndbuf should be set to a value lower than the tx queue
217  * capacity of any destination network interface.
218  * Ethernet NICs generally have txqueuelen=1000, so 1Mb is
219  * a good default, given a 1500 byte MTU.
220  */
221 #define TAP_DEFAULT_SNDBUF 1024*1024
222 
223 static int tap_set_sndbuf(TAPState *s, QemuOpts *opts)
224 {
225     int sndbuf;
226 
227     sndbuf = qemu_opt_get_size(opts, "sndbuf", TAP_DEFAULT_SNDBUF);
228     if (!sndbuf) {
229         sndbuf = INT_MAX;
230     }
231 
232     if (ioctl(s->fd, TUNSETSNDBUF, &sndbuf) == -1 && qemu_opt_get(opts, "sndbuf")) {
233         qemu_error("TUNSETSNDBUF ioctl failed: %s\n", strerror(errno));
234         return -1;
235     }
236     return 0;
237 }
238 
239 int tap_has_ufo(VLANClientState *vc)
240 {
241     TAPState *s = vc->opaque;
242 
243     assert(vc->type == NET_CLIENT_TYPE_TAP);
244 
245     return s->has_ufo;
246 }
247 
248 int tap_has_vnet_hdr(VLANClientState *vc)
249 {
250     TAPState *s = vc->opaque;
251 
252     assert(vc->type == NET_CLIENT_TYPE_TAP);
253 
254     return s->has_vnet_hdr;
255 }
256 
257 void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr)
258 {
259     TAPState *s = vc->opaque;
260 
261     using_vnet_hdr = using_vnet_hdr != 0;
262 
263     assert(vc->type == NET_CLIENT_TYPE_TAP);
264     assert(s->has_vnet_hdr == using_vnet_hdr);
265 
266     s->using_vnet_hdr = using_vnet_hdr;
267 }
268 
269 static int tap_probe_vnet_hdr(int fd)
270 {
271     struct ifreq ifr;
272 
273     if (ioctl(fd, TUNGETIFF, &ifr) != 0) {
274         qemu_error("TUNGETIFF ioctl() failed: %s\n", strerror(errno));
275         return 0;
276     }
277 
278     return ifr.ifr_flags & IFF_VNET_HDR;
279 }
280 
281 void tap_set_offload(VLANClientState *vc, int csum, int tso4,
282                      int tso6, int ecn, int ufo)
283 {
284     TAPState *s = vc->opaque;
285     unsigned int offload = 0;
286 
287     if (csum) {
288         offload |= TUN_F_CSUM;
289         if (tso4)
290             offload |= TUN_F_TSO4;
291         if (tso6)
292             offload |= TUN_F_TSO6;
293         if ((tso4 || tso6) && ecn)
294             offload |= TUN_F_TSO_ECN;
295         if (ufo)
296             offload |= TUN_F_UFO;
297     }
298 
299     if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) {
300         offload &= ~TUN_F_UFO;
301         if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) {
302             fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
303                     strerror(errno));
304         }
305     }
306 }
307 
308 static void tap_cleanup(VLANClientState *vc)
309 {
310     TAPState *s = vc->opaque;
311 
312     qemu_purge_queued_packets(vc);
313 
314     if (s->down_script[0])
315         launch_script(s->down_script, s->down_script_arg, s->fd);
316 
317     tap_read_poll(s, 0);
318     tap_write_poll(s, 0);
319     close(s->fd);
320     qemu_free(s);
321 }
322 
323 /* fd support */
324 
325 static TAPState *net_tap_fd_init(VLANState *vlan,
326                                  const char *model,
327                                  const char *name,
328                                  int fd,
329                                  int vnet_hdr)
330 {
331     TAPState *s;
332     unsigned int offload;
333 
334     s = qemu_mallocz(sizeof(TAPState));
335     s->fd = fd;
336     s->has_vnet_hdr = vnet_hdr != 0;
337     s->using_vnet_hdr = 0;
338     s->vc = qemu_new_vlan_client(NET_CLIENT_TYPE_TAP,
339                                  vlan, NULL, model, name, NULL,
340                                  tap_receive, tap_receive_raw,
341                                  tap_receive_iov, tap_cleanup, s);
342     s->has_ufo = 0;
343     /* Check if tap supports UFO */
344     offload = TUN_F_CSUM | TUN_F_UFO;
345     if (ioctl(s->fd, TUNSETOFFLOAD, offload) == 0)
346        s->has_ufo = 1;
347     tap_set_offload(s->vc, 0, 0, 0, 0, 0);
348     tap_read_poll(s, 1);
349     return s;
350 }
351 
352 #ifdef _AIX
353 int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
354 {
355     fprintf (stderr, "no tap on AIX\n");
356     return -1;
357 }
358 #else
359 int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
360 {
361     struct ifreq ifr;
362     int fd, ret;
363 
364     TFR(fd = open("/dev/net/tun", O_RDWR));
365     if (fd < 0) {
366         fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n");
367         return -1;
368     }
369     memset(&ifr, 0, sizeof(ifr));
370     ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
371 
372     if (*vnet_hdr) {
373         unsigned int features;
374 
375         if (ioctl(fd, TUNGETFEATURES, &features) == 0 &&
376             features & IFF_VNET_HDR) {
377             *vnet_hdr = 1;
378             ifr.ifr_flags |= IFF_VNET_HDR;
379         }
380 
381         if (vnet_hdr_required && !*vnet_hdr) {
382             qemu_error("vnet_hdr=1 requested, but no kernel "
383                        "support for IFF_VNET_HDR available");
384             close(fd);
385             return -1;
386         }
387     }
388 
389     if (ifname[0] != '\0')
390         pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
391     else
392         pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d");
393     ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
394     if (ret != 0) {
395         fprintf(stderr, "warning: could not configure /dev/net/tun: no virtual network emulation\n");
396         close(fd);
397         return -1;
398     }
399     pstrcpy(ifname, ifname_size, ifr.ifr_name);
400     fcntl(fd, F_SETFL, O_NONBLOCK);
401     return fd;
402 }
403 #endif
404 
405 static int launch_script(const char *setup_script, const char *ifname, int fd)
406 {
407     sigset_t oldmask, mask;
408     int pid, status;
409     char *args[3];
410     char **parg;
411 
412     sigemptyset(&mask);
413     sigaddset(&mask, SIGCHLD);
414     sigprocmask(SIG_BLOCK, &mask, &oldmask);
415 
416     /* try to launch network script */
417     pid = fork();
418     if (pid == 0) {
419         int open_max = sysconf(_SC_OPEN_MAX), i;
420 
421         for (i = 0; i < open_max; i++) {
422             if (i != STDIN_FILENO &&
423                 i != STDOUT_FILENO &&
424                 i != STDERR_FILENO &&
425                 i != fd) {
426                 close(i);
427             }
428         }
429         parg = args;
430         *parg++ = (char *)setup_script;
431         *parg++ = (char *)ifname;
432         *parg++ = NULL;
433         execv(setup_script, args);
434         _exit(1);
435     } else if (pid > 0) {
436         while (waitpid(pid, &status, 0) != pid) {
437             /* loop */
438         }
439         sigprocmask(SIG_SETMASK, &oldmask, NULL);
440 
441         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
442             return 0;
443         }
444     }
445     fprintf(stderr, "%s: could not launch network script\n", setup_script);
446     return -1;
447 }
448 
449 static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
450 {
451     int fd, vnet_hdr_required;
452     char ifname[128] = {0,};
453     const char *setup_script;
454 
455     if (qemu_opt_get(opts, "ifname")) {
456         pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
457     }
458 
459     *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
460     if (qemu_opt_get(opts, "vnet_hdr")) {
461         vnet_hdr_required = *vnet_hdr;
462     } else {
463         vnet_hdr_required = 0;
464     }
465 
466     TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
467     if (fd < 0) {
468         return -1;
469     }
470 
471     setup_script = qemu_opt_get(opts, "script");
472     if (setup_script &&
473         setup_script[0] != '\0' &&
474         strcmp(setup_script, "no") != 0 &&
475         launch_script(setup_script, ifname, fd)) {
476         close(fd);
477         return -1;
478     }
479 
480     qemu_opt_set(opts, "ifname", ifname);
481 
482     return fd;
483 }
484 
485 int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
486 {
487     TAPState *s;
488     int fd, vnet_hdr;
489 
490     if (qemu_opt_get(opts, "fd")) {
491         if (qemu_opt_get(opts, "ifname") ||
492             qemu_opt_get(opts, "script") ||
493             qemu_opt_get(opts, "downscript") ||
494             qemu_opt_get(opts, "vnet_hdr")) {
495             qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n");
496             return -1;
497         }
498 
499         fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
500         if (fd == -1) {
501             return -1;
502         }
503 
504         fcntl(fd, F_SETFL, O_NONBLOCK);
505 
506         vnet_hdr = tap_probe_vnet_hdr(fd);
507     } else {
508         if (!qemu_opt_get(opts, "script")) {
509             qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
510         }
511 
512         if (!qemu_opt_get(opts, "downscript")) {
513             qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
514         }
515 
516         fd = net_tap_init(opts, &vnet_hdr);
517     }
518 
519     s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
520     if (!s) {
521         close(fd);
522         return -1;
523     }
524 
525     if (tap_set_sndbuf(s, opts) < 0) {
526         return -1;
527     }
528 
529     if (qemu_opt_get(opts, "fd")) {
530         snprintf(s->vc->info_str, sizeof(s->vc->info_str), "fd=%d", fd);
531     } else {
532         const char *ifname, *script, *downscript;
533 
534         ifname     = qemu_opt_get(opts, "ifname");
535         script     = qemu_opt_get(opts, "script");
536         downscript = qemu_opt_get(opts, "downscript");
537 
538         snprintf(s->vc->info_str, sizeof(s->vc->info_str),
539                  "ifname=%s,script=%s,downscript=%s",
540                  ifname, script, downscript);
541 
542         if (strcmp(downscript, "no") != 0) {
543             snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
544             snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
545         }
546     }
547 
548     if (vlan) {
549         vlan->nb_host_devs++;
550     }
551 
552     return 0;
553 }
554 
555 #endif /* !defined(_AIX) */
556