Annotation of qemu/net/tap.c, revision 1.1.1.5

1.1       root        1: /*
                      2:  * QEMU System Emulator
                      3:  *
                      4:  * Copyright (c) 2003-2008 Fabrice Bellard
                      5:  * Copyright (c) 2009 Red Hat, Inc.
                      6:  *
                      7:  * Permission is hereby granted, free of charge, to any person obtaining a copy
                      8:  * of this software and associated documentation files (the "Software"), to deal
                      9:  * in the Software without restriction, including without limitation the rights
                     10:  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
                     11:  * copies of the Software, and to permit persons to whom the Software is
                     12:  * furnished to do so, subject to the following conditions:
                     13:  *
                     14:  * The above copyright notice and this permission notice shall be included in
                     15:  * all copies or substantial portions of the Software.
                     16:  *
                     17:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
                     18:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
                     19:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
                     20:  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
                     21:  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
                     22:  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
                     23:  * THE SOFTWARE.
                     24:  */
                     25: 
                     26: #include "net/tap.h"
                     27: 
                     28: #include "config-host.h"
                     29: 
                     30: #include <sys/ioctl.h>
                     31: #include <sys/stat.h>
                     32: #include <sys/wait.h>
                     33: #include <sys/socket.h>
                     34: #include <net/if.h>
                     35: 
                     36: #include "net.h"
                     37: #include "sysemu.h"
                     38: #include "qemu-char.h"
                     39: #include "qemu-common.h"
1.1.1.3   root       40: #include "qemu-error.h"
1.1       root       41: 
                     42: #include "net/tap-linux.h"
                     43: 
1.1.1.3   root       44: #include "hw/vhost_net.h"
                     45: 
1.1       root       46: /* Maximum GSO packet size (64k) plus plenty of room for
                     47:  * the ethernet and virtio_net headers
                     48:  */
                     49: #define TAP_BUFSIZE (4096 + 65536)
                     50: 
                     51: typedef struct TAPState {
                     52:     VLANClientState nc;
                     53:     int fd;
                     54:     char down_script[1024];
                     55:     char down_script_arg[128];
                     56:     uint8_t buf[TAP_BUFSIZE];
                     57:     unsigned int read_poll : 1;
                     58:     unsigned int write_poll : 1;
                     59:     unsigned int using_vnet_hdr : 1;
                     60:     unsigned int has_ufo: 1;
1.1.1.3   root       61:     VHostNetState *vhost_net;
1.1.1.4   root       62:     unsigned host_vnet_hdr_len;
1.1       root       63: } TAPState;
                     64: 
                     65: static int launch_script(const char *setup_script, const char *ifname, int fd);
                     66: 
                     67: static int tap_can_send(void *opaque);
                     68: static void tap_send(void *opaque);
                     69: static void tap_writable(void *opaque);
                     70: 
                     71: static void tap_update_fd_handler(TAPState *s)
                     72: {
                     73:     qemu_set_fd_handler2(s->fd,
                     74:                          s->read_poll  ? tap_can_send : NULL,
                     75:                          s->read_poll  ? tap_send     : NULL,
                     76:                          s->write_poll ? tap_writable : NULL,
                     77:                          s);
                     78: }
                     79: 
                     80: static void tap_read_poll(TAPState *s, int enable)
                     81: {
                     82:     s->read_poll = !!enable;
                     83:     tap_update_fd_handler(s);
                     84: }
                     85: 
                     86: static void tap_write_poll(TAPState *s, int enable)
                     87: {
                     88:     s->write_poll = !!enable;
                     89:     tap_update_fd_handler(s);
                     90: }
                     91: 
                     92: static void tap_writable(void *opaque)
                     93: {
                     94:     TAPState *s = opaque;
                     95: 
                     96:     tap_write_poll(s, 0);
                     97: 
                     98:     qemu_flush_queued_packets(&s->nc);
                     99: }
                    100: 
                    101: static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
                    102: {
                    103:     ssize_t len;
                    104: 
                    105:     do {
                    106:         len = writev(s->fd, iov, iovcnt);
                    107:     } while (len == -1 && errno == EINTR);
                    108: 
                    109:     if (len == -1 && errno == EAGAIN) {
                    110:         tap_write_poll(s, 1);
                    111:         return 0;
                    112:     }
                    113: 
                    114:     return len;
                    115: }
                    116: 
                    117: static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
                    118:                                int iovcnt)
                    119: {
                    120:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    121:     const struct iovec *iovp = iov;
                    122:     struct iovec iov_copy[iovcnt + 1];
1.1.1.4   root      123:     struct virtio_net_hdr_mrg_rxbuf hdr = { };
1.1       root      124: 
1.1.1.4   root      125:     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
1.1       root      126:         iov_copy[0].iov_base = &hdr;
1.1.1.4   root      127:         iov_copy[0].iov_len =  s->host_vnet_hdr_len;
1.1       root      128:         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
                    129:         iovp = iov_copy;
                    130:         iovcnt++;
                    131:     }
                    132: 
                    133:     return tap_write_packet(s, iovp, iovcnt);
                    134: }
                    135: 
                    136: static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t size)
                    137: {
                    138:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    139:     struct iovec iov[2];
                    140:     int iovcnt = 0;
1.1.1.4   root      141:     struct virtio_net_hdr_mrg_rxbuf hdr = { };
1.1       root      142: 
1.1.1.4   root      143:     if (s->host_vnet_hdr_len) {
1.1       root      144:         iov[iovcnt].iov_base = &hdr;
1.1.1.4   root      145:         iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
1.1       root      146:         iovcnt++;
                    147:     }
                    148: 
                    149:     iov[iovcnt].iov_base = (char *)buf;
                    150:     iov[iovcnt].iov_len  = size;
                    151:     iovcnt++;
                    152: 
                    153:     return tap_write_packet(s, iov, iovcnt);
                    154: }
                    155: 
                    156: static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
                    157: {
                    158:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    159:     struct iovec iov[1];
                    160: 
1.1.1.4   root      161:     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
1.1       root      162:         return tap_receive_raw(nc, buf, size);
                    163:     }
                    164: 
                    165:     iov[0].iov_base = (char *)buf;
                    166:     iov[0].iov_len  = size;
                    167: 
                    168:     return tap_write_packet(s, iov, 1);
                    169: }
                    170: 
                    171: static int tap_can_send(void *opaque)
                    172: {
                    173:     TAPState *s = opaque;
                    174: 
                    175:     return qemu_can_send_packet(&s->nc);
                    176: }
                    177: 
                    178: #ifndef __sun__
                    179: ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
                    180: {
                    181:     return read(tapfd, buf, maxlen);
                    182: }
                    183: #endif
                    184: 
                    185: static void tap_send_completed(VLANClientState *nc, ssize_t len)
                    186: {
                    187:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    188:     tap_read_poll(s, 1);
                    189: }
                    190: 
                    191: static void tap_send(void *opaque)
                    192: {
                    193:     TAPState *s = opaque;
                    194:     int size;
                    195: 
                    196:     do {
                    197:         uint8_t *buf = s->buf;
                    198: 
                    199:         size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
                    200:         if (size <= 0) {
                    201:             break;
                    202:         }
                    203: 
1.1.1.4   root      204:         if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
                    205:             buf  += s->host_vnet_hdr_len;
                    206:             size -= s->host_vnet_hdr_len;
1.1       root      207:         }
                    208: 
                    209:         size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
                    210:         if (size == 0) {
                    211:             tap_read_poll(s, 0);
                    212:         }
                    213:     } while (size > 0 && qemu_can_send_packet(&s->nc));
                    214: }
                    215: 
                    216: int tap_has_ufo(VLANClientState *nc)
                    217: {
                    218:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    219: 
                    220:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
                    221: 
                    222:     return s->has_ufo;
                    223: }
                    224: 
                    225: int tap_has_vnet_hdr(VLANClientState *nc)
                    226: {
                    227:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    228: 
                    229:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
                    230: 
1.1.1.4   root      231:     return !!s->host_vnet_hdr_len;
                    232: }
                    233: 
                    234: int tap_has_vnet_hdr_len(VLANClientState *nc, int len)
                    235: {
                    236:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    237: 
                    238:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
                    239: 
                    240:     return tap_probe_vnet_hdr_len(s->fd, len);
                    241: }
                    242: 
                    243: void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
                    244: {
                    245:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    246: 
                    247:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
                    248:     assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
                    249:            len == sizeof(struct virtio_net_hdr));
                    250: 
                    251:     tap_fd_set_vnet_hdr_len(s->fd, len);
                    252:     s->host_vnet_hdr_len = len;
1.1       root      253: }
                    254: 
                    255: void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
                    256: {
                    257:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    258: 
                    259:     using_vnet_hdr = using_vnet_hdr != 0;
                    260: 
                    261:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
1.1.1.4   root      262:     assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
1.1       root      263: 
                    264:     s->using_vnet_hdr = using_vnet_hdr;
                    265: }
                    266: 
                    267: void tap_set_offload(VLANClientState *nc, int csum, int tso4,
                    268:                      int tso6, int ecn, int ufo)
                    269: {
                    270:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
1.1.1.4   root      271:     if (s->fd < 0) {
                    272:         return;
                    273:     }
1.1       root      274: 
1.1.1.4   root      275:     tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
1.1       root      276: }
                    277: 
                    278: static void tap_cleanup(VLANClientState *nc)
                    279: {
                    280:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    281: 
1.1.1.3   root      282:     if (s->vhost_net) {
                    283:         vhost_net_cleanup(s->vhost_net);
1.1.1.4   root      284:         s->vhost_net = NULL;
1.1.1.3   root      285:     }
                    286: 
1.1       root      287:     qemu_purge_queued_packets(nc);
                    288: 
                    289:     if (s->down_script[0])
                    290:         launch_script(s->down_script, s->down_script_arg, s->fd);
                    291: 
                    292:     tap_read_poll(s, 0);
                    293:     tap_write_poll(s, 0);
                    294:     close(s->fd);
1.1.1.4   root      295:     s->fd = -1;
1.1       root      296: }
                    297: 
1.1.1.3   root      298: static void tap_poll(VLANClientState *nc, bool enable)
                    299: {
                    300:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    301:     tap_read_poll(s, enable);
                    302:     tap_write_poll(s, enable);
                    303: }
                    304: 
                    305: int tap_get_fd(VLANClientState *nc)
                    306: {
                    307:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    308:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
                    309:     return s->fd;
                    310: }
                    311: 
1.1       root      312: /* fd support */
                    313: 
                    314: static NetClientInfo net_tap_info = {
                    315:     .type = NET_CLIENT_TYPE_TAP,
                    316:     .size = sizeof(TAPState),
                    317:     .receive = tap_receive,
                    318:     .receive_raw = tap_receive_raw,
                    319:     .receive_iov = tap_receive_iov,
1.1.1.3   root      320:     .poll = tap_poll,
1.1       root      321:     .cleanup = tap_cleanup,
                    322: };
                    323: 
                    324: static TAPState *net_tap_fd_init(VLANState *vlan,
                    325:                                  const char *model,
                    326:                                  const char *name,
                    327:                                  int fd,
                    328:                                  int vnet_hdr)
                    329: {
                    330:     VLANClientState *nc;
                    331:     TAPState *s;
                    332: 
                    333:     nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
                    334: 
                    335:     s = DO_UPCAST(TAPState, nc, nc);
                    336: 
                    337:     s->fd = fd;
1.1.1.4   root      338:     s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
1.1       root      339:     s->using_vnet_hdr = 0;
                    340:     s->has_ufo = tap_probe_has_ufo(s->fd);
                    341:     tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
                    342:     tap_read_poll(s, 1);
1.1.1.3   root      343:     s->vhost_net = NULL;
1.1       root      344:     return s;
                    345: }
                    346: 
                    347: static int launch_script(const char *setup_script, const char *ifname, int fd)
                    348: {
                    349:     sigset_t oldmask, mask;
                    350:     int pid, status;
                    351:     char *args[3];
                    352:     char **parg;
                    353: 
                    354:     sigemptyset(&mask);
                    355:     sigaddset(&mask, SIGCHLD);
                    356:     sigprocmask(SIG_BLOCK, &mask, &oldmask);
                    357: 
                    358:     /* try to launch network script */
                    359:     pid = fork();
                    360:     if (pid == 0) {
                    361:         int open_max = sysconf(_SC_OPEN_MAX), i;
                    362: 
                    363:         for (i = 0; i < open_max; i++) {
                    364:             if (i != STDIN_FILENO &&
                    365:                 i != STDOUT_FILENO &&
                    366:                 i != STDERR_FILENO &&
                    367:                 i != fd) {
                    368:                 close(i);
                    369:             }
                    370:         }
                    371:         parg = args;
                    372:         *parg++ = (char *)setup_script;
                    373:         *parg++ = (char *)ifname;
1.1.1.3   root      374:         *parg = NULL;
1.1       root      375:         execv(setup_script, args);
                    376:         _exit(1);
                    377:     } else if (pid > 0) {
                    378:         while (waitpid(pid, &status, 0) != pid) {
                    379:             /* loop */
                    380:         }
                    381:         sigprocmask(SIG_SETMASK, &oldmask, NULL);
                    382: 
                    383:         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
                    384:             return 0;
                    385:         }
                    386:     }
                    387:     fprintf(stderr, "%s: could not launch network script\n", setup_script);
                    388:     return -1;
                    389: }
                    390: 
                    391: static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
                    392: {
                    393:     int fd, vnet_hdr_required;
                    394:     char ifname[128] = {0,};
                    395:     const char *setup_script;
                    396: 
                    397:     if (qemu_opt_get(opts, "ifname")) {
                    398:         pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
                    399:     }
                    400: 
                    401:     *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
                    402:     if (qemu_opt_get(opts, "vnet_hdr")) {
                    403:         vnet_hdr_required = *vnet_hdr;
                    404:     } else {
                    405:         vnet_hdr_required = 0;
                    406:     }
                    407: 
                    408:     TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
                    409:     if (fd < 0) {
                    410:         return -1;
                    411:     }
                    412: 
                    413:     setup_script = qemu_opt_get(opts, "script");
                    414:     if (setup_script &&
                    415:         setup_script[0] != '\0' &&
                    416:         strcmp(setup_script, "no") != 0 &&
                    417:         launch_script(setup_script, ifname, fd)) {
                    418:         close(fd);
                    419:         return -1;
                    420:     }
                    421: 
                    422:     qemu_opt_set(opts, "ifname", ifname);
                    423: 
                    424:     return fd;
                    425: }
                    426: 
                    427: int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
                    428: {
                    429:     TAPState *s;
                    430:     int fd, vnet_hdr = 0;
                    431: 
                    432:     if (qemu_opt_get(opts, "fd")) {
                    433:         if (qemu_opt_get(opts, "ifname") ||
                    434:             qemu_opt_get(opts, "script") ||
                    435:             qemu_opt_get(opts, "downscript") ||
                    436:             qemu_opt_get(opts, "vnet_hdr")) {
1.1.1.3   root      437:             error_report("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=");
1.1       root      438:             return -1;
                    439:         }
                    440: 
                    441:         fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
                    442:         if (fd == -1) {
                    443:             return -1;
                    444:         }
                    445: 
                    446:         fcntl(fd, F_SETFL, O_NONBLOCK);
                    447: 
                    448:         vnet_hdr = tap_probe_vnet_hdr(fd);
                    449:     } else {
                    450:         if (!qemu_opt_get(opts, "script")) {
                    451:             qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
                    452:         }
                    453: 
                    454:         if (!qemu_opt_get(opts, "downscript")) {
                    455:             qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
                    456:         }
                    457: 
                    458:         fd = net_tap_init(opts, &vnet_hdr);
                    459:         if (fd == -1) {
                    460:             return -1;
                    461:         }
                    462:     }
                    463: 
                    464:     s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
                    465:     if (!s) {
                    466:         close(fd);
                    467:         return -1;
                    468:     }
                    469: 
                    470:     if (tap_set_sndbuf(s->fd, opts) < 0) {
                    471:         return -1;
                    472:     }
                    473: 
                    474:     if (qemu_opt_get(opts, "fd")) {
                    475:         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
                    476:     } else {
                    477:         const char *ifname, *script, *downscript;
                    478: 
                    479:         ifname     = qemu_opt_get(opts, "ifname");
                    480:         script     = qemu_opt_get(opts, "script");
                    481:         downscript = qemu_opt_get(opts, "downscript");
                    482: 
                    483:         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
                    484:                  "ifname=%s,script=%s,downscript=%s",
                    485:                  ifname, script, downscript);
                    486: 
                    487:         if (strcmp(downscript, "no") != 0) {
                    488:             snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
                    489:             snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
                    490:         }
                    491:     }
                    492: 
1.1.1.4   root      493:     if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd") ||
                    494:                           qemu_opt_get_bool(opts, "vhostforce", false))) {
1.1.1.3   root      495:         int vhostfd, r;
1.1.1.4   root      496:         bool force = qemu_opt_get_bool(opts, "vhostforce", false);
1.1.1.3   root      497:         if (qemu_opt_get(opts, "vhostfd")) {
                    498:             r = net_handle_fd_param(mon, qemu_opt_get(opts, "vhostfd"));
                    499:             if (r == -1) {
                    500:                 return -1;
                    501:             }
                    502:             vhostfd = r;
                    503:         } else {
                    504:             vhostfd = -1;
                    505:         }
1.1.1.4   root      506:         s->vhost_net = vhost_net_init(&s->nc, vhostfd, force);
1.1.1.3   root      507:         if (!s->vhost_net) {
                    508:             error_report("vhost-net requested but could not be initialized");
                    509:             return -1;
                    510:         }
                    511:     } else if (qemu_opt_get(opts, "vhostfd")) {
                    512:         error_report("vhostfd= is not valid without vhost");
                    513:         return -1;
                    514:     }
                    515: 
1.1       root      516:     return 0;
                    517: }
1.1.1.3   root      518: 
                    519: VHostNetState *tap_get_vhost_net(VLANClientState *nc)
                    520: {
                    521:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    522:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
                    523:     return s->vhost_net;
                    524: }

unix.superglobalmegacorp.com