Annotation of qemu/net/tap.c, revision 1.1.1.3

1.1       root        1: /*
                      2:  * QEMU System Emulator
                      3:  *
                      4:  * Copyright (c) 2003-2008 Fabrice Bellard
                      5:  * Copyright (c) 2009 Red Hat, Inc.
                      6:  *
                      7:  * Permission is hereby granted, free of charge, to any person obtaining a copy
                      8:  * of this software and associated documentation files (the "Software"), to deal
                      9:  * in the Software without restriction, including without limitation the rights
                     10:  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
                     11:  * copies of the Software, and to permit persons to whom the Software is
                     12:  * furnished to do so, subject to the following conditions:
                     13:  *
                     14:  * The above copyright notice and this permission notice shall be included in
                     15:  * all copies or substantial portions of the Software.
                     16:  *
                     17:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
                     18:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
                     19:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
                     20:  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
                     21:  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
                     22:  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
                     23:  * THE SOFTWARE.
                     24:  */
                     25: 
                     26: #include "net/tap.h"
                     27: 
                     28: #include "config-host.h"
                     29: 
                     30: #include <signal.h>
                     31: #include <sys/ioctl.h>
                     32: #include <sys/stat.h>
                     33: #include <sys/wait.h>
                     34: #include <sys/socket.h>
                     35: #include <net/if.h>
                     36: 
                     37: #include "net.h"
                     38: #include "sysemu.h"
                     39: #include "qemu-char.h"
                     40: #include "qemu-common.h"
1.1.1.3 ! root       41: #include "qemu-error.h"
1.1       root       42: 
                     43: #include "net/tap-linux.h"
                     44: 
1.1.1.3 ! root       45: #include "hw/vhost_net.h"
        !            46: 
1.1       root       47: /* Maximum GSO packet size (64k) plus plenty of room for
                     48:  * the ethernet and virtio_net headers
                     49:  */
                     50: #define TAP_BUFSIZE (4096 + 65536)
                     51: 
                     52: typedef struct TAPState {
                     53:     VLANClientState nc;
                     54:     int fd;
                     55:     char down_script[1024];
                     56:     char down_script_arg[128];
                     57:     uint8_t buf[TAP_BUFSIZE];
                     58:     unsigned int read_poll : 1;
                     59:     unsigned int write_poll : 1;
                     60:     unsigned int has_vnet_hdr : 1;
                     61:     unsigned int using_vnet_hdr : 1;
                     62:     unsigned int has_ufo: 1;
1.1.1.3 ! root       63:     VHostNetState *vhost_net;
1.1       root       64: } TAPState;
                     65: 
                     66: static int launch_script(const char *setup_script, const char *ifname, int fd);
                     67: 
                     68: static int tap_can_send(void *opaque);
                     69: static void tap_send(void *opaque);
                     70: static void tap_writable(void *opaque);
                     71: 
                     72: static void tap_update_fd_handler(TAPState *s)
                     73: {
                     74:     qemu_set_fd_handler2(s->fd,
                     75:                          s->read_poll  ? tap_can_send : NULL,
                     76:                          s->read_poll  ? tap_send     : NULL,
                     77:                          s->write_poll ? tap_writable : NULL,
                     78:                          s);
                     79: }
                     80: 
                     81: static void tap_read_poll(TAPState *s, int enable)
                     82: {
                     83:     s->read_poll = !!enable;
                     84:     tap_update_fd_handler(s);
                     85: }
                     86: 
                     87: static void tap_write_poll(TAPState *s, int enable)
                     88: {
                     89:     s->write_poll = !!enable;
                     90:     tap_update_fd_handler(s);
                     91: }
                     92: 
                     93: static void tap_writable(void *opaque)
                     94: {
                     95:     TAPState *s = opaque;
                     96: 
                     97:     tap_write_poll(s, 0);
                     98: 
                     99:     qemu_flush_queued_packets(&s->nc);
                    100: }
                    101: 
                    102: static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
                    103: {
                    104:     ssize_t len;
                    105: 
                    106:     do {
                    107:         len = writev(s->fd, iov, iovcnt);
                    108:     } while (len == -1 && errno == EINTR);
                    109: 
                    110:     if (len == -1 && errno == EAGAIN) {
                    111:         tap_write_poll(s, 1);
                    112:         return 0;
                    113:     }
                    114: 
                    115:     return len;
                    116: }
                    117: 
                    118: static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
                    119:                                int iovcnt)
                    120: {
                    121:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    122:     const struct iovec *iovp = iov;
                    123:     struct iovec iov_copy[iovcnt + 1];
                    124:     struct virtio_net_hdr hdr = { 0, };
                    125: 
                    126:     if (s->has_vnet_hdr && !s->using_vnet_hdr) {
                    127:         iov_copy[0].iov_base = &hdr;
                    128:         iov_copy[0].iov_len =  sizeof(hdr);
                    129:         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
                    130:         iovp = iov_copy;
                    131:         iovcnt++;
                    132:     }
                    133: 
                    134:     return tap_write_packet(s, iovp, iovcnt);
                    135: }
                    136: 
                    137: static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t size)
                    138: {
                    139:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    140:     struct iovec iov[2];
                    141:     int iovcnt = 0;
                    142:     struct virtio_net_hdr hdr = { 0, };
                    143: 
                    144:     if (s->has_vnet_hdr) {
                    145:         iov[iovcnt].iov_base = &hdr;
                    146:         iov[iovcnt].iov_len  = sizeof(hdr);
                    147:         iovcnt++;
                    148:     }
                    149: 
                    150:     iov[iovcnt].iov_base = (char *)buf;
                    151:     iov[iovcnt].iov_len  = size;
                    152:     iovcnt++;
                    153: 
                    154:     return tap_write_packet(s, iov, iovcnt);
                    155: }
                    156: 
                    157: static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
                    158: {
                    159:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    160:     struct iovec iov[1];
                    161: 
                    162:     if (s->has_vnet_hdr && !s->using_vnet_hdr) {
                    163:         return tap_receive_raw(nc, buf, size);
                    164:     }
                    165: 
                    166:     iov[0].iov_base = (char *)buf;
                    167:     iov[0].iov_len  = size;
                    168: 
                    169:     return tap_write_packet(s, iov, 1);
                    170: }
                    171: 
                    172: static int tap_can_send(void *opaque)
                    173: {
                    174:     TAPState *s = opaque;
                    175: 
                    176:     return qemu_can_send_packet(&s->nc);
                    177: }
                    178: 
                    179: #ifndef __sun__
                    180: ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
                    181: {
                    182:     return read(tapfd, buf, maxlen);
                    183: }
                    184: #endif
                    185: 
                    186: static void tap_send_completed(VLANClientState *nc, ssize_t len)
                    187: {
                    188:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    189:     tap_read_poll(s, 1);
                    190: }
                    191: 
                    192: static void tap_send(void *opaque)
                    193: {
                    194:     TAPState *s = opaque;
                    195:     int size;
                    196: 
                    197:     do {
                    198:         uint8_t *buf = s->buf;
                    199: 
                    200:         size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
                    201:         if (size <= 0) {
                    202:             break;
                    203:         }
                    204: 
                    205:         if (s->has_vnet_hdr && !s->using_vnet_hdr) {
                    206:             buf  += sizeof(struct virtio_net_hdr);
                    207:             size -= sizeof(struct virtio_net_hdr);
                    208:         }
                    209: 
                    210:         size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
                    211:         if (size == 0) {
                    212:             tap_read_poll(s, 0);
                    213:         }
                    214:     } while (size > 0 && qemu_can_send_packet(&s->nc));
                    215: }
                    216: 
                    217: int tap_has_ufo(VLANClientState *nc)
                    218: {
                    219:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    220: 
                    221:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
                    222: 
                    223:     return s->has_ufo;
                    224: }
                    225: 
                    226: int tap_has_vnet_hdr(VLANClientState *nc)
                    227: {
                    228:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    229: 
                    230:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
                    231: 
                    232:     return s->has_vnet_hdr;
                    233: }
                    234: 
                    235: void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
                    236: {
                    237:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    238: 
                    239:     using_vnet_hdr = using_vnet_hdr != 0;
                    240: 
                    241:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
                    242:     assert(s->has_vnet_hdr == using_vnet_hdr);
                    243: 
                    244:     s->using_vnet_hdr = using_vnet_hdr;
                    245: }
                    246: 
                    247: void tap_set_offload(VLANClientState *nc, int csum, int tso4,
                    248:                      int tso6, int ecn, int ufo)
                    249: {
                    250:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    251: 
                    252:     return tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
                    253: }
                    254: 
                    255: static void tap_cleanup(VLANClientState *nc)
                    256: {
                    257:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
                    258: 
1.1.1.3 ! root      259:     if (s->vhost_net) {
        !           260:         vhost_net_cleanup(s->vhost_net);
        !           261:     }
        !           262: 
1.1       root      263:     qemu_purge_queued_packets(nc);
                    264: 
                    265:     if (s->down_script[0])
                    266:         launch_script(s->down_script, s->down_script_arg, s->fd);
                    267: 
                    268:     tap_read_poll(s, 0);
                    269:     tap_write_poll(s, 0);
                    270:     close(s->fd);
                    271: }
                    272: 
1.1.1.3 ! root      273: static void tap_poll(VLANClientState *nc, bool enable)
        !           274: {
        !           275:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
        !           276:     tap_read_poll(s, enable);
        !           277:     tap_write_poll(s, enable);
        !           278: }
        !           279: 
        !           280: int tap_get_fd(VLANClientState *nc)
        !           281: {
        !           282:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
        !           283:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
        !           284:     return s->fd;
        !           285: }
        !           286: 
1.1       root      287: /* fd support */
                    288: 
                    289: static NetClientInfo net_tap_info = {
                    290:     .type = NET_CLIENT_TYPE_TAP,
                    291:     .size = sizeof(TAPState),
                    292:     .receive = tap_receive,
                    293:     .receive_raw = tap_receive_raw,
                    294:     .receive_iov = tap_receive_iov,
1.1.1.3 ! root      295:     .poll = tap_poll,
1.1       root      296:     .cleanup = tap_cleanup,
                    297: };
                    298: 
                    299: static TAPState *net_tap_fd_init(VLANState *vlan,
                    300:                                  const char *model,
                    301:                                  const char *name,
                    302:                                  int fd,
                    303:                                  int vnet_hdr)
                    304: {
                    305:     VLANClientState *nc;
                    306:     TAPState *s;
                    307: 
                    308:     nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
                    309: 
                    310:     s = DO_UPCAST(TAPState, nc, nc);
                    311: 
                    312:     s->fd = fd;
                    313:     s->has_vnet_hdr = vnet_hdr != 0;
                    314:     s->using_vnet_hdr = 0;
                    315:     s->has_ufo = tap_probe_has_ufo(s->fd);
                    316:     tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
                    317:     tap_read_poll(s, 1);
1.1.1.3 ! root      318:     s->vhost_net = NULL;
1.1       root      319:     return s;
                    320: }
                    321: 
                    322: static int launch_script(const char *setup_script, const char *ifname, int fd)
                    323: {
                    324:     sigset_t oldmask, mask;
                    325:     int pid, status;
                    326:     char *args[3];
                    327:     char **parg;
                    328: 
                    329:     sigemptyset(&mask);
                    330:     sigaddset(&mask, SIGCHLD);
                    331:     sigprocmask(SIG_BLOCK, &mask, &oldmask);
                    332: 
                    333:     /* try to launch network script */
                    334:     pid = fork();
                    335:     if (pid == 0) {
                    336:         int open_max = sysconf(_SC_OPEN_MAX), i;
                    337: 
                    338:         for (i = 0; i < open_max; i++) {
                    339:             if (i != STDIN_FILENO &&
                    340:                 i != STDOUT_FILENO &&
                    341:                 i != STDERR_FILENO &&
                    342:                 i != fd) {
                    343:                 close(i);
                    344:             }
                    345:         }
                    346:         parg = args;
                    347:         *parg++ = (char *)setup_script;
                    348:         *parg++ = (char *)ifname;
1.1.1.3 ! root      349:         *parg = NULL;
1.1       root      350:         execv(setup_script, args);
                    351:         _exit(1);
                    352:     } else if (pid > 0) {
                    353:         while (waitpid(pid, &status, 0) != pid) {
                    354:             /* loop */
                    355:         }
                    356:         sigprocmask(SIG_SETMASK, &oldmask, NULL);
                    357: 
                    358:         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
                    359:             return 0;
                    360:         }
                    361:     }
                    362:     fprintf(stderr, "%s: could not launch network script\n", setup_script);
                    363:     return -1;
                    364: }
                    365: 
                    366: static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
                    367: {
                    368:     int fd, vnet_hdr_required;
                    369:     char ifname[128] = {0,};
                    370:     const char *setup_script;
                    371: 
                    372:     if (qemu_opt_get(opts, "ifname")) {
                    373:         pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
                    374:     }
                    375: 
                    376:     *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
                    377:     if (qemu_opt_get(opts, "vnet_hdr")) {
                    378:         vnet_hdr_required = *vnet_hdr;
                    379:     } else {
                    380:         vnet_hdr_required = 0;
                    381:     }
                    382: 
                    383:     TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
                    384:     if (fd < 0) {
                    385:         return -1;
                    386:     }
                    387: 
                    388:     setup_script = qemu_opt_get(opts, "script");
                    389:     if (setup_script &&
                    390:         setup_script[0] != '\0' &&
                    391:         strcmp(setup_script, "no") != 0 &&
                    392:         launch_script(setup_script, ifname, fd)) {
                    393:         close(fd);
                    394:         return -1;
                    395:     }
                    396: 
                    397:     qemu_opt_set(opts, "ifname", ifname);
                    398: 
                    399:     return fd;
                    400: }
                    401: 
                    402: int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
                    403: {
                    404:     TAPState *s;
                    405:     int fd, vnet_hdr = 0;
                    406: 
                    407:     if (qemu_opt_get(opts, "fd")) {
                    408:         if (qemu_opt_get(opts, "ifname") ||
                    409:             qemu_opt_get(opts, "script") ||
                    410:             qemu_opt_get(opts, "downscript") ||
                    411:             qemu_opt_get(opts, "vnet_hdr")) {
1.1.1.3 ! root      412:             error_report("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=");
1.1       root      413:             return -1;
                    414:         }
                    415: 
                    416:         fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
                    417:         if (fd == -1) {
                    418:             return -1;
                    419:         }
                    420: 
                    421:         fcntl(fd, F_SETFL, O_NONBLOCK);
                    422: 
                    423:         vnet_hdr = tap_probe_vnet_hdr(fd);
                    424:     } else {
                    425:         if (!qemu_opt_get(opts, "script")) {
                    426:             qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
                    427:         }
                    428: 
                    429:         if (!qemu_opt_get(opts, "downscript")) {
                    430:             qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
                    431:         }
                    432: 
                    433:         fd = net_tap_init(opts, &vnet_hdr);
                    434:         if (fd == -1) {
                    435:             return -1;
                    436:         }
                    437:     }
                    438: 
                    439:     s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
                    440:     if (!s) {
                    441:         close(fd);
                    442:         return -1;
                    443:     }
                    444: 
                    445:     if (tap_set_sndbuf(s->fd, opts) < 0) {
                    446:         return -1;
                    447:     }
                    448: 
                    449:     if (qemu_opt_get(opts, "fd")) {
                    450:         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
                    451:     } else {
                    452:         const char *ifname, *script, *downscript;
                    453: 
                    454:         ifname     = qemu_opt_get(opts, "ifname");
                    455:         script     = qemu_opt_get(opts, "script");
                    456:         downscript = qemu_opt_get(opts, "downscript");
                    457: 
                    458:         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
                    459:                  "ifname=%s,script=%s,downscript=%s",
                    460:                  ifname, script, downscript);
                    461: 
                    462:         if (strcmp(downscript, "no") != 0) {
                    463:             snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
                    464:             snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
                    465:         }
                    466:     }
                    467: 
1.1.1.3 ! root      468:     if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd"))) {
        !           469:         int vhostfd, r;
        !           470:         if (qemu_opt_get(opts, "vhostfd")) {
        !           471:             r = net_handle_fd_param(mon, qemu_opt_get(opts, "vhostfd"));
        !           472:             if (r == -1) {
        !           473:                 return -1;
        !           474:             }
        !           475:             vhostfd = r;
        !           476:         } else {
        !           477:             vhostfd = -1;
        !           478:         }
        !           479:         s->vhost_net = vhost_net_init(&s->nc, vhostfd);
        !           480:         if (!s->vhost_net) {
        !           481:             error_report("vhost-net requested but could not be initialized");
        !           482:             return -1;
        !           483:         }
        !           484:     } else if (qemu_opt_get(opts, "vhostfd")) {
        !           485:         error_report("vhostfd= is not valid without vhost");
        !           486:         return -1;
        !           487:     }
        !           488: 
1.1       root      489:     return 0;
                    490: }
1.1.1.3 ! root      491: 
        !           492: VHostNetState *tap_get_vhost_net(VLANClientState *nc)
        !           493: {
        !           494:     TAPState *s = DO_UPCAST(TAPState, nc, nc);
        !           495:     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
        !           496:     return s->vhost_net;
        !           497: }

unix.superglobalmegacorp.com