15281d757SMark McLoughlin /* 25281d757SMark McLoughlin * QEMU System Emulator 35281d757SMark McLoughlin * 45281d757SMark McLoughlin * Copyright (c) 2003-2008 Fabrice Bellard 55281d757SMark McLoughlin * Copyright (c) 2009 Red Hat, Inc. 65281d757SMark McLoughlin * 75281d757SMark McLoughlin * Permission is hereby granted, free of charge, to any person obtaining a copy 85281d757SMark McLoughlin * of this software and associated documentation files (the "Software"), to deal 95281d757SMark McLoughlin * in the Software without restriction, including without limitation the rights 105281d757SMark McLoughlin * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 115281d757SMark McLoughlin * copies of the Software, and to permit persons to whom the Software is 125281d757SMark McLoughlin * furnished to do so, subject to the following conditions: 135281d757SMark McLoughlin * 145281d757SMark McLoughlin * The above copyright notice and this permission notice shall be included in 155281d757SMark McLoughlin * all copies or substantial portions of the Software. 165281d757SMark McLoughlin * 175281d757SMark McLoughlin * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 185281d757SMark McLoughlin * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 195281d757SMark McLoughlin * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 205281d757SMark McLoughlin * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 215281d757SMark McLoughlin * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 225281d757SMark McLoughlin * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 235281d757SMark McLoughlin * THE SOFTWARE. 245281d757SMark McLoughlin */ 255281d757SMark McLoughlin 262744d920SPeter Maydell #include "qemu/osdep.h" 271422e32dSPaolo Bonzini #include "tap_int.h" 285281d757SMark McLoughlin 295281d757SMark McLoughlin 305281d757SMark McLoughlin #include <sys/ioctl.h> 315281d757SMark McLoughlin #include <sys/wait.h> 3271f4effcSAlexander Graf #include <sys/socket.h> 335281d757SMark McLoughlin #include <net/if.h> 345281d757SMark McLoughlin 35969e50b6SBin Meng #include "net/eth.h" 361422e32dSPaolo Bonzini #include "net/net.h" 37a245fc18SPaolo Bonzini #include "clients.h" 3883c9089eSPaolo Bonzini #include "monitor/monitor.h" 399c17d615SPaolo Bonzini #include "sysemu/sysemu.h" 40da34e65cSMarkus Armbruster #include "qapi/error.h" 41f348b6d1SVeronia Bahaa #include "qemu/cutils.h" 421de7afc9SPaolo Bonzini #include "qemu/error-report.h" 43db725815SMarkus Armbruster #include "qemu/main-loop.h" 44d542800dSBrijesh Singh #include "qemu/sockets.h" 455281d757SMark McLoughlin 461422e32dSPaolo Bonzini #include "net/tap.h" 475281d757SMark McLoughlin 480d09e41aSPaolo Bonzini #include "net/vhost_net.h" 4982b0d80eSMichael S. Tsirkin 505281d757SMark McLoughlin typedef struct TAPState { 514e68f7a0SStefan Hajnoczi NetClientState nc; 525281d757SMark McLoughlin int fd; 535281d757SMark McLoughlin char down_script[1024]; 545281d757SMark McLoughlin char down_script_arg[128]; 55d32fcad3SScott Feldman uint8_t buf[NET_BUFSIZE]; 56ec45f083SJason Wang bool read_poll; 57ec45f083SJason Wang bool write_poll; 58ec45f083SJason Wang bool using_vnet_hdr; 59ec45f083SJason Wang bool has_ufo; 6016dbaf90SJason Wang bool enabled; 6182b0d80eSMichael S. Tsirkin VHostNetState *vhost_net; 62ef4252b1SMichael S. Tsirkin unsigned host_vnet_hdr_len; 639e32ff32SMarc-André Lureau Notifier exit; 645281d757SMark McLoughlin } TAPState; 655281d757SMark McLoughlin 66ac4fcf56SMarkus Armbruster static void launch_script(const char *setup_script, const char *ifname, 67ac4fcf56SMarkus Armbruster int fd, Error **errp); 685281d757SMark McLoughlin 695281d757SMark McLoughlin static void tap_send(void *opaque); 705281d757SMark McLoughlin static void tap_writable(void *opaque); 715281d757SMark McLoughlin 725281d757SMark McLoughlin static void tap_update_fd_handler(TAPState *s) 735281d757SMark McLoughlin { 7482e1cc4bSFam Zheng qemu_set_fd_handler(s->fd, 7516dbaf90SJason Wang s->read_poll && s->enabled ? tap_send : NULL, 7616dbaf90SJason Wang s->write_poll && s->enabled ? tap_writable : NULL, 775281d757SMark McLoughlin s); 785281d757SMark McLoughlin } 795281d757SMark McLoughlin 80ec45f083SJason Wang static void tap_read_poll(TAPState *s, bool enable) 815281d757SMark McLoughlin { 82ec45f083SJason Wang s->read_poll = enable; 835281d757SMark McLoughlin tap_update_fd_handler(s); 845281d757SMark McLoughlin } 855281d757SMark McLoughlin 86ec45f083SJason Wang static void tap_write_poll(TAPState *s, bool enable) 875281d757SMark McLoughlin { 88ec45f083SJason Wang s->write_poll = enable; 895281d757SMark McLoughlin tap_update_fd_handler(s); 905281d757SMark McLoughlin } 915281d757SMark McLoughlin 925281d757SMark McLoughlin static void tap_writable(void *opaque) 935281d757SMark McLoughlin { 945281d757SMark McLoughlin TAPState *s = opaque; 955281d757SMark McLoughlin 96ec45f083SJason Wang tap_write_poll(s, false); 975281d757SMark McLoughlin 983e35ba93SMark McLoughlin qemu_flush_queued_packets(&s->nc); 995281d757SMark McLoughlin } 1005281d757SMark McLoughlin 1015281d757SMark McLoughlin static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) 1025281d757SMark McLoughlin { 1035281d757SMark McLoughlin ssize_t len; 1045281d757SMark McLoughlin 10537b0b24eSNikita Ivanov len = RETRY_ON_EINTR(writev(s->fd, iov, iovcnt)); 1065281d757SMark McLoughlin 1075281d757SMark McLoughlin if (len == -1 && errno == EAGAIN) { 108ec45f083SJason Wang tap_write_poll(s, true); 1095281d757SMark McLoughlin return 0; 1105281d757SMark McLoughlin } 1115281d757SMark McLoughlin 1125281d757SMark McLoughlin return len; 1135281d757SMark McLoughlin } 1145281d757SMark McLoughlin 1154e68f7a0SStefan Hajnoczi static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov, 1165281d757SMark McLoughlin int iovcnt) 1175281d757SMark McLoughlin { 1183e35ba93SMark McLoughlin TAPState *s = DO_UPCAST(TAPState, nc, nc); 1195281d757SMark McLoughlin const struct iovec *iovp = iov; 1205281d757SMark McLoughlin struct iovec iov_copy[iovcnt + 1]; 121ef4252b1SMichael S. Tsirkin struct virtio_net_hdr_mrg_rxbuf hdr = { }; 1225281d757SMark McLoughlin 123ef4252b1SMichael S. Tsirkin if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 1245281d757SMark McLoughlin iov_copy[0].iov_base = &hdr; 125ef4252b1SMichael S. Tsirkin iov_copy[0].iov_len = s->host_vnet_hdr_len; 1265281d757SMark McLoughlin memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); 1275281d757SMark McLoughlin iovp = iov_copy; 1285281d757SMark McLoughlin iovcnt++; 1295281d757SMark McLoughlin } 1305281d757SMark McLoughlin 1315281d757SMark McLoughlin return tap_write_packet(s, iovp, iovcnt); 1325281d757SMark McLoughlin } 1335281d757SMark McLoughlin 1344e68f7a0SStefan Hajnoczi static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size) 1355281d757SMark McLoughlin { 1363e35ba93SMark McLoughlin TAPState *s = DO_UPCAST(TAPState, nc, nc); 1375281d757SMark McLoughlin struct iovec iov[2]; 1385281d757SMark McLoughlin int iovcnt = 0; 139ef4252b1SMichael S. Tsirkin struct virtio_net_hdr_mrg_rxbuf hdr = { }; 1405281d757SMark McLoughlin 141ef4252b1SMichael S. Tsirkin if (s->host_vnet_hdr_len) { 1425281d757SMark McLoughlin iov[iovcnt].iov_base = &hdr; 143ef4252b1SMichael S. Tsirkin iov[iovcnt].iov_len = s->host_vnet_hdr_len; 1445281d757SMark McLoughlin iovcnt++; 1455281d757SMark McLoughlin } 1465281d757SMark McLoughlin 1475281d757SMark McLoughlin iov[iovcnt].iov_base = (char *)buf; 1485281d757SMark McLoughlin iov[iovcnt].iov_len = size; 1495281d757SMark McLoughlin iovcnt++; 1505281d757SMark McLoughlin 1515281d757SMark McLoughlin return tap_write_packet(s, iov, iovcnt); 1525281d757SMark McLoughlin } 1535281d757SMark McLoughlin 1544e68f7a0SStefan Hajnoczi static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size) 1555281d757SMark McLoughlin { 1563e35ba93SMark McLoughlin TAPState *s = DO_UPCAST(TAPState, nc, nc); 1575281d757SMark McLoughlin struct iovec iov[1]; 1585281d757SMark McLoughlin 159ef4252b1SMichael S. Tsirkin if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 1603e35ba93SMark McLoughlin return tap_receive_raw(nc, buf, size); 1615281d757SMark McLoughlin } 1625281d757SMark McLoughlin 1635281d757SMark McLoughlin iov[0].iov_base = (char *)buf; 1645281d757SMark McLoughlin iov[0].iov_len = size; 1655281d757SMark McLoughlin 1665281d757SMark McLoughlin return tap_write_packet(s, iov, 1); 1675281d757SMark McLoughlin } 1685281d757SMark McLoughlin 169966ea5ecSMark McLoughlin #ifndef __sun__ 170966ea5ecSMark McLoughlin ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) 1715281d757SMark McLoughlin { 1725281d757SMark McLoughlin return read(tapfd, buf, maxlen); 1735281d757SMark McLoughlin } 1745281d757SMark McLoughlin #endif 1755281d757SMark McLoughlin 1764e68f7a0SStefan Hajnoczi static void tap_send_completed(NetClientState *nc, ssize_t len) 1775281d757SMark McLoughlin { 1783e35ba93SMark McLoughlin TAPState *s = DO_UPCAST(TAPState, nc, nc); 179ec45f083SJason Wang tap_read_poll(s, true); 1805281d757SMark McLoughlin } 1815281d757SMark McLoughlin 1825281d757SMark McLoughlin static void tap_send(void *opaque) 1835281d757SMark McLoughlin { 1845281d757SMark McLoughlin TAPState *s = opaque; 185be1636b3SMark McLoughlin int size; 186756ae78bSWangkai (Kevin,C) int packets = 0; 1875281d757SMark McLoughlin 188a90a7425SFam Zheng while (true) { 1895819c918SMark McLoughlin uint8_t *buf = s->buf; 190969e50b6SBin Meng uint8_t min_pkt[ETH_ZLEN]; 191969e50b6SBin Meng size_t min_pktsz = sizeof(min_pkt); 1925819c918SMark McLoughlin 1935281d757SMark McLoughlin size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); 1945281d757SMark McLoughlin if (size <= 0) { 1955819c918SMark McLoughlin break; 1965281d757SMark McLoughlin } 1975281d757SMark McLoughlin 198ef4252b1SMichael S. Tsirkin if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 199ef4252b1SMichael S. Tsirkin buf += s->host_vnet_hdr_len; 200ef4252b1SMichael S. Tsirkin size -= s->host_vnet_hdr_len; 2015281d757SMark McLoughlin } 2025281d757SMark McLoughlin 203bc38e31bSJason Wang if (net_peer_needs_padding(&s->nc)) { 204969e50b6SBin Meng if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) { 205969e50b6SBin Meng buf = min_pkt; 206969e50b6SBin Meng size = min_pktsz; 207969e50b6SBin Meng } 208969e50b6SBin Meng } 209969e50b6SBin Meng 2103e35ba93SMark McLoughlin size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed); 2115281d757SMark McLoughlin if (size == 0) { 212ec45f083SJason Wang tap_read_poll(s, false); 21368e5ec64SStefan Hajnoczi break; 21468e5ec64SStefan Hajnoczi } else if (size < 0) { 21568e5ec64SStefan Hajnoczi break; 2165281d757SMark McLoughlin } 217756ae78bSWangkai (Kevin,C) 218756ae78bSWangkai (Kevin,C) /* 219756ae78bSWangkai (Kevin,C) * When the host keeps receiving more packets while tap_send() is 220756ae78bSWangkai (Kevin,C) * running we can hog the QEMU global mutex. Limit the number of 221756ae78bSWangkai (Kevin,C) * packets that are processed per tap_send() callback to prevent 222756ae78bSWangkai (Kevin,C) * stalling the guest. 223756ae78bSWangkai (Kevin,C) */ 224756ae78bSWangkai (Kevin,C) packets++; 225756ae78bSWangkai (Kevin,C) if (packets >= 50) { 226756ae78bSWangkai (Kevin,C) break; 227756ae78bSWangkai (Kevin,C) } 22868e5ec64SStefan Hajnoczi } 2295281d757SMark McLoughlin } 2305281d757SMark McLoughlin 2313bac80d3SVincenzo Maffione static bool tap_has_ufo(NetClientState *nc) 2325281d757SMark McLoughlin { 2333e35ba93SMark McLoughlin TAPState *s = DO_UPCAST(TAPState, nc, nc); 2345281d757SMark McLoughlin 235f394b2e2SEric Blake assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 2365281d757SMark McLoughlin 2375281d757SMark McLoughlin return s->has_ufo; 2385281d757SMark McLoughlin } 2395281d757SMark McLoughlin 2403bac80d3SVincenzo Maffione static bool tap_has_vnet_hdr(NetClientState *nc) 2415281d757SMark McLoughlin { 2423e35ba93SMark McLoughlin TAPState *s = DO_UPCAST(TAPState, nc, nc); 2435281d757SMark McLoughlin 244f394b2e2SEric Blake assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 2455281d757SMark McLoughlin 246ef4252b1SMichael S. Tsirkin return !!s->host_vnet_hdr_len; 2475281d757SMark McLoughlin } 2485281d757SMark McLoughlin 2493bac80d3SVincenzo Maffione static bool tap_has_vnet_hdr_len(NetClientState *nc, int len) 250445d892fSMichael S. Tsirkin { 251445d892fSMichael S. Tsirkin TAPState *s = DO_UPCAST(TAPState, nc, nc); 252445d892fSMichael S. Tsirkin 253f394b2e2SEric Blake assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 254445d892fSMichael S. Tsirkin 255e96dfd11SVincenzo Maffione return !!tap_probe_vnet_hdr_len(s->fd, len); 256445d892fSMichael S. Tsirkin } 257445d892fSMichael S. Tsirkin 258*481c5232SAkihiko Odaki static int tap_get_vnet_hdr_len(NetClientState *nc) 259*481c5232SAkihiko Odaki { 260*481c5232SAkihiko Odaki TAPState *s = DO_UPCAST(TAPState, nc, nc); 261*481c5232SAkihiko Odaki 262*481c5232SAkihiko Odaki return s->host_vnet_hdr_len; 263*481c5232SAkihiko Odaki } 264*481c5232SAkihiko Odaki 2653bac80d3SVincenzo Maffione static void tap_set_vnet_hdr_len(NetClientState *nc, int len) 266445d892fSMichael S. Tsirkin { 267445d892fSMichael S. Tsirkin TAPState *s = DO_UPCAST(TAPState, nc, nc); 268445d892fSMichael S. Tsirkin 269f394b2e2SEric Blake assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 270445d892fSMichael S. Tsirkin assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || 271fbbdbddeSYuri Benditovich len == sizeof(struct virtio_net_hdr) || 272fbbdbddeSYuri Benditovich len == sizeof(struct virtio_net_hdr_v1_hash)); 273445d892fSMichael S. Tsirkin 274445d892fSMichael S. Tsirkin tap_fd_set_vnet_hdr_len(s->fd, len); 275445d892fSMichael S. Tsirkin s->host_vnet_hdr_len = len; 276445d892fSMichael S. Tsirkin } 277445d892fSMichael S. Tsirkin 278*481c5232SAkihiko Odaki static bool tap_get_using_vnet_hdr(NetClientState *nc) 279*481c5232SAkihiko Odaki { 280*481c5232SAkihiko Odaki TAPState *s = DO_UPCAST(TAPState, nc, nc); 281*481c5232SAkihiko Odaki 282*481c5232SAkihiko Odaki return s->using_vnet_hdr; 283*481c5232SAkihiko Odaki } 284*481c5232SAkihiko Odaki 2853bac80d3SVincenzo Maffione static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr) 2865281d757SMark McLoughlin { 2873e35ba93SMark McLoughlin TAPState *s = DO_UPCAST(TAPState, nc, nc); 2885281d757SMark McLoughlin 289f394b2e2SEric Blake assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 290ef4252b1SMichael S. Tsirkin assert(!!s->host_vnet_hdr_len == using_vnet_hdr); 2915281d757SMark McLoughlin 2925281d757SMark McLoughlin s->using_vnet_hdr = using_vnet_hdr; 2935281d757SMark McLoughlin } 2945281d757SMark McLoughlin 295c80cd6bbSGreg Kurz static int tap_set_vnet_le(NetClientState *nc, bool is_le) 296c80cd6bbSGreg Kurz { 297c80cd6bbSGreg Kurz TAPState *s = DO_UPCAST(TAPState, nc, nc); 298c80cd6bbSGreg Kurz 299c80cd6bbSGreg Kurz return tap_fd_set_vnet_le(s->fd, is_le); 300c80cd6bbSGreg Kurz } 301c80cd6bbSGreg Kurz 302c80cd6bbSGreg Kurz static int tap_set_vnet_be(NetClientState *nc, bool is_be) 303c80cd6bbSGreg Kurz { 304c80cd6bbSGreg Kurz TAPState *s = DO_UPCAST(TAPState, nc, nc); 305c80cd6bbSGreg Kurz 306c80cd6bbSGreg Kurz return tap_fd_set_vnet_be(s->fd, is_be); 307c80cd6bbSGreg Kurz } 308c80cd6bbSGreg Kurz 3093bac80d3SVincenzo Maffione static void tap_set_offload(NetClientState *nc, int csum, int tso4, 3105281d757SMark McLoughlin int tso6, int ecn, int ufo) 3115281d757SMark McLoughlin { 3123e35ba93SMark McLoughlin TAPState *s = DO_UPCAST(TAPState, nc, nc); 31327a6375dSMichael S. Tsirkin if (s->fd < 0) { 31427a6375dSMichael S. Tsirkin return; 31527a6375dSMichael S. Tsirkin } 3165281d757SMark McLoughlin 31727a6375dSMichael S. Tsirkin tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo); 3185281d757SMark McLoughlin } 3195281d757SMark McLoughlin 3209e32ff32SMarc-André Lureau static void tap_exit_notify(Notifier *notifier, void *data) 3219e32ff32SMarc-André Lureau { 3229e32ff32SMarc-André Lureau TAPState *s = container_of(notifier, TAPState, exit); 3239e32ff32SMarc-André Lureau Error *err = NULL; 3249e32ff32SMarc-André Lureau 3259e32ff32SMarc-André Lureau if (s->down_script[0]) { 3269e32ff32SMarc-André Lureau launch_script(s->down_script, s->down_script_arg, s->fd, &err); 3279e32ff32SMarc-André Lureau if (err) { 3289e32ff32SMarc-André Lureau error_report_err(err); 3299e32ff32SMarc-André Lureau } 3309e32ff32SMarc-André Lureau } 3319e32ff32SMarc-André Lureau } 3329e32ff32SMarc-André Lureau 3334e68f7a0SStefan Hajnoczi static void tap_cleanup(NetClientState *nc) 3345281d757SMark McLoughlin { 3353e35ba93SMark McLoughlin TAPState *s = DO_UPCAST(TAPState, nc, nc); 3365281d757SMark McLoughlin 33782b0d80eSMichael S. Tsirkin if (s->vhost_net) { 33882b0d80eSMichael S. Tsirkin vhost_net_cleanup(s->vhost_net); 339e6bcb1b6SMarc-André Lureau g_free(s->vhost_net); 34043849424SMichael S. Tsirkin s->vhost_net = NULL; 34182b0d80eSMichael S. Tsirkin } 34282b0d80eSMichael S. Tsirkin 3433e35ba93SMark McLoughlin qemu_purge_queued_packets(nc); 3445281d757SMark McLoughlin 3459e32ff32SMarc-André Lureau tap_exit_notify(&s->exit, NULL); 3469e32ff32SMarc-André Lureau qemu_remove_exit_notifier(&s->exit); 3475281d757SMark McLoughlin 348ec45f083SJason Wang tap_read_poll(s, false); 349ec45f083SJason Wang tap_write_poll(s, false); 3505281d757SMark McLoughlin close(s->fd); 35127a6375dSMichael S. Tsirkin s->fd = -1; 3525281d757SMark McLoughlin } 3535281d757SMark McLoughlin 3544e68f7a0SStefan Hajnoczi static void tap_poll(NetClientState *nc, bool enable) 355ceb69615SMichael S. Tsirkin { 356ceb69615SMichael S. Tsirkin TAPState *s = DO_UPCAST(TAPState, nc, nc); 357ceb69615SMichael S. Tsirkin tap_read_poll(s, enable); 358ceb69615SMichael S. Tsirkin tap_write_poll(s, enable); 359ceb69615SMichael S. Tsirkin } 360ceb69615SMichael S. Tsirkin 3618f364e34SAndrew Melnychenko static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd) 3628f364e34SAndrew Melnychenko { 3638f364e34SAndrew Melnychenko TAPState *s = DO_UPCAST(TAPState, nc, nc); 3648f364e34SAndrew Melnychenko assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 3658f364e34SAndrew Melnychenko 3668f364e34SAndrew Melnychenko return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0; 3678f364e34SAndrew Melnychenko } 3688f364e34SAndrew Melnychenko 3694e68f7a0SStefan Hajnoczi int tap_get_fd(NetClientState *nc) 37095d528a2SMichael S. Tsirkin { 37195d528a2SMichael S. Tsirkin TAPState *s = DO_UPCAST(TAPState, nc, nc); 372f394b2e2SEric Blake assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 37395d528a2SMichael S. Tsirkin return s->fd; 37495d528a2SMichael S. Tsirkin } 37595d528a2SMichael S. Tsirkin 3765281d757SMark McLoughlin /* fd support */ 3775281d757SMark McLoughlin 3783e35ba93SMark McLoughlin static NetClientInfo net_tap_info = { 379f394b2e2SEric Blake .type = NET_CLIENT_DRIVER_TAP, 3803e35ba93SMark McLoughlin .size = sizeof(TAPState), 3813e35ba93SMark McLoughlin .receive = tap_receive, 3823e35ba93SMark McLoughlin .receive_raw = tap_receive_raw, 3833e35ba93SMark McLoughlin .receive_iov = tap_receive_iov, 384ceb69615SMichael S. Tsirkin .poll = tap_poll, 3853e35ba93SMark McLoughlin .cleanup = tap_cleanup, 3862e753bccSVincenzo Maffione .has_ufo = tap_has_ufo, 3872e753bccSVincenzo Maffione .has_vnet_hdr = tap_has_vnet_hdr, 3882e753bccSVincenzo Maffione .has_vnet_hdr_len = tap_has_vnet_hdr_len, 389*481c5232SAkihiko Odaki .get_using_vnet_hdr = tap_get_using_vnet_hdr, 3902e753bccSVincenzo Maffione .using_vnet_hdr = tap_using_vnet_hdr, 3912e753bccSVincenzo Maffione .set_offload = tap_set_offload, 392*481c5232SAkihiko Odaki .get_vnet_hdr_len = tap_get_vnet_hdr_len, 3932e753bccSVincenzo Maffione .set_vnet_hdr_len = tap_set_vnet_hdr_len, 394c80cd6bbSGreg Kurz .set_vnet_le = tap_set_vnet_le, 395c80cd6bbSGreg Kurz .set_vnet_be = tap_set_vnet_be, 3968f364e34SAndrew Melnychenko .set_steering_ebpf = tap_set_steering_ebpf, 3973e35ba93SMark McLoughlin }; 3983e35ba93SMark McLoughlin 3994e68f7a0SStefan Hajnoczi static TAPState *net_tap_fd_init(NetClientState *peer, 4005281d757SMark McLoughlin const char *model, 4015281d757SMark McLoughlin const char *name, 4025281d757SMark McLoughlin int fd, 4035281d757SMark McLoughlin int vnet_hdr) 4045281d757SMark McLoughlin { 4054e68f7a0SStefan Hajnoczi NetClientState *nc; 4065281d757SMark McLoughlin TAPState *s; 4075281d757SMark McLoughlin 408ab5f3f84SStefan Hajnoczi nc = qemu_new_net_client(&net_tap_info, peer, model, name); 4093e35ba93SMark McLoughlin 4103e35ba93SMark McLoughlin s = DO_UPCAST(TAPState, nc, nc); 4113e35ba93SMark McLoughlin 4125281d757SMark McLoughlin s->fd = fd; 413ef4252b1SMichael S. Tsirkin s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0; 414ec45f083SJason Wang s->using_vnet_hdr = false; 4159c282718SMark McLoughlin s->has_ufo = tap_probe_has_ufo(s->fd); 41616dbaf90SJason Wang s->enabled = true; 4173e35ba93SMark McLoughlin tap_set_offload(&s->nc, 0, 0, 0, 0, 0); 41858ddcd50SMichael S. Tsirkin /* 41958ddcd50SMichael S. Tsirkin * Make sure host header length is set correctly in tap: 42058ddcd50SMichael S. Tsirkin * it might have been modified by another instance of qemu. 42158ddcd50SMichael S. Tsirkin */ 42258ddcd50SMichael S. Tsirkin if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) { 42358ddcd50SMichael S. Tsirkin tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len); 42458ddcd50SMichael S. Tsirkin } 425ec45f083SJason Wang tap_read_poll(s, true); 42682b0d80eSMichael S. Tsirkin s->vhost_net = NULL; 4279e32ff32SMarc-André Lureau 4289e32ff32SMarc-André Lureau s->exit.notify = tap_exit_notify; 4299e32ff32SMarc-André Lureau qemu_add_exit_notifier(&s->exit); 4309e32ff32SMarc-André Lureau 4315281d757SMark McLoughlin return s; 4325281d757SMark McLoughlin } 4335281d757SMark McLoughlin 434ac4fcf56SMarkus Armbruster static void launch_script(const char *setup_script, const char *ifname, 435ac4fcf56SMarkus Armbruster int fd, Error **errp) 4365281d757SMark McLoughlin { 4375281d757SMark McLoughlin int pid, status; 4385281d757SMark McLoughlin char *args[3]; 4395281d757SMark McLoughlin char **parg; 4405281d757SMark McLoughlin 4415281d757SMark McLoughlin /* try to launch network script */ 4425281d757SMark McLoughlin pid = fork(); 443ac4fcf56SMarkus Armbruster if (pid < 0) { 444ac4fcf56SMarkus Armbruster error_setg_errno(errp, errno, "could not launch network script %s", 445ac4fcf56SMarkus Armbruster setup_script); 446ac4fcf56SMarkus Armbruster return; 447ac4fcf56SMarkus Armbruster } 4485281d757SMark McLoughlin if (pid == 0) { 4495281d757SMark McLoughlin int open_max = sysconf(_SC_OPEN_MAX), i; 4505281d757SMark McLoughlin 45113a12f86SPankaj Gupta for (i = 3; i < open_max; i++) { 45213a12f86SPankaj Gupta if (i != fd) { 4535281d757SMark McLoughlin close(i); 4545281d757SMark McLoughlin } 4555281d757SMark McLoughlin } 4565281d757SMark McLoughlin parg = args; 4575281d757SMark McLoughlin *parg++ = (char *)setup_script; 4585281d757SMark McLoughlin *parg++ = (char *)ifname; 4599678d950SBlue Swirl *parg = NULL; 4605281d757SMark McLoughlin execv(setup_script, args); 4615281d757SMark McLoughlin _exit(1); 462ac4fcf56SMarkus Armbruster } else { 4635281d757SMark McLoughlin while (waitpid(pid, &status, 0) != pid) { 4645281d757SMark McLoughlin /* loop */ 4655281d757SMark McLoughlin } 4665281d757SMark McLoughlin 4675281d757SMark McLoughlin if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 468ac4fcf56SMarkus Armbruster return; 4695281d757SMark McLoughlin } 470ac4fcf56SMarkus Armbruster error_setg(errp, "network script %s failed with status %d", 471ac4fcf56SMarkus Armbruster setup_script, status); 4725281d757SMark McLoughlin } 4735281d757SMark McLoughlin } 4745281d757SMark McLoughlin 475a7c36ee4SCorey Bryant static int recv_fd(int c) 476a7c36ee4SCorey Bryant { 477a7c36ee4SCorey Bryant int fd; 478a7c36ee4SCorey Bryant uint8_t msgbuf[CMSG_SPACE(sizeof(fd))]; 479a7c36ee4SCorey Bryant struct msghdr msg = { 480a7c36ee4SCorey Bryant .msg_control = msgbuf, 481a7c36ee4SCorey Bryant .msg_controllen = sizeof(msgbuf), 482a7c36ee4SCorey Bryant }; 483a7c36ee4SCorey Bryant struct cmsghdr *cmsg; 484a7c36ee4SCorey Bryant struct iovec iov; 485a7c36ee4SCorey Bryant uint8_t req[1]; 486a7c36ee4SCorey Bryant ssize_t len; 487a7c36ee4SCorey Bryant 488a7c36ee4SCorey Bryant cmsg = CMSG_FIRSTHDR(&msg); 489a7c36ee4SCorey Bryant cmsg->cmsg_level = SOL_SOCKET; 490a7c36ee4SCorey Bryant cmsg->cmsg_type = SCM_RIGHTS; 491a7c36ee4SCorey Bryant cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); 492a7c36ee4SCorey Bryant msg.msg_controllen = cmsg->cmsg_len; 493a7c36ee4SCorey Bryant 494a7c36ee4SCorey Bryant iov.iov_base = req; 495a7c36ee4SCorey Bryant iov.iov_len = sizeof(req); 496a7c36ee4SCorey Bryant 497a7c36ee4SCorey Bryant msg.msg_iov = &iov; 498a7c36ee4SCorey Bryant msg.msg_iovlen = 1; 499a7c36ee4SCorey Bryant 500a7c36ee4SCorey Bryant len = recvmsg(c, &msg, 0); 501a7c36ee4SCorey Bryant if (len > 0) { 502a7c36ee4SCorey Bryant memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); 503a7c36ee4SCorey Bryant return fd; 504a7c36ee4SCorey Bryant } 505a7c36ee4SCorey Bryant 506a7c36ee4SCorey Bryant return len; 507a7c36ee4SCorey Bryant } 508a7c36ee4SCorey Bryant 509a8a21be9SMarkus Armbruster static int net_bridge_run_helper(const char *helper, const char *bridge, 510a8a21be9SMarkus Armbruster Error **errp) 511a7c36ee4SCorey Bryant { 512a7c36ee4SCorey Bryant sigset_t oldmask, mask; 51363c4db4cSPaolo Bonzini g_autofree char *default_helper = NULL; 514a7c36ee4SCorey Bryant int pid, status; 515a7c36ee4SCorey Bryant char *args[5]; 516a7c36ee4SCorey Bryant char **parg; 517a7c36ee4SCorey Bryant int sv[2]; 518a7c36ee4SCorey Bryant 519a7c36ee4SCorey Bryant sigemptyset(&mask); 520a7c36ee4SCorey Bryant sigaddset(&mask, SIGCHLD); 521a7c36ee4SCorey Bryant sigprocmask(SIG_BLOCK, &mask, &oldmask); 522a7c36ee4SCorey Bryant 52363c4db4cSPaolo Bonzini if (!helper) { 52463c4db4cSPaolo Bonzini helper = default_helper = get_relocated_path(DEFAULT_BRIDGE_HELPER); 52563c4db4cSPaolo Bonzini } 52663c4db4cSPaolo Bonzini 527a7c36ee4SCorey Bryant if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 528a8a21be9SMarkus Armbruster error_setg_errno(errp, errno, "socketpair() failed"); 529a7c36ee4SCorey Bryant return -1; 530a7c36ee4SCorey Bryant } 531a7c36ee4SCorey Bryant 532a7c36ee4SCorey Bryant /* try to launch bridge helper */ 533a7c36ee4SCorey Bryant pid = fork(); 534a8a21be9SMarkus Armbruster if (pid < 0) { 535a8a21be9SMarkus Armbruster error_setg_errno(errp, errno, "Can't fork bridge helper"); 536a8a21be9SMarkus Armbruster return -1; 537a8a21be9SMarkus Armbruster } 538a7c36ee4SCorey Bryant if (pid == 0) { 539a7c36ee4SCorey Bryant int open_max = sysconf(_SC_OPEN_MAX), i; 540389abe1dSPrasad J Pandit char *fd_buf = NULL; 541389abe1dSPrasad J Pandit char *br_buf = NULL; 542389abe1dSPrasad J Pandit char *helper_cmd = NULL; 543a7c36ee4SCorey Bryant 54413a12f86SPankaj Gupta for (i = 3; i < open_max; i++) { 54513a12f86SPankaj Gupta if (i != sv[1]) { 546a7c36ee4SCorey Bryant close(i); 547a7c36ee4SCorey Bryant } 548a7c36ee4SCorey Bryant } 549a7c36ee4SCorey Bryant 550389abe1dSPrasad J Pandit fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]); 551a7c36ee4SCorey Bryant 552a7c36ee4SCorey Bryant if (strrchr(helper, ' ') || strrchr(helper, '\t')) { 553a7c36ee4SCorey Bryant /* assume helper is a command */ 554a7c36ee4SCorey Bryant 555a7c36ee4SCorey Bryant if (strstr(helper, "--br=") == NULL) { 556389abe1dSPrasad J Pandit br_buf = g_strdup_printf("%s%s", "--br=", bridge); 557a7c36ee4SCorey Bryant } 558a7c36ee4SCorey Bryant 559389abe1dSPrasad J Pandit helper_cmd = g_strdup_printf("%s %s %s %s", helper, 560389abe1dSPrasad J Pandit "--use-vnet", fd_buf, br_buf ? br_buf : ""); 561a7c36ee4SCorey Bryant 562a7c36ee4SCorey Bryant parg = args; 563a7c36ee4SCorey Bryant *parg++ = (char *)"sh"; 564a7c36ee4SCorey Bryant *parg++ = (char *)"-c"; 565a7c36ee4SCorey Bryant *parg++ = helper_cmd; 566a7c36ee4SCorey Bryant *parg++ = NULL; 567a7c36ee4SCorey Bryant 568a7c36ee4SCorey Bryant execv("/bin/sh", args); 569389abe1dSPrasad J Pandit g_free(helper_cmd); 570a7c36ee4SCorey Bryant } else { 571a7c36ee4SCorey Bryant /* assume helper is just the executable path name */ 572a7c36ee4SCorey Bryant 573389abe1dSPrasad J Pandit br_buf = g_strdup_printf("%s%s", "--br=", bridge); 574a7c36ee4SCorey Bryant 575a7c36ee4SCorey Bryant parg = args; 576a7c36ee4SCorey Bryant *parg++ = (char *)helper; 577a7c36ee4SCorey Bryant *parg++ = (char *)"--use-vnet"; 578a7c36ee4SCorey Bryant *parg++ = fd_buf; 579a7c36ee4SCorey Bryant *parg++ = br_buf; 580a7c36ee4SCorey Bryant *parg++ = NULL; 581a7c36ee4SCorey Bryant 582a7c36ee4SCorey Bryant execv(helper, args); 583a7c36ee4SCorey Bryant } 584389abe1dSPrasad J Pandit g_free(fd_buf); 585389abe1dSPrasad J Pandit g_free(br_buf); 586a7c36ee4SCorey Bryant _exit(1); 587a7c36ee4SCorey Bryant 588a8a21be9SMarkus Armbruster } else { 589a7c36ee4SCorey Bryant int fd; 590a8a21be9SMarkus Armbruster int saved_errno; 591a7c36ee4SCorey Bryant 592a7c36ee4SCorey Bryant close(sv[1]); 593a7c36ee4SCorey Bryant 59437b0b24eSNikita Ivanov fd = RETRY_ON_EINTR(recv_fd(sv[0])); 595a8a21be9SMarkus Armbruster saved_errno = errno; 596a7c36ee4SCorey Bryant 597a7c36ee4SCorey Bryant close(sv[0]); 598a7c36ee4SCorey Bryant 599a7c36ee4SCorey Bryant while (waitpid(pid, &status, 0) != pid) { 600a7c36ee4SCorey Bryant /* loop */ 601a7c36ee4SCorey Bryant } 602a7c36ee4SCorey Bryant sigprocmask(SIG_SETMASK, &oldmask, NULL); 603a7c36ee4SCorey Bryant if (fd < 0) { 604a8a21be9SMarkus Armbruster error_setg_errno(errp, saved_errno, 605a8a21be9SMarkus Armbruster "failed to recv file descriptor"); 606a7c36ee4SCorey Bryant return -1; 607a7c36ee4SCorey Bryant } 608a8a21be9SMarkus Armbruster if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { 609a8a21be9SMarkus Armbruster error_setg(errp, "bridge helper failed"); 610a8a21be9SMarkus Armbruster return -1; 611a8a21be9SMarkus Armbruster } 612a7c36ee4SCorey Bryant return fd; 613a7c36ee4SCorey Bryant } 614a7c36ee4SCorey Bryant } 615a7c36ee4SCorey Bryant 616cebea510SKővágó, Zoltán int net_init_bridge(const Netdev *netdev, const char *name, 617a30ecde6SMarkus Armbruster NetClientState *peer, Error **errp) 618a7c36ee4SCorey Bryant { 619f79b51b0SLaszlo Ersek const NetdevBridgeOptions *bridge; 620f79b51b0SLaszlo Ersek const char *helper, *br; 621a7c36ee4SCorey Bryant TAPState *s; 622a7c36ee4SCorey Bryant int fd, vnet_hdr; 623a7c36ee4SCorey Bryant 624f394b2e2SEric Blake assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE); 625f394b2e2SEric Blake bridge = &netdev->u.bridge; 6267480874aSMarkus Armbruster helper = bridge->helper; 6277480874aSMarkus Armbruster br = bridge->br ?: DEFAULT_BRIDGE_INTERFACE; 628f79b51b0SLaszlo Ersek 629a8a21be9SMarkus Armbruster fd = net_bridge_run_helper(helper, br, errp); 630a7c36ee4SCorey Bryant if (fd == -1) { 631a7c36ee4SCorey Bryant return -1; 632a7c36ee4SCorey Bryant } 633a7c36ee4SCorey Bryant 634a8208626SMarc-André Lureau if (!g_unix_set_fd_nonblocking(fd, true, NULL)) { 635a8208626SMarc-André Lureau error_setg_errno(errp, errno, "Failed to set FD nonblocking"); 636a8208626SMarc-André Lureau return -1; 637a8208626SMarc-André Lureau } 638e7b347d0SDaniel P. Berrange vnet_hdr = tap_probe_vnet_hdr(fd, errp); 639e7b347d0SDaniel P. Berrange if (vnet_hdr < 0) { 640e7b347d0SDaniel P. Berrange close(fd); 641e7b347d0SDaniel P. Berrange return -1; 642e7b347d0SDaniel P. Berrange } 643d33d93b2SStefan Hajnoczi s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); 644a7c36ee4SCorey Bryant 64553b85d95SLaurent Vivier qemu_set_info_str(&s->nc, "helper=%s,br=%s", helper, br); 646d89b4f83SJason Wang 647a7c36ee4SCorey Bryant return 0; 648a7c36ee4SCorey Bryant } 649a7c36ee4SCorey Bryant 65008c573a8SLaszlo Ersek static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr, 65108c573a8SLaszlo Ersek const char *setup_script, char *ifname, 652468dd824SMarkus Armbruster size_t ifname_sz, int mq_required, Error **errp) 6535281d757SMark McLoughlin { 654ac4fcf56SMarkus Armbruster Error *err = NULL; 6555281d757SMark McLoughlin int fd, vnet_hdr_required; 6565281d757SMark McLoughlin 65708c573a8SLaszlo Ersek if (tap->has_vnet_hdr) { 65808c573a8SLaszlo Ersek *vnet_hdr = tap->vnet_hdr; 6595281d757SMark McLoughlin vnet_hdr_required = *vnet_hdr; 6605281d757SMark McLoughlin } else { 66108c573a8SLaszlo Ersek *vnet_hdr = 1; 6625281d757SMark McLoughlin vnet_hdr_required = 0; 6635281d757SMark McLoughlin } 6645281d757SMark McLoughlin 6658b6aa693SNikita Ivanov fd = RETRY_ON_EINTR(tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required, 666468dd824SMarkus Armbruster mq_required, errp)); 6675281d757SMark McLoughlin if (fd < 0) { 6685281d757SMark McLoughlin return -1; 6695281d757SMark McLoughlin } 6705281d757SMark McLoughlin 6715281d757SMark McLoughlin if (setup_script && 6725281d757SMark McLoughlin setup_script[0] != '\0' && 673ac4fcf56SMarkus Armbruster strcmp(setup_script, "no") != 0) { 674ac4fcf56SMarkus Armbruster launch_script(setup_script, ifname, fd, &err); 675ac4fcf56SMarkus Armbruster if (err) { 676468dd824SMarkus Armbruster error_propagate(errp, err); 6775281d757SMark McLoughlin close(fd); 6785281d757SMark McLoughlin return -1; 6795281d757SMark McLoughlin } 680ac4fcf56SMarkus Armbruster } 6815281d757SMark McLoughlin 6825281d757SMark McLoughlin return fd; 6835281d757SMark McLoughlin } 6845281d757SMark McLoughlin 685264986e2SJason Wang #define MAX_TAP_QUEUES 1024 686264986e2SJason Wang 687445f116cSMarkus Armbruster static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, 6885193e5fbSJason Wang const char *model, const char *name, 6895193e5fbSJason Wang const char *ifname, const char *script, 6905193e5fbSJason Wang const char *downscript, const char *vhostfdname, 691f9bb0c1fSJason Wang int vnet_hdr, int fd, Error **errp) 6925193e5fbSJason Wang { 6931677f4c6SMarkus Armbruster Error *err = NULL; 694da4a4eacSMarkus Armbruster TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); 69581647a65SNikolay Nikolaev int vhostfd; 6965193e5fbSJason Wang 69780b832c3SMarkus Armbruster tap_set_sndbuf(s->fd, tap, &err); 69880b832c3SMarkus Armbruster if (err) { 699445f116cSMarkus Armbruster error_propagate(errp, err); 700bf769f74Slu zhipeng goto failed; 7015193e5fbSJason Wang } 7025193e5fbSJason Wang 7037480874aSMarkus Armbruster if (tap->fd || tap->fds) { 70453b85d95SLaurent Vivier qemu_set_info_str(&s->nc, "fd=%d", fd); 7057480874aSMarkus Armbruster } else if (tap->helper) { 70653b85d95SLaurent Vivier qemu_set_info_str(&s->nc, "helper=%s", tap->helper); 7075193e5fbSJason Wang } else { 70853b85d95SLaurent Vivier qemu_set_info_str(&s->nc, "ifname=%s,script=%s,downscript=%s", ifname, 70953b85d95SLaurent Vivier script, downscript); 710d89b4f83SJason Wang 7115193e5fbSJason Wang if (strcmp(downscript, "no") != 0) { 7125193e5fbSJason Wang snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); 7135193e5fbSJason Wang snprintf(s->down_script_arg, sizeof(s->down_script_arg), 7145193e5fbSJason Wang "%s", ifname); 7155193e5fbSJason Wang } 7165193e5fbSJason Wang } 7175193e5fbSJason Wang 7185193e5fbSJason Wang if (tap->has_vhost ? tap->vhost : 7195193e5fbSJason Wang vhostfdname || (tap->has_vhostforce && tap->vhostforce)) { 72081647a65SNikolay Nikolaev VhostNetOptions options; 72181647a65SNikolay Nikolaev 7221a1bfac9SNikolay Nikolaev options.backend_type = VHOST_BACKEND_TYPE_KERNEL; 72381647a65SNikolay Nikolaev options.net_backend = &s->nc; 72469e87b32SJason Wang if (tap->has_poll_us) { 72569e87b32SJason Wang options.busyloop_timeout = tap->poll_us; 72669e87b32SJason Wang } else { 72769e87b32SJason Wang options.busyloop_timeout = 0; 72869e87b32SJason Wang } 7295193e5fbSJason Wang 7303a2d44f6SPaolo Bonzini if (vhostfdname) { 731947e4744SKevin Wolf vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err); 7325193e5fbSJason Wang if (vhostfd == -1) { 73346d4d36dSJay Zhou if (tap->has_vhostforce && tap->vhostforce) { 734445f116cSMarkus Armbruster error_propagate(errp, err); 73546d4d36dSJay Zhou } else { 73646d4d36dSJay Zhou warn_report_err(err); 73746d4d36dSJay Zhou } 738bf769f74Slu zhipeng goto failed; 7395193e5fbSJason Wang } 740a8208626SMarc-André Lureau if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) { 741a8208626SMarc-André Lureau error_setg_errno(errp, errno, "%s: Can't use file descriptor %d", 742894022e6SLaurent Vivier name, fd); 743bf769f74Slu zhipeng goto failed; 744894022e6SLaurent Vivier } 7455193e5fbSJason Wang } else { 74681647a65SNikolay Nikolaev vhostfd = open("/dev/vhost-net", O_RDWR); 74781647a65SNikolay Nikolaev if (vhostfd < 0) { 74846d4d36dSJay Zhou if (tap->has_vhostforce && tap->vhostforce) { 749445f116cSMarkus Armbruster error_setg_errno(errp, errno, 750445f116cSMarkus Armbruster "tap: open vhost char device failed"); 75146d4d36dSJay Zhou } else { 75246d4d36dSJay Zhou warn_report("tap: open vhost char device failed: %s", 75346d4d36dSJay Zhou strerror(errno)); 75446d4d36dSJay Zhou } 755bf769f74Slu zhipeng goto failed; 7565193e5fbSJason Wang } 757a8208626SMarc-André Lureau if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) { 758a8208626SMarc-André Lureau error_setg_errno(errp, errno, "Failed to set FD nonblocking"); 759bf769f74Slu zhipeng goto failed; 760a8208626SMarc-André Lureau } 76181647a65SNikolay Nikolaev } 76281647a65SNikolay Nikolaev options.opaque = (void *)(uintptr_t)vhostfd; 7636a756d14SJason Wang options.nvqs = 2; 7645193e5fbSJason Wang 76581647a65SNikolay Nikolaev s->vhost_net = vhost_net_init(&options); 7665193e5fbSJason Wang if (!s->vhost_net) { 76746d4d36dSJay Zhou if (tap->has_vhostforce && tap->vhostforce) { 76846d4d36dSJay Zhou error_setg(errp, VHOST_NET_INIT_FAILED); 76946d4d36dSJay Zhou } else { 77046d4d36dSJay Zhou warn_report(VHOST_NET_INIT_FAILED); 77146d4d36dSJay Zhou } 772bf769f74Slu zhipeng goto failed; 7735193e5fbSJason Wang } 7743a2d44f6SPaolo Bonzini } else if (vhostfdname) { 77569e87b32SJason Wang error_setg(errp, "vhostfd(s)= is not valid without vhost"); 776bf769f74Slu zhipeng goto failed; 7775193e5fbSJason Wang } 778bf769f74Slu zhipeng 779bf769f74Slu zhipeng return; 780bf769f74Slu zhipeng 781bf769f74Slu zhipeng failed: 782bf769f74Slu zhipeng qemu_del_net_client(&s->nc); 7835193e5fbSJason Wang } 7845193e5fbSJason Wang 785264986e2SJason Wang static int get_fds(char *str, char *fds[], int max) 786264986e2SJason Wang { 787264986e2SJason Wang char *ptr = str, *this; 788264986e2SJason Wang size_t len = strlen(str); 789264986e2SJason Wang int i = 0; 790264986e2SJason Wang 791264986e2SJason Wang while (i < max && ptr < str + len) { 792264986e2SJason Wang this = strchr(ptr, ':'); 793264986e2SJason Wang 794264986e2SJason Wang if (this == NULL) { 795264986e2SJason Wang fds[i] = g_strdup(ptr); 796264986e2SJason Wang } else { 797264986e2SJason Wang fds[i] = g_strndup(ptr, this - ptr); 798264986e2SJason Wang } 799264986e2SJason Wang 800264986e2SJason Wang i++; 801264986e2SJason Wang if (this == NULL) { 802264986e2SJason Wang break; 803264986e2SJason Wang } else { 804264986e2SJason Wang ptr = this + 1; 805264986e2SJason Wang } 806264986e2SJason Wang } 807264986e2SJason Wang 808264986e2SJason Wang return i; 809264986e2SJason Wang } 810264986e2SJason Wang 811cebea510SKővágó, Zoltán int net_init_tap(const Netdev *netdev, const char *name, 812a30ecde6SMarkus Armbruster NetClientState *peer, Error **errp) 8135281d757SMark McLoughlin { 81408c573a8SLaszlo Ersek const NetdevTapOptions *tap; 815264986e2SJason Wang int fd, vnet_hdr = 0, i = 0, queues; 81608c573a8SLaszlo Ersek /* for the no-fd, no-helper case */ 81763c4db4cSPaolo Bonzini const char *script; 81863c4db4cSPaolo Bonzini const char *downscript; 8191677f4c6SMarkus Armbruster Error *err = NULL; 820264986e2SJason Wang const char *vhostfdname; 82108c573a8SLaszlo Ersek char ifname[128]; 822894022e6SLaurent Vivier int ret = 0; 82308c573a8SLaszlo Ersek 824f394b2e2SEric Blake assert(netdev->type == NET_CLIENT_DRIVER_TAP); 825f394b2e2SEric Blake tap = &netdev->u.tap; 826264986e2SJason Wang queues = tap->has_queues ? tap->queues : 1; 8277480874aSMarkus Armbruster vhostfdname = tap->vhostfd; 8287480874aSMarkus Armbruster script = tap->script; 8297480874aSMarkus Armbruster downscript = tap->downscript; 83008c573a8SLaszlo Ersek 831442da403SThomas Huth /* QEMU hubs do not support multiqueue tap, in this case peer is set. 832ce675a75SJason Wang * For -netdev, peer is always NULL. */ 8337480874aSMarkus Armbruster if (peer && (tap->has_queues || tap->fds || tap->vhostfds)) { 834442da403SThomas Huth error_setg(errp, "Multiqueue tap cannot be used with hubs"); 835ce675a75SJason Wang return -1; 836ce675a75SJason Wang } 837ce675a75SJason Wang 8387480874aSMarkus Armbruster if (tap->fd) { 8397480874aSMarkus Armbruster if (tap->ifname || tap->script || tap->downscript || 8407480874aSMarkus Armbruster tap->has_vnet_hdr || tap->helper || tap->has_queues || 8417480874aSMarkus Armbruster tap->fds || tap->vhostfds) { 842a3088177SMarkus Armbruster error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " 843c87826a8SJason Wang "helper=, queues=, fds=, and vhostfds= " 844c87826a8SJason Wang "are invalid with fd="); 8455281d757SMark McLoughlin return -1; 8465281d757SMark McLoughlin } 8475281d757SMark McLoughlin 848947e4744SKevin Wolf fd = monitor_fd_param(monitor_cur(), tap->fd, errp); 8495281d757SMark McLoughlin if (fd == -1) { 8505281d757SMark McLoughlin return -1; 8515281d757SMark McLoughlin } 8525281d757SMark McLoughlin 853a8208626SMarc-André Lureau if (!g_unix_set_fd_nonblocking(fd, true, NULL)) { 854a8208626SMarc-André Lureau error_setg_errno(errp, errno, "%s: Can't use file descriptor %d", 855894022e6SLaurent Vivier name, fd); 856f012bec8Syuanjungong close(fd); 857894022e6SLaurent Vivier return -1; 858894022e6SLaurent Vivier } 8595281d757SMark McLoughlin 860e7b347d0SDaniel P. Berrange vnet_hdr = tap_probe_vnet_hdr(fd, errp); 861e7b347d0SDaniel P. Berrange if (vnet_hdr < 0) { 862e7b347d0SDaniel P. Berrange close(fd); 863e7b347d0SDaniel P. Berrange return -1; 864e7b347d0SDaniel P. Berrange } 865a7c36ee4SCorey Bryant 866445f116cSMarkus Armbruster net_init_tap_one(tap, peer, "tap", name, NULL, 867264986e2SJason Wang script, downscript, 868f9bb0c1fSJason Wang vhostfdname, vnet_hdr, fd, &err); 869445f116cSMarkus Armbruster if (err) { 870a3088177SMarkus Armbruster error_propagate(errp, err); 871f012bec8Syuanjungong close(fd); 872264986e2SJason Wang return -1; 873264986e2SJason Wang } 8747480874aSMarkus Armbruster } else if (tap->fds) { 875fac7d7b1SPeter Maydell char **fds; 876fac7d7b1SPeter Maydell char **vhost_fds; 877323e7c11SYunjian Wang int nfds = 0, nvhosts = 0; 878a7c36ee4SCorey Bryant 8797480874aSMarkus Armbruster if (tap->ifname || tap->script || tap->downscript || 8807480874aSMarkus Armbruster tap->has_vnet_hdr || tap->helper || tap->has_queues || 8817480874aSMarkus Armbruster tap->vhostfd) { 882a3088177SMarkus Armbruster error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " 883c87826a8SJason Wang "helper=, queues=, and vhostfd= " 884c87826a8SJason Wang "are invalid with fds="); 885264986e2SJason Wang return -1; 886264986e2SJason Wang } 887264986e2SJason Wang 888fac7d7b1SPeter Maydell fds = g_new0(char *, MAX_TAP_QUEUES); 889fac7d7b1SPeter Maydell vhost_fds = g_new0(char *, MAX_TAP_QUEUES); 890fac7d7b1SPeter Maydell 891264986e2SJason Wang nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES); 8927480874aSMarkus Armbruster if (tap->vhostfds) { 893264986e2SJason Wang nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES); 894264986e2SJason Wang if (nfds != nvhosts) { 895a3088177SMarkus Armbruster error_setg(errp, "The number of fds passed does not match " 896a3088177SMarkus Armbruster "the number of vhostfds passed"); 897323e7c11SYunjian Wang ret = -1; 898091a6b2aSPaolo Bonzini goto free_fail; 899264986e2SJason Wang } 900264986e2SJason Wang } 901264986e2SJason Wang 902264986e2SJason Wang for (i = 0; i < nfds; i++) { 903947e4744SKevin Wolf fd = monitor_fd_param(monitor_cur(), fds[i], errp); 904264986e2SJason Wang if (fd == -1) { 905323e7c11SYunjian Wang ret = -1; 906091a6b2aSPaolo Bonzini goto free_fail; 907264986e2SJason Wang } 908264986e2SJason Wang 909a8208626SMarc-André Lureau ret = g_unix_set_fd_nonblocking(fd, true, NULL); 910a8208626SMarc-André Lureau if (!ret) { 911a8208626SMarc-André Lureau error_setg_errno(errp, errno, "%s: Can't use file descriptor %d", 912894022e6SLaurent Vivier name, fd); 913894022e6SLaurent Vivier goto free_fail; 914894022e6SLaurent Vivier } 915264986e2SJason Wang 916264986e2SJason Wang if (i == 0) { 917e7b347d0SDaniel P. Berrange vnet_hdr = tap_probe_vnet_hdr(fd, errp); 918e7b347d0SDaniel P. Berrange if (vnet_hdr < 0) { 91941bcea7bSPeter Foley ret = -1; 920e7b347d0SDaniel P. Berrange goto free_fail; 921e7b347d0SDaniel P. Berrange } 922e7b347d0SDaniel P. Berrange } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) { 923a3088177SMarkus Armbruster error_setg(errp, 924a3088177SMarkus Armbruster "vnet_hdr not consistent across given tap fds"); 925323e7c11SYunjian Wang ret = -1; 926091a6b2aSPaolo Bonzini goto free_fail; 927264986e2SJason Wang } 928264986e2SJason Wang 929445f116cSMarkus Armbruster net_init_tap_one(tap, peer, "tap", name, ifname, 930264986e2SJason Wang script, downscript, 9317480874aSMarkus Armbruster tap->vhostfds ? vhost_fds[i] : NULL, 932f9bb0c1fSJason Wang vnet_hdr, fd, &err); 933445f116cSMarkus Armbruster if (err) { 934a3088177SMarkus Armbruster error_propagate(errp, err); 935323e7c11SYunjian Wang ret = -1; 936091a6b2aSPaolo Bonzini goto free_fail; 937264986e2SJason Wang } 938264986e2SJason Wang } 939091a6b2aSPaolo Bonzini 940091a6b2aSPaolo Bonzini free_fail: 941323e7c11SYunjian Wang for (i = 0; i < nvhosts; i++) { 942323e7c11SYunjian Wang g_free(vhost_fds[i]); 943323e7c11SYunjian Wang } 944091a6b2aSPaolo Bonzini for (i = 0; i < nfds; i++) { 945091a6b2aSPaolo Bonzini g_free(fds[i]); 946091a6b2aSPaolo Bonzini } 947091a6b2aSPaolo Bonzini g_free(fds); 948091a6b2aSPaolo Bonzini g_free(vhost_fds); 949323e7c11SYunjian Wang return ret; 9507480874aSMarkus Armbruster } else if (tap->helper) { 9517480874aSMarkus Armbruster if (tap->ifname || tap->script || tap->downscript || 9527480874aSMarkus Armbruster tap->has_vnet_hdr || tap->has_queues || tap->vhostfds) { 953a3088177SMarkus Armbruster error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " 954c87826a8SJason Wang "queues=, and vhostfds= are invalid with helper="); 955a7c36ee4SCorey Bryant return -1; 956a7c36ee4SCorey Bryant } 957a7c36ee4SCorey Bryant 958584613eaSAlexey Kardashevskiy fd = net_bridge_run_helper(tap->helper, 9597480874aSMarkus Armbruster tap->br ?: DEFAULT_BRIDGE_INTERFACE, 960a8a21be9SMarkus Armbruster errp); 961a7c36ee4SCorey Bryant if (fd == -1) { 962a7c36ee4SCorey Bryant return -1; 963a7c36ee4SCorey Bryant } 964a7c36ee4SCorey Bryant 965a8208626SMarc-André Lureau if (!g_unix_set_fd_nonblocking(fd, true, NULL)) { 966a8208626SMarc-André Lureau error_setg_errno(errp, errno, "Failed to set FD nonblocking"); 967a8208626SMarc-André Lureau return -1; 968a8208626SMarc-André Lureau } 969e7b347d0SDaniel P. Berrange vnet_hdr = tap_probe_vnet_hdr(fd, errp); 970e7b347d0SDaniel P. Berrange if (vnet_hdr < 0) { 971e7b347d0SDaniel P. Berrange close(fd); 972e7b347d0SDaniel P. Berrange return -1; 973e7b347d0SDaniel P. Berrange } 974a7c36ee4SCorey Bryant 975445f116cSMarkus Armbruster net_init_tap_one(tap, peer, "bridge", name, ifname, 976264986e2SJason Wang script, downscript, vhostfdname, 977f9bb0c1fSJason Wang vnet_hdr, fd, &err); 978445f116cSMarkus Armbruster if (err) { 979a3088177SMarkus Armbruster error_propagate(errp, err); 98084f8f3daSGonglei close(fd); 981264986e2SJason Wang return -1; 982264986e2SJason Wang } 9835281d757SMark McLoughlin } else { 98463c4db4cSPaolo Bonzini g_autofree char *default_script = NULL; 98563c4db4cSPaolo Bonzini g_autofree char *default_downscript = NULL; 9867480874aSMarkus Armbruster if (tap->vhostfds) { 987a3088177SMarkus Armbruster error_setg(errp, "vhostfds= is invalid if fds= wasn't specified"); 988c87826a8SJason Wang return -1; 989c87826a8SJason Wang } 99063c4db4cSPaolo Bonzini 99163c4db4cSPaolo Bonzini if (!script) { 99263c4db4cSPaolo Bonzini script = default_script = get_relocated_path(DEFAULT_NETWORK_SCRIPT); 99363c4db4cSPaolo Bonzini } 99463c4db4cSPaolo Bonzini if (!downscript) { 9959925990dSKeqian Zhu downscript = default_downscript = 9969925990dSKeqian Zhu get_relocated_path(DEFAULT_NETWORK_DOWN_SCRIPT); 99763c4db4cSPaolo Bonzini } 998264986e2SJason Wang 9997480874aSMarkus Armbruster if (tap->ifname) { 1000264986e2SJason Wang pstrcpy(ifname, sizeof ifname, tap->ifname); 1001264986e2SJason Wang } else { 1002264986e2SJason Wang ifname[0] = '\0'; 1003264986e2SJason Wang } 1004264986e2SJason Wang 1005264986e2SJason Wang for (i = 0; i < queues; i++) { 1006264986e2SJason Wang fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script, 1007a3088177SMarkus Armbruster ifname, sizeof ifname, queues > 1, errp); 1008929fe497SJuergen Lock if (fd == -1) { 1009929fe497SJuergen Lock return -1; 1010929fe497SJuergen Lock } 1011a7c36ee4SCorey Bryant 10127480874aSMarkus Armbruster if (queues > 1 && i == 0 && !tap->ifname) { 1013264986e2SJason Wang if (tap_fd_get_ifname(fd, ifname)) { 1014a3088177SMarkus Armbruster error_setg(errp, "Fail to get ifname"); 101584f8f3daSGonglei close(fd); 1016264986e2SJason Wang return -1; 1017264986e2SJason Wang } 10185281d757SMark McLoughlin } 10195281d757SMark McLoughlin 1020445f116cSMarkus Armbruster net_init_tap_one(tap, peer, "tap", name, ifname, 1021264986e2SJason Wang i >= 1 ? "no" : script, 1022264986e2SJason Wang i >= 1 ? "no" : downscript, 1023f9bb0c1fSJason Wang vhostfdname, vnet_hdr, fd, &err); 1024445f116cSMarkus Armbruster if (err) { 1025a3088177SMarkus Armbruster error_propagate(errp, err); 102684f8f3daSGonglei close(fd); 1027264986e2SJason Wang return -1; 1028264986e2SJason Wang } 1029264986e2SJason Wang } 1030264986e2SJason Wang } 1031264986e2SJason Wang 1032264986e2SJason Wang return 0; 10335281d757SMark McLoughlin } 1034b202554cSMichael S. Tsirkin 10354e68f7a0SStefan Hajnoczi VHostNetState *tap_get_vhost_net(NetClientState *nc) 1036b202554cSMichael S. Tsirkin { 1037b202554cSMichael S. Tsirkin TAPState *s = DO_UPCAST(TAPState, nc, nc); 1038f394b2e2SEric Blake assert(nc->info->type == NET_CLIENT_DRIVER_TAP); 1039b202554cSMichael S. Tsirkin return s->vhost_net; 1040b202554cSMichael S. Tsirkin } 104116dbaf90SJason Wang 104216dbaf90SJason Wang int tap_enable(NetClientState *nc) 104316dbaf90SJason Wang { 104416dbaf90SJason Wang TAPState *s = DO_UPCAST(TAPState, nc, nc); 104516dbaf90SJason Wang int ret; 104616dbaf90SJason Wang 104716dbaf90SJason Wang if (s->enabled) { 104816dbaf90SJason Wang return 0; 104916dbaf90SJason Wang } else { 105016dbaf90SJason Wang ret = tap_fd_enable(s->fd); 105116dbaf90SJason Wang if (ret == 0) { 105216dbaf90SJason Wang s->enabled = true; 105316dbaf90SJason Wang tap_update_fd_handler(s); 105416dbaf90SJason Wang } 105516dbaf90SJason Wang return ret; 105616dbaf90SJason Wang } 105716dbaf90SJason Wang } 105816dbaf90SJason Wang 105916dbaf90SJason Wang int tap_disable(NetClientState *nc) 106016dbaf90SJason Wang { 106116dbaf90SJason Wang TAPState *s = DO_UPCAST(TAPState, nc, nc); 106216dbaf90SJason Wang int ret; 106316dbaf90SJason Wang 106416dbaf90SJason Wang if (s->enabled == 0) { 106516dbaf90SJason Wang return 0; 106616dbaf90SJason Wang } else { 106716dbaf90SJason Wang ret = tap_fd_disable(s->fd); 106816dbaf90SJason Wang if (ret == 0) { 106916dbaf90SJason Wang qemu_purge_queued_packets(nc); 107016dbaf90SJason Wang s->enabled = false; 107116dbaf90SJason Wang tap_update_fd_handler(s); 107216dbaf90SJason Wang } 107316dbaf90SJason Wang return ret; 107416dbaf90SJason Wang } 107516dbaf90SJason Wang } 1076