15281d757SMark McLoughlin /* 25281d757SMark McLoughlin * QEMU System Emulator 35281d757SMark McLoughlin * 45281d757SMark McLoughlin * Copyright (c) 2003-2008 Fabrice Bellard 55281d757SMark McLoughlin * Copyright (c) 2009 Red Hat, Inc. 65281d757SMark McLoughlin * 75281d757SMark McLoughlin * Permission is hereby granted, free of charge, to any person obtaining a copy 85281d757SMark McLoughlin * of this software and associated documentation files (the "Software"), to deal 95281d757SMark McLoughlin * in the Software without restriction, including without limitation the rights 105281d757SMark McLoughlin * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 115281d757SMark McLoughlin * copies of the Software, and to permit persons to whom the Software is 125281d757SMark McLoughlin * furnished to do so, subject to the following conditions: 135281d757SMark McLoughlin * 145281d757SMark McLoughlin * The above copyright notice and this permission notice shall be included in 155281d757SMark McLoughlin * all copies or substantial portions of the Software. 165281d757SMark McLoughlin * 175281d757SMark McLoughlin * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 185281d757SMark McLoughlin * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 195281d757SMark McLoughlin * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 205281d757SMark McLoughlin * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 215281d757SMark McLoughlin * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 225281d757SMark McLoughlin * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 235281d757SMark McLoughlin * THE SOFTWARE. 245281d757SMark McLoughlin */ 255281d757SMark McLoughlin 265281d757SMark McLoughlin #include "net/tap.h" 275281d757SMark McLoughlin 285281d757SMark McLoughlin #include "config-host.h" 295281d757SMark McLoughlin 305281d757SMark McLoughlin #include <signal.h> 315281d757SMark McLoughlin #include <sys/ioctl.h> 325281d757SMark McLoughlin #include <sys/stat.h> 335281d757SMark McLoughlin #include <sys/wait.h> 345281d757SMark McLoughlin #include <net/if.h> 355281d757SMark McLoughlin 365281d757SMark McLoughlin #include "net.h" 375281d757SMark McLoughlin #include "sysemu.h" 385281d757SMark McLoughlin #include "qemu-char.h" 395281d757SMark McLoughlin #include "qemu-common.h" 405281d757SMark McLoughlin 415281d757SMark McLoughlin #ifdef __linux__ 425281d757SMark McLoughlin #include "net/tap-linux.h" 435281d757SMark McLoughlin #endif 445281d757SMark McLoughlin 455281d757SMark McLoughlin #ifdef __sun__ 465281d757SMark McLoughlin #include <sys/stat.h> 475281d757SMark McLoughlin #include <sys/ethernet.h> 485281d757SMark McLoughlin #include <sys/sockio.h> 495281d757SMark McLoughlin #include <netinet/arp.h> 505281d757SMark McLoughlin #include <netinet/in.h> 515281d757SMark McLoughlin #include <netinet/in_systm.h> 525281d757SMark McLoughlin #include <netinet/ip.h> 535281d757SMark McLoughlin #include <netinet/ip_icmp.h> // must come after ip.h 545281d757SMark McLoughlin #include <netinet/udp.h> 555281d757SMark McLoughlin #include <netinet/tcp.h> 565281d757SMark McLoughlin #include <net/if.h> 575281d757SMark McLoughlin #include <syslog.h> 585281d757SMark McLoughlin #include <stropts.h> 595281d757SMark McLoughlin #endif 605281d757SMark McLoughlin 615281d757SMark McLoughlin #if !defined(_AIX) 625281d757SMark McLoughlin 635281d757SMark McLoughlin /* Maximum GSO packet size (64k) plus plenty of room for 645281d757SMark McLoughlin * the ethernet and virtio_net headers 655281d757SMark McLoughlin */ 665281d757SMark McLoughlin #define TAP_BUFSIZE (4096 + 65536) 675281d757SMark McLoughlin 685281d757SMark McLoughlin typedef struct TAPState { 695281d757SMark McLoughlin VLANClientState *vc; 705281d757SMark McLoughlin int fd; 715281d757SMark McLoughlin char down_script[1024]; 725281d757SMark McLoughlin char down_script_arg[128]; 735281d757SMark McLoughlin uint8_t buf[TAP_BUFSIZE]; 745281d757SMark McLoughlin unsigned int read_poll : 1; 755281d757SMark McLoughlin unsigned int write_poll : 1; 765281d757SMark McLoughlin unsigned int has_vnet_hdr : 1; 775281d757SMark McLoughlin unsigned int using_vnet_hdr : 1; 785281d757SMark McLoughlin unsigned int has_ufo: 1; 795281d757SMark McLoughlin } TAPState; 805281d757SMark McLoughlin 815281d757SMark McLoughlin static int launch_script(const char *setup_script, const char *ifname, int fd); 825281d757SMark McLoughlin 835281d757SMark McLoughlin static int tap_can_send(void *opaque); 845281d757SMark McLoughlin static void tap_send(void *opaque); 855281d757SMark McLoughlin static void tap_writable(void *opaque); 865281d757SMark McLoughlin 875281d757SMark McLoughlin static void tap_update_fd_handler(TAPState *s) 885281d757SMark McLoughlin { 895281d757SMark McLoughlin qemu_set_fd_handler2(s->fd, 905281d757SMark McLoughlin s->read_poll ? tap_can_send : NULL, 915281d757SMark McLoughlin s->read_poll ? tap_send : NULL, 925281d757SMark McLoughlin s->write_poll ? tap_writable : NULL, 935281d757SMark McLoughlin s); 945281d757SMark McLoughlin } 955281d757SMark McLoughlin 965281d757SMark McLoughlin static void tap_read_poll(TAPState *s, int enable) 975281d757SMark McLoughlin { 985281d757SMark McLoughlin s->read_poll = !!enable; 995281d757SMark McLoughlin tap_update_fd_handler(s); 1005281d757SMark McLoughlin } 1015281d757SMark McLoughlin 1025281d757SMark McLoughlin static void tap_write_poll(TAPState *s, int enable) 1035281d757SMark McLoughlin { 1045281d757SMark McLoughlin s->write_poll = !!enable; 1055281d757SMark McLoughlin tap_update_fd_handler(s); 1065281d757SMark McLoughlin } 1075281d757SMark McLoughlin 1085281d757SMark McLoughlin static void tap_writable(void *opaque) 1095281d757SMark McLoughlin { 1105281d757SMark McLoughlin TAPState *s = opaque; 1115281d757SMark McLoughlin 1125281d757SMark McLoughlin tap_write_poll(s, 0); 1135281d757SMark McLoughlin 1145281d757SMark McLoughlin qemu_flush_queued_packets(s->vc); 1155281d757SMark McLoughlin } 1165281d757SMark McLoughlin 1175281d757SMark McLoughlin static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) 1185281d757SMark McLoughlin { 1195281d757SMark McLoughlin ssize_t len; 1205281d757SMark McLoughlin 1215281d757SMark McLoughlin do { 1225281d757SMark McLoughlin len = writev(s->fd, iov, iovcnt); 1235281d757SMark McLoughlin } while (len == -1 && errno == EINTR); 1245281d757SMark McLoughlin 1255281d757SMark McLoughlin if (len == -1 && errno == EAGAIN) { 1265281d757SMark McLoughlin tap_write_poll(s, 1); 1275281d757SMark McLoughlin return 0; 1285281d757SMark McLoughlin } 1295281d757SMark McLoughlin 1305281d757SMark McLoughlin return len; 1315281d757SMark McLoughlin } 1325281d757SMark McLoughlin 1335281d757SMark McLoughlin static ssize_t tap_receive_iov(VLANClientState *vc, const struct iovec *iov, 1345281d757SMark McLoughlin int iovcnt) 1355281d757SMark McLoughlin { 1365281d757SMark McLoughlin TAPState *s = vc->opaque; 1375281d757SMark McLoughlin const struct iovec *iovp = iov; 1385281d757SMark McLoughlin struct iovec iov_copy[iovcnt + 1]; 1395281d757SMark McLoughlin struct virtio_net_hdr hdr = { 0, }; 1405281d757SMark McLoughlin 1415281d757SMark McLoughlin if (s->has_vnet_hdr && !s->using_vnet_hdr) { 1425281d757SMark McLoughlin iov_copy[0].iov_base = &hdr; 1435281d757SMark McLoughlin iov_copy[0].iov_len = sizeof(hdr); 1445281d757SMark McLoughlin memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); 1455281d757SMark McLoughlin iovp = iov_copy; 1465281d757SMark McLoughlin iovcnt++; 1475281d757SMark McLoughlin } 1485281d757SMark McLoughlin 1495281d757SMark McLoughlin return tap_write_packet(s, iovp, iovcnt); 1505281d757SMark McLoughlin } 1515281d757SMark McLoughlin 1525281d757SMark McLoughlin static ssize_t tap_receive_raw(VLANClientState *vc, const uint8_t *buf, size_t size) 1535281d757SMark McLoughlin { 1545281d757SMark McLoughlin TAPState *s = vc->opaque; 1555281d757SMark McLoughlin struct iovec iov[2]; 1565281d757SMark McLoughlin int iovcnt = 0; 1575281d757SMark McLoughlin struct virtio_net_hdr hdr = { 0, }; 1585281d757SMark McLoughlin 1595281d757SMark McLoughlin if (s->has_vnet_hdr) { 1605281d757SMark McLoughlin iov[iovcnt].iov_base = &hdr; 1615281d757SMark McLoughlin iov[iovcnt].iov_len = sizeof(hdr); 1625281d757SMark McLoughlin iovcnt++; 1635281d757SMark McLoughlin } 1645281d757SMark McLoughlin 1655281d757SMark McLoughlin iov[iovcnt].iov_base = (char *)buf; 1665281d757SMark McLoughlin iov[iovcnt].iov_len = size; 1675281d757SMark McLoughlin iovcnt++; 1685281d757SMark McLoughlin 1695281d757SMark McLoughlin return tap_write_packet(s, iov, iovcnt); 1705281d757SMark McLoughlin } 1715281d757SMark McLoughlin 1725281d757SMark McLoughlin static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size) 1735281d757SMark McLoughlin { 1745281d757SMark McLoughlin TAPState *s = vc->opaque; 1755281d757SMark McLoughlin struct iovec iov[1]; 1765281d757SMark McLoughlin 1775281d757SMark McLoughlin if (s->has_vnet_hdr && !s->using_vnet_hdr) { 1785281d757SMark McLoughlin return tap_receive_raw(vc, buf, size); 1795281d757SMark McLoughlin } 1805281d757SMark McLoughlin 1815281d757SMark McLoughlin iov[0].iov_base = (char *)buf; 1825281d757SMark McLoughlin iov[0].iov_len = size; 1835281d757SMark McLoughlin 1845281d757SMark McLoughlin return tap_write_packet(s, iov, 1); 1855281d757SMark McLoughlin } 1865281d757SMark McLoughlin 1875281d757SMark McLoughlin static int tap_can_send(void *opaque) 1885281d757SMark McLoughlin { 1895281d757SMark McLoughlin TAPState *s = opaque; 1905281d757SMark McLoughlin 1915281d757SMark McLoughlin return qemu_can_send_packet(s->vc); 1925281d757SMark McLoughlin } 1935281d757SMark McLoughlin 1945281d757SMark McLoughlin #ifdef __sun__ 1955281d757SMark McLoughlin static ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) 1965281d757SMark McLoughlin { 1975281d757SMark McLoughlin struct strbuf sbuf; 1985281d757SMark McLoughlin int f = 0; 1995281d757SMark McLoughlin 2005281d757SMark McLoughlin sbuf.maxlen = maxlen; 2015281d757SMark McLoughlin sbuf.buf = (char *)buf; 2025281d757SMark McLoughlin 2035281d757SMark McLoughlin return getmsg(tapfd, NULL, &sbuf, &f) >= 0 ? sbuf.len : -1; 2045281d757SMark McLoughlin } 2055281d757SMark McLoughlin #else 2065281d757SMark McLoughlin static ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) 2075281d757SMark McLoughlin { 2085281d757SMark McLoughlin return read(tapfd, buf, maxlen); 2095281d757SMark McLoughlin } 2105281d757SMark McLoughlin #endif 2115281d757SMark McLoughlin 2125281d757SMark McLoughlin static void tap_send_completed(VLANClientState *vc, ssize_t len) 2135281d757SMark McLoughlin { 2145281d757SMark McLoughlin TAPState *s = vc->opaque; 2155281d757SMark McLoughlin tap_read_poll(s, 1); 2165281d757SMark McLoughlin } 2175281d757SMark McLoughlin 2185281d757SMark McLoughlin static void tap_send(void *opaque) 2195281d757SMark McLoughlin { 2205281d757SMark McLoughlin TAPState *s = opaque; 2215281d757SMark McLoughlin int size; 2225281d757SMark McLoughlin 2235281d757SMark McLoughlin do { 2245281d757SMark McLoughlin uint8_t *buf = s->buf; 2255281d757SMark McLoughlin 2265281d757SMark McLoughlin size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); 2275281d757SMark McLoughlin if (size <= 0) { 2285281d757SMark McLoughlin break; 2295281d757SMark McLoughlin } 2305281d757SMark McLoughlin 2315281d757SMark McLoughlin if (s->has_vnet_hdr && !s->using_vnet_hdr) { 2325281d757SMark McLoughlin buf += sizeof(struct virtio_net_hdr); 2335281d757SMark McLoughlin size -= sizeof(struct virtio_net_hdr); 2345281d757SMark McLoughlin } 2355281d757SMark McLoughlin 2365281d757SMark McLoughlin size = qemu_send_packet_async(s->vc, buf, size, tap_send_completed); 2375281d757SMark McLoughlin if (size == 0) { 2385281d757SMark McLoughlin tap_read_poll(s, 0); 2395281d757SMark McLoughlin } 2405281d757SMark McLoughlin } while (size > 0); 2415281d757SMark McLoughlin } 2425281d757SMark McLoughlin 2435281d757SMark McLoughlin /* sndbuf should be set to a value lower than the tx queue 2445281d757SMark McLoughlin * capacity of any destination network interface. 2455281d757SMark McLoughlin * Ethernet NICs generally have txqueuelen=1000, so 1Mb is 2465281d757SMark McLoughlin * a good default, given a 1500 byte MTU. 2475281d757SMark McLoughlin */ 2485281d757SMark McLoughlin #define TAP_DEFAULT_SNDBUF 1024*1024 2495281d757SMark McLoughlin 2505281d757SMark McLoughlin static int tap_set_sndbuf(TAPState *s, QemuOpts *opts) 2515281d757SMark McLoughlin { 2525281d757SMark McLoughlin int sndbuf; 2535281d757SMark McLoughlin 2545281d757SMark McLoughlin sndbuf = qemu_opt_get_size(opts, "sndbuf", TAP_DEFAULT_SNDBUF); 2555281d757SMark McLoughlin if (!sndbuf) { 2565281d757SMark McLoughlin sndbuf = INT_MAX; 2575281d757SMark McLoughlin } 2585281d757SMark McLoughlin 2595281d757SMark McLoughlin if (ioctl(s->fd, TUNSETSNDBUF, &sndbuf) == -1 && qemu_opt_get(opts, "sndbuf")) { 2605281d757SMark McLoughlin qemu_error("TUNSETSNDBUF ioctl failed: %s\n", strerror(errno)); 2615281d757SMark McLoughlin return -1; 2625281d757SMark McLoughlin } 2635281d757SMark McLoughlin return 0; 2645281d757SMark McLoughlin } 2655281d757SMark McLoughlin 2665281d757SMark McLoughlin int tap_has_ufo(VLANClientState *vc) 2675281d757SMark McLoughlin { 2685281d757SMark McLoughlin TAPState *s = vc->opaque; 2695281d757SMark McLoughlin 2705281d757SMark McLoughlin assert(vc->type == NET_CLIENT_TYPE_TAP); 2715281d757SMark McLoughlin 2725281d757SMark McLoughlin return s->has_ufo; 2735281d757SMark McLoughlin } 2745281d757SMark McLoughlin 2755281d757SMark McLoughlin int tap_has_vnet_hdr(VLANClientState *vc) 2765281d757SMark McLoughlin { 2775281d757SMark McLoughlin TAPState *s = vc->opaque; 2785281d757SMark McLoughlin 2795281d757SMark McLoughlin assert(vc->type == NET_CLIENT_TYPE_TAP); 2805281d757SMark McLoughlin 2815281d757SMark McLoughlin return s->has_vnet_hdr; 2825281d757SMark McLoughlin } 2835281d757SMark McLoughlin 2845281d757SMark McLoughlin void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr) 2855281d757SMark McLoughlin { 2865281d757SMark McLoughlin TAPState *s = vc->opaque; 2875281d757SMark McLoughlin 2885281d757SMark McLoughlin using_vnet_hdr = using_vnet_hdr != 0; 2895281d757SMark McLoughlin 2905281d757SMark McLoughlin assert(vc->type == NET_CLIENT_TYPE_TAP); 2915281d757SMark McLoughlin assert(s->has_vnet_hdr == using_vnet_hdr); 2925281d757SMark McLoughlin 2935281d757SMark McLoughlin s->using_vnet_hdr = using_vnet_hdr; 2945281d757SMark McLoughlin } 2955281d757SMark McLoughlin 2965281d757SMark McLoughlin static int tap_probe_vnet_hdr(int fd) 2975281d757SMark McLoughlin { 2985281d757SMark McLoughlin struct ifreq ifr; 2995281d757SMark McLoughlin 3005281d757SMark McLoughlin if (ioctl(fd, TUNGETIFF, &ifr) != 0) { 3015281d757SMark McLoughlin qemu_error("TUNGETIFF ioctl() failed: %s\n", strerror(errno)); 3025281d757SMark McLoughlin return 0; 3035281d757SMark McLoughlin } 3045281d757SMark McLoughlin 3055281d757SMark McLoughlin return ifr.ifr_flags & IFF_VNET_HDR; 3065281d757SMark McLoughlin } 3075281d757SMark McLoughlin 3085281d757SMark McLoughlin void tap_set_offload(VLANClientState *vc, int csum, int tso4, 3095281d757SMark McLoughlin int tso6, int ecn, int ufo) 3105281d757SMark McLoughlin { 3115281d757SMark McLoughlin TAPState *s = vc->opaque; 3125281d757SMark McLoughlin unsigned int offload = 0; 3135281d757SMark McLoughlin 3145281d757SMark McLoughlin if (csum) { 3155281d757SMark McLoughlin offload |= TUN_F_CSUM; 3165281d757SMark McLoughlin if (tso4) 3175281d757SMark McLoughlin offload |= TUN_F_TSO4; 3185281d757SMark McLoughlin if (tso6) 3195281d757SMark McLoughlin offload |= TUN_F_TSO6; 3205281d757SMark McLoughlin if ((tso4 || tso6) && ecn) 3215281d757SMark McLoughlin offload |= TUN_F_TSO_ECN; 3225281d757SMark McLoughlin if (ufo) 3235281d757SMark McLoughlin offload |= TUN_F_UFO; 3245281d757SMark McLoughlin } 3255281d757SMark McLoughlin 3265281d757SMark McLoughlin if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) { 3275281d757SMark McLoughlin offload &= ~TUN_F_UFO; 3285281d757SMark McLoughlin if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) { 3295281d757SMark McLoughlin fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n", 3305281d757SMark McLoughlin strerror(errno)); 3315281d757SMark McLoughlin } 3325281d757SMark McLoughlin } 3335281d757SMark McLoughlin } 3345281d757SMark McLoughlin 3355281d757SMark McLoughlin static void tap_cleanup(VLANClientState *vc) 3365281d757SMark McLoughlin { 3375281d757SMark McLoughlin TAPState *s = vc->opaque; 3385281d757SMark McLoughlin 3395281d757SMark McLoughlin qemu_purge_queued_packets(vc); 3405281d757SMark McLoughlin 3415281d757SMark McLoughlin if (s->down_script[0]) 3425281d757SMark McLoughlin launch_script(s->down_script, s->down_script_arg, s->fd); 3435281d757SMark McLoughlin 3445281d757SMark McLoughlin tap_read_poll(s, 0); 3455281d757SMark McLoughlin tap_write_poll(s, 0); 3465281d757SMark McLoughlin close(s->fd); 3475281d757SMark McLoughlin qemu_free(s); 3485281d757SMark McLoughlin } 3495281d757SMark McLoughlin 3505281d757SMark McLoughlin /* fd support */ 3515281d757SMark McLoughlin 3525281d757SMark McLoughlin static TAPState *net_tap_fd_init(VLANState *vlan, 3535281d757SMark McLoughlin const char *model, 3545281d757SMark McLoughlin const char *name, 3555281d757SMark McLoughlin int fd, 3565281d757SMark McLoughlin int vnet_hdr) 3575281d757SMark McLoughlin { 3585281d757SMark McLoughlin TAPState *s; 3595281d757SMark McLoughlin unsigned int offload; 3605281d757SMark McLoughlin 3615281d757SMark McLoughlin s = qemu_mallocz(sizeof(TAPState)); 3625281d757SMark McLoughlin s->fd = fd; 3635281d757SMark McLoughlin s->has_vnet_hdr = vnet_hdr != 0; 3645281d757SMark McLoughlin s->using_vnet_hdr = 0; 3655281d757SMark McLoughlin s->vc = qemu_new_vlan_client(NET_CLIENT_TYPE_TAP, 3665281d757SMark McLoughlin vlan, NULL, model, name, NULL, 3675281d757SMark McLoughlin tap_receive, tap_receive_raw, 3685281d757SMark McLoughlin tap_receive_iov, tap_cleanup, s); 3695281d757SMark McLoughlin s->has_ufo = 0; 3705281d757SMark McLoughlin /* Check if tap supports UFO */ 3715281d757SMark McLoughlin offload = TUN_F_CSUM | TUN_F_UFO; 3725281d757SMark McLoughlin if (ioctl(s->fd, TUNSETOFFLOAD, offload) == 0) 3735281d757SMark McLoughlin s->has_ufo = 1; 3745281d757SMark McLoughlin tap_set_offload(s->vc, 0, 0, 0, 0, 0); 3755281d757SMark McLoughlin tap_read_poll(s, 1); 3765281d757SMark McLoughlin return s; 3775281d757SMark McLoughlin } 3785281d757SMark McLoughlin 379*e7e92325SMark McLoughlin #ifdef __sun__ 3805281d757SMark McLoughlin #define TUNNEWPPA (('T'<<16) | 0x0001) 3815281d757SMark McLoughlin /* 3825281d757SMark McLoughlin * Allocate TAP device, returns opened fd. 3835281d757SMark McLoughlin * Stores dev name in the first arg(must be large enough). 3845281d757SMark McLoughlin */ 3855281d757SMark McLoughlin static int tap_alloc(char *dev, size_t dev_size) 3865281d757SMark McLoughlin { 3875281d757SMark McLoughlin int tap_fd, if_fd, ppa = -1; 3885281d757SMark McLoughlin static int ip_fd = 0; 3895281d757SMark McLoughlin char *ptr; 3905281d757SMark McLoughlin 3915281d757SMark McLoughlin static int arp_fd = 0; 3925281d757SMark McLoughlin int ip_muxid, arp_muxid; 3935281d757SMark McLoughlin struct strioctl strioc_if, strioc_ppa; 3945281d757SMark McLoughlin int link_type = I_PLINK;; 3955281d757SMark McLoughlin struct lifreq ifr; 3965281d757SMark McLoughlin char actual_name[32] = ""; 3975281d757SMark McLoughlin 3985281d757SMark McLoughlin memset(&ifr, 0x0, sizeof(ifr)); 3995281d757SMark McLoughlin 4005281d757SMark McLoughlin if( *dev ){ 4015281d757SMark McLoughlin ptr = dev; 4025281d757SMark McLoughlin while( *ptr && !qemu_isdigit((int)*ptr) ) ptr++; 4035281d757SMark McLoughlin ppa = atoi(ptr); 4045281d757SMark McLoughlin } 4055281d757SMark McLoughlin 4065281d757SMark McLoughlin /* Check if IP device was opened */ 4075281d757SMark McLoughlin if( ip_fd ) 4085281d757SMark McLoughlin close(ip_fd); 4095281d757SMark McLoughlin 4105281d757SMark McLoughlin TFR(ip_fd = open("/dev/udp", O_RDWR, 0)); 4115281d757SMark McLoughlin if (ip_fd < 0) { 4125281d757SMark McLoughlin syslog(LOG_ERR, "Can't open /dev/ip (actually /dev/udp)"); 4135281d757SMark McLoughlin return -1; 4145281d757SMark McLoughlin } 4155281d757SMark McLoughlin 4165281d757SMark McLoughlin TFR(tap_fd = open("/dev/tap", O_RDWR, 0)); 4175281d757SMark McLoughlin if (tap_fd < 0) { 4185281d757SMark McLoughlin syslog(LOG_ERR, "Can't open /dev/tap"); 4195281d757SMark McLoughlin return -1; 4205281d757SMark McLoughlin } 4215281d757SMark McLoughlin 4225281d757SMark McLoughlin /* Assign a new PPA and get its unit number. */ 4235281d757SMark McLoughlin strioc_ppa.ic_cmd = TUNNEWPPA; 4245281d757SMark McLoughlin strioc_ppa.ic_timout = 0; 4255281d757SMark McLoughlin strioc_ppa.ic_len = sizeof(ppa); 4265281d757SMark McLoughlin strioc_ppa.ic_dp = (char *)&ppa; 4275281d757SMark McLoughlin if ((ppa = ioctl (tap_fd, I_STR, &strioc_ppa)) < 0) 4285281d757SMark McLoughlin syslog (LOG_ERR, "Can't assign new interface"); 4295281d757SMark McLoughlin 4305281d757SMark McLoughlin TFR(if_fd = open("/dev/tap", O_RDWR, 0)); 4315281d757SMark McLoughlin if (if_fd < 0) { 4325281d757SMark McLoughlin syslog(LOG_ERR, "Can't open /dev/tap (2)"); 4335281d757SMark McLoughlin return -1; 4345281d757SMark McLoughlin } 4355281d757SMark McLoughlin if(ioctl(if_fd, I_PUSH, "ip") < 0){ 4365281d757SMark McLoughlin syslog(LOG_ERR, "Can't push IP module"); 4375281d757SMark McLoughlin return -1; 4385281d757SMark McLoughlin } 4395281d757SMark McLoughlin 4405281d757SMark McLoughlin if (ioctl(if_fd, SIOCGLIFFLAGS, &ifr) < 0) 4415281d757SMark McLoughlin syslog(LOG_ERR, "Can't get flags\n"); 4425281d757SMark McLoughlin 4435281d757SMark McLoughlin snprintf (actual_name, 32, "tap%d", ppa); 4445281d757SMark McLoughlin pstrcpy(ifr.lifr_name, sizeof(ifr.lifr_name), actual_name); 4455281d757SMark McLoughlin 4465281d757SMark McLoughlin ifr.lifr_ppa = ppa; 4475281d757SMark McLoughlin /* Assign ppa according to the unit number returned by tun device */ 4485281d757SMark McLoughlin 4495281d757SMark McLoughlin if (ioctl (if_fd, SIOCSLIFNAME, &ifr) < 0) 4505281d757SMark McLoughlin syslog (LOG_ERR, "Can't set PPA %d", ppa); 4515281d757SMark McLoughlin if (ioctl(if_fd, SIOCGLIFFLAGS, &ifr) <0) 4525281d757SMark McLoughlin syslog (LOG_ERR, "Can't get flags\n"); 4535281d757SMark McLoughlin /* Push arp module to if_fd */ 4545281d757SMark McLoughlin if (ioctl (if_fd, I_PUSH, "arp") < 0) 4555281d757SMark McLoughlin syslog (LOG_ERR, "Can't push ARP module (2)"); 4565281d757SMark McLoughlin 4575281d757SMark McLoughlin /* Push arp module to ip_fd */ 4585281d757SMark McLoughlin if (ioctl (ip_fd, I_POP, NULL) < 0) 4595281d757SMark McLoughlin syslog (LOG_ERR, "I_POP failed\n"); 4605281d757SMark McLoughlin if (ioctl (ip_fd, I_PUSH, "arp") < 0) 4615281d757SMark McLoughlin syslog (LOG_ERR, "Can't push ARP module (3)\n"); 4625281d757SMark McLoughlin /* Open arp_fd */ 4635281d757SMark McLoughlin TFR(arp_fd = open ("/dev/tap", O_RDWR, 0)); 4645281d757SMark McLoughlin if (arp_fd < 0) 4655281d757SMark McLoughlin syslog (LOG_ERR, "Can't open %s\n", "/dev/tap"); 4665281d757SMark McLoughlin 4675281d757SMark McLoughlin /* Set ifname to arp */ 4685281d757SMark McLoughlin strioc_if.ic_cmd = SIOCSLIFNAME; 4695281d757SMark McLoughlin strioc_if.ic_timout = 0; 4705281d757SMark McLoughlin strioc_if.ic_len = sizeof(ifr); 4715281d757SMark McLoughlin strioc_if.ic_dp = (char *)𝔦 4725281d757SMark McLoughlin if (ioctl(arp_fd, I_STR, &strioc_if) < 0){ 4735281d757SMark McLoughlin syslog (LOG_ERR, "Can't set ifname to arp\n"); 4745281d757SMark McLoughlin } 4755281d757SMark McLoughlin 4765281d757SMark McLoughlin if((ip_muxid = ioctl(ip_fd, I_LINK, if_fd)) < 0){ 4775281d757SMark McLoughlin syslog(LOG_ERR, "Can't link TAP device to IP"); 4785281d757SMark McLoughlin return -1; 4795281d757SMark McLoughlin } 4805281d757SMark McLoughlin 4815281d757SMark McLoughlin if ((arp_muxid = ioctl (ip_fd, link_type, arp_fd)) < 0) 4825281d757SMark McLoughlin syslog (LOG_ERR, "Can't link TAP device to ARP"); 4835281d757SMark McLoughlin 4845281d757SMark McLoughlin close (if_fd); 4855281d757SMark McLoughlin 4865281d757SMark McLoughlin memset(&ifr, 0x0, sizeof(ifr)); 4875281d757SMark McLoughlin pstrcpy(ifr.lifr_name, sizeof(ifr.lifr_name), actual_name); 4885281d757SMark McLoughlin ifr.lifr_ip_muxid = ip_muxid; 4895281d757SMark McLoughlin ifr.lifr_arp_muxid = arp_muxid; 4905281d757SMark McLoughlin 4915281d757SMark McLoughlin if (ioctl (ip_fd, SIOCSLIFMUXID, &ifr) < 0) 4925281d757SMark McLoughlin { 4935281d757SMark McLoughlin ioctl (ip_fd, I_PUNLINK , arp_muxid); 4945281d757SMark McLoughlin ioctl (ip_fd, I_PUNLINK, ip_muxid); 4955281d757SMark McLoughlin syslog (LOG_ERR, "Can't set multiplexor id"); 4965281d757SMark McLoughlin } 4975281d757SMark McLoughlin 4985281d757SMark McLoughlin snprintf(dev, dev_size, "tap%d", ppa); 4995281d757SMark McLoughlin return tap_fd; 5005281d757SMark McLoughlin } 5015281d757SMark McLoughlin 502*e7e92325SMark McLoughlin int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required) 5035281d757SMark McLoughlin { 5045281d757SMark McLoughlin char dev[10]=""; 5055281d757SMark McLoughlin int fd; 5065281d757SMark McLoughlin if( (fd = tap_alloc(dev, sizeof(dev))) < 0 ){ 5075281d757SMark McLoughlin fprintf(stderr, "Cannot allocate TAP device\n"); 5085281d757SMark McLoughlin return -1; 5095281d757SMark McLoughlin } 5105281d757SMark McLoughlin pstrcpy(ifname, ifname_size, dev); 5115281d757SMark McLoughlin fcntl(fd, F_SETFL, O_NONBLOCK); 5125281d757SMark McLoughlin return fd; 5135281d757SMark McLoughlin } 5145281d757SMark McLoughlin #elif defined (_AIX) 515*e7e92325SMark McLoughlin int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required) 5165281d757SMark McLoughlin { 5175281d757SMark McLoughlin fprintf (stderr, "no tap on AIX\n"); 5185281d757SMark McLoughlin return -1; 5195281d757SMark McLoughlin } 5205281d757SMark McLoughlin #else 521*e7e92325SMark McLoughlin int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required) 5225281d757SMark McLoughlin { 5235281d757SMark McLoughlin struct ifreq ifr; 5245281d757SMark McLoughlin int fd, ret; 5255281d757SMark McLoughlin 5265281d757SMark McLoughlin TFR(fd = open("/dev/net/tun", O_RDWR)); 5275281d757SMark McLoughlin if (fd < 0) { 5285281d757SMark McLoughlin fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n"); 5295281d757SMark McLoughlin return -1; 5305281d757SMark McLoughlin } 5315281d757SMark McLoughlin memset(&ifr, 0, sizeof(ifr)); 5325281d757SMark McLoughlin ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 5335281d757SMark McLoughlin 5345281d757SMark McLoughlin if (*vnet_hdr) { 5355281d757SMark McLoughlin unsigned int features; 5365281d757SMark McLoughlin 5375281d757SMark McLoughlin if (ioctl(fd, TUNGETFEATURES, &features) == 0 && 5385281d757SMark McLoughlin features & IFF_VNET_HDR) { 5395281d757SMark McLoughlin *vnet_hdr = 1; 5405281d757SMark McLoughlin ifr.ifr_flags |= IFF_VNET_HDR; 5415281d757SMark McLoughlin } 5425281d757SMark McLoughlin 5435281d757SMark McLoughlin if (vnet_hdr_required && !*vnet_hdr) { 5445281d757SMark McLoughlin qemu_error("vnet_hdr=1 requested, but no kernel " 5455281d757SMark McLoughlin "support for IFF_VNET_HDR available"); 5465281d757SMark McLoughlin close(fd); 5475281d757SMark McLoughlin return -1; 5485281d757SMark McLoughlin } 5495281d757SMark McLoughlin } 5505281d757SMark McLoughlin 5515281d757SMark McLoughlin if (ifname[0] != '\0') 5525281d757SMark McLoughlin pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname); 5535281d757SMark McLoughlin else 5545281d757SMark McLoughlin pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d"); 5555281d757SMark McLoughlin ret = ioctl(fd, TUNSETIFF, (void *) &ifr); 5565281d757SMark McLoughlin if (ret != 0) { 5575281d757SMark McLoughlin fprintf(stderr, "warning: could not configure /dev/net/tun: no virtual network emulation\n"); 5585281d757SMark McLoughlin close(fd); 5595281d757SMark McLoughlin return -1; 5605281d757SMark McLoughlin } 5615281d757SMark McLoughlin pstrcpy(ifname, ifname_size, ifr.ifr_name); 5625281d757SMark McLoughlin fcntl(fd, F_SETFL, O_NONBLOCK); 5635281d757SMark McLoughlin return fd; 5645281d757SMark McLoughlin } 5655281d757SMark McLoughlin #endif 5665281d757SMark McLoughlin 5675281d757SMark McLoughlin static int launch_script(const char *setup_script, const char *ifname, int fd) 5685281d757SMark McLoughlin { 5695281d757SMark McLoughlin sigset_t oldmask, mask; 5705281d757SMark McLoughlin int pid, status; 5715281d757SMark McLoughlin char *args[3]; 5725281d757SMark McLoughlin char **parg; 5735281d757SMark McLoughlin 5745281d757SMark McLoughlin sigemptyset(&mask); 5755281d757SMark McLoughlin sigaddset(&mask, SIGCHLD); 5765281d757SMark McLoughlin sigprocmask(SIG_BLOCK, &mask, &oldmask); 5775281d757SMark McLoughlin 5785281d757SMark McLoughlin /* try to launch network script */ 5795281d757SMark McLoughlin pid = fork(); 5805281d757SMark McLoughlin if (pid == 0) { 5815281d757SMark McLoughlin int open_max = sysconf(_SC_OPEN_MAX), i; 5825281d757SMark McLoughlin 5835281d757SMark McLoughlin for (i = 0; i < open_max; i++) { 5845281d757SMark McLoughlin if (i != STDIN_FILENO && 5855281d757SMark McLoughlin i != STDOUT_FILENO && 5865281d757SMark McLoughlin i != STDERR_FILENO && 5875281d757SMark McLoughlin i != fd) { 5885281d757SMark McLoughlin close(i); 5895281d757SMark McLoughlin } 5905281d757SMark McLoughlin } 5915281d757SMark McLoughlin parg = args; 5925281d757SMark McLoughlin *parg++ = (char *)setup_script; 5935281d757SMark McLoughlin *parg++ = (char *)ifname; 5945281d757SMark McLoughlin *parg++ = NULL; 5955281d757SMark McLoughlin execv(setup_script, args); 5965281d757SMark McLoughlin _exit(1); 5975281d757SMark McLoughlin } else if (pid > 0) { 5985281d757SMark McLoughlin while (waitpid(pid, &status, 0) != pid) { 5995281d757SMark McLoughlin /* loop */ 6005281d757SMark McLoughlin } 6015281d757SMark McLoughlin sigprocmask(SIG_SETMASK, &oldmask, NULL); 6025281d757SMark McLoughlin 6035281d757SMark McLoughlin if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 6045281d757SMark McLoughlin return 0; 6055281d757SMark McLoughlin } 6065281d757SMark McLoughlin } 6075281d757SMark McLoughlin fprintf(stderr, "%s: could not launch network script\n", setup_script); 6085281d757SMark McLoughlin return -1; 6095281d757SMark McLoughlin } 6105281d757SMark McLoughlin 6115281d757SMark McLoughlin static int net_tap_init(QemuOpts *opts, int *vnet_hdr) 6125281d757SMark McLoughlin { 6135281d757SMark McLoughlin int fd, vnet_hdr_required; 6145281d757SMark McLoughlin char ifname[128] = {0,}; 6155281d757SMark McLoughlin const char *setup_script; 6165281d757SMark McLoughlin 6175281d757SMark McLoughlin if (qemu_opt_get(opts, "ifname")) { 6185281d757SMark McLoughlin pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname")); 6195281d757SMark McLoughlin } 6205281d757SMark McLoughlin 6215281d757SMark McLoughlin *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1); 6225281d757SMark McLoughlin if (qemu_opt_get(opts, "vnet_hdr")) { 6235281d757SMark McLoughlin vnet_hdr_required = *vnet_hdr; 6245281d757SMark McLoughlin } else { 6255281d757SMark McLoughlin vnet_hdr_required = 0; 6265281d757SMark McLoughlin } 6275281d757SMark McLoughlin 6285281d757SMark McLoughlin TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required)); 6295281d757SMark McLoughlin if (fd < 0) { 6305281d757SMark McLoughlin return -1; 6315281d757SMark McLoughlin } 6325281d757SMark McLoughlin 6335281d757SMark McLoughlin setup_script = qemu_opt_get(opts, "script"); 6345281d757SMark McLoughlin if (setup_script && 6355281d757SMark McLoughlin setup_script[0] != '\0' && 6365281d757SMark McLoughlin strcmp(setup_script, "no") != 0 && 6375281d757SMark McLoughlin launch_script(setup_script, ifname, fd)) { 6385281d757SMark McLoughlin close(fd); 6395281d757SMark McLoughlin return -1; 6405281d757SMark McLoughlin } 6415281d757SMark McLoughlin 6425281d757SMark McLoughlin qemu_opt_set(opts, "ifname", ifname); 6435281d757SMark McLoughlin 6445281d757SMark McLoughlin return fd; 6455281d757SMark McLoughlin } 6465281d757SMark McLoughlin 6475281d757SMark McLoughlin int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan) 6485281d757SMark McLoughlin { 6495281d757SMark McLoughlin TAPState *s; 6505281d757SMark McLoughlin int fd, vnet_hdr; 6515281d757SMark McLoughlin 6525281d757SMark McLoughlin if (qemu_opt_get(opts, "fd")) { 6535281d757SMark McLoughlin if (qemu_opt_get(opts, "ifname") || 6545281d757SMark McLoughlin qemu_opt_get(opts, "script") || 6555281d757SMark McLoughlin qemu_opt_get(opts, "downscript") || 6565281d757SMark McLoughlin qemu_opt_get(opts, "vnet_hdr")) { 6575281d757SMark McLoughlin qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n"); 6585281d757SMark McLoughlin return -1; 6595281d757SMark McLoughlin } 6605281d757SMark McLoughlin 6615281d757SMark McLoughlin fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd")); 6625281d757SMark McLoughlin if (fd == -1) { 6635281d757SMark McLoughlin return -1; 6645281d757SMark McLoughlin } 6655281d757SMark McLoughlin 6665281d757SMark McLoughlin fcntl(fd, F_SETFL, O_NONBLOCK); 6675281d757SMark McLoughlin 6685281d757SMark McLoughlin vnet_hdr = tap_probe_vnet_hdr(fd); 6695281d757SMark McLoughlin } else { 6705281d757SMark McLoughlin if (!qemu_opt_get(opts, "script")) { 6715281d757SMark McLoughlin qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT); 6725281d757SMark McLoughlin } 6735281d757SMark McLoughlin 6745281d757SMark McLoughlin if (!qemu_opt_get(opts, "downscript")) { 6755281d757SMark McLoughlin qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT); 6765281d757SMark McLoughlin } 6775281d757SMark McLoughlin 6785281d757SMark McLoughlin fd = net_tap_init(opts, &vnet_hdr); 6795281d757SMark McLoughlin } 6805281d757SMark McLoughlin 6815281d757SMark McLoughlin s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr); 6825281d757SMark McLoughlin if (!s) { 6835281d757SMark McLoughlin close(fd); 6845281d757SMark McLoughlin return -1; 6855281d757SMark McLoughlin } 6865281d757SMark McLoughlin 6875281d757SMark McLoughlin if (tap_set_sndbuf(s, opts) < 0) { 6885281d757SMark McLoughlin return -1; 6895281d757SMark McLoughlin } 6905281d757SMark McLoughlin 6915281d757SMark McLoughlin if (qemu_opt_get(opts, "fd")) { 6925281d757SMark McLoughlin snprintf(s->vc->info_str, sizeof(s->vc->info_str), "fd=%d", fd); 6935281d757SMark McLoughlin } else { 6945281d757SMark McLoughlin const char *ifname, *script, *downscript; 6955281d757SMark McLoughlin 6965281d757SMark McLoughlin ifname = qemu_opt_get(opts, "ifname"); 6975281d757SMark McLoughlin script = qemu_opt_get(opts, "script"); 6985281d757SMark McLoughlin downscript = qemu_opt_get(opts, "downscript"); 6995281d757SMark McLoughlin 7005281d757SMark McLoughlin snprintf(s->vc->info_str, sizeof(s->vc->info_str), 7015281d757SMark McLoughlin "ifname=%s,script=%s,downscript=%s", 7025281d757SMark McLoughlin ifname, script, downscript); 7035281d757SMark McLoughlin 7045281d757SMark McLoughlin if (strcmp(downscript, "no") != 0) { 7055281d757SMark McLoughlin snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); 7065281d757SMark McLoughlin snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname); 7075281d757SMark McLoughlin } 7085281d757SMark McLoughlin } 7095281d757SMark McLoughlin 7105281d757SMark McLoughlin if (vlan) { 7115281d757SMark McLoughlin vlan->nb_host_devs++; 7125281d757SMark McLoughlin } 7135281d757SMark McLoughlin 7145281d757SMark McLoughlin return 0; 7155281d757SMark McLoughlin } 7165281d757SMark McLoughlin 7175281d757SMark McLoughlin #endif /* !defined(_AIX) */ 718