xref: /qemu/net/tap.c (revision e7e92325d92843aaa022000aacb9602d3a6ad986)
15281d757SMark McLoughlin /*
25281d757SMark McLoughlin  * QEMU System Emulator
35281d757SMark McLoughlin  *
45281d757SMark McLoughlin  * Copyright (c) 2003-2008 Fabrice Bellard
55281d757SMark McLoughlin  * Copyright (c) 2009 Red Hat, Inc.
65281d757SMark McLoughlin  *
75281d757SMark McLoughlin  * Permission is hereby granted, free of charge, to any person obtaining a copy
85281d757SMark McLoughlin  * of this software and associated documentation files (the "Software"), to deal
95281d757SMark McLoughlin  * in the Software without restriction, including without limitation the rights
105281d757SMark McLoughlin  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
115281d757SMark McLoughlin  * copies of the Software, and to permit persons to whom the Software is
125281d757SMark McLoughlin  * furnished to do so, subject to the following conditions:
135281d757SMark McLoughlin  *
145281d757SMark McLoughlin  * The above copyright notice and this permission notice shall be included in
155281d757SMark McLoughlin  * all copies or substantial portions of the Software.
165281d757SMark McLoughlin  *
175281d757SMark McLoughlin  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
185281d757SMark McLoughlin  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
195281d757SMark McLoughlin  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
205281d757SMark McLoughlin  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
215281d757SMark McLoughlin  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
225281d757SMark McLoughlin  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
235281d757SMark McLoughlin  * THE SOFTWARE.
245281d757SMark McLoughlin  */
255281d757SMark McLoughlin 
265281d757SMark McLoughlin #include "net/tap.h"
275281d757SMark McLoughlin 
285281d757SMark McLoughlin #include "config-host.h"
295281d757SMark McLoughlin 
305281d757SMark McLoughlin #include <signal.h>
315281d757SMark McLoughlin #include <sys/ioctl.h>
325281d757SMark McLoughlin #include <sys/stat.h>
335281d757SMark McLoughlin #include <sys/wait.h>
345281d757SMark McLoughlin #include <net/if.h>
355281d757SMark McLoughlin 
365281d757SMark McLoughlin #include "net.h"
375281d757SMark McLoughlin #include "sysemu.h"
385281d757SMark McLoughlin #include "qemu-char.h"
395281d757SMark McLoughlin #include "qemu-common.h"
405281d757SMark McLoughlin 
415281d757SMark McLoughlin #ifdef __linux__
425281d757SMark McLoughlin #include "net/tap-linux.h"
435281d757SMark McLoughlin #endif
445281d757SMark McLoughlin 
455281d757SMark McLoughlin #ifdef __sun__
465281d757SMark McLoughlin #include <sys/stat.h>
475281d757SMark McLoughlin #include <sys/ethernet.h>
485281d757SMark McLoughlin #include <sys/sockio.h>
495281d757SMark McLoughlin #include <netinet/arp.h>
505281d757SMark McLoughlin #include <netinet/in.h>
515281d757SMark McLoughlin #include <netinet/in_systm.h>
525281d757SMark McLoughlin #include <netinet/ip.h>
535281d757SMark McLoughlin #include <netinet/ip_icmp.h> // must come after ip.h
545281d757SMark McLoughlin #include <netinet/udp.h>
555281d757SMark McLoughlin #include <netinet/tcp.h>
565281d757SMark McLoughlin #include <net/if.h>
575281d757SMark McLoughlin #include <syslog.h>
585281d757SMark McLoughlin #include <stropts.h>
595281d757SMark McLoughlin #endif
605281d757SMark McLoughlin 
615281d757SMark McLoughlin #if !defined(_AIX)
625281d757SMark McLoughlin 
635281d757SMark McLoughlin /* Maximum GSO packet size (64k) plus plenty of room for
645281d757SMark McLoughlin  * the ethernet and virtio_net headers
655281d757SMark McLoughlin  */
665281d757SMark McLoughlin #define TAP_BUFSIZE (4096 + 65536)
675281d757SMark McLoughlin 
685281d757SMark McLoughlin typedef struct TAPState {
695281d757SMark McLoughlin     VLANClientState *vc;
705281d757SMark McLoughlin     int fd;
715281d757SMark McLoughlin     char down_script[1024];
725281d757SMark McLoughlin     char down_script_arg[128];
735281d757SMark McLoughlin     uint8_t buf[TAP_BUFSIZE];
745281d757SMark McLoughlin     unsigned int read_poll : 1;
755281d757SMark McLoughlin     unsigned int write_poll : 1;
765281d757SMark McLoughlin     unsigned int has_vnet_hdr : 1;
775281d757SMark McLoughlin     unsigned int using_vnet_hdr : 1;
785281d757SMark McLoughlin     unsigned int has_ufo: 1;
795281d757SMark McLoughlin } TAPState;
805281d757SMark McLoughlin 
815281d757SMark McLoughlin static int launch_script(const char *setup_script, const char *ifname, int fd);
825281d757SMark McLoughlin 
835281d757SMark McLoughlin static int tap_can_send(void *opaque);
845281d757SMark McLoughlin static void tap_send(void *opaque);
855281d757SMark McLoughlin static void tap_writable(void *opaque);
865281d757SMark McLoughlin 
875281d757SMark McLoughlin static void tap_update_fd_handler(TAPState *s)
885281d757SMark McLoughlin {
895281d757SMark McLoughlin     qemu_set_fd_handler2(s->fd,
905281d757SMark McLoughlin                          s->read_poll  ? tap_can_send : NULL,
915281d757SMark McLoughlin                          s->read_poll  ? tap_send     : NULL,
925281d757SMark McLoughlin                          s->write_poll ? tap_writable : NULL,
935281d757SMark McLoughlin                          s);
945281d757SMark McLoughlin }
955281d757SMark McLoughlin 
965281d757SMark McLoughlin static void tap_read_poll(TAPState *s, int enable)
975281d757SMark McLoughlin {
985281d757SMark McLoughlin     s->read_poll = !!enable;
995281d757SMark McLoughlin     tap_update_fd_handler(s);
1005281d757SMark McLoughlin }
1015281d757SMark McLoughlin 
1025281d757SMark McLoughlin static void tap_write_poll(TAPState *s, int enable)
1035281d757SMark McLoughlin {
1045281d757SMark McLoughlin     s->write_poll = !!enable;
1055281d757SMark McLoughlin     tap_update_fd_handler(s);
1065281d757SMark McLoughlin }
1075281d757SMark McLoughlin 
1085281d757SMark McLoughlin static void tap_writable(void *opaque)
1095281d757SMark McLoughlin {
1105281d757SMark McLoughlin     TAPState *s = opaque;
1115281d757SMark McLoughlin 
1125281d757SMark McLoughlin     tap_write_poll(s, 0);
1135281d757SMark McLoughlin 
1145281d757SMark McLoughlin     qemu_flush_queued_packets(s->vc);
1155281d757SMark McLoughlin }
1165281d757SMark McLoughlin 
1175281d757SMark McLoughlin static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
1185281d757SMark McLoughlin {
1195281d757SMark McLoughlin     ssize_t len;
1205281d757SMark McLoughlin 
1215281d757SMark McLoughlin     do {
1225281d757SMark McLoughlin         len = writev(s->fd, iov, iovcnt);
1235281d757SMark McLoughlin     } while (len == -1 && errno == EINTR);
1245281d757SMark McLoughlin 
1255281d757SMark McLoughlin     if (len == -1 && errno == EAGAIN) {
1265281d757SMark McLoughlin         tap_write_poll(s, 1);
1275281d757SMark McLoughlin         return 0;
1285281d757SMark McLoughlin     }
1295281d757SMark McLoughlin 
1305281d757SMark McLoughlin     return len;
1315281d757SMark McLoughlin }
1325281d757SMark McLoughlin 
1335281d757SMark McLoughlin static ssize_t tap_receive_iov(VLANClientState *vc, const struct iovec *iov,
1345281d757SMark McLoughlin                                int iovcnt)
1355281d757SMark McLoughlin {
1365281d757SMark McLoughlin     TAPState *s = vc->opaque;
1375281d757SMark McLoughlin     const struct iovec *iovp = iov;
1385281d757SMark McLoughlin     struct iovec iov_copy[iovcnt + 1];
1395281d757SMark McLoughlin     struct virtio_net_hdr hdr = { 0, };
1405281d757SMark McLoughlin 
1415281d757SMark McLoughlin     if (s->has_vnet_hdr && !s->using_vnet_hdr) {
1425281d757SMark McLoughlin         iov_copy[0].iov_base = &hdr;
1435281d757SMark McLoughlin         iov_copy[0].iov_len =  sizeof(hdr);
1445281d757SMark McLoughlin         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
1455281d757SMark McLoughlin         iovp = iov_copy;
1465281d757SMark McLoughlin         iovcnt++;
1475281d757SMark McLoughlin     }
1485281d757SMark McLoughlin 
1495281d757SMark McLoughlin     return tap_write_packet(s, iovp, iovcnt);
1505281d757SMark McLoughlin }
1515281d757SMark McLoughlin 
1525281d757SMark McLoughlin static ssize_t tap_receive_raw(VLANClientState *vc, const uint8_t *buf, size_t size)
1535281d757SMark McLoughlin {
1545281d757SMark McLoughlin     TAPState *s = vc->opaque;
1555281d757SMark McLoughlin     struct iovec iov[2];
1565281d757SMark McLoughlin     int iovcnt = 0;
1575281d757SMark McLoughlin     struct virtio_net_hdr hdr = { 0, };
1585281d757SMark McLoughlin 
1595281d757SMark McLoughlin     if (s->has_vnet_hdr) {
1605281d757SMark McLoughlin         iov[iovcnt].iov_base = &hdr;
1615281d757SMark McLoughlin         iov[iovcnt].iov_len  = sizeof(hdr);
1625281d757SMark McLoughlin         iovcnt++;
1635281d757SMark McLoughlin     }
1645281d757SMark McLoughlin 
1655281d757SMark McLoughlin     iov[iovcnt].iov_base = (char *)buf;
1665281d757SMark McLoughlin     iov[iovcnt].iov_len  = size;
1675281d757SMark McLoughlin     iovcnt++;
1685281d757SMark McLoughlin 
1695281d757SMark McLoughlin     return tap_write_packet(s, iov, iovcnt);
1705281d757SMark McLoughlin }
1715281d757SMark McLoughlin 
1725281d757SMark McLoughlin static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size)
1735281d757SMark McLoughlin {
1745281d757SMark McLoughlin     TAPState *s = vc->opaque;
1755281d757SMark McLoughlin     struct iovec iov[1];
1765281d757SMark McLoughlin 
1775281d757SMark McLoughlin     if (s->has_vnet_hdr && !s->using_vnet_hdr) {
1785281d757SMark McLoughlin         return tap_receive_raw(vc, buf, size);
1795281d757SMark McLoughlin     }
1805281d757SMark McLoughlin 
1815281d757SMark McLoughlin     iov[0].iov_base = (char *)buf;
1825281d757SMark McLoughlin     iov[0].iov_len  = size;
1835281d757SMark McLoughlin 
1845281d757SMark McLoughlin     return tap_write_packet(s, iov, 1);
1855281d757SMark McLoughlin }
1865281d757SMark McLoughlin 
1875281d757SMark McLoughlin static int tap_can_send(void *opaque)
1885281d757SMark McLoughlin {
1895281d757SMark McLoughlin     TAPState *s = opaque;
1905281d757SMark McLoughlin 
1915281d757SMark McLoughlin     return qemu_can_send_packet(s->vc);
1925281d757SMark McLoughlin }
1935281d757SMark McLoughlin 
1945281d757SMark McLoughlin #ifdef __sun__
1955281d757SMark McLoughlin static ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
1965281d757SMark McLoughlin {
1975281d757SMark McLoughlin     struct strbuf sbuf;
1985281d757SMark McLoughlin     int f = 0;
1995281d757SMark McLoughlin 
2005281d757SMark McLoughlin     sbuf.maxlen = maxlen;
2015281d757SMark McLoughlin     sbuf.buf = (char *)buf;
2025281d757SMark McLoughlin 
2035281d757SMark McLoughlin     return getmsg(tapfd, NULL, &sbuf, &f) >= 0 ? sbuf.len : -1;
2045281d757SMark McLoughlin }
2055281d757SMark McLoughlin #else
2065281d757SMark McLoughlin static ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
2075281d757SMark McLoughlin {
2085281d757SMark McLoughlin     return read(tapfd, buf, maxlen);
2095281d757SMark McLoughlin }
2105281d757SMark McLoughlin #endif
2115281d757SMark McLoughlin 
2125281d757SMark McLoughlin static void tap_send_completed(VLANClientState *vc, ssize_t len)
2135281d757SMark McLoughlin {
2145281d757SMark McLoughlin     TAPState *s = vc->opaque;
2155281d757SMark McLoughlin     tap_read_poll(s, 1);
2165281d757SMark McLoughlin }
2175281d757SMark McLoughlin 
2185281d757SMark McLoughlin static void tap_send(void *opaque)
2195281d757SMark McLoughlin {
2205281d757SMark McLoughlin     TAPState *s = opaque;
2215281d757SMark McLoughlin     int size;
2225281d757SMark McLoughlin 
2235281d757SMark McLoughlin     do {
2245281d757SMark McLoughlin         uint8_t *buf = s->buf;
2255281d757SMark McLoughlin 
2265281d757SMark McLoughlin         size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
2275281d757SMark McLoughlin         if (size <= 0) {
2285281d757SMark McLoughlin             break;
2295281d757SMark McLoughlin         }
2305281d757SMark McLoughlin 
2315281d757SMark McLoughlin         if (s->has_vnet_hdr && !s->using_vnet_hdr) {
2325281d757SMark McLoughlin             buf  += sizeof(struct virtio_net_hdr);
2335281d757SMark McLoughlin             size -= sizeof(struct virtio_net_hdr);
2345281d757SMark McLoughlin         }
2355281d757SMark McLoughlin 
2365281d757SMark McLoughlin         size = qemu_send_packet_async(s->vc, buf, size, tap_send_completed);
2375281d757SMark McLoughlin         if (size == 0) {
2385281d757SMark McLoughlin             tap_read_poll(s, 0);
2395281d757SMark McLoughlin         }
2405281d757SMark McLoughlin     } while (size > 0);
2415281d757SMark McLoughlin }
2425281d757SMark McLoughlin 
2435281d757SMark McLoughlin /* sndbuf should be set to a value lower than the tx queue
2445281d757SMark McLoughlin  * capacity of any destination network interface.
2455281d757SMark McLoughlin  * Ethernet NICs generally have txqueuelen=1000, so 1Mb is
2465281d757SMark McLoughlin  * a good default, given a 1500 byte MTU.
2475281d757SMark McLoughlin  */
2485281d757SMark McLoughlin #define TAP_DEFAULT_SNDBUF 1024*1024
2495281d757SMark McLoughlin 
2505281d757SMark McLoughlin static int tap_set_sndbuf(TAPState *s, QemuOpts *opts)
2515281d757SMark McLoughlin {
2525281d757SMark McLoughlin     int sndbuf;
2535281d757SMark McLoughlin 
2545281d757SMark McLoughlin     sndbuf = qemu_opt_get_size(opts, "sndbuf", TAP_DEFAULT_SNDBUF);
2555281d757SMark McLoughlin     if (!sndbuf) {
2565281d757SMark McLoughlin         sndbuf = INT_MAX;
2575281d757SMark McLoughlin     }
2585281d757SMark McLoughlin 
2595281d757SMark McLoughlin     if (ioctl(s->fd, TUNSETSNDBUF, &sndbuf) == -1 && qemu_opt_get(opts, "sndbuf")) {
2605281d757SMark McLoughlin         qemu_error("TUNSETSNDBUF ioctl failed: %s\n", strerror(errno));
2615281d757SMark McLoughlin         return -1;
2625281d757SMark McLoughlin     }
2635281d757SMark McLoughlin     return 0;
2645281d757SMark McLoughlin }
2655281d757SMark McLoughlin 
2665281d757SMark McLoughlin int tap_has_ufo(VLANClientState *vc)
2675281d757SMark McLoughlin {
2685281d757SMark McLoughlin     TAPState *s = vc->opaque;
2695281d757SMark McLoughlin 
2705281d757SMark McLoughlin     assert(vc->type == NET_CLIENT_TYPE_TAP);
2715281d757SMark McLoughlin 
2725281d757SMark McLoughlin     return s->has_ufo;
2735281d757SMark McLoughlin }
2745281d757SMark McLoughlin 
2755281d757SMark McLoughlin int tap_has_vnet_hdr(VLANClientState *vc)
2765281d757SMark McLoughlin {
2775281d757SMark McLoughlin     TAPState *s = vc->opaque;
2785281d757SMark McLoughlin 
2795281d757SMark McLoughlin     assert(vc->type == NET_CLIENT_TYPE_TAP);
2805281d757SMark McLoughlin 
2815281d757SMark McLoughlin     return s->has_vnet_hdr;
2825281d757SMark McLoughlin }
2835281d757SMark McLoughlin 
2845281d757SMark McLoughlin void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr)
2855281d757SMark McLoughlin {
2865281d757SMark McLoughlin     TAPState *s = vc->opaque;
2875281d757SMark McLoughlin 
2885281d757SMark McLoughlin     using_vnet_hdr = using_vnet_hdr != 0;
2895281d757SMark McLoughlin 
2905281d757SMark McLoughlin     assert(vc->type == NET_CLIENT_TYPE_TAP);
2915281d757SMark McLoughlin     assert(s->has_vnet_hdr == using_vnet_hdr);
2925281d757SMark McLoughlin 
2935281d757SMark McLoughlin     s->using_vnet_hdr = using_vnet_hdr;
2945281d757SMark McLoughlin }
2955281d757SMark McLoughlin 
2965281d757SMark McLoughlin static int tap_probe_vnet_hdr(int fd)
2975281d757SMark McLoughlin {
2985281d757SMark McLoughlin     struct ifreq ifr;
2995281d757SMark McLoughlin 
3005281d757SMark McLoughlin     if (ioctl(fd, TUNGETIFF, &ifr) != 0) {
3015281d757SMark McLoughlin         qemu_error("TUNGETIFF ioctl() failed: %s\n", strerror(errno));
3025281d757SMark McLoughlin         return 0;
3035281d757SMark McLoughlin     }
3045281d757SMark McLoughlin 
3055281d757SMark McLoughlin     return ifr.ifr_flags & IFF_VNET_HDR;
3065281d757SMark McLoughlin }
3075281d757SMark McLoughlin 
3085281d757SMark McLoughlin void tap_set_offload(VLANClientState *vc, int csum, int tso4,
3095281d757SMark McLoughlin                      int tso6, int ecn, int ufo)
3105281d757SMark McLoughlin {
3115281d757SMark McLoughlin     TAPState *s = vc->opaque;
3125281d757SMark McLoughlin     unsigned int offload = 0;
3135281d757SMark McLoughlin 
3145281d757SMark McLoughlin     if (csum) {
3155281d757SMark McLoughlin         offload |= TUN_F_CSUM;
3165281d757SMark McLoughlin         if (tso4)
3175281d757SMark McLoughlin             offload |= TUN_F_TSO4;
3185281d757SMark McLoughlin         if (tso6)
3195281d757SMark McLoughlin             offload |= TUN_F_TSO6;
3205281d757SMark McLoughlin         if ((tso4 || tso6) && ecn)
3215281d757SMark McLoughlin             offload |= TUN_F_TSO_ECN;
3225281d757SMark McLoughlin         if (ufo)
3235281d757SMark McLoughlin             offload |= TUN_F_UFO;
3245281d757SMark McLoughlin     }
3255281d757SMark McLoughlin 
3265281d757SMark McLoughlin     if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) {
3275281d757SMark McLoughlin         offload &= ~TUN_F_UFO;
3285281d757SMark McLoughlin         if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) {
3295281d757SMark McLoughlin             fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
3305281d757SMark McLoughlin                     strerror(errno));
3315281d757SMark McLoughlin         }
3325281d757SMark McLoughlin     }
3335281d757SMark McLoughlin }
3345281d757SMark McLoughlin 
3355281d757SMark McLoughlin static void tap_cleanup(VLANClientState *vc)
3365281d757SMark McLoughlin {
3375281d757SMark McLoughlin     TAPState *s = vc->opaque;
3385281d757SMark McLoughlin 
3395281d757SMark McLoughlin     qemu_purge_queued_packets(vc);
3405281d757SMark McLoughlin 
3415281d757SMark McLoughlin     if (s->down_script[0])
3425281d757SMark McLoughlin         launch_script(s->down_script, s->down_script_arg, s->fd);
3435281d757SMark McLoughlin 
3445281d757SMark McLoughlin     tap_read_poll(s, 0);
3455281d757SMark McLoughlin     tap_write_poll(s, 0);
3465281d757SMark McLoughlin     close(s->fd);
3475281d757SMark McLoughlin     qemu_free(s);
3485281d757SMark McLoughlin }
3495281d757SMark McLoughlin 
3505281d757SMark McLoughlin /* fd support */
3515281d757SMark McLoughlin 
3525281d757SMark McLoughlin static TAPState *net_tap_fd_init(VLANState *vlan,
3535281d757SMark McLoughlin                                  const char *model,
3545281d757SMark McLoughlin                                  const char *name,
3555281d757SMark McLoughlin                                  int fd,
3565281d757SMark McLoughlin                                  int vnet_hdr)
3575281d757SMark McLoughlin {
3585281d757SMark McLoughlin     TAPState *s;
3595281d757SMark McLoughlin     unsigned int offload;
3605281d757SMark McLoughlin 
3615281d757SMark McLoughlin     s = qemu_mallocz(sizeof(TAPState));
3625281d757SMark McLoughlin     s->fd = fd;
3635281d757SMark McLoughlin     s->has_vnet_hdr = vnet_hdr != 0;
3645281d757SMark McLoughlin     s->using_vnet_hdr = 0;
3655281d757SMark McLoughlin     s->vc = qemu_new_vlan_client(NET_CLIENT_TYPE_TAP,
3665281d757SMark McLoughlin                                  vlan, NULL, model, name, NULL,
3675281d757SMark McLoughlin                                  tap_receive, tap_receive_raw,
3685281d757SMark McLoughlin                                  tap_receive_iov, tap_cleanup, s);
3695281d757SMark McLoughlin     s->has_ufo = 0;
3705281d757SMark McLoughlin     /* Check if tap supports UFO */
3715281d757SMark McLoughlin     offload = TUN_F_CSUM | TUN_F_UFO;
3725281d757SMark McLoughlin     if (ioctl(s->fd, TUNSETOFFLOAD, offload) == 0)
3735281d757SMark McLoughlin        s->has_ufo = 1;
3745281d757SMark McLoughlin     tap_set_offload(s->vc, 0, 0, 0, 0, 0);
3755281d757SMark McLoughlin     tap_read_poll(s, 1);
3765281d757SMark McLoughlin     return s;
3775281d757SMark McLoughlin }
3785281d757SMark McLoughlin 
379*e7e92325SMark McLoughlin #ifdef __sun__
3805281d757SMark McLoughlin #define TUNNEWPPA       (('T'<<16) | 0x0001)
3815281d757SMark McLoughlin /*
3825281d757SMark McLoughlin  * Allocate TAP device, returns opened fd.
3835281d757SMark McLoughlin  * Stores dev name in the first arg(must be large enough).
3845281d757SMark McLoughlin  */
3855281d757SMark McLoughlin static int tap_alloc(char *dev, size_t dev_size)
3865281d757SMark McLoughlin {
3875281d757SMark McLoughlin     int tap_fd, if_fd, ppa = -1;
3885281d757SMark McLoughlin     static int ip_fd = 0;
3895281d757SMark McLoughlin     char *ptr;
3905281d757SMark McLoughlin 
3915281d757SMark McLoughlin     static int arp_fd = 0;
3925281d757SMark McLoughlin     int ip_muxid, arp_muxid;
3935281d757SMark McLoughlin     struct strioctl  strioc_if, strioc_ppa;
3945281d757SMark McLoughlin     int link_type = I_PLINK;;
3955281d757SMark McLoughlin     struct lifreq ifr;
3965281d757SMark McLoughlin     char actual_name[32] = "";
3975281d757SMark McLoughlin 
3985281d757SMark McLoughlin     memset(&ifr, 0x0, sizeof(ifr));
3995281d757SMark McLoughlin 
4005281d757SMark McLoughlin     if( *dev ){
4015281d757SMark McLoughlin        ptr = dev;
4025281d757SMark McLoughlin        while( *ptr && !qemu_isdigit((int)*ptr) ) ptr++;
4035281d757SMark McLoughlin        ppa = atoi(ptr);
4045281d757SMark McLoughlin     }
4055281d757SMark McLoughlin 
4065281d757SMark McLoughlin     /* Check if IP device was opened */
4075281d757SMark McLoughlin     if( ip_fd )
4085281d757SMark McLoughlin        close(ip_fd);
4095281d757SMark McLoughlin 
4105281d757SMark McLoughlin     TFR(ip_fd = open("/dev/udp", O_RDWR, 0));
4115281d757SMark McLoughlin     if (ip_fd < 0) {
4125281d757SMark McLoughlin        syslog(LOG_ERR, "Can't open /dev/ip (actually /dev/udp)");
4135281d757SMark McLoughlin        return -1;
4145281d757SMark McLoughlin     }
4155281d757SMark McLoughlin 
4165281d757SMark McLoughlin     TFR(tap_fd = open("/dev/tap", O_RDWR, 0));
4175281d757SMark McLoughlin     if (tap_fd < 0) {
4185281d757SMark McLoughlin        syslog(LOG_ERR, "Can't open /dev/tap");
4195281d757SMark McLoughlin        return -1;
4205281d757SMark McLoughlin     }
4215281d757SMark McLoughlin 
4225281d757SMark McLoughlin     /* Assign a new PPA and get its unit number. */
4235281d757SMark McLoughlin     strioc_ppa.ic_cmd = TUNNEWPPA;
4245281d757SMark McLoughlin     strioc_ppa.ic_timout = 0;
4255281d757SMark McLoughlin     strioc_ppa.ic_len = sizeof(ppa);
4265281d757SMark McLoughlin     strioc_ppa.ic_dp = (char *)&ppa;
4275281d757SMark McLoughlin     if ((ppa = ioctl (tap_fd, I_STR, &strioc_ppa)) < 0)
4285281d757SMark McLoughlin        syslog (LOG_ERR, "Can't assign new interface");
4295281d757SMark McLoughlin 
4305281d757SMark McLoughlin     TFR(if_fd = open("/dev/tap", O_RDWR, 0));
4315281d757SMark McLoughlin     if (if_fd < 0) {
4325281d757SMark McLoughlin        syslog(LOG_ERR, "Can't open /dev/tap (2)");
4335281d757SMark McLoughlin        return -1;
4345281d757SMark McLoughlin     }
4355281d757SMark McLoughlin     if(ioctl(if_fd, I_PUSH, "ip") < 0){
4365281d757SMark McLoughlin        syslog(LOG_ERR, "Can't push IP module");
4375281d757SMark McLoughlin        return -1;
4385281d757SMark McLoughlin     }
4395281d757SMark McLoughlin 
4405281d757SMark McLoughlin     if (ioctl(if_fd, SIOCGLIFFLAGS, &ifr) < 0)
4415281d757SMark McLoughlin 	syslog(LOG_ERR, "Can't get flags\n");
4425281d757SMark McLoughlin 
4435281d757SMark McLoughlin     snprintf (actual_name, 32, "tap%d", ppa);
4445281d757SMark McLoughlin     pstrcpy(ifr.lifr_name, sizeof(ifr.lifr_name), actual_name);
4455281d757SMark McLoughlin 
4465281d757SMark McLoughlin     ifr.lifr_ppa = ppa;
4475281d757SMark McLoughlin     /* Assign ppa according to the unit number returned by tun device */
4485281d757SMark McLoughlin 
4495281d757SMark McLoughlin     if (ioctl (if_fd, SIOCSLIFNAME, &ifr) < 0)
4505281d757SMark McLoughlin         syslog (LOG_ERR, "Can't set PPA %d", ppa);
4515281d757SMark McLoughlin     if (ioctl(if_fd, SIOCGLIFFLAGS, &ifr) <0)
4525281d757SMark McLoughlin         syslog (LOG_ERR, "Can't get flags\n");
4535281d757SMark McLoughlin     /* Push arp module to if_fd */
4545281d757SMark McLoughlin     if (ioctl (if_fd, I_PUSH, "arp") < 0)
4555281d757SMark McLoughlin         syslog (LOG_ERR, "Can't push ARP module (2)");
4565281d757SMark McLoughlin 
4575281d757SMark McLoughlin     /* Push arp module to ip_fd */
4585281d757SMark McLoughlin     if (ioctl (ip_fd, I_POP, NULL) < 0)
4595281d757SMark McLoughlin         syslog (LOG_ERR, "I_POP failed\n");
4605281d757SMark McLoughlin     if (ioctl (ip_fd, I_PUSH, "arp") < 0)
4615281d757SMark McLoughlin         syslog (LOG_ERR, "Can't push ARP module (3)\n");
4625281d757SMark McLoughlin     /* Open arp_fd */
4635281d757SMark McLoughlin     TFR(arp_fd = open ("/dev/tap", O_RDWR, 0));
4645281d757SMark McLoughlin     if (arp_fd < 0)
4655281d757SMark McLoughlin        syslog (LOG_ERR, "Can't open %s\n", "/dev/tap");
4665281d757SMark McLoughlin 
4675281d757SMark McLoughlin     /* Set ifname to arp */
4685281d757SMark McLoughlin     strioc_if.ic_cmd = SIOCSLIFNAME;
4695281d757SMark McLoughlin     strioc_if.ic_timout = 0;
4705281d757SMark McLoughlin     strioc_if.ic_len = sizeof(ifr);
4715281d757SMark McLoughlin     strioc_if.ic_dp = (char *)&ifr;
4725281d757SMark McLoughlin     if (ioctl(arp_fd, I_STR, &strioc_if) < 0){
4735281d757SMark McLoughlin         syslog (LOG_ERR, "Can't set ifname to arp\n");
4745281d757SMark McLoughlin     }
4755281d757SMark McLoughlin 
4765281d757SMark McLoughlin     if((ip_muxid = ioctl(ip_fd, I_LINK, if_fd)) < 0){
4775281d757SMark McLoughlin        syslog(LOG_ERR, "Can't link TAP device to IP");
4785281d757SMark McLoughlin        return -1;
4795281d757SMark McLoughlin     }
4805281d757SMark McLoughlin 
4815281d757SMark McLoughlin     if ((arp_muxid = ioctl (ip_fd, link_type, arp_fd)) < 0)
4825281d757SMark McLoughlin         syslog (LOG_ERR, "Can't link TAP device to ARP");
4835281d757SMark McLoughlin 
4845281d757SMark McLoughlin     close (if_fd);
4855281d757SMark McLoughlin 
4865281d757SMark McLoughlin     memset(&ifr, 0x0, sizeof(ifr));
4875281d757SMark McLoughlin     pstrcpy(ifr.lifr_name, sizeof(ifr.lifr_name), actual_name);
4885281d757SMark McLoughlin     ifr.lifr_ip_muxid  = ip_muxid;
4895281d757SMark McLoughlin     ifr.lifr_arp_muxid = arp_muxid;
4905281d757SMark McLoughlin 
4915281d757SMark McLoughlin     if (ioctl (ip_fd, SIOCSLIFMUXID, &ifr) < 0)
4925281d757SMark McLoughlin     {
4935281d757SMark McLoughlin       ioctl (ip_fd, I_PUNLINK , arp_muxid);
4945281d757SMark McLoughlin       ioctl (ip_fd, I_PUNLINK, ip_muxid);
4955281d757SMark McLoughlin       syslog (LOG_ERR, "Can't set multiplexor id");
4965281d757SMark McLoughlin     }
4975281d757SMark McLoughlin 
4985281d757SMark McLoughlin     snprintf(dev, dev_size, "tap%d", ppa);
4995281d757SMark McLoughlin     return tap_fd;
5005281d757SMark McLoughlin }
5015281d757SMark McLoughlin 
502*e7e92325SMark McLoughlin int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
5035281d757SMark McLoughlin {
5045281d757SMark McLoughlin     char  dev[10]="";
5055281d757SMark McLoughlin     int fd;
5065281d757SMark McLoughlin     if( (fd = tap_alloc(dev, sizeof(dev))) < 0 ){
5075281d757SMark McLoughlin        fprintf(stderr, "Cannot allocate TAP device\n");
5085281d757SMark McLoughlin        return -1;
5095281d757SMark McLoughlin     }
5105281d757SMark McLoughlin     pstrcpy(ifname, ifname_size, dev);
5115281d757SMark McLoughlin     fcntl(fd, F_SETFL, O_NONBLOCK);
5125281d757SMark McLoughlin     return fd;
5135281d757SMark McLoughlin }
5145281d757SMark McLoughlin #elif defined (_AIX)
515*e7e92325SMark McLoughlin int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
5165281d757SMark McLoughlin {
5175281d757SMark McLoughlin     fprintf (stderr, "no tap on AIX\n");
5185281d757SMark McLoughlin     return -1;
5195281d757SMark McLoughlin }
5205281d757SMark McLoughlin #else
521*e7e92325SMark McLoughlin int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
5225281d757SMark McLoughlin {
5235281d757SMark McLoughlin     struct ifreq ifr;
5245281d757SMark McLoughlin     int fd, ret;
5255281d757SMark McLoughlin 
5265281d757SMark McLoughlin     TFR(fd = open("/dev/net/tun", O_RDWR));
5275281d757SMark McLoughlin     if (fd < 0) {
5285281d757SMark McLoughlin         fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n");
5295281d757SMark McLoughlin         return -1;
5305281d757SMark McLoughlin     }
5315281d757SMark McLoughlin     memset(&ifr, 0, sizeof(ifr));
5325281d757SMark McLoughlin     ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
5335281d757SMark McLoughlin 
5345281d757SMark McLoughlin     if (*vnet_hdr) {
5355281d757SMark McLoughlin         unsigned int features;
5365281d757SMark McLoughlin 
5375281d757SMark McLoughlin         if (ioctl(fd, TUNGETFEATURES, &features) == 0 &&
5385281d757SMark McLoughlin             features & IFF_VNET_HDR) {
5395281d757SMark McLoughlin             *vnet_hdr = 1;
5405281d757SMark McLoughlin             ifr.ifr_flags |= IFF_VNET_HDR;
5415281d757SMark McLoughlin         }
5425281d757SMark McLoughlin 
5435281d757SMark McLoughlin         if (vnet_hdr_required && !*vnet_hdr) {
5445281d757SMark McLoughlin             qemu_error("vnet_hdr=1 requested, but no kernel "
5455281d757SMark McLoughlin                        "support for IFF_VNET_HDR available");
5465281d757SMark McLoughlin             close(fd);
5475281d757SMark McLoughlin             return -1;
5485281d757SMark McLoughlin         }
5495281d757SMark McLoughlin     }
5505281d757SMark McLoughlin 
5515281d757SMark McLoughlin     if (ifname[0] != '\0')
5525281d757SMark McLoughlin         pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
5535281d757SMark McLoughlin     else
5545281d757SMark McLoughlin         pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d");
5555281d757SMark McLoughlin     ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
5565281d757SMark McLoughlin     if (ret != 0) {
5575281d757SMark McLoughlin         fprintf(stderr, "warning: could not configure /dev/net/tun: no virtual network emulation\n");
5585281d757SMark McLoughlin         close(fd);
5595281d757SMark McLoughlin         return -1;
5605281d757SMark McLoughlin     }
5615281d757SMark McLoughlin     pstrcpy(ifname, ifname_size, ifr.ifr_name);
5625281d757SMark McLoughlin     fcntl(fd, F_SETFL, O_NONBLOCK);
5635281d757SMark McLoughlin     return fd;
5645281d757SMark McLoughlin }
5655281d757SMark McLoughlin #endif
5665281d757SMark McLoughlin 
5675281d757SMark McLoughlin static int launch_script(const char *setup_script, const char *ifname, int fd)
5685281d757SMark McLoughlin {
5695281d757SMark McLoughlin     sigset_t oldmask, mask;
5705281d757SMark McLoughlin     int pid, status;
5715281d757SMark McLoughlin     char *args[3];
5725281d757SMark McLoughlin     char **parg;
5735281d757SMark McLoughlin 
5745281d757SMark McLoughlin     sigemptyset(&mask);
5755281d757SMark McLoughlin     sigaddset(&mask, SIGCHLD);
5765281d757SMark McLoughlin     sigprocmask(SIG_BLOCK, &mask, &oldmask);
5775281d757SMark McLoughlin 
5785281d757SMark McLoughlin     /* try to launch network script */
5795281d757SMark McLoughlin     pid = fork();
5805281d757SMark McLoughlin     if (pid == 0) {
5815281d757SMark McLoughlin         int open_max = sysconf(_SC_OPEN_MAX), i;
5825281d757SMark McLoughlin 
5835281d757SMark McLoughlin         for (i = 0; i < open_max; i++) {
5845281d757SMark McLoughlin             if (i != STDIN_FILENO &&
5855281d757SMark McLoughlin                 i != STDOUT_FILENO &&
5865281d757SMark McLoughlin                 i != STDERR_FILENO &&
5875281d757SMark McLoughlin                 i != fd) {
5885281d757SMark McLoughlin                 close(i);
5895281d757SMark McLoughlin             }
5905281d757SMark McLoughlin         }
5915281d757SMark McLoughlin         parg = args;
5925281d757SMark McLoughlin         *parg++ = (char *)setup_script;
5935281d757SMark McLoughlin         *parg++ = (char *)ifname;
5945281d757SMark McLoughlin         *parg++ = NULL;
5955281d757SMark McLoughlin         execv(setup_script, args);
5965281d757SMark McLoughlin         _exit(1);
5975281d757SMark McLoughlin     } else if (pid > 0) {
5985281d757SMark McLoughlin         while (waitpid(pid, &status, 0) != pid) {
5995281d757SMark McLoughlin             /* loop */
6005281d757SMark McLoughlin         }
6015281d757SMark McLoughlin         sigprocmask(SIG_SETMASK, &oldmask, NULL);
6025281d757SMark McLoughlin 
6035281d757SMark McLoughlin         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
6045281d757SMark McLoughlin             return 0;
6055281d757SMark McLoughlin         }
6065281d757SMark McLoughlin     }
6075281d757SMark McLoughlin     fprintf(stderr, "%s: could not launch network script\n", setup_script);
6085281d757SMark McLoughlin     return -1;
6095281d757SMark McLoughlin }
6105281d757SMark McLoughlin 
6115281d757SMark McLoughlin static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
6125281d757SMark McLoughlin {
6135281d757SMark McLoughlin     int fd, vnet_hdr_required;
6145281d757SMark McLoughlin     char ifname[128] = {0,};
6155281d757SMark McLoughlin     const char *setup_script;
6165281d757SMark McLoughlin 
6175281d757SMark McLoughlin     if (qemu_opt_get(opts, "ifname")) {
6185281d757SMark McLoughlin         pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
6195281d757SMark McLoughlin     }
6205281d757SMark McLoughlin 
6215281d757SMark McLoughlin     *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
6225281d757SMark McLoughlin     if (qemu_opt_get(opts, "vnet_hdr")) {
6235281d757SMark McLoughlin         vnet_hdr_required = *vnet_hdr;
6245281d757SMark McLoughlin     } else {
6255281d757SMark McLoughlin         vnet_hdr_required = 0;
6265281d757SMark McLoughlin     }
6275281d757SMark McLoughlin 
6285281d757SMark McLoughlin     TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
6295281d757SMark McLoughlin     if (fd < 0) {
6305281d757SMark McLoughlin         return -1;
6315281d757SMark McLoughlin     }
6325281d757SMark McLoughlin 
6335281d757SMark McLoughlin     setup_script = qemu_opt_get(opts, "script");
6345281d757SMark McLoughlin     if (setup_script &&
6355281d757SMark McLoughlin         setup_script[0] != '\0' &&
6365281d757SMark McLoughlin         strcmp(setup_script, "no") != 0 &&
6375281d757SMark McLoughlin         launch_script(setup_script, ifname, fd)) {
6385281d757SMark McLoughlin         close(fd);
6395281d757SMark McLoughlin         return -1;
6405281d757SMark McLoughlin     }
6415281d757SMark McLoughlin 
6425281d757SMark McLoughlin     qemu_opt_set(opts, "ifname", ifname);
6435281d757SMark McLoughlin 
6445281d757SMark McLoughlin     return fd;
6455281d757SMark McLoughlin }
6465281d757SMark McLoughlin 
6475281d757SMark McLoughlin int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
6485281d757SMark McLoughlin {
6495281d757SMark McLoughlin     TAPState *s;
6505281d757SMark McLoughlin     int fd, vnet_hdr;
6515281d757SMark McLoughlin 
6525281d757SMark McLoughlin     if (qemu_opt_get(opts, "fd")) {
6535281d757SMark McLoughlin         if (qemu_opt_get(opts, "ifname") ||
6545281d757SMark McLoughlin             qemu_opt_get(opts, "script") ||
6555281d757SMark McLoughlin             qemu_opt_get(opts, "downscript") ||
6565281d757SMark McLoughlin             qemu_opt_get(opts, "vnet_hdr")) {
6575281d757SMark McLoughlin             qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n");
6585281d757SMark McLoughlin             return -1;
6595281d757SMark McLoughlin         }
6605281d757SMark McLoughlin 
6615281d757SMark McLoughlin         fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
6625281d757SMark McLoughlin         if (fd == -1) {
6635281d757SMark McLoughlin             return -1;
6645281d757SMark McLoughlin         }
6655281d757SMark McLoughlin 
6665281d757SMark McLoughlin         fcntl(fd, F_SETFL, O_NONBLOCK);
6675281d757SMark McLoughlin 
6685281d757SMark McLoughlin         vnet_hdr = tap_probe_vnet_hdr(fd);
6695281d757SMark McLoughlin     } else {
6705281d757SMark McLoughlin         if (!qemu_opt_get(opts, "script")) {
6715281d757SMark McLoughlin             qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
6725281d757SMark McLoughlin         }
6735281d757SMark McLoughlin 
6745281d757SMark McLoughlin         if (!qemu_opt_get(opts, "downscript")) {
6755281d757SMark McLoughlin             qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
6765281d757SMark McLoughlin         }
6775281d757SMark McLoughlin 
6785281d757SMark McLoughlin         fd = net_tap_init(opts, &vnet_hdr);
6795281d757SMark McLoughlin     }
6805281d757SMark McLoughlin 
6815281d757SMark McLoughlin     s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
6825281d757SMark McLoughlin     if (!s) {
6835281d757SMark McLoughlin         close(fd);
6845281d757SMark McLoughlin         return -1;
6855281d757SMark McLoughlin     }
6865281d757SMark McLoughlin 
6875281d757SMark McLoughlin     if (tap_set_sndbuf(s, opts) < 0) {
6885281d757SMark McLoughlin         return -1;
6895281d757SMark McLoughlin     }
6905281d757SMark McLoughlin 
6915281d757SMark McLoughlin     if (qemu_opt_get(opts, "fd")) {
6925281d757SMark McLoughlin         snprintf(s->vc->info_str, sizeof(s->vc->info_str), "fd=%d", fd);
6935281d757SMark McLoughlin     } else {
6945281d757SMark McLoughlin         const char *ifname, *script, *downscript;
6955281d757SMark McLoughlin 
6965281d757SMark McLoughlin         ifname     = qemu_opt_get(opts, "ifname");
6975281d757SMark McLoughlin         script     = qemu_opt_get(opts, "script");
6985281d757SMark McLoughlin         downscript = qemu_opt_get(opts, "downscript");
6995281d757SMark McLoughlin 
7005281d757SMark McLoughlin         snprintf(s->vc->info_str, sizeof(s->vc->info_str),
7015281d757SMark McLoughlin                  "ifname=%s,script=%s,downscript=%s",
7025281d757SMark McLoughlin                  ifname, script, downscript);
7035281d757SMark McLoughlin 
7045281d757SMark McLoughlin         if (strcmp(downscript, "no") != 0) {
7055281d757SMark McLoughlin             snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
7065281d757SMark McLoughlin             snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
7075281d757SMark McLoughlin         }
7085281d757SMark McLoughlin     }
7095281d757SMark McLoughlin 
7105281d757SMark McLoughlin     if (vlan) {
7115281d757SMark McLoughlin         vlan->nb_host_devs++;
7125281d757SMark McLoughlin     }
7135281d757SMark McLoughlin 
7145281d757SMark McLoughlin     return 0;
7155281d757SMark McLoughlin }
7165281d757SMark McLoughlin 
7175281d757SMark McLoughlin #endif /* !defined(_AIX) */
718