14f56d42cSAsias He #include "kvm/virtio-net.h" 24f56d42cSAsias He #include "kvm/virtio-pci.h" 34f56d42cSAsias He #include "kvm/virtio.h" 44f56d42cSAsias He #include "kvm/ioport.h" 54f56d42cSAsias He #include "kvm/types.h" 64f56d42cSAsias He #include "kvm/mutex.h" 74f56d42cSAsias He #include "kvm/util.h" 84f56d42cSAsias He #include "kvm/kvm.h" 94f56d42cSAsias He #include "kvm/pci.h" 104f56d42cSAsias He 114f56d42cSAsias He #include <linux/virtio_net.h> 124f56d42cSAsias He #include <linux/if_tun.h> 134f56d42cSAsias He #include <net/if.h> 144f56d42cSAsias He #include <sys/ioctl.h> 154f56d42cSAsias He #include <assert.h> 164f56d42cSAsias He #include <fcntl.h> 17cb7202c1SSasha Levin #include <arpa/inet.h> 18cb7202c1SSasha Levin #include <sys/types.h> 19cb7202c1SSasha Levin #include <sys/socket.h> 20*73b7d038SAmos Kong #include <unistd.h> 21*73b7d038SAmos Kong #include <sys/wait.h> 224f56d42cSAsias He 234f56d42cSAsias He #define VIRTIO_NET_IRQ 14 244f56d42cSAsias He #define VIRTIO_NET_QUEUE_SIZE 128 254f56d42cSAsias He #define VIRTIO_NET_NUM_QUEUES 2 264f56d42cSAsias He #define VIRTIO_NET_RX_QUEUE 0 274f56d42cSAsias He #define VIRTIO_NET_TX_QUEUE 1 284f56d42cSAsias He #define PCI_VIRTIO_NET_DEVNUM 3 294f56d42cSAsias He 304f56d42cSAsias He struct net_device { 314f56d42cSAsias He pthread_mutex_t mutex; 324f56d42cSAsias He 334f56d42cSAsias He struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 344f56d42cSAsias He struct virtio_net_config net_config; 354f56d42cSAsias He uint32_t host_features; 364f56d42cSAsias He uint32_t guest_features; 374f56d42cSAsias He uint16_t config_vector; 384f56d42cSAsias He uint8_t status; 394f56d42cSAsias He uint16_t queue_selector; 404f56d42cSAsias He 414f56d42cSAsias He pthread_t io_rx_thread; 424f56d42cSAsias He pthread_mutex_t io_rx_mutex; 434f56d42cSAsias He pthread_cond_t io_rx_cond; 444f56d42cSAsias He 454f56d42cSAsias He pthread_t io_tx_thread; 464f56d42cSAsias He pthread_mutex_t io_tx_mutex; 474f56d42cSAsias He pthread_cond_t io_tx_cond; 484f56d42cSAsias He 494f56d42cSAsias He int tap_fd; 504f56d42cSAsias He char tap_name[IFNAMSIZ]; 514f56d42cSAsias He }; 524f56d42cSAsias He 534f56d42cSAsias He static struct net_device net_device = { 544f56d42cSAsias He .mutex = PTHREAD_MUTEX_INITIALIZER, 554f56d42cSAsias He 564f56d42cSAsias He .net_config = { 574f56d42cSAsias He .mac = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}, 584f56d42cSAsias He .status = VIRTIO_NET_S_LINK_UP, 594f56d42cSAsias He }, 604f56d42cSAsias He 614f56d42cSAsias He .host_features = 1UL << VIRTIO_NET_F_MAC, 624f56d42cSAsias He }; 634f56d42cSAsias He 644f56d42cSAsias He static void *virtio_net_rx_thread(void *p) 654f56d42cSAsias He { 664f56d42cSAsias He struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 674f56d42cSAsias He struct virt_queue *vq; 684f56d42cSAsias He struct kvm *self; 694f56d42cSAsias He uint16_t out, in; 704f56d42cSAsias He uint16_t head; 714f56d42cSAsias He int len; 724f56d42cSAsias He 734f56d42cSAsias He self = p; 744f56d42cSAsias He vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE]; 754f56d42cSAsias He 764f56d42cSAsias He while (1) { 774f56d42cSAsias He mutex_lock(&net_device.io_rx_mutex); 784f56d42cSAsias He if (!virt_queue__available(vq)) 794f56d42cSAsias He pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex); 804f56d42cSAsias He mutex_unlock(&net_device.io_rx_mutex); 814f56d42cSAsias He 824f56d42cSAsias He while (virt_queue__available(vq)) { 834f56d42cSAsias He head = virt_queue__get_iov(vq, iov, &out, &in, self); 844f56d42cSAsias He 854f56d42cSAsias He /* We do not specify GSO or CSUM features, So we can ignore virtio_net_hdr */ 864f56d42cSAsias He len = readv(net_device.tap_fd, iov + 1, in - 1); 874f56d42cSAsias He 884f56d42cSAsias He /* However, We have to tell guest we have write the virtio_net_hdr */ 894f56d42cSAsias He virt_queue__set_used_elem(vq, head, sizeof(struct virtio_net_hdr) + len); 904f56d42cSAsias He 914f56d42cSAsias He /* We should interrupt guest right now, otherwise latency is huge. */ 924f56d42cSAsias He kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 934f56d42cSAsias He } 944f56d42cSAsias He 954f56d42cSAsias He } 964f56d42cSAsias He 974f56d42cSAsias He pthread_exit(NULL); 984f56d42cSAsias He return NULL; 994f56d42cSAsias He 1004f56d42cSAsias He } 1014f56d42cSAsias He 1024f56d42cSAsias He static void *virtio_net_tx_thread(void *p) 1034f56d42cSAsias He { 1044f56d42cSAsias He struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 1054f56d42cSAsias He struct virt_queue *vq; 1064f56d42cSAsias He struct kvm *self; 1074f56d42cSAsias He uint16_t out, in; 1084f56d42cSAsias He uint16_t head; 1094f56d42cSAsias He int len; 1104f56d42cSAsias He 1114f56d42cSAsias He self = p; 1124f56d42cSAsias He vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE]; 1134f56d42cSAsias He 1144f56d42cSAsias He while (1) { 1154f56d42cSAsias He mutex_lock(&net_device.io_tx_mutex); 1164f56d42cSAsias He if (!virt_queue__available(vq)) 1174f56d42cSAsias He pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex); 1184f56d42cSAsias He mutex_unlock(&net_device.io_tx_mutex); 1194f56d42cSAsias He 1204f56d42cSAsias He while (virt_queue__available(vq)) { 1214f56d42cSAsias He head = virt_queue__get_iov(vq, iov, &out, &in, self); 1224f56d42cSAsias He len = writev(net_device.tap_fd, iov + 1, out - 1); 1234f56d42cSAsias He virt_queue__set_used_elem(vq, head, len); 1244f56d42cSAsias He } 1254f56d42cSAsias He 1264f56d42cSAsias He kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 1274f56d42cSAsias He } 1284f56d42cSAsias He 1294f56d42cSAsias He pthread_exit(NULL); 1304f56d42cSAsias He return NULL; 1314f56d42cSAsias He 1324f56d42cSAsias He } 1334f56d42cSAsias He static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count) 1344f56d42cSAsias He { 1354f56d42cSAsias He uint8_t *config_space = (uint8_t *) &net_device.net_config; 1364f56d42cSAsias He 1374f56d42cSAsias He if (size != 1 || count != 1) 1384f56d42cSAsias He return false; 1394f56d42cSAsias He 1404f56d42cSAsias He if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config)) 1414f56d42cSAsias He error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI); 1424f56d42cSAsias He 1434f56d42cSAsias He ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]); 1444f56d42cSAsias He 1454f56d42cSAsias He return true; 1464f56d42cSAsias He } 1474f56d42cSAsias He 1484f56d42cSAsias He static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 1494f56d42cSAsias He { 1504f56d42cSAsias He unsigned long offset = port - IOPORT_VIRTIO_NET; 1514f56d42cSAsias He bool ret = true; 1524f56d42cSAsias He 1534f56d42cSAsias He mutex_lock(&net_device.mutex); 1544f56d42cSAsias He 1554f56d42cSAsias He switch (offset) { 1564f56d42cSAsias He case VIRTIO_PCI_HOST_FEATURES: 1574f56d42cSAsias He ioport__write32(data, net_device.host_features); 1584f56d42cSAsias He break; 1594f56d42cSAsias He case VIRTIO_PCI_GUEST_FEATURES: 1604f56d42cSAsias He ret = false; 1614f56d42cSAsias He break; 1624f56d42cSAsias He case VIRTIO_PCI_QUEUE_PFN: 1634f56d42cSAsias He ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn); 1644f56d42cSAsias He break; 1654f56d42cSAsias He case VIRTIO_PCI_QUEUE_NUM: 1664f56d42cSAsias He ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 1674f56d42cSAsias He break; 1684f56d42cSAsias He case VIRTIO_PCI_QUEUE_SEL: 1694f56d42cSAsias He case VIRTIO_PCI_QUEUE_NOTIFY: 1704f56d42cSAsias He ret = false; 1714f56d42cSAsias He break; 1724f56d42cSAsias He case VIRTIO_PCI_STATUS: 1734f56d42cSAsias He ioport__write8(data, net_device.status); 1744f56d42cSAsias He break; 1754f56d42cSAsias He case VIRTIO_PCI_ISR: 1764f56d42cSAsias He ioport__write8(data, 0x1); 1774f56d42cSAsias He kvm__irq_line(self, VIRTIO_NET_IRQ, 0); 1784f56d42cSAsias He break; 1794f56d42cSAsias He case VIRTIO_MSI_CONFIG_VECTOR: 1804f56d42cSAsias He ioport__write16(data, net_device.config_vector); 1814f56d42cSAsias He break; 1824f56d42cSAsias He default: 1834f56d42cSAsias He ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 1844f56d42cSAsias He }; 1854f56d42cSAsias He 1864f56d42cSAsias He mutex_unlock(&net_device.mutex); 1874f56d42cSAsias He 1884f56d42cSAsias He return ret; 1894f56d42cSAsias He } 1904f56d42cSAsias He 1914f56d42cSAsias He static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index) 1924f56d42cSAsias He { 1934f56d42cSAsias He if (queue_index == VIRTIO_NET_TX_QUEUE) { 1944f56d42cSAsias He 1954f56d42cSAsias He mutex_lock(&net_device.io_tx_mutex); 1964f56d42cSAsias He pthread_cond_signal(&net_device.io_tx_cond); 1974f56d42cSAsias He mutex_unlock(&net_device.io_tx_mutex); 1984f56d42cSAsias He 1994f56d42cSAsias He } else if (queue_index == VIRTIO_NET_RX_QUEUE) { 2004f56d42cSAsias He 2014f56d42cSAsias He mutex_lock(&net_device.io_rx_mutex); 2024f56d42cSAsias He pthread_cond_signal(&net_device.io_rx_cond); 2034f56d42cSAsias He mutex_unlock(&net_device.io_rx_mutex); 2044f56d42cSAsias He 2054f56d42cSAsias He } 2064f56d42cSAsias He } 2074f56d42cSAsias He 2084f56d42cSAsias He static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 2094f56d42cSAsias He { 2104f56d42cSAsias He unsigned long offset = port - IOPORT_VIRTIO_NET; 2114f56d42cSAsias He bool ret = true; 2124f56d42cSAsias He 2134f56d42cSAsias He mutex_lock(&net_device.mutex); 2144f56d42cSAsias He 2154f56d42cSAsias He switch (offset) { 2164f56d42cSAsias He case VIRTIO_PCI_GUEST_FEATURES: 2174f56d42cSAsias He net_device.guest_features = ioport__read32(data); 2184f56d42cSAsias He break; 2194f56d42cSAsias He case VIRTIO_PCI_QUEUE_PFN: { 2204f56d42cSAsias He struct virt_queue *queue; 2214f56d42cSAsias He void *p; 2224f56d42cSAsias He 2234f56d42cSAsias He assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES); 2244f56d42cSAsias He 2254f56d42cSAsias He queue = &net_device.vqs[net_device.queue_selector]; 2264f56d42cSAsias He queue->pfn = ioport__read32(data); 2274f56d42cSAsias He p = guest_flat_to_host(self, queue->pfn << 12); 2284f56d42cSAsias He 2294f56d42cSAsias He vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096); 2304f56d42cSAsias He 2314f56d42cSAsias He break; 2324f56d42cSAsias He } 2334f56d42cSAsias He case VIRTIO_PCI_QUEUE_SEL: 2344f56d42cSAsias He net_device.queue_selector = ioport__read16(data); 2354f56d42cSAsias He break; 2364f56d42cSAsias He case VIRTIO_PCI_QUEUE_NOTIFY: { 2374f56d42cSAsias He uint16_t queue_index; 2384f56d42cSAsias He queue_index = ioport__read16(data); 2394f56d42cSAsias He virtio_net_handle_callback(self, queue_index); 2404f56d42cSAsias He break; 2414f56d42cSAsias He } 2424f56d42cSAsias He case VIRTIO_PCI_STATUS: 2434f56d42cSAsias He net_device.status = ioport__read8(data); 2444f56d42cSAsias He break; 2454f56d42cSAsias He case VIRTIO_MSI_CONFIG_VECTOR: 2464f56d42cSAsias He net_device.config_vector = VIRTIO_MSI_NO_VECTOR; 2474f56d42cSAsias He break; 2484f56d42cSAsias He case VIRTIO_MSI_QUEUE_VECTOR: 2494f56d42cSAsias He break; 2504f56d42cSAsias He default: 2514f56d42cSAsias He ret = false; 2524f56d42cSAsias He }; 2534f56d42cSAsias He 2544f56d42cSAsias He mutex_unlock(&net_device.mutex); 2554f56d42cSAsias He return ret; 2564f56d42cSAsias He } 2574f56d42cSAsias He 2584f56d42cSAsias He static struct ioport_operations virtio_net_io_ops = { 2594f56d42cSAsias He .io_in = virtio_net_pci_io_in, 2604f56d42cSAsias He .io_out = virtio_net_pci_io_out, 2614f56d42cSAsias He }; 2624f56d42cSAsias He 2634f56d42cSAsias He #define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 2644f56d42cSAsias He #define PCI_DEVICE_ID_VIRTIO_NET 0x1000 2654f56d42cSAsias He #define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET 0x1af4 2664f56d42cSAsias He #define PCI_SUBSYSTEM_ID_VIRTIO_NET 0x0001 2674f56d42cSAsias He 2684f56d42cSAsias He static struct pci_device_header virtio_net_pci_device = { 2694f56d42cSAsias He .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 2704f56d42cSAsias He .device_id = PCI_DEVICE_ID_VIRTIO_NET, 2714f56d42cSAsias He .header_type = PCI_HEADER_TYPE_NORMAL, 2724f56d42cSAsias He .revision_id = 0, 2734f56d42cSAsias He .class = 0x020000, 2744f56d42cSAsias He .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 2754f56d42cSAsias He .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET, 2764f56d42cSAsias He .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, 2774f56d42cSAsias He .irq_pin = 3, 2784f56d42cSAsias He .irq_line = VIRTIO_NET_IRQ, 2794f56d42cSAsias He }; 2804f56d42cSAsias He 2813b02f580SSasha Levin static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 2824f56d42cSAsias He { 2834f56d42cSAsias He struct ifreq ifr; 284cb7202c1SSasha Levin int sock = socket(AF_INET, SOCK_STREAM, 0); 285*73b7d038SAmos Kong int i, pid, status; 286cb7202c1SSasha Levin struct sockaddr_in sin = {0}; 2874f56d42cSAsias He 288a4e724ddSSasha Levin for (i = 0 ; i < 6 ; i++) 289a4e724ddSSasha Levin net_device.net_config.mac[i] = params->guest_mac[i]; 290a4e724ddSSasha Levin 2914f56d42cSAsias He net_device.tap_fd = open("/dev/net/tun", O_RDWR); 2923b02f580SSasha Levin if (net_device.tap_fd < 0) { 2933b02f580SSasha Levin warning("Unable to open /dev/net/tun\n"); 2943b02f580SSasha Levin goto fail; 2953b02f580SSasha Levin } 2964f56d42cSAsias He 2974f56d42cSAsias He memset(&ifr, 0, sizeof(ifr)); 2984f56d42cSAsias He ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 2994f56d42cSAsias He 3003b02f580SSasha Levin if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) { 3013b02f580SSasha Levin warning("Config tap device error. Are you root?"); 3023b02f580SSasha Levin goto fail; 3033b02f580SSasha Levin } 3044f56d42cSAsias He 3054f56d42cSAsias He strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name)); 3064f56d42cSAsias He 3074f56d42cSAsias He ioctl(net_device.tap_fd, TUNSETNOCSUM, 1); 3084f56d42cSAsias He 309*73b7d038SAmos Kong if (strcmp(params->script, "none")) { 310*73b7d038SAmos Kong pid = fork(); 311*73b7d038SAmos Kong if (pid == 0) { 312*73b7d038SAmos Kong execl(params->script, params->script, net_device.tap_name, NULL); 313*73b7d038SAmos Kong _exit(1); 314*73b7d038SAmos Kong } else { 315*73b7d038SAmos Kong waitpid(pid, &status, 0); 316*73b7d038SAmos Kong if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 317*73b7d038SAmos Kong warning("Fail to setup tap by %s", params->script); 318*73b7d038SAmos Kong goto fail; 319*73b7d038SAmos Kong } 320*73b7d038SAmos Kong } 321*73b7d038SAmos Kong } else { 322cb7202c1SSasha Levin memset(&ifr, 0, sizeof(ifr)); 323cb7202c1SSasha Levin 324cb7202c1SSasha Levin strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 325cb7202c1SSasha Levin 326bdfcfca6SSasha Levin sin.sin_addr.s_addr = inet_addr(params->host_ip); 327cb7202c1SSasha Levin memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 328cb7202c1SSasha Levin ifr.ifr_addr.sa_family = AF_INET; 329cb7202c1SSasha Levin 3303b02f580SSasha Levin if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 331cb7202c1SSasha Levin warning("Can not set ip address on tap device"); 3323b02f580SSasha Levin goto fail; 3333b02f580SSasha Levin } 334*73b7d038SAmos Kong } 335cb7202c1SSasha Levin 336cb7202c1SSasha Levin memset(&ifr, 0, sizeof(ifr)); 337cb7202c1SSasha Levin strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 338cb7202c1SSasha Levin ioctl(sock, SIOCGIFFLAGS, &ifr); 339cb7202c1SSasha Levin ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 340cb7202c1SSasha Levin if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 341cb7202c1SSasha Levin warning("Could not bring tap device up"); 342cb7202c1SSasha Levin 343cb7202c1SSasha Levin close(sock); 3443b02f580SSasha Levin 3453b02f580SSasha Levin return 1; 3463b02f580SSasha Levin 3473b02f580SSasha Levin fail: 3483b02f580SSasha Levin if (sock >= 0) 3493b02f580SSasha Levin close(sock); 3503b02f580SSasha Levin if (net_device.tap_fd >= 0) 3513b02f580SSasha Levin close(net_device.tap_fd); 3523b02f580SSasha Levin 3533b02f580SSasha Levin return 0; 3544f56d42cSAsias He } 3554f56d42cSAsias He 3564f56d42cSAsias He static void virtio_net__io_thread_init(struct kvm *self) 3574f56d42cSAsias He { 3584f56d42cSAsias He pthread_mutex_init(&net_device.io_rx_mutex, NULL); 3594f56d42cSAsias He pthread_cond_init(&net_device.io_tx_cond, NULL); 3604f56d42cSAsias He 3614f56d42cSAsias He pthread_mutex_init(&net_device.io_rx_mutex, NULL); 3624f56d42cSAsias He pthread_cond_init(&net_device.io_tx_cond, NULL); 3634f56d42cSAsias He 3644f56d42cSAsias He pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self); 3654f56d42cSAsias He pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self); 3664f56d42cSAsias He } 3674f56d42cSAsias He 368bdfcfca6SSasha Levin void virtio_net__init(const struct virtio_net_parameters *params) 3694f56d42cSAsias He { 3703b02f580SSasha Levin if (virtio_net__tap_init(params)) { 3714f56d42cSAsias He pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM); 3724f56d42cSAsias He ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); 3734f56d42cSAsias He 374bdfcfca6SSasha Levin virtio_net__io_thread_init(params->self); 3754f56d42cSAsias He } 3763b02f580SSasha Levin } 377