1*4f56d42cSAsias He #include "kvm/virtio-net.h" 2*4f56d42cSAsias He #include "kvm/virtio-pci.h" 3*4f56d42cSAsias He #include "kvm/virtio.h" 4*4f56d42cSAsias He #include "kvm/ioport.h" 5*4f56d42cSAsias He #include "kvm/types.h" 6*4f56d42cSAsias He #include "kvm/mutex.h" 7*4f56d42cSAsias He #include "kvm/util.h" 8*4f56d42cSAsias He #include "kvm/kvm.h" 9*4f56d42cSAsias He #include "kvm/pci.h" 10*4f56d42cSAsias He 11*4f56d42cSAsias He #include <linux/virtio_net.h> 12*4f56d42cSAsias He #include <linux/if_tun.h> 13*4f56d42cSAsias He #include <net/if.h> 14*4f56d42cSAsias He #include <sys/ioctl.h> 15*4f56d42cSAsias He #include <assert.h> 16*4f56d42cSAsias He #include <fcntl.h> 17*4f56d42cSAsias He 18*4f56d42cSAsias He #define VIRTIO_NET_IRQ 14 19*4f56d42cSAsias He #define VIRTIO_NET_QUEUE_SIZE 128 20*4f56d42cSAsias He #define VIRTIO_NET_NUM_QUEUES 2 21*4f56d42cSAsias He #define VIRTIO_NET_RX_QUEUE 0 22*4f56d42cSAsias He #define VIRTIO_NET_TX_QUEUE 1 23*4f56d42cSAsias He #define PCI_VIRTIO_NET_DEVNUM 3 24*4f56d42cSAsias He 25*4f56d42cSAsias He struct net_device { 26*4f56d42cSAsias He pthread_mutex_t mutex; 27*4f56d42cSAsias He 28*4f56d42cSAsias He struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 29*4f56d42cSAsias He struct virtio_net_config net_config; 30*4f56d42cSAsias He uint32_t host_features; 31*4f56d42cSAsias He uint32_t guest_features; 32*4f56d42cSAsias He uint16_t config_vector; 33*4f56d42cSAsias He uint8_t status; 34*4f56d42cSAsias He uint16_t queue_selector; 35*4f56d42cSAsias He 36*4f56d42cSAsias He pthread_t io_rx_thread; 37*4f56d42cSAsias He pthread_mutex_t io_rx_mutex; 38*4f56d42cSAsias He pthread_cond_t io_rx_cond; 39*4f56d42cSAsias He 40*4f56d42cSAsias He pthread_t io_tx_thread; 41*4f56d42cSAsias He pthread_mutex_t io_tx_mutex; 42*4f56d42cSAsias He pthread_cond_t io_tx_cond; 43*4f56d42cSAsias He 44*4f56d42cSAsias He int tap_fd; 45*4f56d42cSAsias He char tap_name[IFNAMSIZ]; 46*4f56d42cSAsias He }; 47*4f56d42cSAsias He 48*4f56d42cSAsias He static struct net_device net_device = { 49*4f56d42cSAsias He .mutex = PTHREAD_MUTEX_INITIALIZER, 50*4f56d42cSAsias He 51*4f56d42cSAsias He .net_config = { 52*4f56d42cSAsias He .mac = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}, 53*4f56d42cSAsias He .status = VIRTIO_NET_S_LINK_UP, 54*4f56d42cSAsias He }, 55*4f56d42cSAsias He 56*4f56d42cSAsias He .host_features = 1UL << VIRTIO_NET_F_MAC, 57*4f56d42cSAsias He }; 58*4f56d42cSAsias He 59*4f56d42cSAsias He static void *virtio_net_rx_thread(void *p) 60*4f56d42cSAsias He { 61*4f56d42cSAsias He struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 62*4f56d42cSAsias He struct virt_queue *vq; 63*4f56d42cSAsias He struct kvm *self; 64*4f56d42cSAsias He uint16_t out, in; 65*4f56d42cSAsias He uint16_t head; 66*4f56d42cSAsias He int len; 67*4f56d42cSAsias He 68*4f56d42cSAsias He self = p; 69*4f56d42cSAsias He vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE]; 70*4f56d42cSAsias He 71*4f56d42cSAsias He while (1) { 72*4f56d42cSAsias He mutex_lock(&net_device.io_rx_mutex); 73*4f56d42cSAsias He if (!virt_queue__available(vq)) 74*4f56d42cSAsias He pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex); 75*4f56d42cSAsias He mutex_unlock(&net_device.io_rx_mutex); 76*4f56d42cSAsias He 77*4f56d42cSAsias He while (virt_queue__available(vq)) { 78*4f56d42cSAsias He head = virt_queue__get_iov(vq, iov, &out, &in, self); 79*4f56d42cSAsias He 80*4f56d42cSAsias He /* We do not specify GSO or CSUM features, So we can ignore virtio_net_hdr */ 81*4f56d42cSAsias He len = readv(net_device.tap_fd, iov + 1, in - 1); 82*4f56d42cSAsias He 83*4f56d42cSAsias He /* However, We have to tell guest we have write the virtio_net_hdr */ 84*4f56d42cSAsias He virt_queue__set_used_elem(vq, head, sizeof(struct virtio_net_hdr) + len); 85*4f56d42cSAsias He 86*4f56d42cSAsias He /* We should interrupt guest right now, otherwise latency is huge. */ 87*4f56d42cSAsias He kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 88*4f56d42cSAsias He } 89*4f56d42cSAsias He 90*4f56d42cSAsias He } 91*4f56d42cSAsias He 92*4f56d42cSAsias He pthread_exit(NULL); 93*4f56d42cSAsias He return NULL; 94*4f56d42cSAsias He 95*4f56d42cSAsias He } 96*4f56d42cSAsias He 97*4f56d42cSAsias He static void *virtio_net_tx_thread(void *p) 98*4f56d42cSAsias He { 99*4f56d42cSAsias He struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 100*4f56d42cSAsias He struct virt_queue *vq; 101*4f56d42cSAsias He struct kvm *self; 102*4f56d42cSAsias He uint16_t out, in; 103*4f56d42cSAsias He uint16_t head; 104*4f56d42cSAsias He int len; 105*4f56d42cSAsias He 106*4f56d42cSAsias He self = p; 107*4f56d42cSAsias He vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE]; 108*4f56d42cSAsias He 109*4f56d42cSAsias He while (1) { 110*4f56d42cSAsias He mutex_lock(&net_device.io_tx_mutex); 111*4f56d42cSAsias He if (!virt_queue__available(vq)) 112*4f56d42cSAsias He pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex); 113*4f56d42cSAsias He mutex_unlock(&net_device.io_tx_mutex); 114*4f56d42cSAsias He 115*4f56d42cSAsias He while (virt_queue__available(vq)) { 116*4f56d42cSAsias He head = virt_queue__get_iov(vq, iov, &out, &in, self); 117*4f56d42cSAsias He len = writev(net_device.tap_fd, iov + 1, out - 1); 118*4f56d42cSAsias He virt_queue__set_used_elem(vq, head, len); 119*4f56d42cSAsias He } 120*4f56d42cSAsias He 121*4f56d42cSAsias He kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 122*4f56d42cSAsias He } 123*4f56d42cSAsias He 124*4f56d42cSAsias He pthread_exit(NULL); 125*4f56d42cSAsias He return NULL; 126*4f56d42cSAsias He 127*4f56d42cSAsias He } 128*4f56d42cSAsias He static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count) 129*4f56d42cSAsias He { 130*4f56d42cSAsias He uint8_t *config_space = (uint8_t *) &net_device.net_config; 131*4f56d42cSAsias He 132*4f56d42cSAsias He if (size != 1 || count != 1) 133*4f56d42cSAsias He return false; 134*4f56d42cSAsias He 135*4f56d42cSAsias He if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config)) 136*4f56d42cSAsias He error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI); 137*4f56d42cSAsias He 138*4f56d42cSAsias He ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]); 139*4f56d42cSAsias He 140*4f56d42cSAsias He return true; 141*4f56d42cSAsias He } 142*4f56d42cSAsias He 143*4f56d42cSAsias He static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 144*4f56d42cSAsias He { 145*4f56d42cSAsias He unsigned long offset = port - IOPORT_VIRTIO_NET; 146*4f56d42cSAsias He bool ret = true; 147*4f56d42cSAsias He 148*4f56d42cSAsias He mutex_lock(&net_device.mutex); 149*4f56d42cSAsias He 150*4f56d42cSAsias He switch (offset) { 151*4f56d42cSAsias He case VIRTIO_PCI_HOST_FEATURES: 152*4f56d42cSAsias He ioport__write32(data, net_device.host_features); 153*4f56d42cSAsias He break; 154*4f56d42cSAsias He case VIRTIO_PCI_GUEST_FEATURES: 155*4f56d42cSAsias He ret = false; 156*4f56d42cSAsias He break; 157*4f56d42cSAsias He case VIRTIO_PCI_QUEUE_PFN: 158*4f56d42cSAsias He ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn); 159*4f56d42cSAsias He break; 160*4f56d42cSAsias He case VIRTIO_PCI_QUEUE_NUM: 161*4f56d42cSAsias He ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 162*4f56d42cSAsias He break; 163*4f56d42cSAsias He case VIRTIO_PCI_QUEUE_SEL: 164*4f56d42cSAsias He case VIRTIO_PCI_QUEUE_NOTIFY: 165*4f56d42cSAsias He ret = false; 166*4f56d42cSAsias He break; 167*4f56d42cSAsias He case VIRTIO_PCI_STATUS: 168*4f56d42cSAsias He ioport__write8(data, net_device.status); 169*4f56d42cSAsias He break; 170*4f56d42cSAsias He case VIRTIO_PCI_ISR: 171*4f56d42cSAsias He ioport__write8(data, 0x1); 172*4f56d42cSAsias He kvm__irq_line(self, VIRTIO_NET_IRQ, 0); 173*4f56d42cSAsias He break; 174*4f56d42cSAsias He case VIRTIO_MSI_CONFIG_VECTOR: 175*4f56d42cSAsias He ioport__write16(data, net_device.config_vector); 176*4f56d42cSAsias He break; 177*4f56d42cSAsias He default: 178*4f56d42cSAsias He ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 179*4f56d42cSAsias He }; 180*4f56d42cSAsias He 181*4f56d42cSAsias He mutex_unlock(&net_device.mutex); 182*4f56d42cSAsias He 183*4f56d42cSAsias He return ret; 184*4f56d42cSAsias He } 185*4f56d42cSAsias He 186*4f56d42cSAsias He static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index) 187*4f56d42cSAsias He { 188*4f56d42cSAsias He if (queue_index == VIRTIO_NET_TX_QUEUE) { 189*4f56d42cSAsias He 190*4f56d42cSAsias He mutex_lock(&net_device.io_tx_mutex); 191*4f56d42cSAsias He pthread_cond_signal(&net_device.io_tx_cond); 192*4f56d42cSAsias He mutex_unlock(&net_device.io_tx_mutex); 193*4f56d42cSAsias He 194*4f56d42cSAsias He } else if (queue_index == VIRTIO_NET_RX_QUEUE) { 195*4f56d42cSAsias He 196*4f56d42cSAsias He mutex_lock(&net_device.io_rx_mutex); 197*4f56d42cSAsias He pthread_cond_signal(&net_device.io_rx_cond); 198*4f56d42cSAsias He mutex_unlock(&net_device.io_rx_mutex); 199*4f56d42cSAsias He 200*4f56d42cSAsias He } 201*4f56d42cSAsias He } 202*4f56d42cSAsias He 203*4f56d42cSAsias He static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 204*4f56d42cSAsias He { 205*4f56d42cSAsias He unsigned long offset = port - IOPORT_VIRTIO_NET; 206*4f56d42cSAsias He bool ret = true; 207*4f56d42cSAsias He 208*4f56d42cSAsias He mutex_lock(&net_device.mutex); 209*4f56d42cSAsias He 210*4f56d42cSAsias He switch (offset) { 211*4f56d42cSAsias He case VIRTIO_PCI_GUEST_FEATURES: 212*4f56d42cSAsias He net_device.guest_features = ioport__read32(data); 213*4f56d42cSAsias He break; 214*4f56d42cSAsias He case VIRTIO_PCI_QUEUE_PFN: { 215*4f56d42cSAsias He struct virt_queue *queue; 216*4f56d42cSAsias He void *p; 217*4f56d42cSAsias He 218*4f56d42cSAsias He assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES); 219*4f56d42cSAsias He 220*4f56d42cSAsias He queue = &net_device.vqs[net_device.queue_selector]; 221*4f56d42cSAsias He queue->pfn = ioport__read32(data); 222*4f56d42cSAsias He p = guest_flat_to_host(self, queue->pfn << 12); 223*4f56d42cSAsias He 224*4f56d42cSAsias He vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096); 225*4f56d42cSAsias He 226*4f56d42cSAsias He break; 227*4f56d42cSAsias He } 228*4f56d42cSAsias He case VIRTIO_PCI_QUEUE_SEL: 229*4f56d42cSAsias He net_device.queue_selector = ioport__read16(data); 230*4f56d42cSAsias He break; 231*4f56d42cSAsias He case VIRTIO_PCI_QUEUE_NOTIFY: { 232*4f56d42cSAsias He uint16_t queue_index; 233*4f56d42cSAsias He queue_index = ioport__read16(data); 234*4f56d42cSAsias He virtio_net_handle_callback(self, queue_index); 235*4f56d42cSAsias He break; 236*4f56d42cSAsias He } 237*4f56d42cSAsias He case VIRTIO_PCI_STATUS: 238*4f56d42cSAsias He net_device.status = ioport__read8(data); 239*4f56d42cSAsias He break; 240*4f56d42cSAsias He case VIRTIO_MSI_CONFIG_VECTOR: 241*4f56d42cSAsias He net_device.config_vector = VIRTIO_MSI_NO_VECTOR; 242*4f56d42cSAsias He break; 243*4f56d42cSAsias He case VIRTIO_MSI_QUEUE_VECTOR: 244*4f56d42cSAsias He break; 245*4f56d42cSAsias He default: 246*4f56d42cSAsias He ret = false; 247*4f56d42cSAsias He }; 248*4f56d42cSAsias He 249*4f56d42cSAsias He mutex_unlock(&net_device.mutex); 250*4f56d42cSAsias He return ret; 251*4f56d42cSAsias He } 252*4f56d42cSAsias He 253*4f56d42cSAsias He static struct ioport_operations virtio_net_io_ops = { 254*4f56d42cSAsias He .io_in = virtio_net_pci_io_in, 255*4f56d42cSAsias He .io_out = virtio_net_pci_io_out, 256*4f56d42cSAsias He }; 257*4f56d42cSAsias He 258*4f56d42cSAsias He #define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 259*4f56d42cSAsias He #define PCI_DEVICE_ID_VIRTIO_NET 0x1000 260*4f56d42cSAsias He #define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET 0x1af4 261*4f56d42cSAsias He #define PCI_SUBSYSTEM_ID_VIRTIO_NET 0x0001 262*4f56d42cSAsias He 263*4f56d42cSAsias He static struct pci_device_header virtio_net_pci_device = { 264*4f56d42cSAsias He .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 265*4f56d42cSAsias He .device_id = PCI_DEVICE_ID_VIRTIO_NET, 266*4f56d42cSAsias He .header_type = PCI_HEADER_TYPE_NORMAL, 267*4f56d42cSAsias He .revision_id = 0, 268*4f56d42cSAsias He .class = 0x020000, 269*4f56d42cSAsias He .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 270*4f56d42cSAsias He .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET, 271*4f56d42cSAsias He .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, 272*4f56d42cSAsias He .irq_pin = 3, 273*4f56d42cSAsias He .irq_line = VIRTIO_NET_IRQ, 274*4f56d42cSAsias He }; 275*4f56d42cSAsias He 276*4f56d42cSAsias He static void virtio_net__tap_init(void) 277*4f56d42cSAsias He { 278*4f56d42cSAsias He struct ifreq ifr; 279*4f56d42cSAsias He 280*4f56d42cSAsias He net_device.tap_fd = open("/dev/net/tun", O_RDWR); 281*4f56d42cSAsias He if (net_device.tap_fd < 0) 282*4f56d42cSAsias He die("Unable to open /dev/net/tun\n"); 283*4f56d42cSAsias He 284*4f56d42cSAsias He memset(&ifr, 0, sizeof(ifr)); 285*4f56d42cSAsias He ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 286*4f56d42cSAsias He 287*4f56d42cSAsias He if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) 288*4f56d42cSAsias He die("Config tap device error. Are you root?"); 289*4f56d42cSAsias He 290*4f56d42cSAsias He strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name)); 291*4f56d42cSAsias He 292*4f56d42cSAsias He ioctl(net_device.tap_fd, TUNSETNOCSUM, 1); 293*4f56d42cSAsias He 294*4f56d42cSAsias He /*FIXME: Remove this after user can specify ip address and netmask*/ 295*4f56d42cSAsias He if (system("ifconfig tap0 192.168.33.2") < 0) 296*4f56d42cSAsias He warning("Can not set ip address on tap0"); 297*4f56d42cSAsias He } 298*4f56d42cSAsias He 299*4f56d42cSAsias He static void virtio_net__io_thread_init(struct kvm *self) 300*4f56d42cSAsias He { 301*4f56d42cSAsias He pthread_mutex_init(&net_device.io_rx_mutex, NULL); 302*4f56d42cSAsias He pthread_cond_init(&net_device.io_tx_cond, NULL); 303*4f56d42cSAsias He 304*4f56d42cSAsias He pthread_mutex_init(&net_device.io_rx_mutex, NULL); 305*4f56d42cSAsias He pthread_cond_init(&net_device.io_tx_cond, NULL); 306*4f56d42cSAsias He 307*4f56d42cSAsias He pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self); 308*4f56d42cSAsias He pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self); 309*4f56d42cSAsias He } 310*4f56d42cSAsias He 311*4f56d42cSAsias He void virtio_net__init(struct kvm *self) 312*4f56d42cSAsias He { 313*4f56d42cSAsias He pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM); 314*4f56d42cSAsias He ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); 315*4f56d42cSAsias He 316*4f56d42cSAsias He virtio_net__tap_init(); 317*4f56d42cSAsias He virtio_net__io_thread_init(self); 318*4f56d42cSAsias He } 319