1 #include "kvm/virtio-net.h" 2 #include "kvm/virtio-pci-dev.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 #include "kvm/irq.h" 11 12 #include <linux/virtio_net.h> 13 #include <linux/if_tun.h> 14 15 #include <arpa/inet.h> 16 #include <net/if.h> 17 18 #include <unistd.h> 19 #include <assert.h> 20 #include <fcntl.h> 21 22 #include <sys/socket.h> 23 #include <sys/ioctl.h> 24 #include <sys/types.h> 25 #include <sys/wait.h> 26 27 #define VIRTIO_NET_QUEUE_SIZE 128 28 #define VIRTIO_NET_NUM_QUEUES 2 29 #define VIRTIO_NET_RX_QUEUE 0 30 #define VIRTIO_NET_TX_QUEUE 1 31 32 static struct pci_device_header pci_header = { 33 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 34 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 35 .header_type = PCI_HEADER_TYPE_NORMAL, 36 .revision_id = 0, 37 .class = 0x020000, 38 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 39 .subsys_id = VIRTIO_ID_NET, 40 .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, 41 }; 42 43 struct net_device { 44 pthread_mutex_t mutex; 45 46 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 47 struct virtio_net_config config; 48 u32 host_features; 49 u32 guest_features; 50 u16 config_vector; 51 u8 status; 52 u8 isr; 53 u16 queue_selector; 54 55 pthread_t io_rx_thread; 56 pthread_mutex_t io_rx_lock; 57 pthread_cond_t io_rx_cond; 58 59 pthread_t io_tx_thread; 60 pthread_mutex_t io_tx_lock; 61 pthread_cond_t io_tx_cond; 62 63 int tap_fd; 64 char tap_name[IFNAMSIZ]; 65 }; 66 67 static struct net_device ndev = { 68 .mutex = PTHREAD_MUTEX_INITIALIZER, 69 70 .config = { 71 .mac = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, 72 .status = VIRTIO_NET_S_LINK_UP, 73 }, 74 .host_features = 1UL << VIRTIO_NET_F_MAC 75 | 1UL << VIRTIO_NET_F_CSUM 76 | 1UL << VIRTIO_NET_F_HOST_UFO 77 | 1UL << VIRTIO_NET_F_HOST_TSO4 78 | 1UL << VIRTIO_NET_F_HOST_TSO6 79 | 1UL << VIRTIO_NET_F_GUEST_UFO 80 | 1UL << VIRTIO_NET_F_GUEST_TSO4 81 | 1UL << VIRTIO_NET_F_GUEST_TSO6, 82 }; 83 84 static void *virtio_net_rx_thread(void *p) 85 { 86 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 87 struct virt_queue *vq; 88 struct kvm *kvm; 89 u16 out, in; 90 u16 head; 91 int len; 92 93 kvm = p; 94 vq = &ndev.vqs[VIRTIO_NET_RX_QUEUE]; 95 96 while (1) { 97 mutex_lock(&ndev.io_rx_lock); 98 if (!virt_queue__available(vq)) 99 pthread_cond_wait(&ndev.io_rx_cond, &ndev.io_rx_lock); 100 mutex_unlock(&ndev.io_rx_lock); 101 102 while (virt_queue__available(vq)) { 103 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 104 len = readv(ndev.tap_fd, iov, in); 105 virt_queue__set_used_elem(vq, head, len); 106 107 /* We should interrupt guest right now, otherwise latency is huge. */ 108 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm); 109 } 110 111 } 112 113 pthread_exit(NULL); 114 return NULL; 115 116 } 117 118 static void *virtio_net_tx_thread(void *p) 119 { 120 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 121 struct virt_queue *vq; 122 struct kvm *kvm; 123 u16 out, in; 124 u16 head; 125 int len; 126 127 kvm = p; 128 vq = &ndev.vqs[VIRTIO_NET_TX_QUEUE]; 129 130 while (1) { 131 mutex_lock(&ndev.io_tx_lock); 132 if (!virt_queue__available(vq)) 133 pthread_cond_wait(&ndev.io_tx_cond, &ndev.io_tx_lock); 134 mutex_unlock(&ndev.io_tx_lock); 135 136 while (virt_queue__available(vq)) { 137 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 138 len = writev(ndev.tap_fd, iov, out); 139 virt_queue__set_used_elem(vq, head, len); 140 } 141 142 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm); 143 144 } 145 146 pthread_exit(NULL); 147 148 return NULL; 149 150 } 151 152 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, u32 count) 153 { 154 u8 *config_space = (u8 *)&ndev.config; 155 156 if (size != 1 || count != 1) 157 return false; 158 159 if ((offset - VIRTIO_MSI_CONFIG_VECTOR) > sizeof(struct virtio_net_config)) 160 error("config offset is too big: %li", offset - VIRTIO_MSI_CONFIG_VECTOR); 161 162 ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]); 163 164 return true; 165 } 166 167 static bool virtio_net_pci_io_in(struct kvm *kvm, u16 port, void *data, int size, u32 count) 168 { 169 unsigned long offset = port - IOPORT_VIRTIO_NET; 170 bool ret = true; 171 172 mutex_lock(&ndev.mutex); 173 174 switch (offset) { 175 case VIRTIO_PCI_HOST_FEATURES: 176 ioport__write32(data, ndev.host_features); 177 break; 178 case VIRTIO_PCI_GUEST_FEATURES: 179 ret = false; 180 break; 181 case VIRTIO_PCI_QUEUE_PFN: 182 ioport__write32(data, ndev.vqs[ndev.queue_selector].pfn); 183 break; 184 case VIRTIO_PCI_QUEUE_NUM: 185 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 186 break; 187 case VIRTIO_PCI_QUEUE_SEL: 188 case VIRTIO_PCI_QUEUE_NOTIFY: 189 ret = false; 190 break; 191 case VIRTIO_PCI_STATUS: 192 ioport__write8(data, ndev.status); 193 break; 194 case VIRTIO_PCI_ISR: 195 ioport__write8(data, ndev.isr); 196 kvm__irq_line(kvm, pci_header.irq_line, VIRTIO_IRQ_LOW); 197 ndev.isr = VIRTIO_IRQ_LOW; 198 break; 199 case VIRTIO_MSI_CONFIG_VECTOR: 200 ioport__write16(data, ndev.config_vector); 201 break; 202 default: 203 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 204 }; 205 206 mutex_unlock(&ndev.mutex); 207 208 return ret; 209 } 210 211 static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index) 212 { 213 switch (queue_index) { 214 case VIRTIO_NET_TX_QUEUE: { 215 mutex_lock(&ndev.io_tx_lock); 216 pthread_cond_signal(&ndev.io_tx_cond); 217 mutex_unlock(&ndev.io_tx_lock); 218 break; 219 } 220 case VIRTIO_NET_RX_QUEUE: { 221 mutex_lock(&ndev.io_rx_lock); 222 pthread_cond_signal(&ndev.io_rx_cond); 223 mutex_unlock(&ndev.io_rx_lock); 224 break; 225 } 226 default: 227 warning("Unknown queue index %u", queue_index); 228 } 229 } 230 231 static bool virtio_net_pci_io_out(struct kvm *kvm, u16 port, void *data, int size, u32 count) 232 { 233 unsigned long offset = port - IOPORT_VIRTIO_NET; 234 bool ret = true; 235 236 mutex_lock(&ndev.mutex); 237 238 switch (offset) { 239 case VIRTIO_PCI_GUEST_FEATURES: 240 ndev.guest_features = ioport__read32(data); 241 break; 242 case VIRTIO_PCI_QUEUE_PFN: { 243 struct virt_queue *queue; 244 void *p; 245 246 assert(ndev.queue_selector < VIRTIO_NET_NUM_QUEUES); 247 248 queue = &ndev.vqs[ndev.queue_selector]; 249 queue->pfn = ioport__read32(data); 250 p = guest_pfn_to_host(kvm, queue->pfn); 251 252 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 253 254 break; 255 } 256 case VIRTIO_PCI_QUEUE_SEL: 257 ndev.queue_selector = ioport__read16(data); 258 break; 259 case VIRTIO_PCI_QUEUE_NOTIFY: { 260 u16 queue_index; 261 262 queue_index = ioport__read16(data); 263 virtio_net_handle_callback(kvm, queue_index); 264 break; 265 } 266 case VIRTIO_PCI_STATUS: 267 ndev.status = ioport__read8(data); 268 break; 269 case VIRTIO_MSI_CONFIG_VECTOR: 270 ndev.config_vector = VIRTIO_MSI_NO_VECTOR; 271 break; 272 case VIRTIO_MSI_QUEUE_VECTOR: 273 break; 274 default: 275 ret = false; 276 }; 277 278 mutex_unlock(&ndev.mutex); 279 280 return ret; 281 } 282 283 static struct ioport_operations virtio_net_io_ops = { 284 .io_in = virtio_net_pci_io_in, 285 .io_out = virtio_net_pci_io_out, 286 }; 287 288 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 289 { 290 int sock = socket(AF_INET, SOCK_STREAM, 0); 291 int i, pid, status, offload, hdr_len; 292 struct sockaddr_in sin = {0}; 293 struct ifreq ifr; 294 295 for (i = 0 ; i < 6 ; i++) 296 ndev.config.mac[i] = params->guest_mac[i]; 297 298 ndev.tap_fd = open("/dev/net/tun", O_RDWR); 299 if (ndev.tap_fd < 0) { 300 warning("Unable to open /dev/net/tun"); 301 goto fail; 302 } 303 304 memset(&ifr, 0, sizeof(ifr)); 305 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 306 if (ioctl(ndev.tap_fd, TUNSETIFF, &ifr) < 0) { 307 warning("Config tap device error. Are you root?"); 308 goto fail; 309 } 310 311 strncpy(ndev.tap_name, ifr.ifr_name, sizeof(ndev.tap_name)); 312 313 if (ioctl(ndev.tap_fd, TUNSETNOCSUM, 1) < 0) { 314 warning("Config tap device TUNSETNOCSUM error"); 315 goto fail; 316 } 317 318 hdr_len = sizeof(struct virtio_net_hdr); 319 if (ioctl(ndev.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 320 warning("Config tap device TUNSETVNETHDRSZ error"); 321 goto fail; 322 } 323 324 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 325 if (ioctl(ndev.tap_fd, TUNSETOFFLOAD, offload) < 0) { 326 warning("Config tap device TUNSETOFFLOAD error"); 327 goto fail; 328 } 329 330 if (strcmp(params->script, "none")) { 331 pid = fork(); 332 if (pid == 0) { 333 execl(params->script, params->script, ndev.tap_name, NULL); 334 _exit(1); 335 } else { 336 waitpid(pid, &status, 0); 337 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 338 warning("Fail to setup tap by %s", params->script); 339 goto fail; 340 } 341 } 342 } else { 343 memset(&ifr, 0, sizeof(ifr)); 344 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 345 sin.sin_addr.s_addr = inet_addr(params->host_ip); 346 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 347 ifr.ifr_addr.sa_family = AF_INET; 348 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 349 warning("Could not set ip address on tap device"); 350 goto fail; 351 } 352 } 353 354 memset(&ifr, 0, sizeof(ifr)); 355 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 356 ioctl(sock, SIOCGIFFLAGS, &ifr); 357 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 358 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 359 warning("Could not bring tap device up"); 360 361 close(sock); 362 363 return 1; 364 365 fail: 366 if (sock >= 0) 367 close(sock); 368 if (ndev.tap_fd >= 0) 369 close(ndev.tap_fd); 370 371 return 0; 372 } 373 374 static void virtio_net__io_thread_init(struct kvm *kvm) 375 { 376 pthread_mutex_init(&ndev.io_rx_lock, NULL); 377 pthread_cond_init(&ndev.io_tx_cond, NULL); 378 379 pthread_mutex_init(&ndev.io_rx_lock, NULL); 380 pthread_cond_init(&ndev.io_tx_cond, NULL); 381 382 pthread_create(&ndev.io_rx_thread, NULL, virtio_net_rx_thread, (void *)kvm); 383 pthread_create(&ndev.io_tx_thread, NULL, virtio_net_tx_thread, (void *)kvm); 384 } 385 386 void virtio_net__init(const struct virtio_net_parameters *params) 387 { 388 if (virtio_net__tap_init(params)) { 389 u8 dev, line, pin; 390 391 if (irq__register_device(VIRTIO_ID_NET, &dev, &pin, &line) < 0) 392 return; 393 394 pci_header.irq_pin = pin; 395 pci_header.irq_line = line; 396 pci__register(&pci_header, dev); 397 ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); 398 399 virtio_net__io_thread_init(params->kvm); 400 } 401 } 402