1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 #include "kvm/irq.h" 11 #include "kvm/uip.h" 12 #include "kvm/ioeventfd.h" 13 #include "kvm/guest_compat.h" 14 15 #include <linux/virtio_net.h> 16 #include <linux/if_tun.h> 17 18 #include <arpa/inet.h> 19 #include <net/if.h> 20 21 #include <unistd.h> 22 #include <assert.h> 23 #include <fcntl.h> 24 25 #include <sys/socket.h> 26 #include <sys/ioctl.h> 27 #include <sys/types.h> 28 #include <sys/wait.h> 29 30 #define VIRTIO_NET_QUEUE_SIZE 128 31 #define VIRTIO_NET_NUM_QUEUES 2 32 #define VIRTIO_NET_RX_QUEUE 0 33 #define VIRTIO_NET_TX_QUEUE 1 34 35 static struct pci_device_header pci_header = { 36 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 37 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 38 .header_type = PCI_HEADER_TYPE_NORMAL, 39 .revision_id = 0, 40 .class = 0x020000, 41 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 42 .subsys_id = VIRTIO_ID_NET, 43 }; 44 45 struct net_dev; 46 47 struct net_dev_operations { 48 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 49 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 50 }; 51 52 struct net_dev { 53 pthread_mutex_t mutex; 54 55 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 56 struct virtio_net_config config; 57 u32 host_features; 58 u32 guest_features; 59 u16 config_vector; 60 u8 status; 61 u8 isr; 62 u16 queue_selector; 63 u16 base_addr; 64 u32 vq_vector[VIRTIO_NET_NUM_QUEUES]; 65 u32 gsis[VIRTIO_NET_NUM_QUEUES]; 66 u32 msix_io_block; 67 int compat_id; 68 bool msix_enabled; 69 70 pthread_t io_rx_thread; 71 pthread_mutex_t io_rx_lock; 72 pthread_cond_t io_rx_cond; 73 74 pthread_t io_tx_thread; 75 pthread_mutex_t io_tx_lock; 76 pthread_cond_t io_tx_cond; 77 78 int tap_fd; 79 char tap_name[IFNAMSIZ]; 80 81 int mode; 82 83 struct uip_info info; 84 struct net_dev_operations *ops; 85 }; 86 87 static struct net_dev ndev = { 88 .mutex = PTHREAD_MUTEX_INITIALIZER, 89 90 .config = { 91 .status = VIRTIO_NET_S_LINK_UP, 92 }, 93 .host_features = 1UL << VIRTIO_NET_F_MAC 94 | 1UL << VIRTIO_NET_F_CSUM 95 | 1UL << VIRTIO_NET_F_HOST_UFO 96 | 1UL << VIRTIO_NET_F_HOST_TSO4 97 | 1UL << VIRTIO_NET_F_HOST_TSO6 98 | 1UL << VIRTIO_NET_F_GUEST_UFO 99 | 1UL << VIRTIO_NET_F_GUEST_TSO4 100 | 1UL << VIRTIO_NET_F_GUEST_TSO6, 101 .info = { 102 .buf_nr = 20, 103 } 104 }; 105 106 static void *virtio_net_rx_thread(void *p) 107 { 108 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 109 struct virt_queue *vq; 110 struct kvm *kvm; 111 u16 out, in; 112 u16 head; 113 int len; 114 115 kvm = p; 116 vq = &ndev.vqs[VIRTIO_NET_RX_QUEUE]; 117 118 while (1) { 119 120 mutex_lock(&ndev.io_rx_lock); 121 if (!virt_queue__available(vq)) 122 pthread_cond_wait(&ndev.io_rx_cond, &ndev.io_rx_lock); 123 mutex_unlock(&ndev.io_rx_lock); 124 125 while (virt_queue__available(vq)) { 126 127 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 128 129 len = ndev.ops->rx(iov, in, &ndev); 130 131 virt_queue__set_used_elem(vq, head, len); 132 133 /* We should interrupt guest right now, otherwise latency is huge. */ 134 kvm__irq_trigger(kvm, ndev.gsis[VIRTIO_NET_RX_QUEUE]); 135 } 136 137 } 138 139 pthread_exit(NULL); 140 return NULL; 141 142 } 143 144 static void *virtio_net_tx_thread(void *p) 145 { 146 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 147 struct virt_queue *vq; 148 struct kvm *kvm; 149 u16 out, in; 150 u16 head; 151 int len; 152 153 kvm = p; 154 vq = &ndev.vqs[VIRTIO_NET_TX_QUEUE]; 155 156 while (1) { 157 mutex_lock(&ndev.io_tx_lock); 158 if (!virt_queue__available(vq)) 159 pthread_cond_wait(&ndev.io_tx_cond, &ndev.io_tx_lock); 160 mutex_unlock(&ndev.io_tx_lock); 161 162 while (virt_queue__available(vq)) { 163 164 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 165 166 len = ndev.ops->tx(iov, out, &ndev); 167 168 virt_queue__set_used_elem(vq, head, len); 169 } 170 171 kvm__irq_trigger(kvm, ndev.gsis[VIRTIO_NET_TX_QUEUE]); 172 } 173 174 pthread_exit(NULL); 175 176 return NULL; 177 178 } 179 180 static bool virtio_net_pci_io_device_specific_out(struct kvm *kvm, void *data, 181 unsigned long offset, int size) 182 { 183 u8 *config_space = (u8 *)&ndev.config; 184 int type; 185 u32 config_offset; 186 187 type = virtio__get_dev_specific_field(offset - 20, ndev.msix_enabled, 0, &config_offset); 188 if (type == VIRTIO_PCI_O_MSIX) { 189 if (offset == VIRTIO_MSI_CONFIG_VECTOR) { 190 ndev.config_vector = ioport__read16(data); 191 } else { 192 u32 gsi; 193 u32 vec; 194 195 vec = ndev.vq_vector[ndev.queue_selector] = ioport__read16(data); 196 197 gsi = irq__add_msix_route(kvm, 198 pci_header.msix.table[vec].low, 199 pci_header.msix.table[vec].high, 200 pci_header.msix.table[vec].data); 201 202 ndev.gsis[ndev.queue_selector] = gsi; 203 } 204 return true; 205 } 206 207 if (size != 1) 208 return false; 209 210 if ((config_offset) > sizeof(struct virtio_net_config)) 211 pr_error("config offset is too big: %u", config_offset); 212 213 config_space[config_offset] = *(u8 *)data; 214 215 return true; 216 } 217 218 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size) 219 { 220 u8 *config_space = (u8 *)&ndev.config; 221 int type; 222 u32 config_offset; 223 224 type = virtio__get_dev_specific_field(offset - 20, ndev.msix_enabled, 0, &config_offset); 225 if (type == VIRTIO_PCI_O_MSIX) { 226 if (offset == VIRTIO_MSI_CONFIG_VECTOR) 227 ioport__write16(data, ndev.config_vector); 228 else 229 ioport__write16(data, ndev.vq_vector[ndev.queue_selector]); 230 231 return true; 232 } 233 234 if (size != 1) 235 return false; 236 237 if ((config_offset) > sizeof(struct virtio_net_config)) 238 pr_error("config offset is too big: %u", config_offset); 239 240 ioport__write8(data, config_space[config_offset]); 241 242 return true; 243 } 244 245 static bool virtio_net_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) 246 { 247 unsigned long offset = port - ndev.base_addr; 248 bool ret = true; 249 250 mutex_lock(&ndev.mutex); 251 252 switch (offset) { 253 case VIRTIO_PCI_HOST_FEATURES: 254 ioport__write32(data, ndev.host_features); 255 break; 256 case VIRTIO_PCI_GUEST_FEATURES: 257 ret = false; 258 break; 259 case VIRTIO_PCI_QUEUE_PFN: 260 ioport__write32(data, ndev.vqs[ndev.queue_selector].pfn); 261 break; 262 case VIRTIO_PCI_QUEUE_NUM: 263 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 264 break; 265 case VIRTIO_PCI_QUEUE_SEL: 266 case VIRTIO_PCI_QUEUE_NOTIFY: 267 ret = false; 268 break; 269 case VIRTIO_PCI_STATUS: 270 ioport__write8(data, ndev.status); 271 break; 272 case VIRTIO_PCI_ISR: 273 ioport__write8(data, ndev.isr); 274 kvm__irq_line(kvm, pci_header.irq_line, VIRTIO_IRQ_LOW); 275 ndev.isr = VIRTIO_IRQ_LOW; 276 break; 277 default: 278 ret = virtio_net_pci_io_device_specific_in(data, offset, size); 279 }; 280 281 mutex_unlock(&ndev.mutex); 282 283 return ret; 284 } 285 286 static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index) 287 { 288 switch (queue_index) { 289 case VIRTIO_NET_TX_QUEUE: 290 mutex_lock(&ndev.io_tx_lock); 291 pthread_cond_signal(&ndev.io_tx_cond); 292 mutex_unlock(&ndev.io_tx_lock); 293 break; 294 case VIRTIO_NET_RX_QUEUE: 295 mutex_lock(&ndev.io_rx_lock); 296 pthread_cond_signal(&ndev.io_rx_cond); 297 mutex_unlock(&ndev.io_rx_lock); 298 break; 299 default: 300 pr_warning("Unknown queue index %u", queue_index); 301 } 302 } 303 304 static bool virtio_net_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) 305 { 306 unsigned long offset = port - ndev.base_addr; 307 bool ret = true; 308 309 mutex_lock(&ndev.mutex); 310 311 switch (offset) { 312 case VIRTIO_PCI_GUEST_FEATURES: 313 ndev.guest_features = ioport__read32(data); 314 break; 315 case VIRTIO_PCI_QUEUE_PFN: { 316 struct virt_queue *queue; 317 void *p; 318 319 assert(ndev.queue_selector < VIRTIO_NET_NUM_QUEUES); 320 321 compat__remove_message(ndev.compat_id); 322 323 queue = &ndev.vqs[ndev.queue_selector]; 324 queue->pfn = ioport__read32(data); 325 p = guest_pfn_to_host(kvm, queue->pfn); 326 327 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 328 329 break; 330 } 331 case VIRTIO_PCI_QUEUE_SEL: 332 ndev.queue_selector = ioport__read16(data); 333 break; 334 case VIRTIO_PCI_QUEUE_NOTIFY: { 335 u16 queue_index; 336 337 queue_index = ioport__read16(data); 338 virtio_net_handle_callback(kvm, queue_index); 339 break; 340 } 341 case VIRTIO_PCI_STATUS: 342 ndev.status = ioport__read8(data); 343 break; 344 default: 345 ret = virtio_net_pci_io_device_specific_out(kvm, data, offset, size); 346 }; 347 348 mutex_unlock(&ndev.mutex); 349 350 return ret; 351 } 352 353 static void ioevent_callback(struct kvm *kvm, void *param) 354 { 355 virtio_net_handle_callback(kvm, (u64)(long)param); 356 } 357 358 static struct ioport_operations virtio_net_io_ops = { 359 .io_in = virtio_net_pci_io_in, 360 .io_out = virtio_net_pci_io_out, 361 }; 362 363 static void callback_mmio(u64 addr, u8 *data, u32 len, u8 is_write, void *ptr) 364 { 365 void *table = pci_header.msix.table; 366 if (is_write) 367 memcpy(table + addr - ndev.msix_io_block, data, len); 368 else 369 memcpy(data, table + addr - ndev.msix_io_block, len); 370 371 ndev.msix_enabled = 1; 372 } 373 374 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 375 { 376 int sock = socket(AF_INET, SOCK_STREAM, 0); 377 int pid, status, offload, hdr_len; 378 struct sockaddr_in sin = {0}; 379 struct ifreq ifr; 380 381 ndev.tap_fd = open("/dev/net/tun", O_RDWR); 382 if (ndev.tap_fd < 0) { 383 pr_warning("Unable to open /dev/net/tun"); 384 goto fail; 385 } 386 387 memset(&ifr, 0, sizeof(ifr)); 388 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 389 if (ioctl(ndev.tap_fd, TUNSETIFF, &ifr) < 0) { 390 pr_warning("Config tap device error. Are you root?"); 391 goto fail; 392 } 393 394 strncpy(ndev.tap_name, ifr.ifr_name, sizeof(ndev.tap_name)); 395 396 if (ioctl(ndev.tap_fd, TUNSETNOCSUM, 1) < 0) { 397 pr_warning("Config tap device TUNSETNOCSUM error"); 398 goto fail; 399 } 400 401 hdr_len = sizeof(struct virtio_net_hdr); 402 if (ioctl(ndev.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 403 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 404 } 405 406 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 407 if (ioctl(ndev.tap_fd, TUNSETOFFLOAD, offload) < 0) { 408 pr_warning("Config tap device TUNSETOFFLOAD error"); 409 goto fail; 410 } 411 412 if (strcmp(params->script, "none")) { 413 pid = fork(); 414 if (pid == 0) { 415 execl(params->script, params->script, ndev.tap_name, NULL); 416 _exit(1); 417 } else { 418 waitpid(pid, &status, 0); 419 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 420 pr_warning("Fail to setup tap by %s", params->script); 421 goto fail; 422 } 423 } 424 } else { 425 memset(&ifr, 0, sizeof(ifr)); 426 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 427 sin.sin_addr.s_addr = inet_addr(params->host_ip); 428 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 429 ifr.ifr_addr.sa_family = AF_INET; 430 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 431 pr_warning("Could not set ip address on tap device"); 432 goto fail; 433 } 434 } 435 436 memset(&ifr, 0, sizeof(ifr)); 437 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 438 ioctl(sock, SIOCGIFFLAGS, &ifr); 439 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 440 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 441 pr_warning("Could not bring tap device up"); 442 443 close(sock); 444 445 return 1; 446 447 fail: 448 if (sock >= 0) 449 close(sock); 450 if (ndev.tap_fd >= 0) 451 close(ndev.tap_fd); 452 453 return 0; 454 } 455 456 static void virtio_net__io_thread_init(struct kvm *kvm) 457 { 458 pthread_mutex_init(&ndev.io_rx_lock, NULL); 459 pthread_cond_init(&ndev.io_tx_cond, NULL); 460 461 pthread_mutex_init(&ndev.io_rx_lock, NULL); 462 pthread_cond_init(&ndev.io_tx_cond, NULL); 463 464 pthread_create(&ndev.io_rx_thread, NULL, virtio_net_rx_thread, (void *)kvm); 465 pthread_create(&ndev.io_tx_thread, NULL, virtio_net_tx_thread, (void *)kvm); 466 } 467 468 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 469 { 470 return writev(ndev->tap_fd, iov, out); 471 } 472 473 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 474 { 475 return readv(ndev->tap_fd, iov, in); 476 } 477 478 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 479 { 480 return uip_tx(iov, out, &ndev->info); 481 } 482 483 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 484 { 485 return uip_rx(iov, in, &ndev->info); 486 } 487 488 static struct net_dev_operations tap_ops = { 489 .rx = tap_ops_rx, 490 .tx = tap_ops_tx, 491 }; 492 493 static struct net_dev_operations uip_ops = { 494 .rx = uip_ops_rx, 495 .tx = uip_ops_tx, 496 }; 497 498 void virtio_net__init(const struct virtio_net_parameters *params) 499 { 500 struct ioevent ioevent; 501 u8 dev, line, pin; 502 u16 net_base_addr; 503 int i; 504 505 if (irq__register_device(VIRTIO_ID_NET, &dev, &pin, &line) < 0) 506 return; 507 508 pci_header.irq_pin = pin; 509 pci_header.irq_line = line; 510 net_base_addr = ioport__register(IOPORT_EMPTY, &virtio_net_io_ops, IOPORT_SIZE, NULL); 511 pci_header.bar[0] = net_base_addr | PCI_BASE_ADDRESS_SPACE_IO; 512 ndev.base_addr = net_base_addr; 513 pci__register(&pci_header, dev); 514 515 for (i = 0 ; i < 6 ; i++) { 516 ndev.config.mac[i] = params->guest_mac[i]; 517 ndev.info.guest_mac.addr[i] = params->guest_mac[i]; 518 ndev.info.host_mac.addr[i] = params->host_mac[i]; 519 } 520 521 ndev.mode = params->mode; 522 if (ndev.mode == NET_MODE_TAP) { 523 virtio_net__tap_init(params); 524 ndev.ops = &tap_ops; 525 } else { 526 ndev.info.host_ip = ntohl(inet_addr(params->host_ip)); 527 ndev.info.guest_ip = ntohl(inet_addr(params->guest_ip)); 528 ndev.info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 529 uip_init(&ndev.info); 530 ndev.ops = &uip_ops; 531 } 532 533 ndev.msix_io_block = pci_get_io_space_block(); 534 kvm__register_mmio(params->kvm, ndev.msix_io_block, 0x100, callback_mmio, NULL); 535 pci_header.bar[1] = ndev.msix_io_block | 536 PCI_BASE_ADDRESS_SPACE_MEMORY | 537 PCI_BASE_ADDRESS_MEM_TYPE_64; 538 /* bar[2] is the continuation of bar[1] for 64bit addressing */ 539 pci_header.bar[2] = 0; 540 pci_header.status = PCI_STATUS_CAP_LIST; 541 pci_header.capabilities = (void *)&pci_header.msix - (void *)&pci_header; 542 543 pci_header.msix.cap = PCI_CAP_ID_MSIX; 544 pci_header.msix.next = 0; 545 pci_header.msix.table_size = (VIRTIO_NET_NUM_QUEUES + 1) | PCI_MSIX_FLAGS_ENABLE; 546 pci_header.msix.table_offset = 1; /* Use BAR 1 */ 547 548 virtio_net__io_thread_init(params->kvm); 549 550 for (i = 0; i < VIRTIO_NET_NUM_QUEUES; i++) { 551 ioevent = (struct ioevent) { 552 .io_addr = net_base_addr + VIRTIO_PCI_QUEUE_NOTIFY, 553 .io_len = sizeof(u16), 554 .fn = ioevent_callback, 555 .datamatch = i, 556 .fn_ptr = (void *)(long)i, 557 .fn_kvm = params->kvm, 558 .fd = eventfd(0, 0), 559 }; 560 561 ioeventfd__add_event(&ioevent); 562 } 563 564 ndev.compat_id = compat__add_message("virtio-net device was not detected", 565 "While you have requested a virtio-net device, " 566 "the guest kernel didn't seem to detect it.\n" 567 "Please make sure that the kernel was compiled" 568 "with CONFIG_VIRTIO_NET."); 569 } 570