1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 #include "kvm/irq.h" 11 #include "kvm/uip.h" 12 #include "kvm/ioeventfd.h" 13 #include "kvm/guest_compat.h" 14 15 #include <linux/virtio_net.h> 16 #include <linux/if_tun.h> 17 18 #include <arpa/inet.h> 19 #include <net/if.h> 20 21 #include <unistd.h> 22 #include <assert.h> 23 #include <fcntl.h> 24 25 #include <sys/socket.h> 26 #include <sys/ioctl.h> 27 #include <sys/types.h> 28 #include <sys/wait.h> 29 30 #define VIRTIO_NET_QUEUE_SIZE 128 31 #define VIRTIO_NET_NUM_QUEUES 2 32 #define VIRTIO_NET_RX_QUEUE 0 33 #define VIRTIO_NET_TX_QUEUE 1 34 35 static struct pci_device_header pci_header = { 36 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 37 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 38 .header_type = PCI_HEADER_TYPE_NORMAL, 39 .revision_id = 0, 40 .class = 0x020000, 41 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 42 .subsys_id = VIRTIO_ID_NET, 43 }; 44 45 struct net_dev; 46 47 struct net_dev_operations { 48 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 49 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 50 }; 51 52 struct net_dev { 53 pthread_mutex_t mutex; 54 55 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 56 struct virtio_net_config config; 57 u32 host_features; 58 u32 guest_features; 59 u16 config_vector; 60 u8 status; 61 u8 isr; 62 u16 queue_selector; 63 u16 base_addr; 64 u32 vq_vector[VIRTIO_NET_NUM_QUEUES]; 65 u32 gsis[VIRTIO_NET_NUM_QUEUES]; 66 u32 msix_io_block; 67 int compat_id; 68 69 pthread_t io_rx_thread; 70 pthread_mutex_t io_rx_lock; 71 pthread_cond_t io_rx_cond; 72 73 pthread_t io_tx_thread; 74 pthread_mutex_t io_tx_lock; 75 pthread_cond_t io_tx_cond; 76 77 int tap_fd; 78 char tap_name[IFNAMSIZ]; 79 80 int mode; 81 82 struct uip_info info; 83 struct net_dev_operations *ops; 84 }; 85 86 static struct net_dev ndev = { 87 .mutex = PTHREAD_MUTEX_INITIALIZER, 88 89 .config = { 90 .status = VIRTIO_NET_S_LINK_UP, 91 }, 92 .host_features = 1UL << VIRTIO_NET_F_MAC 93 | 1UL << VIRTIO_NET_F_CSUM 94 | 1UL << VIRTIO_NET_F_HOST_UFO 95 | 1UL << VIRTIO_NET_F_HOST_TSO4 96 | 1UL << VIRTIO_NET_F_HOST_TSO6 97 | 1UL << VIRTIO_NET_F_GUEST_UFO 98 | 1UL << VIRTIO_NET_F_GUEST_TSO4 99 | 1UL << VIRTIO_NET_F_GUEST_TSO6, 100 .info = { 101 .buf_nr = 20, 102 } 103 }; 104 105 static void *virtio_net_rx_thread(void *p) 106 { 107 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 108 struct virt_queue *vq; 109 struct kvm *kvm; 110 u16 out, in; 111 u16 head; 112 int len; 113 114 kvm = p; 115 vq = &ndev.vqs[VIRTIO_NET_RX_QUEUE]; 116 117 while (1) { 118 119 mutex_lock(&ndev.io_rx_lock); 120 if (!virt_queue__available(vq)) 121 pthread_cond_wait(&ndev.io_rx_cond, &ndev.io_rx_lock); 122 mutex_unlock(&ndev.io_rx_lock); 123 124 while (virt_queue__available(vq)) { 125 126 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 127 128 len = ndev.ops->rx(iov, in, &ndev); 129 130 virt_queue__set_used_elem(vq, head, len); 131 132 /* We should interrupt guest right now, otherwise latency is huge. */ 133 kvm__irq_trigger(kvm, ndev.gsis[VIRTIO_NET_RX_QUEUE]); 134 } 135 136 } 137 138 pthread_exit(NULL); 139 return NULL; 140 141 } 142 143 static void *virtio_net_tx_thread(void *p) 144 { 145 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 146 struct virt_queue *vq; 147 struct kvm *kvm; 148 u16 out, in; 149 u16 head; 150 int len; 151 152 kvm = p; 153 vq = &ndev.vqs[VIRTIO_NET_TX_QUEUE]; 154 155 while (1) { 156 mutex_lock(&ndev.io_tx_lock); 157 if (!virt_queue__available(vq)) 158 pthread_cond_wait(&ndev.io_tx_cond, &ndev.io_tx_lock); 159 mutex_unlock(&ndev.io_tx_lock); 160 161 while (virt_queue__available(vq)) { 162 163 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 164 165 len = ndev.ops->tx(iov, out, &ndev); 166 167 virt_queue__set_used_elem(vq, head, len); 168 } 169 170 kvm__irq_trigger(kvm, ndev.gsis[VIRTIO_NET_TX_QUEUE]); 171 } 172 173 pthread_exit(NULL); 174 175 return NULL; 176 177 } 178 179 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size) 180 { 181 u8 *config_space = (u8 *)&ndev.config; 182 183 if (size != 1) 184 return false; 185 186 if ((offset - VIRTIO_MSI_CONFIG_VECTOR) > sizeof(struct virtio_net_config)) 187 pr_error("config offset is too big: %li", offset - VIRTIO_MSI_CONFIG_VECTOR); 188 189 ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]); 190 191 return true; 192 } 193 194 static bool virtio_net_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) 195 { 196 unsigned long offset = port - ndev.base_addr; 197 bool ret = true; 198 199 mutex_lock(&ndev.mutex); 200 201 switch (offset) { 202 case VIRTIO_PCI_HOST_FEATURES: 203 ioport__write32(data, ndev.host_features); 204 break; 205 case VIRTIO_PCI_GUEST_FEATURES: 206 ret = false; 207 break; 208 case VIRTIO_PCI_QUEUE_PFN: 209 ioport__write32(data, ndev.vqs[ndev.queue_selector].pfn); 210 break; 211 case VIRTIO_PCI_QUEUE_NUM: 212 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 213 break; 214 case VIRTIO_PCI_QUEUE_SEL: 215 case VIRTIO_PCI_QUEUE_NOTIFY: 216 ret = false; 217 break; 218 case VIRTIO_PCI_STATUS: 219 ioport__write8(data, ndev.status); 220 break; 221 case VIRTIO_PCI_ISR: 222 ioport__write8(data, ndev.isr); 223 kvm__irq_line(kvm, pci_header.irq_line, VIRTIO_IRQ_LOW); 224 ndev.isr = VIRTIO_IRQ_LOW; 225 break; 226 default: 227 ret = virtio_net_pci_io_device_specific_in(data, offset, size); 228 }; 229 230 mutex_unlock(&ndev.mutex); 231 232 return ret; 233 } 234 235 static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index) 236 { 237 switch (queue_index) { 238 case VIRTIO_NET_TX_QUEUE: 239 mutex_lock(&ndev.io_tx_lock); 240 pthread_cond_signal(&ndev.io_tx_cond); 241 mutex_unlock(&ndev.io_tx_lock); 242 break; 243 case VIRTIO_NET_RX_QUEUE: 244 mutex_lock(&ndev.io_rx_lock); 245 pthread_cond_signal(&ndev.io_rx_cond); 246 mutex_unlock(&ndev.io_rx_lock); 247 break; 248 default: 249 pr_warning("Unknown queue index %u", queue_index); 250 } 251 } 252 253 static bool virtio_net_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) 254 { 255 unsigned long offset = port - ndev.base_addr; 256 bool ret = true; 257 258 mutex_lock(&ndev.mutex); 259 260 switch (offset) { 261 case VIRTIO_PCI_GUEST_FEATURES: 262 ndev.guest_features = ioport__read32(data); 263 break; 264 case VIRTIO_PCI_QUEUE_PFN: { 265 struct virt_queue *queue; 266 void *p; 267 268 assert(ndev.queue_selector < VIRTIO_NET_NUM_QUEUES); 269 270 compat__remove_message(ndev.compat_id); 271 272 queue = &ndev.vqs[ndev.queue_selector]; 273 queue->pfn = ioport__read32(data); 274 p = guest_pfn_to_host(kvm, queue->pfn); 275 276 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 277 278 break; 279 } 280 case VIRTIO_PCI_QUEUE_SEL: 281 ndev.queue_selector = ioport__read16(data); 282 break; 283 case VIRTIO_PCI_QUEUE_NOTIFY: { 284 u16 queue_index; 285 286 queue_index = ioport__read16(data); 287 virtio_net_handle_callback(kvm, queue_index); 288 break; 289 } 290 case VIRTIO_PCI_STATUS: 291 ndev.status = ioport__read8(data); 292 break; 293 case VIRTIO_MSI_CONFIG_VECTOR: 294 ndev.config_vector = ioport__read16(data); 295 break; 296 case VIRTIO_MSI_QUEUE_VECTOR: { 297 u32 gsi; 298 u32 vec; 299 300 vec = ndev.vq_vector[ndev.queue_selector] = ioport__read16(data); 301 302 gsi = irq__add_msix_route(kvm, 303 pci_header.msix.table[vec].low, 304 pci_header.msix.table[vec].high, 305 pci_header.msix.table[vec].data); 306 307 ndev.gsis[ndev.queue_selector] = gsi; 308 break; 309 } 310 default: 311 ret = false; 312 }; 313 314 mutex_unlock(&ndev.mutex); 315 316 return ret; 317 } 318 319 static void ioevent_callback(struct kvm *kvm, void *param) 320 { 321 virtio_net_handle_callback(kvm, (u64)(long)param); 322 } 323 324 static struct ioport_operations virtio_net_io_ops = { 325 .io_in = virtio_net_pci_io_in, 326 .io_out = virtio_net_pci_io_out, 327 }; 328 329 static void callback_mmio(u64 addr, u8 *data, u32 len, u8 is_write, void *ptr) 330 { 331 void *table = pci_header.msix.table; 332 if (is_write) 333 memcpy(table + addr - ndev.msix_io_block, data, len); 334 else 335 memcpy(data, table + addr - ndev.msix_io_block, len); 336 } 337 338 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 339 { 340 int sock = socket(AF_INET, SOCK_STREAM, 0); 341 int pid, status, offload, hdr_len; 342 struct sockaddr_in sin = {0}; 343 struct ifreq ifr; 344 345 ndev.tap_fd = open("/dev/net/tun", O_RDWR); 346 if (ndev.tap_fd < 0) { 347 pr_warning("Unable to open /dev/net/tun"); 348 goto fail; 349 } 350 351 memset(&ifr, 0, sizeof(ifr)); 352 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 353 if (ioctl(ndev.tap_fd, TUNSETIFF, &ifr) < 0) { 354 pr_warning("Config tap device error. Are you root?"); 355 goto fail; 356 } 357 358 strncpy(ndev.tap_name, ifr.ifr_name, sizeof(ndev.tap_name)); 359 360 if (ioctl(ndev.tap_fd, TUNSETNOCSUM, 1) < 0) { 361 pr_warning("Config tap device TUNSETNOCSUM error"); 362 goto fail; 363 } 364 365 hdr_len = sizeof(struct virtio_net_hdr); 366 if (ioctl(ndev.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 367 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 368 } 369 370 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 371 if (ioctl(ndev.tap_fd, TUNSETOFFLOAD, offload) < 0) { 372 pr_warning("Config tap device TUNSETOFFLOAD error"); 373 goto fail; 374 } 375 376 if (strcmp(params->script, "none")) { 377 pid = fork(); 378 if (pid == 0) { 379 execl(params->script, params->script, ndev.tap_name, NULL); 380 _exit(1); 381 } else { 382 waitpid(pid, &status, 0); 383 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 384 pr_warning("Fail to setup tap by %s", params->script); 385 goto fail; 386 } 387 } 388 } else { 389 memset(&ifr, 0, sizeof(ifr)); 390 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 391 sin.sin_addr.s_addr = inet_addr(params->host_ip); 392 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 393 ifr.ifr_addr.sa_family = AF_INET; 394 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 395 pr_warning("Could not set ip address on tap device"); 396 goto fail; 397 } 398 } 399 400 memset(&ifr, 0, sizeof(ifr)); 401 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 402 ioctl(sock, SIOCGIFFLAGS, &ifr); 403 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 404 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 405 pr_warning("Could not bring tap device up"); 406 407 close(sock); 408 409 return 1; 410 411 fail: 412 if (sock >= 0) 413 close(sock); 414 if (ndev.tap_fd >= 0) 415 close(ndev.tap_fd); 416 417 return 0; 418 } 419 420 static void virtio_net__io_thread_init(struct kvm *kvm) 421 { 422 pthread_mutex_init(&ndev.io_rx_lock, NULL); 423 pthread_cond_init(&ndev.io_tx_cond, NULL); 424 425 pthread_mutex_init(&ndev.io_rx_lock, NULL); 426 pthread_cond_init(&ndev.io_tx_cond, NULL); 427 428 pthread_create(&ndev.io_rx_thread, NULL, virtio_net_rx_thread, (void *)kvm); 429 pthread_create(&ndev.io_tx_thread, NULL, virtio_net_tx_thread, (void *)kvm); 430 } 431 432 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 433 { 434 return writev(ndev->tap_fd, iov, out); 435 } 436 437 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 438 { 439 return readv(ndev->tap_fd, iov, in); 440 } 441 442 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 443 { 444 return uip_tx(iov, out, &ndev->info); 445 } 446 447 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 448 { 449 return uip_rx(iov, in, &ndev->info); 450 } 451 452 static struct net_dev_operations tap_ops = { 453 .rx = tap_ops_rx, 454 .tx = tap_ops_tx, 455 }; 456 457 static struct net_dev_operations uip_ops = { 458 .rx = uip_ops_rx, 459 .tx = uip_ops_tx, 460 }; 461 462 void virtio_net__init(const struct virtio_net_parameters *params) 463 { 464 struct ioevent ioevent; 465 u8 dev, line, pin; 466 u16 net_base_addr; 467 int i; 468 469 if (irq__register_device(VIRTIO_ID_NET, &dev, &pin, &line) < 0) 470 return; 471 472 pci_header.irq_pin = pin; 473 pci_header.irq_line = line; 474 net_base_addr = ioport__register(IOPORT_EMPTY, &virtio_net_io_ops, IOPORT_SIZE, NULL); 475 pci_header.bar[0] = net_base_addr | PCI_BASE_ADDRESS_SPACE_IO; 476 ndev.base_addr = net_base_addr; 477 pci__register(&pci_header, dev); 478 479 for (i = 0 ; i < 6 ; i++) { 480 ndev.config.mac[i] = params->guest_mac[i]; 481 ndev.info.guest_mac.addr[i] = params->guest_mac[i]; 482 ndev.info.host_mac.addr[i] = params->host_mac[i]; 483 } 484 485 ndev.mode = params->mode; 486 if (ndev.mode == NET_MODE_TAP) { 487 virtio_net__tap_init(params); 488 ndev.ops = &tap_ops; 489 } else { 490 ndev.info.host_ip = ntohl(inet_addr(params->host_ip)); 491 ndev.info.guest_ip = ntohl(inet_addr(params->guest_ip)); 492 ndev.info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 493 uip_init(&ndev.info); 494 ndev.ops = &uip_ops; 495 } 496 497 ndev.msix_io_block = pci_get_io_space_block(); 498 kvm__register_mmio(params->kvm, ndev.msix_io_block, 0x100, callback_mmio, NULL); 499 pci_header.bar[1] = ndev.msix_io_block | 500 PCI_BASE_ADDRESS_SPACE_MEMORY | 501 PCI_BASE_ADDRESS_MEM_TYPE_64; 502 /* bar[2] is the continuation of bar[1] for 64bit addressing */ 503 pci_header.bar[2] = 0; 504 pci_header.status = PCI_STATUS_CAP_LIST; 505 pci_header.capabilities = (void *)&pci_header.msix - (void *)&pci_header; 506 507 pci_header.msix.cap = PCI_CAP_ID_MSIX; 508 pci_header.msix.next = 0; 509 pci_header.msix.table_size = (VIRTIO_NET_NUM_QUEUES + 1) | PCI_MSIX_FLAGS_ENABLE; 510 pci_header.msix.table_offset = 1; /* Use BAR 1 */ 511 512 virtio_net__io_thread_init(params->kvm); 513 514 for (i = 0; i < VIRTIO_NET_NUM_QUEUES; i++) { 515 ioevent = (struct ioevent) { 516 .io_addr = net_base_addr + VIRTIO_PCI_QUEUE_NOTIFY, 517 .io_len = sizeof(u16), 518 .fn = ioevent_callback, 519 .datamatch = i, 520 .fn_ptr = (void *)(long)i, 521 .fn_kvm = params->kvm, 522 .fd = eventfd(0, 0), 523 }; 524 525 ioeventfd__add_event(&ioevent); 526 } 527 528 ndev.compat_id = compat__add_message("virtio-net device was not detected", 529 "While you have requested a virtio-net device, " 530 "the guest kernel didn't seem to detect it.\n" 531 "Please make sure that the kernel was compiled" 532 "with CONFIG_VIRTIO_NET."); 533 } 534