1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/mutex.h" 5 #include "kvm/util.h" 6 #include "kvm/kvm.h" 7 #include "kvm/irq.h" 8 #include "kvm/uip.h" 9 #include "kvm/guest_compat.h" 10 #include "kvm/iovec.h" 11 #include "kvm/strbuf.h" 12 13 #include <linux/vhost.h> 14 #include <linux/virtio_net.h> 15 #include <linux/if_tun.h> 16 #include <linux/types.h> 17 18 #include <arpa/inet.h> 19 #include <net/if.h> 20 21 #include <unistd.h> 22 #include <fcntl.h> 23 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <sys/types.h> 27 #include <sys/wait.h> 28 #include <sys/eventfd.h> 29 30 #define VIRTIO_NET_QUEUE_SIZE 256 31 #define VIRTIO_NET_NUM_QUEUES 8 32 33 struct net_dev; 34 35 struct net_dev_operations { 36 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 37 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 38 }; 39 40 struct net_dev_queue { 41 int id; 42 struct net_dev *ndev; 43 struct virt_queue vq; 44 pthread_t thread; 45 struct mutex lock; 46 pthread_cond_t cond; 47 int gsi; 48 int irqfd; 49 }; 50 51 struct net_dev { 52 struct mutex mutex; 53 struct virtio_device vdev; 54 struct list_head list; 55 56 struct net_dev_queue queues[VIRTIO_NET_NUM_QUEUES * 2 + 1]; 57 struct virtio_net_config config; 58 u32 queue_pairs; 59 60 int vhost_fd; 61 int tap_fd; 62 char tap_name[IFNAMSIZ]; 63 bool tap_ufo; 64 65 int mode; 66 67 struct uip_info info; 68 struct net_dev_operations *ops; 69 struct kvm *kvm; 70 71 struct virtio_net_params *params; 72 }; 73 74 static LIST_HEAD(ndevs); 75 static int compat_id = -1; 76 77 #define MAX_PACKET_SIZE 65550 78 79 static bool has_virtio_feature(struct net_dev *ndev, u32 feature) 80 { 81 return ndev->vdev.features & (1 << feature); 82 } 83 84 static void *virtio_net_rx_thread(void *p) 85 { 86 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 87 struct net_dev_queue *queue = p; 88 struct virt_queue *vq = &queue->vq; 89 struct net_dev *ndev = queue->ndev; 90 struct kvm *kvm; 91 u16 out, in; 92 u16 head; 93 int len, copied; 94 95 kvm__set_thread_name("virtio-net-rx"); 96 97 kvm = ndev->kvm; 98 while (1) { 99 mutex_lock(&queue->lock); 100 if (!virt_queue__available(vq)) 101 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 102 mutex_unlock(&queue->lock); 103 104 while (virt_queue__available(vq)) { 105 unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)]; 106 struct iovec dummy_iov = { 107 .iov_base = buffer, 108 .iov_len = sizeof(buffer), 109 }; 110 struct virtio_net_hdr_mrg_rxbuf *hdr; 111 u16 num_buffers; 112 113 len = ndev->ops->rx(&dummy_iov, 1, ndev); 114 if (len < 0) { 115 pr_warning("%s: rx on vq %u failed (%d), exiting thread\n", 116 __func__, queue->id, len); 117 goto out_err; 118 } 119 120 copied = num_buffers = 0; 121 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 122 hdr = iov[0].iov_base; 123 while (copied < len) { 124 size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in)); 125 126 memcpy_toiovec(iov, buffer + copied, iovsize); 127 copied += iovsize; 128 virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++); 129 if (copied == len) 130 break; 131 while (!virt_queue__available(vq)) 132 sleep(0); 133 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 134 } 135 136 if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 137 hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers); 138 139 virt_queue__used_idx_advance(vq, num_buffers); 140 141 /* We should interrupt guest right now, otherwise latency is huge. */ 142 if (virtio_queue__should_signal(vq)) 143 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 144 } 145 } 146 147 out_err: 148 pthread_exit(NULL); 149 return NULL; 150 151 } 152 153 static void *virtio_net_tx_thread(void *p) 154 { 155 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 156 struct net_dev_queue *queue = p; 157 struct virt_queue *vq = &queue->vq; 158 struct net_dev *ndev = queue->ndev; 159 struct kvm *kvm; 160 u16 out, in; 161 u16 head; 162 int len; 163 164 kvm__set_thread_name("virtio-net-tx"); 165 166 kvm = ndev->kvm; 167 168 while (1) { 169 mutex_lock(&queue->lock); 170 if (!virt_queue__available(vq)) 171 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 172 mutex_unlock(&queue->lock); 173 174 while (virt_queue__available(vq)) { 175 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 176 len = ndev->ops->tx(iov, out, ndev); 177 if (len < 0) { 178 pr_warning("%s: tx on vq %u failed (%d)\n", 179 __func__, queue->id, errno); 180 goto out_err; 181 } 182 183 virt_queue__set_used_elem(vq, head, len); 184 } 185 186 if (virtio_queue__should_signal(vq)) 187 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 188 } 189 190 out_err: 191 pthread_exit(NULL); 192 return NULL; 193 } 194 195 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl) 196 { 197 /* Not much to do here */ 198 return VIRTIO_NET_OK; 199 } 200 201 static void *virtio_net_ctrl_thread(void *p) 202 { 203 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 204 struct net_dev_queue *queue = p; 205 struct virt_queue *vq = &queue->vq; 206 struct net_dev *ndev = queue->ndev; 207 u16 out, in, head; 208 struct kvm *kvm = ndev->kvm; 209 struct virtio_net_ctrl_hdr *ctrl; 210 virtio_net_ctrl_ack *ack; 211 212 kvm__set_thread_name("virtio-net-ctrl"); 213 214 while (1) { 215 mutex_lock(&queue->lock); 216 if (!virt_queue__available(vq)) 217 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 218 mutex_unlock(&queue->lock); 219 220 while (virt_queue__available(vq)) { 221 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 222 ctrl = iov[0].iov_base; 223 ack = iov[out].iov_base; 224 225 switch (ctrl->class) { 226 case VIRTIO_NET_CTRL_MQ: 227 *ack = virtio_net_handle_mq(kvm, ndev, ctrl); 228 break; 229 default: 230 *ack = VIRTIO_NET_ERR; 231 break; 232 } 233 virt_queue__set_used_elem(vq, head, iov[out].iov_len); 234 } 235 236 if (virtio_queue__should_signal(vq)) 237 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 238 } 239 240 pthread_exit(NULL); 241 242 return NULL; 243 } 244 245 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue) 246 { 247 struct net_dev_queue *net_queue = &ndev->queues[queue]; 248 249 if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) { 250 pr_warning("Unknown queue index %u", queue); 251 return; 252 } 253 254 mutex_lock(&net_queue->lock); 255 pthread_cond_signal(&net_queue->cond); 256 mutex_unlock(&net_queue->lock); 257 } 258 259 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr, 260 const char *tapname) 261 { 262 int ret; 263 264 memset(ifr, 0, sizeof(*ifr)); 265 ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 266 if (tapname) 267 strlcpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name)); 268 269 ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr); 270 271 if (ret >= 0) 272 strlcpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name)); 273 return ret; 274 } 275 276 static int virtio_net_exec_script(const char* script, const char *tap_name) 277 { 278 pid_t pid; 279 int status; 280 281 pid = fork(); 282 if (pid == 0) { 283 execl(script, script, tap_name, NULL); 284 _exit(1); 285 } else { 286 waitpid(pid, &status, 0); 287 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 288 pr_warning("Fail to setup tap by %s", script); 289 return -1; 290 } 291 } 292 return 0; 293 } 294 295 static bool virtio_net__tap_init(struct net_dev *ndev) 296 { 297 int sock = socket(AF_INET, SOCK_STREAM, 0); 298 int hdr_len; 299 struct sockaddr_in sin = {0}; 300 struct ifreq ifr; 301 const struct virtio_net_params *params = ndev->params; 302 bool skipconf = !!params->tapif; 303 304 hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ? 305 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 306 sizeof(struct virtio_net_hdr); 307 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) 308 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 309 310 if (strcmp(params->script, "none")) { 311 if (virtio_net_exec_script(params->script, ndev->tap_name) < 0) 312 goto fail; 313 } else if (!skipconf) { 314 memset(&ifr, 0, sizeof(ifr)); 315 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name)); 316 sin.sin_addr.s_addr = inet_addr(params->host_ip); 317 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 318 ifr.ifr_addr.sa_family = AF_INET; 319 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 320 pr_warning("Could not set ip address on tap device"); 321 goto fail; 322 } 323 } 324 325 if (!skipconf) { 326 memset(&ifr, 0, sizeof(ifr)); 327 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name)); 328 ioctl(sock, SIOCGIFFLAGS, &ifr); 329 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 330 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 331 pr_warning("Could not bring tap device up"); 332 } 333 334 close(sock); 335 336 return 1; 337 338 fail: 339 if (sock >= 0) 340 close(sock); 341 if (ndev->tap_fd >= 0) 342 close(ndev->tap_fd); 343 344 return 0; 345 } 346 347 static void virtio_net__tap_exit(struct net_dev *ndev) 348 { 349 int sock; 350 struct ifreq ifr; 351 352 if (ndev->params->tapif) 353 return; 354 355 sock = socket(AF_INET, SOCK_STREAM, 0); 356 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name)); 357 ioctl(sock, SIOCGIFFLAGS, &ifr); 358 ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); 359 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) 360 pr_warning("Count not bring tap device down"); 361 close(sock); 362 } 363 364 static bool virtio_net__tap_create(struct net_dev *ndev) 365 { 366 int offload; 367 struct ifreq ifr; 368 const struct virtio_net_params *params = ndev->params; 369 bool macvtap = (!!params->tapif) && (params->tapif[0] == '/'); 370 371 /* Did the user already gave us the FD? */ 372 if (params->fd) 373 ndev->tap_fd = params->fd; 374 else { 375 const char *tap_file = "/dev/net/tun"; 376 377 /* Did the user ask us to use macvtap? */ 378 if (macvtap) 379 tap_file = params->tapif; 380 381 ndev->tap_fd = open(tap_file, O_RDWR); 382 if (ndev->tap_fd < 0) { 383 pr_warning("Unable to open %s", tap_file); 384 return 0; 385 } 386 } 387 388 if (!macvtap && 389 virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) { 390 pr_warning("Config tap device error. Are you root?"); 391 goto fail; 392 } 393 394 /* 395 * The UFO support had been removed from kernel in commit: 396 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984 397 * https://www.spinics.net/lists/netdev/msg443562.html 398 * In oder to support the older kernels without this commit, 399 * we set the TUN_F_UFO to offload by default to test the status of 400 * UFO kernel support. 401 */ 402 ndev->tap_ufo = true; 403 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 404 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 405 /* 406 * Is this failure caused by kernel remove the UFO support? 407 * Try TUNSETOFFLOAD without TUN_F_UFO. 408 */ 409 offload &= ~TUN_F_UFO; 410 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 411 pr_warning("Config tap device TUNSETOFFLOAD error"); 412 goto fail; 413 } 414 ndev->tap_ufo = false; 415 } 416 417 return 1; 418 419 fail: 420 if ((ndev->tap_fd >= 0) || (!params->fd) ) 421 close(ndev->tap_fd); 422 423 return 0; 424 } 425 426 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 427 { 428 return writev(ndev->tap_fd, iov, out); 429 } 430 431 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 432 { 433 return readv(ndev->tap_fd, iov, in); 434 } 435 436 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 437 { 438 return uip_tx(iov, out, &ndev->info); 439 } 440 441 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 442 { 443 return uip_rx(iov, in, &ndev->info); 444 } 445 446 static struct net_dev_operations tap_ops = { 447 .rx = tap_ops_rx, 448 .tx = tap_ops_tx, 449 }; 450 451 static struct net_dev_operations uip_ops = { 452 .rx = uip_ops_rx, 453 .tx = uip_ops_tx, 454 }; 455 456 static u8 *get_config(struct kvm *kvm, void *dev) 457 { 458 struct net_dev *ndev = dev; 459 460 return ((u8 *)(&ndev->config)); 461 } 462 463 static size_t get_config_size(struct kvm *kvm, void *dev) 464 { 465 struct net_dev *ndev = dev; 466 467 return sizeof(ndev->config); 468 } 469 470 static u32 get_host_features(struct kvm *kvm, void *dev) 471 { 472 u32 features; 473 struct net_dev *ndev = dev; 474 475 features = 1UL << VIRTIO_NET_F_MAC 476 | 1UL << VIRTIO_NET_F_CSUM 477 | 1UL << VIRTIO_NET_F_HOST_TSO4 478 | 1UL << VIRTIO_NET_F_HOST_TSO6 479 | 1UL << VIRTIO_NET_F_GUEST_TSO4 480 | 1UL << VIRTIO_NET_F_GUEST_TSO6 481 | 1UL << VIRTIO_RING_F_EVENT_IDX 482 | 1UL << VIRTIO_RING_F_INDIRECT_DESC 483 | 1UL << VIRTIO_NET_F_CTRL_VQ 484 | 1UL << VIRTIO_NET_F_MRG_RXBUF 485 | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0); 486 487 /* 488 * The UFO feature for host and guest only can be enabled when the 489 * kernel has TAP UFO support. 490 */ 491 if (ndev->tap_ufo) 492 features |= (1UL << VIRTIO_NET_F_HOST_UFO 493 | 1UL << VIRTIO_NET_F_GUEST_UFO); 494 495 return features; 496 } 497 498 static int virtio_net__vhost_set_features(struct net_dev *ndev) 499 { 500 u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX; 501 u64 vhost_features; 502 503 if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0) 504 die_perror("VHOST_GET_FEATURES failed"); 505 506 /* make sure both side support mergable rx buffers */ 507 if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF && 508 has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 509 features |= 1UL << VIRTIO_NET_F_MRG_RXBUF; 510 511 return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features); 512 } 513 514 static void virtio_net_start(struct net_dev *ndev) 515 { 516 if (ndev->mode == NET_MODE_TAP) { 517 if (!virtio_net__tap_init(ndev)) 518 die_perror("TAP device initialized failed because"); 519 520 if (ndev->vhost_fd && 521 virtio_net__vhost_set_features(ndev) != 0) 522 die_perror("VHOST_SET_FEATURES failed"); 523 } else { 524 ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ? 525 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 526 sizeof(struct virtio_net_hdr); 527 uip_init(&ndev->info); 528 } 529 } 530 531 static void virtio_net_stop(struct net_dev *ndev) 532 { 533 /* Undo whatever start() did */ 534 if (ndev->mode == NET_MODE_TAP) 535 virtio_net__tap_exit(ndev); 536 else 537 uip_exit(&ndev->info); 538 } 539 540 static void virtio_net_update_endian(struct net_dev *ndev) 541 { 542 struct virtio_net_config *conf = &ndev->config; 543 544 conf->status = virtio_host_to_guest_u16(&ndev->vdev, 545 VIRTIO_NET_S_LINK_UP); 546 conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev, 547 ndev->queue_pairs); 548 549 /* Let TAP know about vnet header endianness */ 550 if (ndev->mode == NET_MODE_TAP && 551 ndev->vdev.endian != VIRTIO_ENDIAN_HOST) { 552 int enable_val = 1, disable_val = 0; 553 int enable_req, disable_req; 554 555 if (ndev->vdev.endian == VIRTIO_ENDIAN_LE) { 556 enable_req = TUNSETVNETLE; 557 disable_req = TUNSETVNETBE; 558 } else { 559 enable_req = TUNSETVNETBE; 560 disable_req = TUNSETVNETLE; 561 } 562 563 ioctl(ndev->tap_fd, disable_req, &disable_val); 564 if (ioctl(ndev->tap_fd, enable_req, &enable_val) < 0) 565 pr_err("Config tap device TUNSETVNETLE/BE error"); 566 } 567 } 568 569 static void notify_status(struct kvm *kvm, void *dev, u32 status) 570 { 571 struct net_dev *ndev = dev; 572 573 if (status & VIRTIO__STATUS_CONFIG) 574 virtio_net_update_endian(ndev); 575 576 if (status & VIRTIO__STATUS_START) 577 virtio_net_start(dev); 578 else if (status & VIRTIO__STATUS_STOP) 579 virtio_net_stop(dev); 580 } 581 582 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq) 583 { 584 return vq == (u32)(ndev->queue_pairs * 2); 585 } 586 587 static int init_vq(struct kvm *kvm, void *dev, u32 vq) 588 { 589 struct vhost_vring_state state = { .index = vq }; 590 struct net_dev_queue *net_queue; 591 struct vhost_vring_addr addr; 592 struct net_dev *ndev = dev; 593 struct virt_queue *queue; 594 int r; 595 596 compat__remove_message(compat_id); 597 598 net_queue = &ndev->queues[vq]; 599 net_queue->id = vq; 600 net_queue->ndev = ndev; 601 queue = &net_queue->vq; 602 virtio_init_device_vq(kvm, &ndev->vdev, queue, VIRTIO_NET_QUEUE_SIZE); 603 604 mutex_init(&net_queue->lock); 605 pthread_cond_init(&net_queue->cond, NULL); 606 if (is_ctrl_vq(ndev, vq)) { 607 pthread_create(&net_queue->thread, NULL, virtio_net_ctrl_thread, 608 net_queue); 609 610 return 0; 611 } else if (ndev->vhost_fd == 0 ) { 612 if (vq & 1) 613 pthread_create(&net_queue->thread, NULL, 614 virtio_net_tx_thread, net_queue); 615 else 616 pthread_create(&net_queue->thread, NULL, 617 virtio_net_rx_thread, net_queue); 618 619 return 0; 620 } 621 622 if (queue->endian != VIRTIO_ENDIAN_HOST) 623 die_perror("VHOST requires the same endianness in guest and host"); 624 625 state.num = queue->vring.num; 626 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state); 627 if (r < 0) 628 die_perror("VHOST_SET_VRING_NUM failed"); 629 state.num = 0; 630 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state); 631 if (r < 0) 632 die_perror("VHOST_SET_VRING_BASE failed"); 633 634 addr = (struct vhost_vring_addr) { 635 .index = vq, 636 .desc_user_addr = (u64)(unsigned long)queue->vring.desc, 637 .avail_user_addr = (u64)(unsigned long)queue->vring.avail, 638 .used_user_addr = (u64)(unsigned long)queue->vring.used, 639 }; 640 641 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); 642 if (r < 0) 643 die_perror("VHOST_SET_VRING_ADDR failed"); 644 645 return 0; 646 } 647 648 static void exit_vq(struct kvm *kvm, void *dev, u32 vq) 649 { 650 struct net_dev *ndev = dev; 651 struct net_dev_queue *queue = &ndev->queues[vq]; 652 653 if (!is_ctrl_vq(ndev, vq) && queue->gsi) { 654 irq__del_irqfd(kvm, queue->gsi, queue->irqfd); 655 close(queue->irqfd); 656 queue->gsi = queue->irqfd = 0; 657 } 658 659 /* 660 * TODO: vhost reset owner. It's the only way to cleanly stop vhost, but 661 * we can't restart it at the moment. 662 */ 663 if (ndev->vhost_fd && !is_ctrl_vq(ndev, vq)) { 664 pr_warning("Cannot reset VHOST queue"); 665 ioctl(ndev->vhost_fd, VHOST_RESET_OWNER); 666 return; 667 } 668 669 /* 670 * Threads are waiting on cancellation points (readv or 671 * pthread_cond_wait) and should stop gracefully. 672 */ 673 pthread_cancel(queue->thread); 674 pthread_join(queue->thread, NULL); 675 } 676 677 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) 678 { 679 struct net_dev *ndev = dev; 680 struct net_dev_queue *queue = &ndev->queues[vq]; 681 struct vhost_vring_file file; 682 int r; 683 684 if (ndev->vhost_fd == 0) 685 return; 686 687 file = (struct vhost_vring_file) { 688 .index = vq, 689 .fd = eventfd(0, 0), 690 }; 691 692 r = irq__add_irqfd(kvm, gsi, file.fd, -1); 693 if (r < 0) 694 die_perror("KVM_IRQFD failed"); 695 696 queue->irqfd = file.fd; 697 queue->gsi = gsi; 698 699 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file); 700 if (r < 0) 701 die_perror("VHOST_SET_VRING_CALL failed"); 702 file.fd = ndev->tap_fd; 703 r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file); 704 if (r != 0) 705 die("VHOST_NET_SET_BACKEND failed %d", errno); 706 707 } 708 709 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) 710 { 711 struct net_dev *ndev = dev; 712 struct vhost_vring_file file = { 713 .index = vq, 714 .fd = efd, 715 }; 716 int r; 717 718 if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq)) 719 return; 720 721 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file); 722 if (r < 0) 723 die_perror("VHOST_SET_VRING_KICK failed"); 724 } 725 726 static int notify_vq(struct kvm *kvm, void *dev, u32 vq) 727 { 728 struct net_dev *ndev = dev; 729 730 virtio_net_handle_callback(kvm, ndev, vq); 731 732 return 0; 733 } 734 735 static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq) 736 { 737 struct net_dev *ndev = dev; 738 739 return &ndev->queues[vq].vq; 740 } 741 742 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) 743 { 744 /* FIXME: dynamic */ 745 return VIRTIO_NET_QUEUE_SIZE; 746 } 747 748 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size) 749 { 750 /* FIXME: dynamic */ 751 return size; 752 } 753 754 static unsigned int get_vq_count(struct kvm *kvm, void *dev) 755 { 756 struct net_dev *ndev = dev; 757 758 return ndev->queue_pairs * 2 + 1; 759 } 760 761 static struct virtio_ops net_dev_virtio_ops = { 762 .get_config = get_config, 763 .get_config_size = get_config_size, 764 .get_host_features = get_host_features, 765 .get_vq_count = get_vq_count, 766 .init_vq = init_vq, 767 .exit_vq = exit_vq, 768 .get_vq = get_vq, 769 .get_size_vq = get_size_vq, 770 .set_size_vq = set_size_vq, 771 .notify_vq = notify_vq, 772 .notify_vq_gsi = notify_vq_gsi, 773 .notify_vq_eventfd = notify_vq_eventfd, 774 .notify_status = notify_status, 775 }; 776 777 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev) 778 { 779 struct kvm_mem_bank *bank; 780 struct vhost_memory *mem; 781 int r, i; 782 783 ndev->vhost_fd = open("/dev/vhost-net", O_RDWR); 784 if (ndev->vhost_fd < 0) 785 die_perror("Failed openning vhost-net device"); 786 787 mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region)); 788 if (mem == NULL) 789 die("Failed allocating memory for vhost memory map"); 790 791 i = 0; 792 list_for_each_entry(bank, &kvm->mem_banks, list) { 793 mem->regions[i] = (struct vhost_memory_region) { 794 .guest_phys_addr = bank->guest_phys_addr, 795 .memory_size = bank->size, 796 .userspace_addr = (unsigned long)bank->host_addr, 797 }; 798 i++; 799 } 800 mem->nregions = i; 801 802 r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER); 803 if (r != 0) 804 die_perror("VHOST_SET_OWNER failed"); 805 806 r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem); 807 if (r != 0) 808 die_perror("VHOST_SET_MEM_TABLE failed"); 809 810 ndev->vdev.use_vhost = true; 811 812 free(mem); 813 } 814 815 static inline void str_to_mac(const char *str, char *mac) 816 { 817 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 818 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 819 } 820 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p, 821 const char *param, const char *val) 822 { 823 if (strcmp(param, "guest_mac") == 0) { 824 str_to_mac(val, p->guest_mac); 825 } else if (strcmp(param, "mode") == 0) { 826 if (!strncmp(val, "user", 4)) { 827 int i; 828 829 for (i = 0; i < kvm->cfg.num_net_devices; i++) 830 if (kvm->cfg.net_params[i].mode == NET_MODE_USER) 831 die("Only one usermode network device allowed at a time"); 832 p->mode = NET_MODE_USER; 833 } else if (!strncmp(val, "tap", 3)) { 834 p->mode = NET_MODE_TAP; 835 } else if (!strncmp(val, "none", 4)) { 836 kvm->cfg.no_net = 1; 837 return -1; 838 } else 839 die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network); 840 } else if (strcmp(param, "script") == 0) { 841 p->script = strdup(val); 842 } else if (strcmp(param, "downscript") == 0) { 843 p->downscript = strdup(val); 844 } else if (strcmp(param, "guest_ip") == 0) { 845 p->guest_ip = strdup(val); 846 } else if (strcmp(param, "host_ip") == 0) { 847 p->host_ip = strdup(val); 848 } else if (strcmp(param, "trans") == 0) { 849 p->trans = strdup(val); 850 } else if (strcmp(param, "tapif") == 0) { 851 p->tapif = strdup(val); 852 } else if (strcmp(param, "vhost") == 0) { 853 p->vhost = atoi(val); 854 } else if (strcmp(param, "fd") == 0) { 855 p->fd = atoi(val); 856 } else if (strcmp(param, "mq") == 0) { 857 p->mq = atoi(val); 858 } else 859 die("Unknown network parameter %s", param); 860 861 return 0; 862 } 863 864 int netdev_parser(const struct option *opt, const char *arg, int unset) 865 { 866 struct virtio_net_params p; 867 char *buf = NULL, *cmd = NULL, *cur = NULL; 868 bool on_cmd = true; 869 struct kvm *kvm = opt->ptr; 870 871 if (arg) { 872 buf = strdup(arg); 873 if (buf == NULL) 874 die("Failed allocating new net buffer"); 875 cur = strtok(buf, ",="); 876 } 877 878 p = (struct virtio_net_params) { 879 .guest_ip = DEFAULT_GUEST_ADDR, 880 .host_ip = DEFAULT_HOST_ADDR, 881 .script = DEFAULT_SCRIPT, 882 .downscript = DEFAULT_SCRIPT, 883 .mode = NET_MODE_TAP, 884 }; 885 886 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 887 p.guest_mac[5] += kvm->cfg.num_net_devices; 888 889 while (cur) { 890 if (on_cmd) { 891 cmd = cur; 892 } else { 893 if (set_net_param(kvm, &p, cmd, cur) < 0) 894 goto done; 895 } 896 on_cmd = !on_cmd; 897 898 cur = strtok(NULL, ",="); 899 }; 900 901 kvm->cfg.num_net_devices++; 902 903 kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params)); 904 if (kvm->cfg.net_params == NULL) 905 die("Failed adding new network device"); 906 907 kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p; 908 909 done: 910 free(buf); 911 return 0; 912 } 913 914 static int virtio_net__init_one(struct virtio_net_params *params) 915 { 916 int i, r; 917 struct net_dev *ndev; 918 struct virtio_ops *ops; 919 enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm); 920 921 ndev = calloc(1, sizeof(struct net_dev)); 922 if (ndev == NULL) 923 return -ENOMEM; 924 925 list_add_tail(&ndev->list, &ndevs); 926 927 ops = malloc(sizeof(*ops)); 928 if (ops == NULL) 929 return -ENOMEM; 930 931 ndev->kvm = params->kvm; 932 ndev->params = params; 933 934 mutex_init(&ndev->mutex); 935 ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq)); 936 937 for (i = 0 ; i < 6 ; i++) { 938 ndev->config.mac[i] = params->guest_mac[i]; 939 ndev->info.guest_mac.addr[i] = params->guest_mac[i]; 940 ndev->info.host_mac.addr[i] = params->host_mac[i]; 941 } 942 943 ndev->mode = params->mode; 944 if (ndev->mode == NET_MODE_TAP) { 945 ndev->ops = &tap_ops; 946 if (!virtio_net__tap_create(ndev)) 947 die_perror("You have requested a TAP device, but creation of one has failed because"); 948 } else { 949 ndev->info.host_ip = ntohl(inet_addr(params->host_ip)); 950 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip)); 951 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 952 ndev->info.buf_nr = 20, 953 ndev->ops = &uip_ops; 954 uip_static_init(&ndev->info); 955 } 956 957 *ops = net_dev_virtio_ops; 958 959 if (params->trans) { 960 if (strcmp(params->trans, "mmio") == 0) 961 trans = VIRTIO_MMIO; 962 else if (strcmp(params->trans, "pci") == 0) 963 trans = VIRTIO_PCI; 964 else 965 pr_warning("virtio-net: Unknown transport method : %s, " 966 "falling back to %s.", params->trans, 967 virtio_trans_name(trans)); 968 } 969 970 r = virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans, 971 PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET); 972 if (r < 0) { 973 free(ops); 974 return r; 975 } 976 977 if (params->vhost) 978 virtio_net__vhost_init(params->kvm, ndev); 979 980 if (compat_id == -1) 981 compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET"); 982 983 return 0; 984 } 985 986 int virtio_net__init(struct kvm *kvm) 987 { 988 int i, r; 989 990 for (i = 0; i < kvm->cfg.num_net_devices; i++) { 991 kvm->cfg.net_params[i].kvm = kvm; 992 r = virtio_net__init_one(&kvm->cfg.net_params[i]); 993 if (r < 0) 994 goto cleanup; 995 } 996 997 if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) { 998 static struct virtio_net_params net_params; 999 1000 net_params = (struct virtio_net_params) { 1001 .guest_ip = kvm->cfg.guest_ip, 1002 .host_ip = kvm->cfg.host_ip, 1003 .kvm = kvm, 1004 .script = kvm->cfg.script, 1005 .mode = NET_MODE_USER, 1006 }; 1007 str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac); 1008 str_to_mac(kvm->cfg.host_mac, net_params.host_mac); 1009 1010 r = virtio_net__init_one(&net_params); 1011 if (r < 0) 1012 goto cleanup; 1013 } 1014 1015 return 0; 1016 1017 cleanup: 1018 virtio_net__exit(kvm); 1019 return r; 1020 } 1021 virtio_dev_init(virtio_net__init); 1022 1023 int virtio_net__exit(struct kvm *kvm) 1024 { 1025 struct virtio_net_params *params; 1026 struct net_dev *ndev; 1027 struct list_head *ptr, *n; 1028 1029 list_for_each_safe(ptr, n, &ndevs) { 1030 ndev = list_entry(ptr, struct net_dev, list); 1031 params = ndev->params; 1032 /* Cleanup any tap device which attached to bridge */ 1033 if (ndev->mode == NET_MODE_TAP && 1034 strcmp(params->downscript, "none")) 1035 virtio_net_exec_script(params->downscript, ndev->tap_name); 1036 1037 list_del(&ndev->list); 1038 free(ndev); 1039 } 1040 return 0; 1041 } 1042 virtio_dev_exit(virtio_net__exit); 1043