1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/mutex.h" 5 #include "kvm/util.h" 6 #include "kvm/kvm.h" 7 #include "kvm/irq.h" 8 #include "kvm/uip.h" 9 #include "kvm/guest_compat.h" 10 #include "kvm/iovec.h" 11 #include "kvm/strbuf.h" 12 13 #include <linux/vhost.h> 14 #include <linux/virtio_net.h> 15 #include <linux/if_tun.h> 16 #include <linux/types.h> 17 18 #include <arpa/inet.h> 19 #include <net/if.h> 20 21 #include <unistd.h> 22 #include <fcntl.h> 23 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <sys/types.h> 27 #include <sys/wait.h> 28 #include <sys/eventfd.h> 29 30 #define VIRTIO_NET_QUEUE_SIZE 256 31 #define VIRTIO_NET_NUM_QUEUES 8 32 33 struct net_dev; 34 35 struct net_dev_operations { 36 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 37 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 38 }; 39 40 struct net_dev_queue { 41 int id; 42 struct net_dev *ndev; 43 struct virt_queue vq; 44 pthread_t thread; 45 struct mutex lock; 46 pthread_cond_t cond; 47 int gsi; 48 int irqfd; 49 }; 50 51 struct net_dev { 52 struct mutex mutex; 53 struct virtio_device vdev; 54 struct list_head list; 55 56 struct net_dev_queue queues[VIRTIO_NET_NUM_QUEUES * 2 + 1]; 57 struct virtio_net_config config; 58 u32 queue_pairs; 59 60 int vhost_fd; 61 int tap_fd; 62 char tap_name[IFNAMSIZ]; 63 bool tap_ufo; 64 65 int mode; 66 67 struct uip_info info; 68 struct net_dev_operations *ops; 69 struct kvm *kvm; 70 71 struct virtio_net_params *params; 72 }; 73 74 static LIST_HEAD(ndevs); 75 static int compat_id = -1; 76 77 #define MAX_PACKET_SIZE 65550 78 79 static bool has_virtio_feature(struct net_dev *ndev, u32 feature) 80 { 81 return ndev->vdev.features & (1 << feature); 82 } 83 84 static int virtio_net_hdr_len(struct net_dev *ndev) 85 { 86 if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) || 87 !ndev->vdev.legacy) 88 return sizeof(struct virtio_net_hdr_mrg_rxbuf); 89 90 return sizeof(struct virtio_net_hdr); 91 } 92 93 static void *virtio_net_rx_thread(void *p) 94 { 95 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 96 struct net_dev_queue *queue = p; 97 struct virt_queue *vq = &queue->vq; 98 struct net_dev *ndev = queue->ndev; 99 struct kvm *kvm; 100 u16 out, in; 101 u16 head; 102 int len, copied; 103 104 kvm__set_thread_name("virtio-net-rx"); 105 106 kvm = ndev->kvm; 107 while (1) { 108 mutex_lock(&queue->lock); 109 if (!virt_queue__available(vq)) 110 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 111 mutex_unlock(&queue->lock); 112 113 while (virt_queue__available(vq)) { 114 unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)]; 115 struct iovec dummy_iov = { 116 .iov_base = buffer, 117 .iov_len = sizeof(buffer), 118 }; 119 struct virtio_net_hdr_mrg_rxbuf *hdr; 120 u16 num_buffers; 121 122 len = ndev->ops->rx(&dummy_iov, 1, ndev); 123 if (len < 0) { 124 pr_warning("%s: rx on vq %u failed (%d), exiting thread\n", 125 __func__, queue->id, len); 126 goto out_err; 127 } 128 129 copied = num_buffers = 0; 130 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 131 hdr = iov[0].iov_base; 132 while (copied < len) { 133 size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in)); 134 135 memcpy_toiovec(iov, buffer + copied, iovsize); 136 copied += iovsize; 137 virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++); 138 if (copied == len) 139 break; 140 while (!virt_queue__available(vq)) 141 sleep(0); 142 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 143 } 144 145 /* 146 * The device MUST set num_buffers, except in the case 147 * where the legacy driver did not negotiate 148 * VIRTIO_NET_F_MRG_RXBUF and the field does not exist. 149 */ 150 if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) || 151 !ndev->vdev.legacy) 152 hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers); 153 154 virt_queue__used_idx_advance(vq, num_buffers); 155 156 /* We should interrupt guest right now, otherwise latency is huge. */ 157 if (virtio_queue__should_signal(vq)) 158 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 159 } 160 } 161 162 out_err: 163 pthread_exit(NULL); 164 return NULL; 165 166 } 167 168 static void *virtio_net_tx_thread(void *p) 169 { 170 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 171 struct net_dev_queue *queue = p; 172 struct virt_queue *vq = &queue->vq; 173 struct net_dev *ndev = queue->ndev; 174 struct kvm *kvm; 175 u16 out, in; 176 u16 head; 177 int len; 178 179 kvm__set_thread_name("virtio-net-tx"); 180 181 kvm = ndev->kvm; 182 183 while (1) { 184 mutex_lock(&queue->lock); 185 if (!virt_queue__available(vq)) 186 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 187 mutex_unlock(&queue->lock); 188 189 while (virt_queue__available(vq)) { 190 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 191 len = ndev->ops->tx(iov, out, ndev); 192 if (len < 0) { 193 pr_warning("%s: tx on vq %u failed (%d)\n", 194 __func__, queue->id, errno); 195 goto out_err; 196 } 197 198 virt_queue__set_used_elem(vq, head, len); 199 } 200 201 if (virtio_queue__should_signal(vq)) 202 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 203 } 204 205 out_err: 206 pthread_exit(NULL); 207 return NULL; 208 } 209 210 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl) 211 { 212 /* Not much to do here */ 213 return VIRTIO_NET_OK; 214 } 215 216 static void *virtio_net_ctrl_thread(void *p) 217 { 218 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 219 struct net_dev_queue *queue = p; 220 struct virt_queue *vq = &queue->vq; 221 struct net_dev *ndev = queue->ndev; 222 u16 out, in, head; 223 struct kvm *kvm = ndev->kvm; 224 struct virtio_net_ctrl_hdr ctrl; 225 virtio_net_ctrl_ack ack; 226 size_t len; 227 228 kvm__set_thread_name("virtio-net-ctrl"); 229 230 while (1) { 231 mutex_lock(&queue->lock); 232 if (!virt_queue__available(vq)) 233 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 234 mutex_unlock(&queue->lock); 235 236 while (virt_queue__available(vq)) { 237 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 238 len = min(iov_size(iov, in), sizeof(ctrl)); 239 memcpy_fromiovec((void *)&ctrl, iov, len); 240 241 switch (ctrl.class) { 242 case VIRTIO_NET_CTRL_MQ: 243 ack = virtio_net_handle_mq(kvm, ndev, &ctrl); 244 break; 245 default: 246 ack = VIRTIO_NET_ERR; 247 break; 248 } 249 memcpy_toiovec(iov + in, &ack, sizeof(ack)); 250 virt_queue__set_used_elem(vq, head, sizeof(ack)); 251 } 252 253 if (virtio_queue__should_signal(vq)) 254 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 255 } 256 257 pthread_exit(NULL); 258 259 return NULL; 260 } 261 262 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue) 263 { 264 struct net_dev_queue *net_queue = &ndev->queues[queue]; 265 266 if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) { 267 pr_warning("Unknown queue index %u", queue); 268 return; 269 } 270 271 mutex_lock(&net_queue->lock); 272 pthread_cond_signal(&net_queue->cond); 273 mutex_unlock(&net_queue->lock); 274 } 275 276 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr, 277 const char *tapname) 278 { 279 int ret; 280 281 memset(ifr, 0, sizeof(*ifr)); 282 ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 283 if (tapname) 284 strlcpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name)); 285 286 ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr); 287 288 if (ret >= 0) 289 strlcpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name)); 290 return ret; 291 } 292 293 static int virtio_net_exec_script(const char* script, const char *tap_name) 294 { 295 pid_t pid; 296 int status; 297 298 pid = vfork(); 299 if (pid == 0) { 300 execl(script, script, tap_name, NULL); 301 _exit(1); 302 } else { 303 waitpid(pid, &status, 0); 304 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 305 pr_warning("Fail to setup tap by %s", script); 306 return -1; 307 } 308 } 309 return 0; 310 } 311 312 static bool virtio_net__tap_init(struct net_dev *ndev) 313 { 314 int sock = socket(AF_INET, SOCK_STREAM, 0); 315 int hdr_len; 316 struct sockaddr_in sin = {0}; 317 struct ifreq ifr; 318 const struct virtio_net_params *params = ndev->params; 319 bool skipconf = !!params->tapif; 320 321 hdr_len = virtio_net_hdr_len(ndev); 322 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) 323 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 324 325 if (strcmp(params->script, "none")) { 326 if (virtio_net_exec_script(params->script, ndev->tap_name) < 0) 327 goto fail; 328 } else if (!skipconf) { 329 memset(&ifr, 0, sizeof(ifr)); 330 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name)); 331 sin.sin_addr.s_addr = inet_addr(params->host_ip); 332 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 333 ifr.ifr_addr.sa_family = AF_INET; 334 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 335 pr_warning("Could not set ip address on tap device"); 336 goto fail; 337 } 338 } 339 340 if (!skipconf) { 341 memset(&ifr, 0, sizeof(ifr)); 342 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name)); 343 ioctl(sock, SIOCGIFFLAGS, &ifr); 344 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 345 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 346 pr_warning("Could not bring tap device up"); 347 } 348 349 close(sock); 350 351 return 1; 352 353 fail: 354 if (sock >= 0) 355 close(sock); 356 if (ndev->tap_fd >= 0) 357 close(ndev->tap_fd); 358 359 return 0; 360 } 361 362 static void virtio_net__tap_exit(struct net_dev *ndev) 363 { 364 int sock; 365 struct ifreq ifr; 366 367 if (ndev->params->tapif) 368 return; 369 370 sock = socket(AF_INET, SOCK_STREAM, 0); 371 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name)); 372 ioctl(sock, SIOCGIFFLAGS, &ifr); 373 ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); 374 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) 375 pr_warning("Count not bring tap device down"); 376 close(sock); 377 } 378 379 static bool virtio_net__tap_create(struct net_dev *ndev) 380 { 381 int offload; 382 struct ifreq ifr; 383 const struct virtio_net_params *params = ndev->params; 384 bool macvtap = (!!params->tapif) && (params->tapif[0] == '/'); 385 386 /* Did the user already gave us the FD? */ 387 if (params->fd) 388 ndev->tap_fd = params->fd; 389 else { 390 const char *tap_file = "/dev/net/tun"; 391 392 /* Did the user ask us to use macvtap? */ 393 if (macvtap) 394 tap_file = params->tapif; 395 396 ndev->tap_fd = open(tap_file, O_RDWR); 397 if (ndev->tap_fd < 0) { 398 pr_warning("Unable to open %s", tap_file); 399 return 0; 400 } 401 } 402 403 if (!macvtap && 404 virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) { 405 pr_warning("Config tap device error. Are you root?"); 406 goto fail; 407 } 408 409 /* 410 * The UFO support had been removed from kernel in commit: 411 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984 412 * https://www.spinics.net/lists/netdev/msg443562.html 413 * In oder to support the older kernels without this commit, 414 * we set the TUN_F_UFO to offload by default to test the status of 415 * UFO kernel support. 416 */ 417 ndev->tap_ufo = true; 418 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 419 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 420 /* 421 * Is this failure caused by kernel remove the UFO support? 422 * Try TUNSETOFFLOAD without TUN_F_UFO. 423 */ 424 offload &= ~TUN_F_UFO; 425 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 426 pr_warning("Config tap device TUNSETOFFLOAD error"); 427 goto fail; 428 } 429 ndev->tap_ufo = false; 430 } 431 432 return 1; 433 434 fail: 435 if ((ndev->tap_fd >= 0) || (!params->fd) ) 436 close(ndev->tap_fd); 437 438 return 0; 439 } 440 441 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 442 { 443 return writev(ndev->tap_fd, iov, out); 444 } 445 446 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 447 { 448 return readv(ndev->tap_fd, iov, in); 449 } 450 451 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 452 { 453 return uip_tx(iov, out, &ndev->info); 454 } 455 456 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 457 { 458 return uip_rx(iov, in, &ndev->info); 459 } 460 461 static struct net_dev_operations tap_ops = { 462 .rx = tap_ops_rx, 463 .tx = tap_ops_tx, 464 }; 465 466 static struct net_dev_operations uip_ops = { 467 .rx = uip_ops_rx, 468 .tx = uip_ops_tx, 469 }; 470 471 static u8 *get_config(struct kvm *kvm, void *dev) 472 { 473 struct net_dev *ndev = dev; 474 475 return ((u8 *)(&ndev->config)); 476 } 477 478 static size_t get_config_size(struct kvm *kvm, void *dev) 479 { 480 struct net_dev *ndev = dev; 481 482 return sizeof(ndev->config); 483 } 484 485 static u64 get_host_features(struct kvm *kvm, void *dev) 486 { 487 u64 features; 488 struct net_dev *ndev = dev; 489 490 features = 1UL << VIRTIO_NET_F_MAC 491 | 1UL << VIRTIO_NET_F_CSUM 492 | 1UL << VIRTIO_NET_F_HOST_TSO4 493 | 1UL << VIRTIO_NET_F_HOST_TSO6 494 | 1UL << VIRTIO_NET_F_GUEST_TSO4 495 | 1UL << VIRTIO_NET_F_GUEST_TSO6 496 | 1UL << VIRTIO_RING_F_EVENT_IDX 497 | 1UL << VIRTIO_RING_F_INDIRECT_DESC 498 | 1UL << VIRTIO_NET_F_CTRL_VQ 499 | 1UL << VIRTIO_NET_F_MRG_RXBUF 500 | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0) 501 | 1UL << VIRTIO_F_ANY_LAYOUT; 502 503 /* 504 * The UFO feature for host and guest only can be enabled when the 505 * kernel has TAP UFO support. 506 */ 507 if (ndev->tap_ufo) 508 features |= (1UL << VIRTIO_NET_F_HOST_UFO 509 | 1UL << VIRTIO_NET_F_GUEST_UFO); 510 511 return features; 512 } 513 514 static int virtio_net__vhost_set_features(struct net_dev *ndev) 515 { 516 u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX; 517 u64 vhost_features; 518 519 if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0) 520 die_perror("VHOST_GET_FEATURES failed"); 521 522 /* make sure both side support mergable rx buffers */ 523 if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF && 524 has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 525 features |= 1UL << VIRTIO_NET_F_MRG_RXBUF; 526 527 return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features); 528 } 529 530 static void virtio_net_start(struct net_dev *ndev) 531 { 532 if (ndev->mode == NET_MODE_TAP) { 533 if (!virtio_net__tap_init(ndev)) 534 die_perror("TAP device initialized failed because"); 535 536 if (ndev->vhost_fd && 537 virtio_net__vhost_set_features(ndev) != 0) 538 die_perror("VHOST_SET_FEATURES failed"); 539 } else { 540 ndev->info.vnet_hdr_len = virtio_net_hdr_len(ndev); 541 uip_init(&ndev->info); 542 } 543 } 544 545 static void virtio_net_stop(struct net_dev *ndev) 546 { 547 /* Undo whatever start() did */ 548 if (ndev->mode == NET_MODE_TAP) 549 virtio_net__tap_exit(ndev); 550 else 551 uip_exit(&ndev->info); 552 } 553 554 static void virtio_net_update_endian(struct net_dev *ndev) 555 { 556 struct virtio_net_config *conf = &ndev->config; 557 558 conf->status = virtio_host_to_guest_u16(&ndev->vdev, 559 VIRTIO_NET_S_LINK_UP); 560 conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev, 561 ndev->queue_pairs); 562 563 /* Let TAP know about vnet header endianness */ 564 if (ndev->mode == NET_MODE_TAP && 565 ndev->vdev.endian != VIRTIO_ENDIAN_HOST) { 566 int enable_val = 1, disable_val = 0; 567 int enable_req, disable_req; 568 569 if (ndev->vdev.endian == VIRTIO_ENDIAN_LE) { 570 enable_req = TUNSETVNETLE; 571 disable_req = TUNSETVNETBE; 572 } else { 573 enable_req = TUNSETVNETBE; 574 disable_req = TUNSETVNETLE; 575 } 576 577 ioctl(ndev->tap_fd, disable_req, &disable_val); 578 if (ioctl(ndev->tap_fd, enable_req, &enable_val) < 0) 579 pr_err("Config tap device TUNSETVNETLE/BE error"); 580 } 581 } 582 583 static void notify_status(struct kvm *kvm, void *dev, u32 status) 584 { 585 struct net_dev *ndev = dev; 586 587 if (status & VIRTIO__STATUS_CONFIG) 588 virtio_net_update_endian(ndev); 589 590 if (status & VIRTIO__STATUS_START) 591 virtio_net_start(dev); 592 else if (status & VIRTIO__STATUS_STOP) 593 virtio_net_stop(dev); 594 } 595 596 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq) 597 { 598 return vq == (u32)(ndev->queue_pairs * 2); 599 } 600 601 static int init_vq(struct kvm *kvm, void *dev, u32 vq) 602 { 603 struct vhost_vring_state state = { .index = vq }; 604 struct vhost_vring_file file = { .index = vq }; 605 struct net_dev_queue *net_queue; 606 struct vhost_vring_addr addr; 607 struct net_dev *ndev = dev; 608 struct virt_queue *queue; 609 int r; 610 611 compat__remove_message(compat_id); 612 613 net_queue = &ndev->queues[vq]; 614 net_queue->id = vq; 615 net_queue->ndev = ndev; 616 queue = &net_queue->vq; 617 virtio_init_device_vq(kvm, &ndev->vdev, queue, VIRTIO_NET_QUEUE_SIZE); 618 619 mutex_init(&net_queue->lock); 620 pthread_cond_init(&net_queue->cond, NULL); 621 if (is_ctrl_vq(ndev, vq)) { 622 pthread_create(&net_queue->thread, NULL, virtio_net_ctrl_thread, 623 net_queue); 624 625 return 0; 626 } else if (ndev->vhost_fd == 0 ) { 627 if (vq & 1) 628 pthread_create(&net_queue->thread, NULL, 629 virtio_net_tx_thread, net_queue); 630 else 631 pthread_create(&net_queue->thread, NULL, 632 virtio_net_rx_thread, net_queue); 633 634 return 0; 635 } 636 637 if (queue->endian != VIRTIO_ENDIAN_HOST) 638 die_perror("VHOST requires the same endianness in guest and host"); 639 640 state.num = queue->vring.num; 641 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state); 642 if (r < 0) 643 die_perror("VHOST_SET_VRING_NUM failed"); 644 state.num = 0; 645 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state); 646 if (r < 0) 647 die_perror("VHOST_SET_VRING_BASE failed"); 648 649 addr = (struct vhost_vring_addr) { 650 .index = vq, 651 .desc_user_addr = (u64)(unsigned long)queue->vring.desc, 652 .avail_user_addr = (u64)(unsigned long)queue->vring.avail, 653 .used_user_addr = (u64)(unsigned long)queue->vring.used, 654 }; 655 656 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); 657 if (r < 0) 658 die_perror("VHOST_SET_VRING_ADDR failed"); 659 660 file.fd = ndev->tap_fd; 661 r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file); 662 if (r < 0) 663 die_perror("VHOST_NET_SET_BACKEND failed"); 664 665 return 0; 666 } 667 668 static void exit_vq(struct kvm *kvm, void *dev, u32 vq) 669 { 670 struct net_dev *ndev = dev; 671 struct net_dev_queue *queue = &ndev->queues[vq]; 672 673 if (!is_ctrl_vq(ndev, vq) && queue->gsi) { 674 irq__del_irqfd(kvm, queue->gsi, queue->irqfd); 675 close(queue->irqfd); 676 queue->gsi = queue->irqfd = 0; 677 } 678 679 /* 680 * TODO: vhost reset owner. It's the only way to cleanly stop vhost, but 681 * we can't restart it at the moment. 682 */ 683 if (ndev->vhost_fd && !is_ctrl_vq(ndev, vq)) { 684 pr_warning("Cannot reset VHOST queue"); 685 ioctl(ndev->vhost_fd, VHOST_RESET_OWNER); 686 return; 687 } 688 689 /* 690 * Threads are waiting on cancellation points (readv or 691 * pthread_cond_wait) and should stop gracefully. 692 */ 693 pthread_cancel(queue->thread); 694 pthread_join(queue->thread, NULL); 695 } 696 697 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) 698 { 699 struct net_dev *ndev = dev; 700 struct net_dev_queue *queue = &ndev->queues[vq]; 701 struct vhost_vring_file file; 702 int r; 703 704 if (ndev->vhost_fd == 0) 705 return; 706 707 file = (struct vhost_vring_file) { 708 .index = vq, 709 .fd = eventfd(0, 0), 710 }; 711 712 r = irq__add_irqfd(kvm, gsi, file.fd, -1); 713 if (r < 0) 714 die_perror("KVM_IRQFD failed"); 715 716 queue->irqfd = file.fd; 717 queue->gsi = gsi; 718 719 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file); 720 if (r < 0) 721 die_perror("VHOST_SET_VRING_CALL failed"); 722 } 723 724 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) 725 { 726 struct net_dev *ndev = dev; 727 struct vhost_vring_file file = { 728 .index = vq, 729 .fd = efd, 730 }; 731 int r; 732 733 if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq)) 734 return; 735 736 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file); 737 if (r < 0) 738 die_perror("VHOST_SET_VRING_KICK failed"); 739 } 740 741 static int notify_vq(struct kvm *kvm, void *dev, u32 vq) 742 { 743 struct net_dev *ndev = dev; 744 745 virtio_net_handle_callback(kvm, ndev, vq); 746 747 return 0; 748 } 749 750 static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq) 751 { 752 struct net_dev *ndev = dev; 753 754 return &ndev->queues[vq].vq; 755 } 756 757 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) 758 { 759 /* FIXME: dynamic */ 760 return VIRTIO_NET_QUEUE_SIZE; 761 } 762 763 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size) 764 { 765 /* FIXME: dynamic */ 766 return size; 767 } 768 769 static unsigned int get_vq_count(struct kvm *kvm, void *dev) 770 { 771 struct net_dev *ndev = dev; 772 773 return ndev->queue_pairs * 2 + 1; 774 } 775 776 static struct virtio_ops net_dev_virtio_ops = { 777 .get_config = get_config, 778 .get_config_size = get_config_size, 779 .get_host_features = get_host_features, 780 .get_vq_count = get_vq_count, 781 .init_vq = init_vq, 782 .exit_vq = exit_vq, 783 .get_vq = get_vq, 784 .get_size_vq = get_size_vq, 785 .set_size_vq = set_size_vq, 786 .notify_vq = notify_vq, 787 .notify_vq_gsi = notify_vq_gsi, 788 .notify_vq_eventfd = notify_vq_eventfd, 789 .notify_status = notify_status, 790 }; 791 792 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev) 793 { 794 struct kvm_mem_bank *bank; 795 struct vhost_memory *mem; 796 int r, i; 797 798 ndev->vhost_fd = open("/dev/vhost-net", O_RDWR); 799 if (ndev->vhost_fd < 0) 800 die_perror("Failed openning vhost-net device"); 801 802 mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region)); 803 if (mem == NULL) 804 die("Failed allocating memory for vhost memory map"); 805 806 i = 0; 807 list_for_each_entry(bank, &kvm->mem_banks, list) { 808 mem->regions[i] = (struct vhost_memory_region) { 809 .guest_phys_addr = bank->guest_phys_addr, 810 .memory_size = bank->size, 811 .userspace_addr = (unsigned long)bank->host_addr, 812 }; 813 i++; 814 } 815 mem->nregions = i; 816 817 r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER); 818 if (r != 0) 819 die_perror("VHOST_SET_OWNER failed"); 820 821 r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem); 822 if (r != 0) 823 die_perror("VHOST_SET_MEM_TABLE failed"); 824 825 ndev->vdev.use_vhost = true; 826 827 free(mem); 828 } 829 830 static inline void str_to_mac(const char *str, char *mac) 831 { 832 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 833 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 834 } 835 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p, 836 const char *param, const char *val) 837 { 838 if (strcmp(param, "guest_mac") == 0) { 839 str_to_mac(val, p->guest_mac); 840 } else if (strcmp(param, "mode") == 0) { 841 if (!strncmp(val, "user", 4)) { 842 int i; 843 844 for (i = 0; i < kvm->cfg.num_net_devices; i++) 845 if (kvm->cfg.net_params[i].mode == NET_MODE_USER) 846 die("Only one usermode network device allowed at a time"); 847 p->mode = NET_MODE_USER; 848 } else if (!strncmp(val, "tap", 3)) { 849 p->mode = NET_MODE_TAP; 850 } else if (!strncmp(val, "none", 4)) { 851 kvm->cfg.no_net = 1; 852 return -1; 853 } else 854 die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network); 855 } else if (strcmp(param, "script") == 0) { 856 p->script = strdup(val); 857 } else if (strcmp(param, "downscript") == 0) { 858 p->downscript = strdup(val); 859 } else if (strcmp(param, "guest_ip") == 0) { 860 p->guest_ip = strdup(val); 861 } else if (strcmp(param, "host_ip") == 0) { 862 p->host_ip = strdup(val); 863 } else if (strcmp(param, "trans") == 0) { 864 p->trans = strdup(val); 865 } else if (strcmp(param, "tapif") == 0) { 866 p->tapif = strdup(val); 867 } else if (strcmp(param, "vhost") == 0) { 868 p->vhost = atoi(val); 869 } else if (strcmp(param, "fd") == 0) { 870 p->fd = atoi(val); 871 } else if (strcmp(param, "mq") == 0) { 872 p->mq = atoi(val); 873 } else 874 die("Unknown network parameter %s", param); 875 876 return 0; 877 } 878 879 int netdev_parser(const struct option *opt, const char *arg, int unset) 880 { 881 struct virtio_net_params p; 882 char *buf = NULL, *cmd = NULL, *cur = NULL; 883 bool on_cmd = true; 884 struct kvm *kvm = opt->ptr; 885 886 if (arg) { 887 buf = strdup(arg); 888 if (buf == NULL) 889 die("Failed allocating new net buffer"); 890 cur = strtok(buf, ",="); 891 } 892 893 p = (struct virtio_net_params) { 894 .guest_ip = DEFAULT_GUEST_ADDR, 895 .host_ip = DEFAULT_HOST_ADDR, 896 .script = DEFAULT_SCRIPT, 897 .downscript = DEFAULT_SCRIPT, 898 .mode = NET_MODE_TAP, 899 }; 900 901 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 902 p.guest_mac[5] += kvm->cfg.num_net_devices; 903 904 while (cur) { 905 if (on_cmd) { 906 cmd = cur; 907 } else { 908 if (set_net_param(kvm, &p, cmd, cur) < 0) 909 goto done; 910 } 911 on_cmd = !on_cmd; 912 913 cur = strtok(NULL, ",="); 914 }; 915 916 kvm->cfg.num_net_devices++; 917 918 kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params)); 919 if (kvm->cfg.net_params == NULL) 920 die("Failed adding new network device"); 921 922 kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p; 923 924 done: 925 free(buf); 926 return 0; 927 } 928 929 static int virtio_net__init_one(struct virtio_net_params *params) 930 { 931 int i, r; 932 struct net_dev *ndev; 933 struct virtio_ops *ops; 934 enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm); 935 936 ndev = calloc(1, sizeof(struct net_dev)); 937 if (ndev == NULL) 938 return -ENOMEM; 939 940 list_add_tail(&ndev->list, &ndevs); 941 942 ops = malloc(sizeof(*ops)); 943 if (ops == NULL) 944 return -ENOMEM; 945 946 ndev->kvm = params->kvm; 947 ndev->params = params; 948 949 mutex_init(&ndev->mutex); 950 ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq)); 951 952 for (i = 0 ; i < 6 ; i++) { 953 ndev->config.mac[i] = params->guest_mac[i]; 954 ndev->info.guest_mac.addr[i] = params->guest_mac[i]; 955 ndev->info.host_mac.addr[i] = params->host_mac[i]; 956 } 957 958 ndev->mode = params->mode; 959 if (ndev->mode == NET_MODE_TAP) { 960 ndev->ops = &tap_ops; 961 if (!virtio_net__tap_create(ndev)) 962 die_perror("You have requested a TAP device, but creation of one has failed because"); 963 } else { 964 ndev->info.host_ip = ntohl(inet_addr(params->host_ip)); 965 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip)); 966 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 967 ndev->info.buf_nr = 20, 968 ndev->ops = &uip_ops; 969 uip_static_init(&ndev->info); 970 } 971 972 *ops = net_dev_virtio_ops; 973 974 if (params->trans) { 975 if (strcmp(params->trans, "mmio") == 0) 976 trans = VIRTIO_MMIO; 977 else if (strcmp(params->trans, "pci") == 0) 978 trans = VIRTIO_PCI; 979 else 980 pr_warning("virtio-net: Unknown transport method : %s, " 981 "falling back to %s.", params->trans, 982 virtio_trans_name(trans)); 983 } 984 985 r = virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans, 986 PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET); 987 if (r < 0) { 988 free(ops); 989 return r; 990 } 991 992 if (params->vhost) 993 virtio_net__vhost_init(params->kvm, ndev); 994 995 if (compat_id == -1) 996 compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET"); 997 998 return 0; 999 } 1000 1001 int virtio_net__init(struct kvm *kvm) 1002 { 1003 int i, r; 1004 1005 for (i = 0; i < kvm->cfg.num_net_devices; i++) { 1006 kvm->cfg.net_params[i].kvm = kvm; 1007 r = virtio_net__init_one(&kvm->cfg.net_params[i]); 1008 if (r < 0) 1009 goto cleanup; 1010 } 1011 1012 if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) { 1013 static struct virtio_net_params net_params; 1014 1015 net_params = (struct virtio_net_params) { 1016 .guest_ip = kvm->cfg.guest_ip, 1017 .host_ip = kvm->cfg.host_ip, 1018 .kvm = kvm, 1019 .script = kvm->cfg.script, 1020 .mode = NET_MODE_USER, 1021 }; 1022 str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac); 1023 str_to_mac(kvm->cfg.host_mac, net_params.host_mac); 1024 1025 r = virtio_net__init_one(&net_params); 1026 if (r < 0) 1027 goto cleanup; 1028 } 1029 1030 return 0; 1031 1032 cleanup: 1033 virtio_net__exit(kvm); 1034 return r; 1035 } 1036 virtio_dev_init(virtio_net__init); 1037 1038 int virtio_net__exit(struct kvm *kvm) 1039 { 1040 struct virtio_net_params *params; 1041 struct net_dev *ndev; 1042 struct list_head *ptr, *n; 1043 1044 list_for_each_safe(ptr, n, &ndevs) { 1045 ndev = list_entry(ptr, struct net_dev, list); 1046 params = ndev->params; 1047 /* Cleanup any tap device which attached to bridge */ 1048 if (ndev->mode == NET_MODE_TAP && 1049 strcmp(params->downscript, "none")) 1050 virtio_net_exec_script(params->downscript, ndev->tap_name); 1051 1052 list_del(&ndev->list); 1053 free(ndev); 1054 } 1055 return 0; 1056 } 1057 virtio_dev_exit(virtio_net__exit); 1058