1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/mutex.h" 5 #include "kvm/util.h" 6 #include "kvm/kvm.h" 7 #include "kvm/irq.h" 8 #include "kvm/uip.h" 9 #include "kvm/guest_compat.h" 10 #include "kvm/iovec.h" 11 #include "kvm/strbuf.h" 12 13 #include <linux/vhost.h> 14 #include <linux/virtio_net.h> 15 #include <linux/if_tun.h> 16 #include <linux/types.h> 17 18 #include <arpa/inet.h> 19 #include <net/if.h> 20 21 #include <unistd.h> 22 #include <fcntl.h> 23 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <sys/types.h> 27 #include <sys/wait.h> 28 #include <sys/eventfd.h> 29 30 #define VIRTIO_NET_QUEUE_SIZE 256 31 #define VIRTIO_NET_NUM_QUEUES 8 32 33 struct net_dev; 34 35 struct net_dev_operations { 36 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 37 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 38 }; 39 40 struct net_dev_queue { 41 int id; 42 struct net_dev *ndev; 43 struct virt_queue vq; 44 pthread_t thread; 45 struct mutex lock; 46 pthread_cond_t cond; 47 int gsi; 48 int irqfd; 49 }; 50 51 struct net_dev { 52 struct mutex mutex; 53 struct virtio_device vdev; 54 struct list_head list; 55 56 struct net_dev_queue queues[VIRTIO_NET_NUM_QUEUES * 2 + 1]; 57 struct virtio_net_config config; 58 u32 features, queue_pairs; 59 60 int vhost_fd; 61 int tap_fd; 62 char tap_name[IFNAMSIZ]; 63 bool tap_ufo; 64 65 int mode; 66 67 struct uip_info info; 68 struct net_dev_operations *ops; 69 struct kvm *kvm; 70 71 struct virtio_net_params *params; 72 }; 73 74 static LIST_HEAD(ndevs); 75 static int compat_id = -1; 76 77 #define MAX_PACKET_SIZE 65550 78 79 static bool has_virtio_feature(struct net_dev *ndev, u32 feature) 80 { 81 return ndev->features & (1 << feature); 82 } 83 84 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev) 85 { 86 hdr->hdr_len = virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len); 87 hdr->gso_size = virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size); 88 hdr->csum_start = virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start); 89 hdr->csum_offset = virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset); 90 } 91 92 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev) 93 { 94 hdr->hdr_len = virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr_len); 95 hdr->gso_size = virtio_host_to_guest_u16(&ndev->vdev, hdr->gso_size); 96 hdr->csum_start = virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_start); 97 hdr->csum_offset = virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_offset); 98 } 99 100 static void *virtio_net_rx_thread(void *p) 101 { 102 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 103 struct net_dev_queue *queue = p; 104 struct virt_queue *vq = &queue->vq; 105 struct net_dev *ndev = queue->ndev; 106 struct kvm *kvm; 107 u16 out, in; 108 u16 head; 109 int len, copied; 110 111 kvm__set_thread_name("virtio-net-rx"); 112 113 kvm = ndev->kvm; 114 while (1) { 115 mutex_lock(&queue->lock); 116 if (!virt_queue__available(vq)) 117 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 118 mutex_unlock(&queue->lock); 119 120 while (virt_queue__available(vq)) { 121 unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)]; 122 struct iovec dummy_iov = { 123 .iov_base = buffer, 124 .iov_len = sizeof(buffer), 125 }; 126 struct virtio_net_hdr_mrg_rxbuf *hdr; 127 u16 num_buffers; 128 129 len = ndev->ops->rx(&dummy_iov, 1, ndev); 130 if (len < 0) { 131 pr_warning("%s: rx on vq %u failed (%d), exiting thread\n", 132 __func__, queue->id, len); 133 goto out_err; 134 } 135 136 copied = num_buffers = 0; 137 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 138 hdr = iov[0].iov_base; 139 while (copied < len) { 140 size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in)); 141 142 memcpy_toiovec(iov, buffer + copied, iovsize); 143 copied += iovsize; 144 virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++); 145 if (copied == len) 146 break; 147 while (!virt_queue__available(vq)) 148 sleep(0); 149 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 150 } 151 152 virtio_net_fix_rx_hdr(&hdr->hdr, ndev); 153 if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 154 hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers); 155 156 virt_queue__used_idx_advance(vq, num_buffers); 157 158 /* We should interrupt guest right now, otherwise latency is huge. */ 159 if (virtio_queue__should_signal(vq)) 160 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 161 } 162 } 163 164 out_err: 165 pthread_exit(NULL); 166 return NULL; 167 168 } 169 170 static void *virtio_net_tx_thread(void *p) 171 { 172 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 173 struct net_dev_queue *queue = p; 174 struct virt_queue *vq = &queue->vq; 175 struct net_dev *ndev = queue->ndev; 176 struct kvm *kvm; 177 u16 out, in; 178 u16 head; 179 int len; 180 181 kvm__set_thread_name("virtio-net-tx"); 182 183 kvm = ndev->kvm; 184 185 while (1) { 186 mutex_lock(&queue->lock); 187 if (!virt_queue__available(vq)) 188 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 189 mutex_unlock(&queue->lock); 190 191 while (virt_queue__available(vq)) { 192 struct virtio_net_hdr *hdr; 193 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 194 hdr = iov[0].iov_base; 195 virtio_net_fix_tx_hdr(hdr, ndev); 196 len = ndev->ops->tx(iov, out, ndev); 197 if (len < 0) { 198 pr_warning("%s: tx on vq %u failed (%d)\n", 199 __func__, queue->id, errno); 200 goto out_err; 201 } 202 203 virt_queue__set_used_elem(vq, head, len); 204 } 205 206 if (virtio_queue__should_signal(vq)) 207 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 208 } 209 210 out_err: 211 pthread_exit(NULL); 212 return NULL; 213 } 214 215 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl) 216 { 217 /* Not much to do here */ 218 return VIRTIO_NET_OK; 219 } 220 221 static void *virtio_net_ctrl_thread(void *p) 222 { 223 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 224 struct net_dev_queue *queue = p; 225 struct virt_queue *vq = &queue->vq; 226 struct net_dev *ndev = queue->ndev; 227 u16 out, in, head; 228 struct kvm *kvm = ndev->kvm; 229 struct virtio_net_ctrl_hdr *ctrl; 230 virtio_net_ctrl_ack *ack; 231 232 kvm__set_thread_name("virtio-net-ctrl"); 233 234 while (1) { 235 mutex_lock(&queue->lock); 236 if (!virt_queue__available(vq)) 237 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 238 mutex_unlock(&queue->lock); 239 240 while (virt_queue__available(vq)) { 241 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 242 ctrl = iov[0].iov_base; 243 ack = iov[out].iov_base; 244 245 switch (ctrl->class) { 246 case VIRTIO_NET_CTRL_MQ: 247 *ack = virtio_net_handle_mq(kvm, ndev, ctrl); 248 break; 249 default: 250 *ack = VIRTIO_NET_ERR; 251 break; 252 } 253 virt_queue__set_used_elem(vq, head, iov[out].iov_len); 254 } 255 256 if (virtio_queue__should_signal(vq)) 257 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 258 } 259 260 pthread_exit(NULL); 261 262 return NULL; 263 } 264 265 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue) 266 { 267 struct net_dev_queue *net_queue = &ndev->queues[queue]; 268 269 if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) { 270 pr_warning("Unknown queue index %u", queue); 271 return; 272 } 273 274 mutex_lock(&net_queue->lock); 275 pthread_cond_signal(&net_queue->cond); 276 mutex_unlock(&net_queue->lock); 277 } 278 279 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr, 280 const char *tapname) 281 { 282 int ret; 283 284 memset(ifr, 0, sizeof(*ifr)); 285 ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 286 if (tapname) 287 strlcpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name)); 288 289 ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr); 290 291 if (ret >= 0) 292 strlcpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name)); 293 return ret; 294 } 295 296 static int virtio_net_exec_script(const char* script, const char *tap_name) 297 { 298 pid_t pid; 299 int status; 300 301 pid = fork(); 302 if (pid == 0) { 303 execl(script, script, tap_name, NULL); 304 _exit(1); 305 } else { 306 waitpid(pid, &status, 0); 307 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 308 pr_warning("Fail to setup tap by %s", script); 309 return -1; 310 } 311 } 312 return 0; 313 } 314 315 static bool virtio_net__tap_init(struct net_dev *ndev) 316 { 317 int sock = socket(AF_INET, SOCK_STREAM, 0); 318 int hdr_len; 319 struct sockaddr_in sin = {0}; 320 struct ifreq ifr; 321 const struct virtio_net_params *params = ndev->params; 322 bool skipconf = !!params->tapif; 323 324 hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ? 325 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 326 sizeof(struct virtio_net_hdr); 327 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) 328 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 329 330 if (strcmp(params->script, "none")) { 331 if (virtio_net_exec_script(params->script, ndev->tap_name) < 0) 332 goto fail; 333 } else if (!skipconf) { 334 memset(&ifr, 0, sizeof(ifr)); 335 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name)); 336 sin.sin_addr.s_addr = inet_addr(params->host_ip); 337 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 338 ifr.ifr_addr.sa_family = AF_INET; 339 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 340 pr_warning("Could not set ip address on tap device"); 341 goto fail; 342 } 343 } 344 345 if (!skipconf) { 346 memset(&ifr, 0, sizeof(ifr)); 347 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name)); 348 ioctl(sock, SIOCGIFFLAGS, &ifr); 349 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 350 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 351 pr_warning("Could not bring tap device up"); 352 } 353 354 close(sock); 355 356 return 1; 357 358 fail: 359 if (sock >= 0) 360 close(sock); 361 if (ndev->tap_fd >= 0) 362 close(ndev->tap_fd); 363 364 return 0; 365 } 366 367 static void virtio_net__tap_exit(struct net_dev *ndev) 368 { 369 int sock; 370 struct ifreq ifr; 371 372 if (ndev->params->tapif) 373 return; 374 375 sock = socket(AF_INET, SOCK_STREAM, 0); 376 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ifr.ifr_name)); 377 ioctl(sock, SIOCGIFFLAGS, &ifr); 378 ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); 379 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) 380 pr_warning("Count not bring tap device down"); 381 close(sock); 382 } 383 384 static bool virtio_net__tap_create(struct net_dev *ndev) 385 { 386 int offload; 387 struct ifreq ifr; 388 const struct virtio_net_params *params = ndev->params; 389 bool macvtap = (!!params->tapif) && (params->tapif[0] == '/'); 390 391 /* Did the user already gave us the FD? */ 392 if (params->fd) 393 ndev->tap_fd = params->fd; 394 else { 395 const char *tap_file = "/dev/net/tun"; 396 397 /* Did the user ask us to use macvtap? */ 398 if (macvtap) 399 tap_file = params->tapif; 400 401 ndev->tap_fd = open(tap_file, O_RDWR); 402 if (ndev->tap_fd < 0) { 403 pr_warning("Unable to open %s", tap_file); 404 return 0; 405 } 406 } 407 408 if (!macvtap && 409 virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) { 410 pr_warning("Config tap device error. Are you root?"); 411 goto fail; 412 } 413 414 /* 415 * The UFO support had been removed from kernel in commit: 416 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984 417 * https://www.spinics.net/lists/netdev/msg443562.html 418 * In oder to support the older kernels without this commit, 419 * we set the TUN_F_UFO to offload by default to test the status of 420 * UFO kernel support. 421 */ 422 ndev->tap_ufo = true; 423 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 424 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 425 /* 426 * Is this failure caused by kernel remove the UFO support? 427 * Try TUNSETOFFLOAD without TUN_F_UFO. 428 */ 429 offload &= ~TUN_F_UFO; 430 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 431 pr_warning("Config tap device TUNSETOFFLOAD error"); 432 goto fail; 433 } 434 ndev->tap_ufo = false; 435 } 436 437 return 1; 438 439 fail: 440 if ((ndev->tap_fd >= 0) || (!params->fd) ) 441 close(ndev->tap_fd); 442 443 return 0; 444 } 445 446 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 447 { 448 return writev(ndev->tap_fd, iov, out); 449 } 450 451 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 452 { 453 return readv(ndev->tap_fd, iov, in); 454 } 455 456 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 457 { 458 return uip_tx(iov, out, &ndev->info); 459 } 460 461 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 462 { 463 return uip_rx(iov, in, &ndev->info); 464 } 465 466 static struct net_dev_operations tap_ops = { 467 .rx = tap_ops_rx, 468 .tx = tap_ops_tx, 469 }; 470 471 static struct net_dev_operations uip_ops = { 472 .rx = uip_ops_rx, 473 .tx = uip_ops_tx, 474 }; 475 476 static u8 *get_config(struct kvm *kvm, void *dev) 477 { 478 struct net_dev *ndev = dev; 479 480 return ((u8 *)(&ndev->config)); 481 } 482 483 static size_t get_config_size(struct kvm *kvm, void *dev) 484 { 485 struct net_dev *ndev = dev; 486 487 return sizeof(ndev->config); 488 } 489 490 static u32 get_host_features(struct kvm *kvm, void *dev) 491 { 492 u32 features; 493 struct net_dev *ndev = dev; 494 495 features = 1UL << VIRTIO_NET_F_MAC 496 | 1UL << VIRTIO_NET_F_CSUM 497 | 1UL << VIRTIO_NET_F_HOST_TSO4 498 | 1UL << VIRTIO_NET_F_HOST_TSO6 499 | 1UL << VIRTIO_NET_F_GUEST_TSO4 500 | 1UL << VIRTIO_NET_F_GUEST_TSO6 501 | 1UL << VIRTIO_RING_F_EVENT_IDX 502 | 1UL << VIRTIO_RING_F_INDIRECT_DESC 503 | 1UL << VIRTIO_NET_F_CTRL_VQ 504 | 1UL << VIRTIO_NET_F_MRG_RXBUF 505 | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0); 506 507 /* 508 * The UFO feature for host and guest only can be enabled when the 509 * kernel has TAP UFO support. 510 */ 511 if (ndev->tap_ufo) 512 features |= (1UL << VIRTIO_NET_F_HOST_UFO 513 | 1UL << VIRTIO_NET_F_GUEST_UFO); 514 515 return features; 516 } 517 518 static int virtio_net__vhost_set_features(struct net_dev *ndev) 519 { 520 u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX; 521 u64 vhost_features; 522 523 if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0) 524 die_perror("VHOST_GET_FEATURES failed"); 525 526 /* make sure both side support mergable rx buffers */ 527 if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF && 528 has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 529 features |= 1UL << VIRTIO_NET_F_MRG_RXBUF; 530 531 return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features); 532 } 533 534 static void set_guest_features(struct kvm *kvm, void *dev, u32 features) 535 { 536 struct net_dev *ndev = dev; 537 struct virtio_net_config *conf = &ndev->config; 538 539 ndev->features = features; 540 541 conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status); 542 conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev, 543 conf->max_virtqueue_pairs); 544 } 545 546 static void virtio_net_start(struct net_dev *ndev) 547 { 548 if (ndev->mode == NET_MODE_TAP) { 549 if (!virtio_net__tap_init(ndev)) 550 die_perror("TAP device initialized failed because"); 551 552 if (ndev->vhost_fd && 553 virtio_net__vhost_set_features(ndev) != 0) 554 die_perror("VHOST_SET_FEATURES failed"); 555 } else { 556 ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ? 557 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 558 sizeof(struct virtio_net_hdr); 559 uip_init(&ndev->info); 560 } 561 } 562 563 static void virtio_net_stop(struct net_dev *ndev) 564 { 565 /* Undo whatever start() did */ 566 if (ndev->mode == NET_MODE_TAP) 567 virtio_net__tap_exit(ndev); 568 else 569 uip_exit(&ndev->info); 570 } 571 572 static void notify_status(struct kvm *kvm, void *dev, u32 status) 573 { 574 if (status & VIRTIO__STATUS_START) 575 virtio_net_start(dev); 576 else if (status & VIRTIO__STATUS_STOP) 577 virtio_net_stop(dev); 578 } 579 580 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq) 581 { 582 return vq == (u32)(ndev->queue_pairs * 2); 583 } 584 585 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align, 586 u32 pfn) 587 { 588 struct vhost_vring_state state = { .index = vq }; 589 struct net_dev_queue *net_queue; 590 struct vhost_vring_addr addr; 591 struct net_dev *ndev = dev; 592 struct virt_queue *queue; 593 void *p; 594 int r; 595 596 compat__remove_message(compat_id); 597 598 net_queue = &ndev->queues[vq]; 599 net_queue->id = vq; 600 net_queue->ndev = ndev; 601 queue = &net_queue->vq; 602 queue->pfn = pfn; 603 p = virtio_get_vq(kvm, queue->pfn, page_size); 604 605 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align); 606 virtio_init_device_vq(&ndev->vdev, queue); 607 608 mutex_init(&net_queue->lock); 609 pthread_cond_init(&net_queue->cond, NULL); 610 if (is_ctrl_vq(ndev, vq)) { 611 pthread_create(&net_queue->thread, NULL, virtio_net_ctrl_thread, 612 net_queue); 613 614 return 0; 615 } else if (ndev->vhost_fd == 0 ) { 616 if (vq & 1) 617 pthread_create(&net_queue->thread, NULL, 618 virtio_net_tx_thread, net_queue); 619 else 620 pthread_create(&net_queue->thread, NULL, 621 virtio_net_rx_thread, net_queue); 622 623 return 0; 624 } 625 626 if (queue->endian != VIRTIO_ENDIAN_HOST) 627 die_perror("VHOST requires the same endianness in guest and host"); 628 629 state.num = queue->vring.num; 630 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state); 631 if (r < 0) 632 die_perror("VHOST_SET_VRING_NUM failed"); 633 state.num = 0; 634 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state); 635 if (r < 0) 636 die_perror("VHOST_SET_VRING_BASE failed"); 637 638 addr = (struct vhost_vring_addr) { 639 .index = vq, 640 .desc_user_addr = (u64)(unsigned long)queue->vring.desc, 641 .avail_user_addr = (u64)(unsigned long)queue->vring.avail, 642 .used_user_addr = (u64)(unsigned long)queue->vring.used, 643 }; 644 645 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); 646 if (r < 0) 647 die_perror("VHOST_SET_VRING_ADDR failed"); 648 649 return 0; 650 } 651 652 static void exit_vq(struct kvm *kvm, void *dev, u32 vq) 653 { 654 struct net_dev *ndev = dev; 655 struct net_dev_queue *queue = &ndev->queues[vq]; 656 657 if (!is_ctrl_vq(ndev, vq) && queue->gsi) { 658 irq__del_irqfd(kvm, queue->gsi, queue->irqfd); 659 close(queue->irqfd); 660 queue->gsi = queue->irqfd = 0; 661 } 662 663 /* 664 * TODO: vhost reset owner. It's the only way to cleanly stop vhost, but 665 * we can't restart it at the moment. 666 */ 667 if (ndev->vhost_fd && !is_ctrl_vq(ndev, vq)) { 668 pr_warning("Cannot reset VHOST queue"); 669 ioctl(ndev->vhost_fd, VHOST_RESET_OWNER); 670 return; 671 } 672 673 /* 674 * Threads are waiting on cancellation points (readv or 675 * pthread_cond_wait) and should stop gracefully. 676 */ 677 pthread_cancel(queue->thread); 678 pthread_join(queue->thread, NULL); 679 } 680 681 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) 682 { 683 struct net_dev *ndev = dev; 684 struct net_dev_queue *queue = &ndev->queues[vq]; 685 struct vhost_vring_file file; 686 int r; 687 688 if (ndev->vhost_fd == 0) 689 return; 690 691 file = (struct vhost_vring_file) { 692 .index = vq, 693 .fd = eventfd(0, 0), 694 }; 695 696 r = irq__add_irqfd(kvm, gsi, file.fd, -1); 697 if (r < 0) 698 die_perror("KVM_IRQFD failed"); 699 700 queue->irqfd = file.fd; 701 queue->gsi = gsi; 702 703 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file); 704 if (r < 0) 705 die_perror("VHOST_SET_VRING_CALL failed"); 706 file.fd = ndev->tap_fd; 707 r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file); 708 if (r != 0) 709 die("VHOST_NET_SET_BACKEND failed %d", errno); 710 711 } 712 713 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) 714 { 715 struct net_dev *ndev = dev; 716 struct vhost_vring_file file = { 717 .index = vq, 718 .fd = efd, 719 }; 720 int r; 721 722 if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq)) 723 return; 724 725 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file); 726 if (r < 0) 727 die_perror("VHOST_SET_VRING_KICK failed"); 728 } 729 730 static int notify_vq(struct kvm *kvm, void *dev, u32 vq) 731 { 732 struct net_dev *ndev = dev; 733 734 virtio_net_handle_callback(kvm, ndev, vq); 735 736 return 0; 737 } 738 739 static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq) 740 { 741 struct net_dev *ndev = dev; 742 743 return &ndev->queues[vq].vq; 744 } 745 746 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) 747 { 748 /* FIXME: dynamic */ 749 return VIRTIO_NET_QUEUE_SIZE; 750 } 751 752 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size) 753 { 754 /* FIXME: dynamic */ 755 return size; 756 } 757 758 static unsigned int get_vq_count(struct kvm *kvm, void *dev) 759 { 760 struct net_dev *ndev = dev; 761 762 return ndev->queue_pairs * 2 + 1; 763 } 764 765 static struct virtio_ops net_dev_virtio_ops = { 766 .get_config = get_config, 767 .get_config_size = get_config_size, 768 .get_host_features = get_host_features, 769 .set_guest_features = set_guest_features, 770 .get_vq_count = get_vq_count, 771 .init_vq = init_vq, 772 .exit_vq = exit_vq, 773 .get_vq = get_vq, 774 .get_size_vq = get_size_vq, 775 .set_size_vq = set_size_vq, 776 .notify_vq = notify_vq, 777 .notify_vq_gsi = notify_vq_gsi, 778 .notify_vq_eventfd = notify_vq_eventfd, 779 .notify_status = notify_status, 780 }; 781 782 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev) 783 { 784 struct kvm_mem_bank *bank; 785 struct vhost_memory *mem; 786 int r, i; 787 788 ndev->vhost_fd = open("/dev/vhost-net", O_RDWR); 789 if (ndev->vhost_fd < 0) 790 die_perror("Failed openning vhost-net device"); 791 792 mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region)); 793 if (mem == NULL) 794 die("Failed allocating memory for vhost memory map"); 795 796 i = 0; 797 list_for_each_entry(bank, &kvm->mem_banks, list) { 798 mem->regions[i] = (struct vhost_memory_region) { 799 .guest_phys_addr = bank->guest_phys_addr, 800 .memory_size = bank->size, 801 .userspace_addr = (unsigned long)bank->host_addr, 802 }; 803 i++; 804 } 805 mem->nregions = i; 806 807 r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER); 808 if (r != 0) 809 die_perror("VHOST_SET_OWNER failed"); 810 811 r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem); 812 if (r != 0) 813 die_perror("VHOST_SET_MEM_TABLE failed"); 814 815 ndev->vdev.use_vhost = true; 816 817 free(mem); 818 } 819 820 static inline void str_to_mac(const char *str, char *mac) 821 { 822 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 823 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 824 } 825 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p, 826 const char *param, const char *val) 827 { 828 if (strcmp(param, "guest_mac") == 0) { 829 str_to_mac(val, p->guest_mac); 830 } else if (strcmp(param, "mode") == 0) { 831 if (!strncmp(val, "user", 4)) { 832 int i; 833 834 for (i = 0; i < kvm->cfg.num_net_devices; i++) 835 if (kvm->cfg.net_params[i].mode == NET_MODE_USER) 836 die("Only one usermode network device allowed at a time"); 837 p->mode = NET_MODE_USER; 838 } else if (!strncmp(val, "tap", 3)) { 839 p->mode = NET_MODE_TAP; 840 } else if (!strncmp(val, "none", 4)) { 841 kvm->cfg.no_net = 1; 842 return -1; 843 } else 844 die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network); 845 } else if (strcmp(param, "script") == 0) { 846 p->script = strdup(val); 847 } else if (strcmp(param, "downscript") == 0) { 848 p->downscript = strdup(val); 849 } else if (strcmp(param, "guest_ip") == 0) { 850 p->guest_ip = strdup(val); 851 } else if (strcmp(param, "host_ip") == 0) { 852 p->host_ip = strdup(val); 853 } else if (strcmp(param, "trans") == 0) { 854 p->trans = strdup(val); 855 } else if (strcmp(param, "tapif") == 0) { 856 p->tapif = strdup(val); 857 } else if (strcmp(param, "vhost") == 0) { 858 p->vhost = atoi(val); 859 } else if (strcmp(param, "fd") == 0) { 860 p->fd = atoi(val); 861 } else if (strcmp(param, "mq") == 0) { 862 p->mq = atoi(val); 863 } else 864 die("Unknown network parameter %s", param); 865 866 return 0; 867 } 868 869 int netdev_parser(const struct option *opt, const char *arg, int unset) 870 { 871 struct virtio_net_params p; 872 char *buf = NULL, *cmd = NULL, *cur = NULL; 873 bool on_cmd = true; 874 struct kvm *kvm = opt->ptr; 875 876 if (arg) { 877 buf = strdup(arg); 878 if (buf == NULL) 879 die("Failed allocating new net buffer"); 880 cur = strtok(buf, ",="); 881 } 882 883 p = (struct virtio_net_params) { 884 .guest_ip = DEFAULT_GUEST_ADDR, 885 .host_ip = DEFAULT_HOST_ADDR, 886 .script = DEFAULT_SCRIPT, 887 .downscript = DEFAULT_SCRIPT, 888 .mode = NET_MODE_TAP, 889 }; 890 891 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 892 p.guest_mac[5] += kvm->cfg.num_net_devices; 893 894 while (cur) { 895 if (on_cmd) { 896 cmd = cur; 897 } else { 898 if (set_net_param(kvm, &p, cmd, cur) < 0) 899 goto done; 900 } 901 on_cmd = !on_cmd; 902 903 cur = strtok(NULL, ",="); 904 }; 905 906 kvm->cfg.num_net_devices++; 907 908 kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params)); 909 if (kvm->cfg.net_params == NULL) 910 die("Failed adding new network device"); 911 912 kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p; 913 914 done: 915 free(buf); 916 return 0; 917 } 918 919 static int virtio_net__init_one(struct virtio_net_params *params) 920 { 921 int i, r; 922 struct net_dev *ndev; 923 struct virtio_ops *ops; 924 enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm); 925 926 ndev = calloc(1, sizeof(struct net_dev)); 927 if (ndev == NULL) 928 return -ENOMEM; 929 930 list_add_tail(&ndev->list, &ndevs); 931 932 ops = malloc(sizeof(*ops)); 933 if (ops == NULL) 934 return -ENOMEM; 935 936 ndev->kvm = params->kvm; 937 ndev->params = params; 938 939 mutex_init(&ndev->mutex); 940 ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq)); 941 ndev->config.status = VIRTIO_NET_S_LINK_UP; 942 if (ndev->queue_pairs > 1) 943 ndev->config.max_virtqueue_pairs = ndev->queue_pairs; 944 945 for (i = 0 ; i < 6 ; i++) { 946 ndev->config.mac[i] = params->guest_mac[i]; 947 ndev->info.guest_mac.addr[i] = params->guest_mac[i]; 948 ndev->info.host_mac.addr[i] = params->host_mac[i]; 949 } 950 951 ndev->mode = params->mode; 952 if (ndev->mode == NET_MODE_TAP) { 953 ndev->ops = &tap_ops; 954 if (!virtio_net__tap_create(ndev)) 955 die_perror("You have requested a TAP device, but creation of one has failed because"); 956 } else { 957 ndev->info.host_ip = ntohl(inet_addr(params->host_ip)); 958 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip)); 959 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 960 ndev->info.buf_nr = 20, 961 ndev->ops = &uip_ops; 962 uip_static_init(&ndev->info); 963 } 964 965 *ops = net_dev_virtio_ops; 966 967 if (params->trans) { 968 if (strcmp(params->trans, "mmio") == 0) 969 trans = VIRTIO_MMIO; 970 else if (strcmp(params->trans, "pci") == 0) 971 trans = VIRTIO_PCI; 972 else 973 pr_warning("virtio-net: Unknown transport method : %s, " 974 "falling back to %s.", params->trans, 975 virtio_trans_name(trans)); 976 } 977 978 r = virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans, 979 PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET); 980 if (r < 0) { 981 free(ops); 982 return r; 983 } 984 985 if (params->vhost) 986 virtio_net__vhost_init(params->kvm, ndev); 987 988 if (compat_id == -1) 989 compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET"); 990 991 return 0; 992 } 993 994 int virtio_net__init(struct kvm *kvm) 995 { 996 int i, r; 997 998 for (i = 0; i < kvm->cfg.num_net_devices; i++) { 999 kvm->cfg.net_params[i].kvm = kvm; 1000 r = virtio_net__init_one(&kvm->cfg.net_params[i]); 1001 if (r < 0) 1002 goto cleanup; 1003 } 1004 1005 if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) { 1006 static struct virtio_net_params net_params; 1007 1008 net_params = (struct virtio_net_params) { 1009 .guest_ip = kvm->cfg.guest_ip, 1010 .host_ip = kvm->cfg.host_ip, 1011 .kvm = kvm, 1012 .script = kvm->cfg.script, 1013 .mode = NET_MODE_USER, 1014 }; 1015 str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac); 1016 str_to_mac(kvm->cfg.host_mac, net_params.host_mac); 1017 1018 r = virtio_net__init_one(&net_params); 1019 if (r < 0) 1020 goto cleanup; 1021 } 1022 1023 return 0; 1024 1025 cleanup: 1026 virtio_net__exit(kvm); 1027 return r; 1028 } 1029 virtio_dev_init(virtio_net__init); 1030 1031 int virtio_net__exit(struct kvm *kvm) 1032 { 1033 struct virtio_net_params *params; 1034 struct net_dev *ndev; 1035 struct list_head *ptr, *n; 1036 1037 list_for_each_safe(ptr, n, &ndevs) { 1038 ndev = list_entry(ptr, struct net_dev, list); 1039 params = ndev->params; 1040 /* Cleanup any tap device which attached to bridge */ 1041 if (ndev->mode == NET_MODE_TAP && 1042 strcmp(params->downscript, "none")) 1043 virtio_net_exec_script(params->downscript, ndev->tap_name); 1044 1045 list_del(&ndev->list); 1046 free(ndev); 1047 } 1048 return 0; 1049 } 1050 virtio_dev_exit(virtio_net__exit); 1051