1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/mutex.h" 5 #include "kvm/util.h" 6 #include "kvm/kvm.h" 7 #include "kvm/irq.h" 8 #include "kvm/uip.h" 9 #include "kvm/guest_compat.h" 10 #include "kvm/iovec.h" 11 12 #include <linux/vhost.h> 13 #include <linux/virtio_net.h> 14 #include <linux/if_tun.h> 15 #include <linux/types.h> 16 17 #include <arpa/inet.h> 18 #include <net/if.h> 19 20 #include <unistd.h> 21 #include <fcntl.h> 22 23 #include <sys/socket.h> 24 #include <sys/ioctl.h> 25 #include <sys/types.h> 26 #include <sys/wait.h> 27 #include <sys/eventfd.h> 28 29 #define VIRTIO_NET_QUEUE_SIZE 256 30 #define VIRTIO_NET_NUM_QUEUES 8 31 32 struct net_dev; 33 34 struct net_dev_operations { 35 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 36 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 37 }; 38 39 struct net_dev_queue { 40 int id; 41 struct net_dev *ndev; 42 struct virt_queue vq; 43 pthread_t thread; 44 struct mutex lock; 45 pthread_cond_t cond; 46 int gsi; 47 int irqfd; 48 }; 49 50 struct net_dev { 51 struct mutex mutex; 52 struct virtio_device vdev; 53 struct list_head list; 54 55 struct net_dev_queue queues[VIRTIO_NET_NUM_QUEUES * 2 + 1]; 56 struct virtio_net_config config; 57 u32 features, queue_pairs; 58 59 int vhost_fd; 60 int tap_fd; 61 char tap_name[IFNAMSIZ]; 62 bool tap_ufo; 63 64 int mode; 65 66 struct uip_info info; 67 struct net_dev_operations *ops; 68 struct kvm *kvm; 69 70 struct virtio_net_params *params; 71 }; 72 73 static LIST_HEAD(ndevs); 74 static int compat_id = -1; 75 76 #define MAX_PACKET_SIZE 65550 77 78 static bool has_virtio_feature(struct net_dev *ndev, u32 feature) 79 { 80 return ndev->features & (1 << feature); 81 } 82 83 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev) 84 { 85 hdr->hdr_len = virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len); 86 hdr->gso_size = virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size); 87 hdr->csum_start = virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start); 88 hdr->csum_offset = virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset); 89 } 90 91 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev) 92 { 93 hdr->hdr_len = virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr_len); 94 hdr->gso_size = virtio_host_to_guest_u16(&ndev->vdev, hdr->gso_size); 95 hdr->csum_start = virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_start); 96 hdr->csum_offset = virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_offset); 97 } 98 99 static void *virtio_net_rx_thread(void *p) 100 { 101 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 102 struct net_dev_queue *queue = p; 103 struct virt_queue *vq = &queue->vq; 104 struct net_dev *ndev = queue->ndev; 105 struct kvm *kvm; 106 u16 out, in; 107 u16 head; 108 int len, copied; 109 110 kvm__set_thread_name("virtio-net-rx"); 111 112 kvm = ndev->kvm; 113 while (1) { 114 mutex_lock(&queue->lock); 115 if (!virt_queue__available(vq)) 116 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 117 mutex_unlock(&queue->lock); 118 119 while (virt_queue__available(vq)) { 120 unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)]; 121 struct iovec dummy_iov = { 122 .iov_base = buffer, 123 .iov_len = sizeof(buffer), 124 }; 125 struct virtio_net_hdr_mrg_rxbuf *hdr; 126 u16 num_buffers; 127 128 len = ndev->ops->rx(&dummy_iov, 1, ndev); 129 if (len < 0) { 130 pr_warning("%s: rx on vq %u failed (%d), exiting thread\n", 131 __func__, queue->id, len); 132 goto out_err; 133 } 134 135 copied = num_buffers = 0; 136 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 137 hdr = iov[0].iov_base; 138 while (copied < len) { 139 size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in)); 140 141 memcpy_toiovec(iov, buffer + copied, iovsize); 142 copied += iovsize; 143 virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++); 144 if (copied == len) 145 break; 146 while (!virt_queue__available(vq)) 147 sleep(0); 148 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 149 } 150 151 virtio_net_fix_rx_hdr(&hdr->hdr, ndev); 152 if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 153 hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers); 154 155 virt_queue__used_idx_advance(vq, num_buffers); 156 157 /* We should interrupt guest right now, otherwise latency is huge. */ 158 if (virtio_queue__should_signal(vq)) 159 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 160 } 161 } 162 163 out_err: 164 pthread_exit(NULL); 165 return NULL; 166 167 } 168 169 static void *virtio_net_tx_thread(void *p) 170 { 171 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 172 struct net_dev_queue *queue = p; 173 struct virt_queue *vq = &queue->vq; 174 struct net_dev *ndev = queue->ndev; 175 struct kvm *kvm; 176 u16 out, in; 177 u16 head; 178 int len; 179 180 kvm__set_thread_name("virtio-net-tx"); 181 182 kvm = ndev->kvm; 183 184 while (1) { 185 mutex_lock(&queue->lock); 186 if (!virt_queue__available(vq)) 187 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 188 mutex_unlock(&queue->lock); 189 190 while (virt_queue__available(vq)) { 191 struct virtio_net_hdr *hdr; 192 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 193 hdr = iov[0].iov_base; 194 virtio_net_fix_tx_hdr(hdr, ndev); 195 len = ndev->ops->tx(iov, out, ndev); 196 if (len < 0) { 197 pr_warning("%s: tx on vq %u failed (%d)\n", 198 __func__, queue->id, errno); 199 goto out_err; 200 } 201 202 virt_queue__set_used_elem(vq, head, len); 203 } 204 205 if (virtio_queue__should_signal(vq)) 206 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 207 } 208 209 out_err: 210 pthread_exit(NULL); 211 return NULL; 212 } 213 214 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl) 215 { 216 /* Not much to do here */ 217 return VIRTIO_NET_OK; 218 } 219 220 static void *virtio_net_ctrl_thread(void *p) 221 { 222 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 223 struct net_dev_queue *queue = p; 224 struct virt_queue *vq = &queue->vq; 225 struct net_dev *ndev = queue->ndev; 226 u16 out, in, head; 227 struct kvm *kvm = ndev->kvm; 228 struct virtio_net_ctrl_hdr *ctrl; 229 virtio_net_ctrl_ack *ack; 230 231 kvm__set_thread_name("virtio-net-ctrl"); 232 233 while (1) { 234 mutex_lock(&queue->lock); 235 if (!virt_queue__available(vq)) 236 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 237 mutex_unlock(&queue->lock); 238 239 while (virt_queue__available(vq)) { 240 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 241 ctrl = iov[0].iov_base; 242 ack = iov[out].iov_base; 243 244 switch (ctrl->class) { 245 case VIRTIO_NET_CTRL_MQ: 246 *ack = virtio_net_handle_mq(kvm, ndev, ctrl); 247 break; 248 default: 249 *ack = VIRTIO_NET_ERR; 250 break; 251 } 252 virt_queue__set_used_elem(vq, head, iov[out].iov_len); 253 } 254 255 if (virtio_queue__should_signal(vq)) 256 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 257 } 258 259 pthread_exit(NULL); 260 261 return NULL; 262 } 263 264 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue) 265 { 266 struct net_dev_queue *net_queue = &ndev->queues[queue]; 267 268 if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) { 269 pr_warning("Unknown queue index %u", queue); 270 return; 271 } 272 273 mutex_lock(&net_queue->lock); 274 pthread_cond_signal(&net_queue->cond); 275 mutex_unlock(&net_queue->lock); 276 } 277 278 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr, 279 const char *tapname) 280 { 281 int ret; 282 283 memset(ifr, 0, sizeof(*ifr)); 284 ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 285 if (tapname) 286 strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name)); 287 288 ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr); 289 290 if (ret >= 0) 291 strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name)); 292 return ret; 293 } 294 295 static int virtio_net_exec_script(const char* script, const char *tap_name) 296 { 297 pid_t pid; 298 int status; 299 300 pid = fork(); 301 if (pid == 0) { 302 execl(script, script, tap_name, NULL); 303 _exit(1); 304 } else { 305 waitpid(pid, &status, 0); 306 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 307 pr_warning("Fail to setup tap by %s", script); 308 return -1; 309 } 310 } 311 return 0; 312 } 313 314 static bool virtio_net__tap_init(struct net_dev *ndev) 315 { 316 int sock = socket(AF_INET, SOCK_STREAM, 0); 317 int hdr_len; 318 struct sockaddr_in sin = {0}; 319 struct ifreq ifr; 320 const struct virtio_net_params *params = ndev->params; 321 bool skipconf = !!params->tapif; 322 323 hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ? 324 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 325 sizeof(struct virtio_net_hdr); 326 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) 327 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 328 329 if (strcmp(params->script, "none")) { 330 if (virtio_net_exec_script(params->script, ndev->tap_name) < 0) 331 goto fail; 332 } else if (!skipconf) { 333 memset(&ifr, 0, sizeof(ifr)); 334 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 335 sin.sin_addr.s_addr = inet_addr(params->host_ip); 336 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 337 ifr.ifr_addr.sa_family = AF_INET; 338 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 339 pr_warning("Could not set ip address on tap device"); 340 goto fail; 341 } 342 } 343 344 if (!skipconf) { 345 memset(&ifr, 0, sizeof(ifr)); 346 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 347 ioctl(sock, SIOCGIFFLAGS, &ifr); 348 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 349 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 350 pr_warning("Could not bring tap device up"); 351 } 352 353 close(sock); 354 355 return 1; 356 357 fail: 358 if (sock >= 0) 359 close(sock); 360 if (ndev->tap_fd >= 0) 361 close(ndev->tap_fd); 362 363 return 0; 364 } 365 366 static void virtio_net__tap_exit(struct net_dev *ndev) 367 { 368 int sock; 369 struct ifreq ifr; 370 371 if (ndev->params->tapif) 372 return; 373 374 sock = socket(AF_INET, SOCK_STREAM, 0); 375 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 376 ioctl(sock, SIOCGIFFLAGS, &ifr); 377 ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); 378 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) 379 pr_warning("Count not bring tap device down"); 380 close(sock); 381 } 382 383 static bool virtio_net__tap_create(struct net_dev *ndev) 384 { 385 int offload; 386 struct ifreq ifr; 387 const struct virtio_net_params *params = ndev->params; 388 bool macvtap = (!!params->tapif) && (params->tapif[0] == '/'); 389 390 /* Did the user already gave us the FD? */ 391 if (params->fd) 392 ndev->tap_fd = params->fd; 393 else { 394 const char *tap_file = "/dev/net/tun"; 395 396 /* Did the user ask us to use macvtap? */ 397 if (macvtap) 398 tap_file = params->tapif; 399 400 ndev->tap_fd = open(tap_file, O_RDWR); 401 if (ndev->tap_fd < 0) { 402 pr_warning("Unable to open %s", tap_file); 403 return 0; 404 } 405 } 406 407 if (!macvtap && 408 virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) { 409 pr_warning("Config tap device error. Are you root?"); 410 goto fail; 411 } 412 413 /* 414 * The UFO support had been removed from kernel in commit: 415 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984 416 * https://www.spinics.net/lists/netdev/msg443562.html 417 * In oder to support the older kernels without this commit, 418 * we set the TUN_F_UFO to offload by default to test the status of 419 * UFO kernel support. 420 */ 421 ndev->tap_ufo = true; 422 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 423 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 424 /* 425 * Is this failure caused by kernel remove the UFO support? 426 * Try TUNSETOFFLOAD without TUN_F_UFO. 427 */ 428 offload &= ~TUN_F_UFO; 429 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 430 pr_warning("Config tap device TUNSETOFFLOAD error"); 431 goto fail; 432 } 433 ndev->tap_ufo = false; 434 } 435 436 return 1; 437 438 fail: 439 if ((ndev->tap_fd >= 0) || (!params->fd) ) 440 close(ndev->tap_fd); 441 442 return 0; 443 } 444 445 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 446 { 447 return writev(ndev->tap_fd, iov, out); 448 } 449 450 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 451 { 452 return readv(ndev->tap_fd, iov, in); 453 } 454 455 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 456 { 457 return uip_tx(iov, out, &ndev->info); 458 } 459 460 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 461 { 462 return uip_rx(iov, in, &ndev->info); 463 } 464 465 static struct net_dev_operations tap_ops = { 466 .rx = tap_ops_rx, 467 .tx = tap_ops_tx, 468 }; 469 470 static struct net_dev_operations uip_ops = { 471 .rx = uip_ops_rx, 472 .tx = uip_ops_tx, 473 }; 474 475 static u8 *get_config(struct kvm *kvm, void *dev) 476 { 477 struct net_dev *ndev = dev; 478 479 return ((u8 *)(&ndev->config)); 480 } 481 482 static u32 get_host_features(struct kvm *kvm, void *dev) 483 { 484 u32 features; 485 struct net_dev *ndev = dev; 486 487 features = 1UL << VIRTIO_NET_F_MAC 488 | 1UL << VIRTIO_NET_F_CSUM 489 | 1UL << VIRTIO_NET_F_HOST_TSO4 490 | 1UL << VIRTIO_NET_F_HOST_TSO6 491 | 1UL << VIRTIO_NET_F_GUEST_TSO4 492 | 1UL << VIRTIO_NET_F_GUEST_TSO6 493 | 1UL << VIRTIO_RING_F_EVENT_IDX 494 | 1UL << VIRTIO_RING_F_INDIRECT_DESC 495 | 1UL << VIRTIO_NET_F_CTRL_VQ 496 | 1UL << VIRTIO_NET_F_MRG_RXBUF 497 | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0); 498 499 /* 500 * The UFO feature for host and guest only can be enabled when the 501 * kernel has TAP UFO support. 502 */ 503 if (ndev->tap_ufo) 504 features |= (1UL << VIRTIO_NET_F_HOST_UFO 505 | 1UL << VIRTIO_NET_F_GUEST_UFO); 506 507 return features; 508 } 509 510 static int virtio_net__vhost_set_features(struct net_dev *ndev) 511 { 512 u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX; 513 u64 vhost_features; 514 515 if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0) 516 die_perror("VHOST_GET_FEATURES failed"); 517 518 /* make sure both side support mergable rx buffers */ 519 if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF && 520 has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 521 features |= 1UL << VIRTIO_NET_F_MRG_RXBUF; 522 523 return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features); 524 } 525 526 static void set_guest_features(struct kvm *kvm, void *dev, u32 features) 527 { 528 struct net_dev *ndev = dev; 529 struct virtio_net_config *conf = &ndev->config; 530 531 ndev->features = features; 532 533 conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status); 534 conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev, 535 conf->max_virtqueue_pairs); 536 } 537 538 static void virtio_net_start(struct net_dev *ndev) 539 { 540 if (ndev->mode == NET_MODE_TAP) { 541 if (!virtio_net__tap_init(ndev)) 542 die_perror("TAP device initialized failed because"); 543 544 if (ndev->vhost_fd && 545 virtio_net__vhost_set_features(ndev) != 0) 546 die_perror("VHOST_SET_FEATURES failed"); 547 } else { 548 ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ? 549 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 550 sizeof(struct virtio_net_hdr); 551 uip_init(&ndev->info); 552 } 553 } 554 555 static void virtio_net_stop(struct net_dev *ndev) 556 { 557 /* Undo whatever start() did */ 558 if (ndev->mode == NET_MODE_TAP) 559 virtio_net__tap_exit(ndev); 560 else 561 uip_exit(&ndev->info); 562 } 563 564 static void notify_status(struct kvm *kvm, void *dev, u32 status) 565 { 566 if (status & VIRTIO__STATUS_START) 567 virtio_net_start(dev); 568 else if (status & VIRTIO__STATUS_STOP) 569 virtio_net_stop(dev); 570 } 571 572 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq) 573 { 574 return vq == (u32)(ndev->queue_pairs * 2); 575 } 576 577 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align, 578 u32 pfn) 579 { 580 struct vhost_vring_state state = { .index = vq }; 581 struct net_dev_queue *net_queue; 582 struct vhost_vring_addr addr; 583 struct net_dev *ndev = dev; 584 struct virt_queue *queue; 585 void *p; 586 int r; 587 588 compat__remove_message(compat_id); 589 590 net_queue = &ndev->queues[vq]; 591 net_queue->id = vq; 592 net_queue->ndev = ndev; 593 queue = &net_queue->vq; 594 queue->pfn = pfn; 595 p = virtio_get_vq(kvm, queue->pfn, page_size); 596 597 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align); 598 virtio_init_device_vq(&ndev->vdev, queue); 599 600 mutex_init(&net_queue->lock); 601 pthread_cond_init(&net_queue->cond, NULL); 602 if (is_ctrl_vq(ndev, vq)) { 603 pthread_create(&net_queue->thread, NULL, virtio_net_ctrl_thread, 604 net_queue); 605 606 return 0; 607 } else if (ndev->vhost_fd == 0 ) { 608 if (vq & 1) 609 pthread_create(&net_queue->thread, NULL, 610 virtio_net_tx_thread, net_queue); 611 else 612 pthread_create(&net_queue->thread, NULL, 613 virtio_net_rx_thread, net_queue); 614 615 return 0; 616 } 617 618 if (queue->endian != VIRTIO_ENDIAN_HOST) 619 die_perror("VHOST requires the same endianness in guest and host"); 620 621 state.num = queue->vring.num; 622 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state); 623 if (r < 0) 624 die_perror("VHOST_SET_VRING_NUM failed"); 625 state.num = 0; 626 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state); 627 if (r < 0) 628 die_perror("VHOST_SET_VRING_BASE failed"); 629 630 addr = (struct vhost_vring_addr) { 631 .index = vq, 632 .desc_user_addr = (u64)(unsigned long)queue->vring.desc, 633 .avail_user_addr = (u64)(unsigned long)queue->vring.avail, 634 .used_user_addr = (u64)(unsigned long)queue->vring.used, 635 }; 636 637 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); 638 if (r < 0) 639 die_perror("VHOST_SET_VRING_ADDR failed"); 640 641 return 0; 642 } 643 644 static void exit_vq(struct kvm *kvm, void *dev, u32 vq) 645 { 646 struct net_dev *ndev = dev; 647 struct net_dev_queue *queue = &ndev->queues[vq]; 648 649 if (!is_ctrl_vq(ndev, vq) && queue->gsi) { 650 irq__del_irqfd(kvm, queue->gsi, queue->irqfd); 651 close(queue->irqfd); 652 queue->gsi = queue->irqfd = 0; 653 } 654 655 /* 656 * TODO: vhost reset owner. It's the only way to cleanly stop vhost, but 657 * we can't restart it at the moment. 658 */ 659 if (ndev->vhost_fd && !is_ctrl_vq(ndev, vq)) { 660 pr_warning("Cannot reset VHOST queue"); 661 ioctl(ndev->vhost_fd, VHOST_RESET_OWNER); 662 return; 663 } 664 665 /* 666 * Threads are waiting on cancellation points (readv or 667 * pthread_cond_wait) and should stop gracefully. 668 */ 669 pthread_cancel(queue->thread); 670 pthread_join(queue->thread, NULL); 671 } 672 673 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) 674 { 675 struct net_dev *ndev = dev; 676 struct net_dev_queue *queue = &ndev->queues[vq]; 677 struct vhost_vring_file file; 678 int r; 679 680 if (ndev->vhost_fd == 0) 681 return; 682 683 file = (struct vhost_vring_file) { 684 .index = vq, 685 .fd = eventfd(0, 0), 686 }; 687 688 r = irq__add_irqfd(kvm, gsi, file.fd, -1); 689 if (r < 0) 690 die_perror("KVM_IRQFD failed"); 691 692 queue->irqfd = file.fd; 693 queue->gsi = gsi; 694 695 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file); 696 if (r < 0) 697 die_perror("VHOST_SET_VRING_CALL failed"); 698 file.fd = ndev->tap_fd; 699 r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file); 700 if (r != 0) 701 die("VHOST_NET_SET_BACKEND failed %d", errno); 702 703 } 704 705 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) 706 { 707 struct net_dev *ndev = dev; 708 struct vhost_vring_file file = { 709 .index = vq, 710 .fd = efd, 711 }; 712 int r; 713 714 if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq)) 715 return; 716 717 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file); 718 if (r < 0) 719 die_perror("VHOST_SET_VRING_KICK failed"); 720 } 721 722 static int notify_vq(struct kvm *kvm, void *dev, u32 vq) 723 { 724 struct net_dev *ndev = dev; 725 726 virtio_net_handle_callback(kvm, ndev, vq); 727 728 return 0; 729 } 730 731 static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq) 732 { 733 struct net_dev *ndev = dev; 734 735 return &ndev->queues[vq].vq; 736 } 737 738 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) 739 { 740 /* FIXME: dynamic */ 741 return VIRTIO_NET_QUEUE_SIZE; 742 } 743 744 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size) 745 { 746 /* FIXME: dynamic */ 747 return size; 748 } 749 750 static int get_vq_count(struct kvm *kvm, void *dev) 751 { 752 struct net_dev *ndev = dev; 753 754 return ndev->queue_pairs * 2 + 1; 755 } 756 757 static struct virtio_ops net_dev_virtio_ops = { 758 .get_config = get_config, 759 .get_host_features = get_host_features, 760 .set_guest_features = set_guest_features, 761 .get_vq_count = get_vq_count, 762 .init_vq = init_vq, 763 .exit_vq = exit_vq, 764 .get_vq = get_vq, 765 .get_size_vq = get_size_vq, 766 .set_size_vq = set_size_vq, 767 .notify_vq = notify_vq, 768 .notify_vq_gsi = notify_vq_gsi, 769 .notify_vq_eventfd = notify_vq_eventfd, 770 .notify_status = notify_status, 771 }; 772 773 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev) 774 { 775 struct kvm_mem_bank *bank; 776 struct vhost_memory *mem; 777 int r, i; 778 779 ndev->vhost_fd = open("/dev/vhost-net", O_RDWR); 780 if (ndev->vhost_fd < 0) 781 die_perror("Failed openning vhost-net device"); 782 783 mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region)); 784 if (mem == NULL) 785 die("Failed allocating memory for vhost memory map"); 786 787 i = 0; 788 list_for_each_entry(bank, &kvm->mem_banks, list) { 789 mem->regions[i] = (struct vhost_memory_region) { 790 .guest_phys_addr = bank->guest_phys_addr, 791 .memory_size = bank->size, 792 .userspace_addr = (unsigned long)bank->host_addr, 793 }; 794 i++; 795 } 796 mem->nregions = i; 797 798 r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER); 799 if (r != 0) 800 die_perror("VHOST_SET_OWNER failed"); 801 802 r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem); 803 if (r != 0) 804 die_perror("VHOST_SET_MEM_TABLE failed"); 805 806 ndev->vdev.use_vhost = true; 807 808 free(mem); 809 } 810 811 static inline void str_to_mac(const char *str, char *mac) 812 { 813 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 814 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 815 } 816 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p, 817 const char *param, const char *val) 818 { 819 if (strcmp(param, "guest_mac") == 0) { 820 str_to_mac(val, p->guest_mac); 821 } else if (strcmp(param, "mode") == 0) { 822 if (!strncmp(val, "user", 4)) { 823 int i; 824 825 for (i = 0; i < kvm->cfg.num_net_devices; i++) 826 if (kvm->cfg.net_params[i].mode == NET_MODE_USER) 827 die("Only one usermode network device allowed at a time"); 828 p->mode = NET_MODE_USER; 829 } else if (!strncmp(val, "tap", 3)) { 830 p->mode = NET_MODE_TAP; 831 } else if (!strncmp(val, "none", 4)) { 832 kvm->cfg.no_net = 1; 833 return -1; 834 } else 835 die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network); 836 } else if (strcmp(param, "script") == 0) { 837 p->script = strdup(val); 838 } else if (strcmp(param, "downscript") == 0) { 839 p->downscript = strdup(val); 840 } else if (strcmp(param, "guest_ip") == 0) { 841 p->guest_ip = strdup(val); 842 } else if (strcmp(param, "host_ip") == 0) { 843 p->host_ip = strdup(val); 844 } else if (strcmp(param, "trans") == 0) { 845 p->trans = strdup(val); 846 } else if (strcmp(param, "tapif") == 0) { 847 p->tapif = strdup(val); 848 } else if (strcmp(param, "vhost") == 0) { 849 p->vhost = atoi(val); 850 } else if (strcmp(param, "fd") == 0) { 851 p->fd = atoi(val); 852 } else if (strcmp(param, "mq") == 0) { 853 p->mq = atoi(val); 854 } else 855 die("Unknown network parameter %s", param); 856 857 return 0; 858 } 859 860 int netdev_parser(const struct option *opt, const char *arg, int unset) 861 { 862 struct virtio_net_params p; 863 char *buf = NULL, *cmd = NULL, *cur = NULL; 864 bool on_cmd = true; 865 struct kvm *kvm = opt->ptr; 866 867 if (arg) { 868 buf = strdup(arg); 869 if (buf == NULL) 870 die("Failed allocating new net buffer"); 871 cur = strtok(buf, ",="); 872 } 873 874 p = (struct virtio_net_params) { 875 .guest_ip = DEFAULT_GUEST_ADDR, 876 .host_ip = DEFAULT_HOST_ADDR, 877 .script = DEFAULT_SCRIPT, 878 .downscript = DEFAULT_SCRIPT, 879 .mode = NET_MODE_TAP, 880 }; 881 882 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 883 p.guest_mac[5] += kvm->cfg.num_net_devices; 884 885 while (cur) { 886 if (on_cmd) { 887 cmd = cur; 888 } else { 889 if (set_net_param(kvm, &p, cmd, cur) < 0) 890 goto done; 891 } 892 on_cmd = !on_cmd; 893 894 cur = strtok(NULL, ",="); 895 }; 896 897 kvm->cfg.num_net_devices++; 898 899 kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params)); 900 if (kvm->cfg.net_params == NULL) 901 die("Failed adding new network device"); 902 903 kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p; 904 905 done: 906 free(buf); 907 return 0; 908 } 909 910 static int virtio_net__init_one(struct virtio_net_params *params) 911 { 912 int i, err; 913 struct net_dev *ndev; 914 struct virtio_ops *ops; 915 enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm); 916 917 ndev = calloc(1, sizeof(struct net_dev)); 918 if (ndev == NULL) 919 return -ENOMEM; 920 921 ops = malloc(sizeof(*ops)); 922 if (ops == NULL) { 923 err = -ENOMEM; 924 goto err_free_ndev; 925 } 926 927 list_add_tail(&ndev->list, &ndevs); 928 929 ndev->kvm = params->kvm; 930 ndev->params = params; 931 932 mutex_init(&ndev->mutex); 933 ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq)); 934 ndev->config.status = VIRTIO_NET_S_LINK_UP; 935 if (ndev->queue_pairs > 1) 936 ndev->config.max_virtqueue_pairs = ndev->queue_pairs; 937 938 for (i = 0 ; i < 6 ; i++) { 939 ndev->config.mac[i] = params->guest_mac[i]; 940 ndev->info.guest_mac.addr[i] = params->guest_mac[i]; 941 ndev->info.host_mac.addr[i] = params->host_mac[i]; 942 } 943 944 ndev->mode = params->mode; 945 if (ndev->mode == NET_MODE_TAP) { 946 ndev->ops = &tap_ops; 947 if (!virtio_net__tap_create(ndev)) 948 die_perror("You have requested a TAP device, but creation of one has failed because"); 949 } else { 950 ndev->info.host_ip = ntohl(inet_addr(params->host_ip)); 951 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip)); 952 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 953 ndev->info.buf_nr = 20, 954 ndev->ops = &uip_ops; 955 uip_static_init(&ndev->info); 956 } 957 958 *ops = net_dev_virtio_ops; 959 960 if (params->trans) { 961 if (strcmp(params->trans, "mmio") == 0) 962 trans = VIRTIO_MMIO; 963 else if (strcmp(params->trans, "pci") == 0) 964 trans = VIRTIO_PCI; 965 else 966 pr_warning("virtio-net: Unknown transport method : %s, " 967 "falling back to %s.", params->trans, 968 virtio_trans_name(trans)); 969 } 970 971 virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans, 972 PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET); 973 974 if (params->vhost) 975 virtio_net__vhost_init(params->kvm, ndev); 976 977 if (compat_id == -1) 978 compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET"); 979 980 return 0; 981 982 err_free_ndev: 983 free(ndev); 984 return err; 985 } 986 987 int virtio_net__init(struct kvm *kvm) 988 { 989 int i; 990 991 for (i = 0; i < kvm->cfg.num_net_devices; i++) { 992 kvm->cfg.net_params[i].kvm = kvm; 993 virtio_net__init_one(&kvm->cfg.net_params[i]); 994 } 995 996 if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) { 997 static struct virtio_net_params net_params; 998 999 net_params = (struct virtio_net_params) { 1000 .guest_ip = kvm->cfg.guest_ip, 1001 .host_ip = kvm->cfg.host_ip, 1002 .kvm = kvm, 1003 .script = kvm->cfg.script, 1004 .mode = NET_MODE_USER, 1005 }; 1006 str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac); 1007 str_to_mac(kvm->cfg.host_mac, net_params.host_mac); 1008 1009 virtio_net__init_one(&net_params); 1010 } 1011 1012 return 0; 1013 } 1014 virtio_dev_init(virtio_net__init); 1015 1016 int virtio_net__exit(struct kvm *kvm) 1017 { 1018 struct virtio_net_params *params; 1019 struct net_dev *ndev; 1020 struct list_head *ptr; 1021 1022 list_for_each(ptr, &ndevs) { 1023 ndev = list_entry(ptr, struct net_dev, list); 1024 params = ndev->params; 1025 /* Cleanup any tap device which attached to bridge */ 1026 if (ndev->mode == NET_MODE_TAP && 1027 strcmp(params->downscript, "none")) 1028 virtio_net_exec_script(params->downscript, ndev->tap_name); 1029 } 1030 return 0; 1031 } 1032 virtio_dev_exit(virtio_net__exit); 1033