1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/mutex.h" 5 #include "kvm/util.h" 6 #include "kvm/kvm.h" 7 #include "kvm/irq.h" 8 #include "kvm/uip.h" 9 #include "kvm/guest_compat.h" 10 #include "kvm/iovec.h" 11 12 #include <linux/vhost.h> 13 #include <linux/virtio_net.h> 14 #include <linux/if_tun.h> 15 #include <linux/types.h> 16 17 #include <arpa/inet.h> 18 #include <net/if.h> 19 20 #include <unistd.h> 21 #include <fcntl.h> 22 23 #include <sys/socket.h> 24 #include <sys/ioctl.h> 25 #include <sys/types.h> 26 #include <sys/wait.h> 27 #include <sys/eventfd.h> 28 29 #define VIRTIO_NET_QUEUE_SIZE 256 30 #define VIRTIO_NET_NUM_QUEUES 8 31 32 struct net_dev; 33 34 struct net_dev_operations { 35 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 36 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 37 }; 38 39 struct net_dev_queue { 40 int id; 41 struct net_dev *ndev; 42 struct virt_queue vq; 43 pthread_t thread; 44 struct mutex lock; 45 pthread_cond_t cond; 46 }; 47 48 struct net_dev { 49 struct mutex mutex; 50 struct virtio_device vdev; 51 struct list_head list; 52 53 struct net_dev_queue queues[VIRTIO_NET_NUM_QUEUES * 2 + 1]; 54 struct virtio_net_config config; 55 u32 features, queue_pairs; 56 57 int vhost_fd; 58 int tap_fd; 59 char tap_name[IFNAMSIZ]; 60 bool tap_ufo; 61 62 int mode; 63 64 struct uip_info info; 65 struct net_dev_operations *ops; 66 struct kvm *kvm; 67 68 struct virtio_net_params *params; 69 }; 70 71 static LIST_HEAD(ndevs); 72 static int compat_id = -1; 73 74 #define MAX_PACKET_SIZE 65550 75 76 static bool has_virtio_feature(struct net_dev *ndev, u32 feature) 77 { 78 return ndev->features & (1 << feature); 79 } 80 81 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev) 82 { 83 hdr->hdr_len = virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len); 84 hdr->gso_size = virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size); 85 hdr->csum_start = virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start); 86 hdr->csum_offset = virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset); 87 } 88 89 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev) 90 { 91 hdr->hdr_len = virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr_len); 92 hdr->gso_size = virtio_host_to_guest_u16(&ndev->vdev, hdr->gso_size); 93 hdr->csum_start = virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_start); 94 hdr->csum_offset = virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_offset); 95 } 96 97 static void *virtio_net_rx_thread(void *p) 98 { 99 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 100 struct net_dev_queue *queue = p; 101 struct virt_queue *vq = &queue->vq; 102 struct net_dev *ndev = queue->ndev; 103 struct kvm *kvm; 104 u16 out, in; 105 u16 head; 106 int len, copied; 107 108 kvm__set_thread_name("virtio-net-rx"); 109 110 kvm = ndev->kvm; 111 while (1) { 112 mutex_lock(&queue->lock); 113 if (!virt_queue__available(vq)) 114 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 115 mutex_unlock(&queue->lock); 116 117 while (virt_queue__available(vq)) { 118 unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)]; 119 struct iovec dummy_iov = { 120 .iov_base = buffer, 121 .iov_len = sizeof(buffer), 122 }; 123 struct virtio_net_hdr_mrg_rxbuf *hdr; 124 u16 num_buffers; 125 126 len = ndev->ops->rx(&dummy_iov, 1, ndev); 127 if (len < 0) { 128 pr_warning("%s: rx on vq %u failed (%d), exiting thread\n", 129 __func__, queue->id, len); 130 goto out_err; 131 } 132 133 copied = num_buffers = 0; 134 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 135 hdr = iov[0].iov_base; 136 while (copied < len) { 137 size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in)); 138 139 memcpy_toiovec(iov, buffer + copied, iovsize); 140 copied += iovsize; 141 virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++); 142 if (copied == len) 143 break; 144 while (!virt_queue__available(vq)) 145 sleep(0); 146 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 147 } 148 149 virtio_net_fix_rx_hdr(&hdr->hdr, ndev); 150 if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 151 hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers); 152 153 virt_queue__used_idx_advance(vq, num_buffers); 154 155 /* We should interrupt guest right now, otherwise latency is huge. */ 156 if (virtio_queue__should_signal(vq)) 157 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 158 } 159 } 160 161 out_err: 162 pthread_exit(NULL); 163 return NULL; 164 165 } 166 167 static void *virtio_net_tx_thread(void *p) 168 { 169 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 170 struct net_dev_queue *queue = p; 171 struct virt_queue *vq = &queue->vq; 172 struct net_dev *ndev = queue->ndev; 173 struct kvm *kvm; 174 u16 out, in; 175 u16 head; 176 int len; 177 178 kvm__set_thread_name("virtio-net-tx"); 179 180 kvm = ndev->kvm; 181 182 while (1) { 183 mutex_lock(&queue->lock); 184 if (!virt_queue__available(vq)) 185 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 186 mutex_unlock(&queue->lock); 187 188 while (virt_queue__available(vq)) { 189 struct virtio_net_hdr *hdr; 190 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 191 hdr = iov[0].iov_base; 192 virtio_net_fix_tx_hdr(hdr, ndev); 193 len = ndev->ops->tx(iov, out, ndev); 194 if (len < 0) { 195 pr_warning("%s: tx on vq %u failed (%d)\n", 196 __func__, queue->id, errno); 197 goto out_err; 198 } 199 200 virt_queue__set_used_elem(vq, head, len); 201 } 202 203 if (virtio_queue__should_signal(vq)) 204 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 205 } 206 207 out_err: 208 pthread_exit(NULL); 209 return NULL; 210 } 211 212 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl) 213 { 214 /* Not much to do here */ 215 return VIRTIO_NET_OK; 216 } 217 218 static void *virtio_net_ctrl_thread(void *p) 219 { 220 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 221 struct net_dev_queue *queue = p; 222 struct virt_queue *vq = &queue->vq; 223 struct net_dev *ndev = queue->ndev; 224 u16 out, in, head; 225 struct kvm *kvm = ndev->kvm; 226 struct virtio_net_ctrl_hdr *ctrl; 227 virtio_net_ctrl_ack *ack; 228 229 kvm__set_thread_name("virtio-net-ctrl"); 230 231 while (1) { 232 mutex_lock(&queue->lock); 233 if (!virt_queue__available(vq)) 234 pthread_cond_wait(&queue->cond, &queue->lock.mutex); 235 mutex_unlock(&queue->lock); 236 237 while (virt_queue__available(vq)) { 238 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 239 ctrl = iov[0].iov_base; 240 ack = iov[out].iov_base; 241 242 switch (ctrl->class) { 243 case VIRTIO_NET_CTRL_MQ: 244 *ack = virtio_net_handle_mq(kvm, ndev, ctrl); 245 break; 246 default: 247 *ack = VIRTIO_NET_ERR; 248 break; 249 } 250 virt_queue__set_used_elem(vq, head, iov[out].iov_len); 251 } 252 253 if (virtio_queue__should_signal(vq)) 254 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id); 255 } 256 257 pthread_exit(NULL); 258 259 return NULL; 260 } 261 262 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue) 263 { 264 struct net_dev_queue *net_queue = &ndev->queues[queue]; 265 266 if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) { 267 pr_warning("Unknown queue index %u", queue); 268 return; 269 } 270 271 mutex_lock(&net_queue->lock); 272 pthread_cond_signal(&net_queue->cond); 273 mutex_unlock(&net_queue->lock); 274 } 275 276 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr, 277 const char *tapname) 278 { 279 int ret; 280 281 memset(ifr, 0, sizeof(*ifr)); 282 ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 283 if (tapname) 284 strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name)); 285 286 ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr); 287 288 if (ret >= 0) 289 strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name)); 290 return ret; 291 } 292 293 static int virtio_net_exec_script(const char* script, const char *tap_name) 294 { 295 pid_t pid; 296 int status; 297 298 pid = fork(); 299 if (pid == 0) { 300 execl(script, script, tap_name, NULL); 301 _exit(1); 302 } else { 303 waitpid(pid, &status, 0); 304 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 305 pr_warning("Fail to setup tap by %s", script); 306 return -1; 307 } 308 } 309 return 0; 310 } 311 312 static bool virtio_net__tap_init(struct net_dev *ndev) 313 { 314 int sock = socket(AF_INET, SOCK_STREAM, 0); 315 int hdr_len; 316 struct sockaddr_in sin = {0}; 317 struct ifreq ifr; 318 const struct virtio_net_params *params = ndev->params; 319 bool skipconf = !!params->tapif; 320 321 hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ? 322 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 323 sizeof(struct virtio_net_hdr); 324 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) 325 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 326 327 if (strcmp(params->script, "none")) { 328 if (virtio_net_exec_script(params->script, ndev->tap_name) < 0) 329 goto fail; 330 } else if (!skipconf) { 331 memset(&ifr, 0, sizeof(ifr)); 332 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 333 sin.sin_addr.s_addr = inet_addr(params->host_ip); 334 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 335 ifr.ifr_addr.sa_family = AF_INET; 336 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 337 pr_warning("Could not set ip address on tap device"); 338 goto fail; 339 } 340 } 341 342 if (!skipconf) { 343 memset(&ifr, 0, sizeof(ifr)); 344 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 345 ioctl(sock, SIOCGIFFLAGS, &ifr); 346 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 347 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 348 pr_warning("Could not bring tap device up"); 349 } 350 351 close(sock); 352 353 return 1; 354 355 fail: 356 if (sock >= 0) 357 close(sock); 358 if (ndev->tap_fd >= 0) 359 close(ndev->tap_fd); 360 361 return 0; 362 } 363 364 static bool virtio_net__tap_create(struct net_dev *ndev) 365 { 366 int offload; 367 struct ifreq ifr; 368 const struct virtio_net_params *params = ndev->params; 369 bool macvtap = (!!params->tapif) && (params->tapif[0] == '/'); 370 371 /* Did the user already gave us the FD? */ 372 if (params->fd) 373 ndev->tap_fd = params->fd; 374 else { 375 const char *tap_file = "/dev/net/tun"; 376 377 /* Did the user ask us to use macvtap? */ 378 if (macvtap) 379 tap_file = params->tapif; 380 381 ndev->tap_fd = open(tap_file, O_RDWR); 382 if (ndev->tap_fd < 0) { 383 pr_warning("Unable to open %s", tap_file); 384 return 0; 385 } 386 } 387 388 if (!macvtap && 389 virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) { 390 pr_warning("Config tap device error. Are you root?"); 391 goto fail; 392 } 393 394 /* 395 * The UFO support had been removed from kernel in commit: 396 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984 397 * https://www.spinics.net/lists/netdev/msg443562.html 398 * In oder to support the older kernels without this commit, 399 * we set the TUN_F_UFO to offload by default to test the status of 400 * UFO kernel support. 401 */ 402 ndev->tap_ufo = true; 403 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 404 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 405 /* 406 * Is this failure caused by kernel remove the UFO support? 407 * Try TUNSETOFFLOAD without TUN_F_UFO. 408 */ 409 offload &= ~TUN_F_UFO; 410 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 411 pr_warning("Config tap device TUNSETOFFLOAD error"); 412 goto fail; 413 } 414 ndev->tap_ufo = false; 415 } 416 417 return 1; 418 419 fail: 420 if ((ndev->tap_fd >= 0) || (!params->fd) ) 421 close(ndev->tap_fd); 422 423 return 0; 424 } 425 426 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 427 { 428 return writev(ndev->tap_fd, iov, out); 429 } 430 431 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 432 { 433 return readv(ndev->tap_fd, iov, in); 434 } 435 436 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 437 { 438 return uip_tx(iov, out, &ndev->info); 439 } 440 441 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 442 { 443 return uip_rx(iov, in, &ndev->info); 444 } 445 446 static struct net_dev_operations tap_ops = { 447 .rx = tap_ops_rx, 448 .tx = tap_ops_tx, 449 }; 450 451 static struct net_dev_operations uip_ops = { 452 .rx = uip_ops_rx, 453 .tx = uip_ops_tx, 454 }; 455 456 static u8 *get_config(struct kvm *kvm, void *dev) 457 { 458 struct net_dev *ndev = dev; 459 460 return ((u8 *)(&ndev->config)); 461 } 462 463 static u32 get_host_features(struct kvm *kvm, void *dev) 464 { 465 u32 features; 466 struct net_dev *ndev = dev; 467 468 features = 1UL << VIRTIO_NET_F_MAC 469 | 1UL << VIRTIO_NET_F_CSUM 470 | 1UL << VIRTIO_NET_F_HOST_TSO4 471 | 1UL << VIRTIO_NET_F_HOST_TSO6 472 | 1UL << VIRTIO_NET_F_GUEST_TSO4 473 | 1UL << VIRTIO_NET_F_GUEST_TSO6 474 | 1UL << VIRTIO_RING_F_EVENT_IDX 475 | 1UL << VIRTIO_RING_F_INDIRECT_DESC 476 | 1UL << VIRTIO_NET_F_CTRL_VQ 477 | 1UL << VIRTIO_NET_F_MRG_RXBUF 478 | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0); 479 480 /* 481 * The UFO feature for host and guest only can be enabled when the 482 * kernel has TAP UFO support. 483 */ 484 if (ndev->tap_ufo) 485 features |= (1UL << VIRTIO_NET_F_HOST_UFO 486 | 1UL << VIRTIO_NET_F_GUEST_UFO); 487 488 return features; 489 } 490 491 static int virtio_net__vhost_set_features(struct net_dev *ndev) 492 { 493 u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX; 494 u64 vhost_features; 495 496 if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0) 497 die_perror("VHOST_GET_FEATURES failed"); 498 499 /* make sure both side support mergable rx buffers */ 500 if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF && 501 has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 502 features |= 1UL << VIRTIO_NET_F_MRG_RXBUF; 503 504 return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features); 505 } 506 507 static void set_guest_features(struct kvm *kvm, void *dev, u32 features) 508 { 509 struct net_dev *ndev = dev; 510 struct virtio_net_config *conf = &ndev->config; 511 512 ndev->features = features; 513 514 conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status); 515 conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev, 516 conf->max_virtqueue_pairs); 517 } 518 519 static void virtio_net_start(struct net_dev *ndev) 520 { 521 if (ndev->mode == NET_MODE_TAP) { 522 if (!virtio_net__tap_init(ndev)) 523 die_perror("TAP device initialized failed because"); 524 525 if (ndev->vhost_fd && 526 virtio_net__vhost_set_features(ndev) != 0) 527 die_perror("VHOST_SET_FEATURES failed"); 528 } else { 529 ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ? 530 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 531 sizeof(struct virtio_net_hdr); 532 uip_init(&ndev->info); 533 } 534 } 535 536 static void notify_status(struct kvm *kvm, void *dev, u32 status) 537 { 538 if (status & VIRTIO__STATUS_START) 539 virtio_net_start(dev); 540 } 541 542 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq) 543 { 544 return vq == (u32)(ndev->queue_pairs * 2); 545 } 546 547 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align, 548 u32 pfn) 549 { 550 struct vhost_vring_state state = { .index = vq }; 551 struct net_dev_queue *net_queue; 552 struct vhost_vring_addr addr; 553 struct net_dev *ndev = dev; 554 struct virt_queue *queue; 555 void *p; 556 int r; 557 558 compat__remove_message(compat_id); 559 560 net_queue = &ndev->queues[vq]; 561 net_queue->id = vq; 562 net_queue->ndev = ndev; 563 queue = &net_queue->vq; 564 queue->pfn = pfn; 565 p = virtio_get_vq(kvm, queue->pfn, page_size); 566 567 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align); 568 virtio_init_device_vq(&ndev->vdev, queue); 569 570 mutex_init(&net_queue->lock); 571 pthread_cond_init(&net_queue->cond, NULL); 572 if (is_ctrl_vq(ndev, vq)) { 573 pthread_create(&net_queue->thread, NULL, virtio_net_ctrl_thread, 574 net_queue); 575 576 return 0; 577 } else if (ndev->vhost_fd == 0 ) { 578 if (vq & 1) 579 pthread_create(&net_queue->thread, NULL, 580 virtio_net_tx_thread, net_queue); 581 else 582 pthread_create(&net_queue->thread, NULL, 583 virtio_net_rx_thread, net_queue); 584 585 return 0; 586 } 587 588 if (queue->endian != VIRTIO_ENDIAN_HOST) 589 die_perror("VHOST requires the same endianness in guest and host"); 590 591 state.num = queue->vring.num; 592 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state); 593 if (r < 0) 594 die_perror("VHOST_SET_VRING_NUM failed"); 595 state.num = 0; 596 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state); 597 if (r < 0) 598 die_perror("VHOST_SET_VRING_BASE failed"); 599 600 addr = (struct vhost_vring_addr) { 601 .index = vq, 602 .desc_user_addr = (u64)(unsigned long)queue->vring.desc, 603 .avail_user_addr = (u64)(unsigned long)queue->vring.avail, 604 .used_user_addr = (u64)(unsigned long)queue->vring.used, 605 }; 606 607 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); 608 if (r < 0) 609 die_perror("VHOST_SET_VRING_ADDR failed"); 610 611 return 0; 612 } 613 614 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) 615 { 616 struct net_dev *ndev = dev; 617 struct net_dev_queue *queue = &ndev->queues[vq]; 618 struct vhost_vring_file file; 619 int r; 620 621 if (ndev->vhost_fd == 0) 622 return; 623 624 file = (struct vhost_vring_file) { 625 .index = vq, 626 .fd = eventfd(0, 0), 627 }; 628 629 r = irq__add_irqfd(kvm, gsi, file.fd, -1); 630 if (r < 0) 631 die_perror("KVM_IRQFD failed"); 632 633 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file); 634 if (r < 0) 635 die_perror("VHOST_SET_VRING_CALL failed"); 636 file.fd = ndev->tap_fd; 637 r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file); 638 if (r != 0) 639 die("VHOST_NET_SET_BACKEND failed %d", errno); 640 641 } 642 643 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) 644 { 645 struct net_dev *ndev = dev; 646 struct vhost_vring_file file = { 647 .index = vq, 648 .fd = efd, 649 }; 650 int r; 651 652 if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq)) 653 return; 654 655 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file); 656 if (r < 0) 657 die_perror("VHOST_SET_VRING_KICK failed"); 658 } 659 660 static int notify_vq(struct kvm *kvm, void *dev, u32 vq) 661 { 662 struct net_dev *ndev = dev; 663 664 virtio_net_handle_callback(kvm, ndev, vq); 665 666 return 0; 667 } 668 669 static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq) 670 { 671 struct net_dev *ndev = dev; 672 673 return &ndev->queues[vq].vq; 674 } 675 676 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) 677 { 678 /* FIXME: dynamic */ 679 return VIRTIO_NET_QUEUE_SIZE; 680 } 681 682 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size) 683 { 684 /* FIXME: dynamic */ 685 return size; 686 } 687 688 static int get_vq_count(struct kvm *kvm, void *dev) 689 { 690 struct net_dev *ndev = dev; 691 692 return ndev->queue_pairs * 2 + 1; 693 } 694 695 static struct virtio_ops net_dev_virtio_ops = { 696 .get_config = get_config, 697 .get_host_features = get_host_features, 698 .set_guest_features = set_guest_features, 699 .get_vq_count = get_vq_count, 700 .init_vq = init_vq, 701 .get_vq = get_vq, 702 .get_size_vq = get_size_vq, 703 .set_size_vq = set_size_vq, 704 .notify_vq = notify_vq, 705 .notify_vq_gsi = notify_vq_gsi, 706 .notify_vq_eventfd = notify_vq_eventfd, 707 .notify_status = notify_status, 708 }; 709 710 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev) 711 { 712 struct kvm_mem_bank *bank; 713 struct vhost_memory *mem; 714 int r, i; 715 716 ndev->vhost_fd = open("/dev/vhost-net", O_RDWR); 717 if (ndev->vhost_fd < 0) 718 die_perror("Failed openning vhost-net device"); 719 720 mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region)); 721 if (mem == NULL) 722 die("Failed allocating memory for vhost memory map"); 723 724 i = 0; 725 list_for_each_entry(bank, &kvm->mem_banks, list) { 726 mem->regions[i] = (struct vhost_memory_region) { 727 .guest_phys_addr = bank->guest_phys_addr, 728 .memory_size = bank->size, 729 .userspace_addr = (unsigned long)bank->host_addr, 730 }; 731 i++; 732 } 733 mem->nregions = i; 734 735 r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER); 736 if (r != 0) 737 die_perror("VHOST_SET_OWNER failed"); 738 739 r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem); 740 if (r != 0) 741 die_perror("VHOST_SET_MEM_TABLE failed"); 742 743 ndev->vdev.use_vhost = true; 744 745 free(mem); 746 } 747 748 static inline void str_to_mac(const char *str, char *mac) 749 { 750 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 751 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 752 } 753 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p, 754 const char *param, const char *val) 755 { 756 if (strcmp(param, "guest_mac") == 0) { 757 str_to_mac(val, p->guest_mac); 758 } else if (strcmp(param, "mode") == 0) { 759 if (!strncmp(val, "user", 4)) { 760 int i; 761 762 for (i = 0; i < kvm->cfg.num_net_devices; i++) 763 if (kvm->cfg.net_params[i].mode == NET_MODE_USER) 764 die("Only one usermode network device allowed at a time"); 765 p->mode = NET_MODE_USER; 766 } else if (!strncmp(val, "tap", 3)) { 767 p->mode = NET_MODE_TAP; 768 } else if (!strncmp(val, "none", 4)) { 769 kvm->cfg.no_net = 1; 770 return -1; 771 } else 772 die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network); 773 } else if (strcmp(param, "script") == 0) { 774 p->script = strdup(val); 775 } else if (strcmp(param, "downscript") == 0) { 776 p->downscript = strdup(val); 777 } else if (strcmp(param, "guest_ip") == 0) { 778 p->guest_ip = strdup(val); 779 } else if (strcmp(param, "host_ip") == 0) { 780 p->host_ip = strdup(val); 781 } else if (strcmp(param, "trans") == 0) { 782 p->trans = strdup(val); 783 } else if (strcmp(param, "tapif") == 0) { 784 p->tapif = strdup(val); 785 } else if (strcmp(param, "vhost") == 0) { 786 p->vhost = atoi(val); 787 } else if (strcmp(param, "fd") == 0) { 788 p->fd = atoi(val); 789 } else if (strcmp(param, "mq") == 0) { 790 p->mq = atoi(val); 791 } else 792 die("Unknown network parameter %s", param); 793 794 return 0; 795 } 796 797 int netdev_parser(const struct option *opt, const char *arg, int unset) 798 { 799 struct virtio_net_params p; 800 char *buf = NULL, *cmd = NULL, *cur = NULL; 801 bool on_cmd = true; 802 struct kvm *kvm = opt->ptr; 803 804 if (arg) { 805 buf = strdup(arg); 806 if (buf == NULL) 807 die("Failed allocating new net buffer"); 808 cur = strtok(buf, ",="); 809 } 810 811 p = (struct virtio_net_params) { 812 .guest_ip = DEFAULT_GUEST_ADDR, 813 .host_ip = DEFAULT_HOST_ADDR, 814 .script = DEFAULT_SCRIPT, 815 .downscript = DEFAULT_SCRIPT, 816 .mode = NET_MODE_TAP, 817 }; 818 819 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 820 p.guest_mac[5] += kvm->cfg.num_net_devices; 821 822 while (cur) { 823 if (on_cmd) { 824 cmd = cur; 825 } else { 826 if (set_net_param(kvm, &p, cmd, cur) < 0) 827 goto done; 828 } 829 on_cmd = !on_cmd; 830 831 cur = strtok(NULL, ",="); 832 }; 833 834 kvm->cfg.num_net_devices++; 835 836 kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params)); 837 if (kvm->cfg.net_params == NULL) 838 die("Failed adding new network device"); 839 840 kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p; 841 842 done: 843 free(buf); 844 return 0; 845 } 846 847 static int virtio_net__init_one(struct virtio_net_params *params) 848 { 849 int i, err; 850 struct net_dev *ndev; 851 struct virtio_ops *ops; 852 enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm); 853 854 ndev = calloc(1, sizeof(struct net_dev)); 855 if (ndev == NULL) 856 return -ENOMEM; 857 858 ops = malloc(sizeof(*ops)); 859 if (ops == NULL) { 860 err = -ENOMEM; 861 goto err_free_ndev; 862 } 863 864 list_add_tail(&ndev->list, &ndevs); 865 866 ndev->kvm = params->kvm; 867 ndev->params = params; 868 869 mutex_init(&ndev->mutex); 870 ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq)); 871 ndev->config.status = VIRTIO_NET_S_LINK_UP; 872 if (ndev->queue_pairs > 1) 873 ndev->config.max_virtqueue_pairs = ndev->queue_pairs; 874 875 for (i = 0 ; i < 6 ; i++) { 876 ndev->config.mac[i] = params->guest_mac[i]; 877 ndev->info.guest_mac.addr[i] = params->guest_mac[i]; 878 ndev->info.host_mac.addr[i] = params->host_mac[i]; 879 } 880 881 ndev->mode = params->mode; 882 if (ndev->mode == NET_MODE_TAP) { 883 ndev->ops = &tap_ops; 884 if (!virtio_net__tap_create(ndev)) 885 die_perror("You have requested a TAP device, but creation of one has failed because"); 886 } else { 887 ndev->info.host_ip = ntohl(inet_addr(params->host_ip)); 888 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip)); 889 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 890 ndev->info.buf_nr = 20, 891 ndev->ops = &uip_ops; 892 uip_static_init(&ndev->info); 893 } 894 895 *ops = net_dev_virtio_ops; 896 897 if (params->trans) { 898 if (strcmp(params->trans, "mmio") == 0) 899 trans = VIRTIO_MMIO; 900 else if (strcmp(params->trans, "pci") == 0) 901 trans = VIRTIO_PCI; 902 else 903 pr_warning("virtio-net: Unknown transport method : %s, " 904 "falling back to %s.", params->trans, 905 virtio_trans_name(trans)); 906 } 907 908 virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans, 909 PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET); 910 911 if (params->vhost) 912 virtio_net__vhost_init(params->kvm, ndev); 913 914 if (compat_id == -1) 915 compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET"); 916 917 return 0; 918 919 err_free_ndev: 920 free(ndev); 921 return err; 922 } 923 924 int virtio_net__init(struct kvm *kvm) 925 { 926 int i; 927 928 for (i = 0; i < kvm->cfg.num_net_devices; i++) { 929 kvm->cfg.net_params[i].kvm = kvm; 930 virtio_net__init_one(&kvm->cfg.net_params[i]); 931 } 932 933 if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) { 934 static struct virtio_net_params net_params; 935 936 net_params = (struct virtio_net_params) { 937 .guest_ip = kvm->cfg.guest_ip, 938 .host_ip = kvm->cfg.host_ip, 939 .kvm = kvm, 940 .script = kvm->cfg.script, 941 .mode = NET_MODE_USER, 942 }; 943 str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac); 944 str_to_mac(kvm->cfg.host_mac, net_params.host_mac); 945 946 virtio_net__init_one(&net_params); 947 } 948 949 return 0; 950 } 951 virtio_dev_init(virtio_net__init); 952 953 int virtio_net__exit(struct kvm *kvm) 954 { 955 struct virtio_net_params *params; 956 struct net_dev *ndev; 957 struct list_head *ptr; 958 959 list_for_each(ptr, &ndevs) { 960 ndev = list_entry(ptr, struct net_dev, list); 961 params = ndev->params; 962 /* Cleanup any tap device which attached to bridge */ 963 if (ndev->mode == NET_MODE_TAP && 964 strcmp(params->downscript, "none")) 965 virtio_net_exec_script(params->downscript, ndev->tap_name); 966 } 967 return 0; 968 } 969 virtio_dev_exit(virtio_net__exit); 970