1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/mutex.h" 5 #include "kvm/util.h" 6 #include "kvm/kvm.h" 7 #include "kvm/irq.h" 8 #include "kvm/uip.h" 9 #include "kvm/guest_compat.h" 10 #include "kvm/iovec.h" 11 12 #include <linux/vhost.h> 13 #include <linux/virtio_net.h> 14 #include <linux/if_tun.h> 15 #include <linux/types.h> 16 17 #include <arpa/inet.h> 18 #include <net/if.h> 19 20 #include <unistd.h> 21 #include <fcntl.h> 22 23 #include <sys/socket.h> 24 #include <sys/ioctl.h> 25 #include <sys/types.h> 26 #include <sys/wait.h> 27 #include <sys/eventfd.h> 28 29 #define VIRTIO_NET_QUEUE_SIZE 256 30 #define VIRTIO_NET_NUM_QUEUES 8 31 32 struct net_dev; 33 34 struct net_dev_operations { 35 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 36 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 37 }; 38 39 struct net_dev { 40 struct mutex mutex; 41 struct virtio_device vdev; 42 struct list_head list; 43 44 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES * 2 + 1]; 45 struct virtio_net_config config; 46 u32 features, rx_vqs, tx_vqs, queue_pairs; 47 48 pthread_t io_thread[VIRTIO_NET_NUM_QUEUES * 2 + 1]; 49 struct mutex io_lock[VIRTIO_NET_NUM_QUEUES * 2 + 1]; 50 pthread_cond_t io_cond[VIRTIO_NET_NUM_QUEUES * 2 + 1]; 51 52 int vhost_fd; 53 int tap_fd; 54 char tap_name[IFNAMSIZ]; 55 bool tap_ufo; 56 57 int mode; 58 59 struct uip_info info; 60 struct net_dev_operations *ops; 61 struct kvm *kvm; 62 63 struct virtio_net_params *params; 64 }; 65 66 static LIST_HEAD(ndevs); 67 static int compat_id = -1; 68 69 #define MAX_PACKET_SIZE 65550 70 71 static bool has_virtio_feature(struct net_dev *ndev, u32 feature) 72 { 73 return ndev->features & (1 << feature); 74 } 75 76 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev) 77 { 78 hdr->hdr_len = virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len); 79 hdr->gso_size = virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size); 80 hdr->csum_start = virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start); 81 hdr->csum_offset = virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset); 82 } 83 84 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev) 85 { 86 hdr->hdr_len = virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr_len); 87 hdr->gso_size = virtio_host_to_guest_u16(&ndev->vdev, hdr->gso_size); 88 hdr->csum_start = virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_start); 89 hdr->csum_offset = virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_offset); 90 } 91 92 static void *virtio_net_rx_thread(void *p) 93 { 94 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 95 struct virt_queue *vq; 96 struct kvm *kvm; 97 struct net_dev *ndev = p; 98 u16 out, in; 99 u16 head; 100 int len, copied; 101 u32 id; 102 103 mutex_lock(&ndev->mutex); 104 id = ndev->rx_vqs++ * 2; 105 mutex_unlock(&ndev->mutex); 106 107 kvm__set_thread_name("virtio-net-rx"); 108 109 kvm = ndev->kvm; 110 vq = &ndev->vqs[id]; 111 112 while (1) { 113 mutex_lock(&ndev->io_lock[id]); 114 if (!virt_queue__available(vq)) 115 pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex); 116 mutex_unlock(&ndev->io_lock[id]); 117 118 while (virt_queue__available(vq)) { 119 unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)]; 120 struct iovec dummy_iov = { 121 .iov_base = buffer, 122 .iov_len = sizeof(buffer), 123 }; 124 struct virtio_net_hdr_mrg_rxbuf *hdr; 125 u16 num_buffers; 126 127 len = ndev->ops->rx(&dummy_iov, 1, ndev); 128 if (len < 0) { 129 pr_warning("%s: rx on vq %u failed (%d), exiting thread\n", 130 __func__, id, len); 131 goto out_err; 132 } 133 134 copied = num_buffers = 0; 135 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 136 hdr = iov[0].iov_base; 137 while (copied < len) { 138 size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in)); 139 140 memcpy_toiovec(iov, buffer + copied, iovsize); 141 copied += iovsize; 142 virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++); 143 if (copied == len) 144 break; 145 while (!virt_queue__available(vq)) 146 sleep(0); 147 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 148 } 149 150 virtio_net_fix_rx_hdr(&hdr->hdr, ndev); 151 if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 152 hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers); 153 154 virt_queue__used_idx_advance(vq, num_buffers); 155 156 /* We should interrupt guest right now, otherwise latency is huge. */ 157 if (virtio_queue__should_signal(vq)) 158 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id); 159 } 160 } 161 162 out_err: 163 pthread_exit(NULL); 164 return NULL; 165 166 } 167 168 static void *virtio_net_tx_thread(void *p) 169 { 170 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 171 struct virt_queue *vq; 172 struct kvm *kvm; 173 struct net_dev *ndev = p; 174 u16 out, in; 175 u16 head; 176 int len; 177 u32 id; 178 179 mutex_lock(&ndev->mutex); 180 id = ndev->tx_vqs++ * 2 + 1; 181 mutex_unlock(&ndev->mutex); 182 183 kvm__set_thread_name("virtio-net-tx"); 184 185 kvm = ndev->kvm; 186 vq = &ndev->vqs[id]; 187 188 while (1) { 189 mutex_lock(&ndev->io_lock[id]); 190 if (!virt_queue__available(vq)) 191 pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex); 192 mutex_unlock(&ndev->io_lock[id]); 193 194 while (virt_queue__available(vq)) { 195 struct virtio_net_hdr *hdr; 196 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 197 hdr = iov[0].iov_base; 198 virtio_net_fix_tx_hdr(hdr, ndev); 199 len = ndev->ops->tx(iov, out, ndev); 200 if (len < 0) { 201 pr_warning("%s: tx on vq %u failed (%d)\n", 202 __func__, id, errno); 203 goto out_err; 204 } 205 206 virt_queue__set_used_elem(vq, head, len); 207 } 208 209 if (virtio_queue__should_signal(vq)) 210 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id); 211 } 212 213 out_err: 214 pthread_exit(NULL); 215 return NULL; 216 } 217 218 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl) 219 { 220 /* Not much to do here */ 221 return VIRTIO_NET_OK; 222 } 223 224 static void *virtio_net_ctrl_thread(void *p) 225 { 226 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 227 u16 out, in, head; 228 struct net_dev *ndev = p; 229 struct kvm *kvm = ndev->kvm; 230 u32 id = ndev->queue_pairs * 2; 231 struct virt_queue *vq = &ndev->vqs[id]; 232 struct virtio_net_ctrl_hdr *ctrl; 233 virtio_net_ctrl_ack *ack; 234 235 kvm__set_thread_name("virtio-net-ctrl"); 236 237 while (1) { 238 mutex_lock(&ndev->io_lock[id]); 239 if (!virt_queue__available(vq)) 240 pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex); 241 mutex_unlock(&ndev->io_lock[id]); 242 243 while (virt_queue__available(vq)) { 244 head = virt_queue__get_iov(&ndev->vqs[id], iov, &out, &in, kvm); 245 ctrl = iov[0].iov_base; 246 ack = iov[out].iov_base; 247 248 switch (ctrl->class) { 249 case VIRTIO_NET_CTRL_MQ: 250 *ack = virtio_net_handle_mq(kvm, ndev, ctrl); 251 break; 252 default: 253 *ack = VIRTIO_NET_ERR; 254 break; 255 } 256 virt_queue__set_used_elem(&ndev->vqs[id], head, iov[out].iov_len); 257 } 258 259 if (virtio_queue__should_signal(&ndev->vqs[id])) 260 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id); 261 } 262 263 pthread_exit(NULL); 264 265 return NULL; 266 } 267 268 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue) 269 { 270 if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) { 271 pr_warning("Unknown queue index %u", queue); 272 return; 273 } 274 275 mutex_lock(&ndev->io_lock[queue]); 276 pthread_cond_signal(&ndev->io_cond[queue]); 277 mutex_unlock(&ndev->io_lock[queue]); 278 } 279 280 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr, 281 const char *tapname) 282 { 283 int ret; 284 285 memset(ifr, 0, sizeof(*ifr)); 286 ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 287 if (tapname) 288 strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name)); 289 290 ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr); 291 292 if (ret >= 0) 293 strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name)); 294 return ret; 295 } 296 297 static int virtio_net_exec_script(const char* script, const char *tap_name) 298 { 299 pid_t pid; 300 int status; 301 302 pid = fork(); 303 if (pid == 0) { 304 execl(script, script, tap_name, NULL); 305 _exit(1); 306 } else { 307 waitpid(pid, &status, 0); 308 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 309 pr_warning("Fail to setup tap by %s", script); 310 return -1; 311 } 312 } 313 return 0; 314 } 315 316 static bool virtio_net__tap_init(struct net_dev *ndev) 317 { 318 int sock = socket(AF_INET, SOCK_STREAM, 0); 319 int hdr_len; 320 struct sockaddr_in sin = {0}; 321 struct ifreq ifr; 322 const struct virtio_net_params *params = ndev->params; 323 bool skipconf = !!params->tapif; 324 325 hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ? 326 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 327 sizeof(struct virtio_net_hdr); 328 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) 329 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 330 331 if (strcmp(params->script, "none")) { 332 if (virtio_net_exec_script(params->script, ndev->tap_name) < 0) 333 goto fail; 334 } else if (!skipconf) { 335 memset(&ifr, 0, sizeof(ifr)); 336 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 337 sin.sin_addr.s_addr = inet_addr(params->host_ip); 338 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 339 ifr.ifr_addr.sa_family = AF_INET; 340 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 341 pr_warning("Could not set ip address on tap device"); 342 goto fail; 343 } 344 } 345 346 if (!skipconf) { 347 memset(&ifr, 0, sizeof(ifr)); 348 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 349 ioctl(sock, SIOCGIFFLAGS, &ifr); 350 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 351 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 352 pr_warning("Could not bring tap device up"); 353 } 354 355 close(sock); 356 357 return 1; 358 359 fail: 360 if (sock >= 0) 361 close(sock); 362 if (ndev->tap_fd >= 0) 363 close(ndev->tap_fd); 364 365 return 0; 366 } 367 368 static bool virtio_net__tap_create(struct net_dev *ndev) 369 { 370 int offload; 371 struct ifreq ifr; 372 const struct virtio_net_params *params = ndev->params; 373 bool macvtap = (!!params->tapif) && (params->tapif[0] == '/'); 374 375 /* Did the user already gave us the FD? */ 376 if (params->fd) 377 ndev->tap_fd = params->fd; 378 else { 379 const char *tap_file = "/dev/net/tun"; 380 381 /* Did the user ask us to use macvtap? */ 382 if (macvtap) 383 tap_file = params->tapif; 384 385 ndev->tap_fd = open(tap_file, O_RDWR); 386 if (ndev->tap_fd < 0) { 387 pr_warning("Unable to open %s", tap_file); 388 return 0; 389 } 390 } 391 392 if (!macvtap && 393 virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) { 394 pr_warning("Config tap device error. Are you root?"); 395 goto fail; 396 } 397 398 /* 399 * The UFO support had been removed from kernel in commit: 400 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984 401 * https://www.spinics.net/lists/netdev/msg443562.html 402 * In oder to support the older kernels without this commit, 403 * we set the TUN_F_UFO to offload by default to test the status of 404 * UFO kernel support. 405 */ 406 ndev->tap_ufo = true; 407 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 408 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 409 /* 410 * Is this failure caused by kernel remove the UFO support? 411 * Try TUNSETOFFLOAD without TUN_F_UFO. 412 */ 413 offload &= ~TUN_F_UFO; 414 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 415 pr_warning("Config tap device TUNSETOFFLOAD error"); 416 goto fail; 417 } 418 ndev->tap_ufo = false; 419 } 420 421 return 1; 422 423 fail: 424 if ((ndev->tap_fd >= 0) || (!params->fd) ) 425 close(ndev->tap_fd); 426 427 return 0; 428 } 429 430 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 431 { 432 return writev(ndev->tap_fd, iov, out); 433 } 434 435 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 436 { 437 return readv(ndev->tap_fd, iov, in); 438 } 439 440 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 441 { 442 return uip_tx(iov, out, &ndev->info); 443 } 444 445 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 446 { 447 return uip_rx(iov, in, &ndev->info); 448 } 449 450 static struct net_dev_operations tap_ops = { 451 .rx = tap_ops_rx, 452 .tx = tap_ops_tx, 453 }; 454 455 static struct net_dev_operations uip_ops = { 456 .rx = uip_ops_rx, 457 .tx = uip_ops_tx, 458 }; 459 460 static u8 *get_config(struct kvm *kvm, void *dev) 461 { 462 struct net_dev *ndev = dev; 463 464 return ((u8 *)(&ndev->config)); 465 } 466 467 static u32 get_host_features(struct kvm *kvm, void *dev) 468 { 469 u32 features; 470 struct net_dev *ndev = dev; 471 472 features = 1UL << VIRTIO_NET_F_MAC 473 | 1UL << VIRTIO_NET_F_CSUM 474 | 1UL << VIRTIO_NET_F_HOST_TSO4 475 | 1UL << VIRTIO_NET_F_HOST_TSO6 476 | 1UL << VIRTIO_NET_F_GUEST_TSO4 477 | 1UL << VIRTIO_NET_F_GUEST_TSO6 478 | 1UL << VIRTIO_RING_F_EVENT_IDX 479 | 1UL << VIRTIO_RING_F_INDIRECT_DESC 480 | 1UL << VIRTIO_NET_F_CTRL_VQ 481 | 1UL << VIRTIO_NET_F_MRG_RXBUF 482 | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0); 483 484 /* 485 * The UFO feature for host and guest only can be enabled when the 486 * kernel has TAP UFO support. 487 */ 488 if (ndev->tap_ufo) 489 features |= (1UL << VIRTIO_NET_F_HOST_UFO 490 | 1UL << VIRTIO_NET_F_GUEST_UFO); 491 492 return features; 493 } 494 495 static int virtio_net__vhost_set_features(struct net_dev *ndev) 496 { 497 u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX; 498 u64 vhost_features; 499 500 if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0) 501 die_perror("VHOST_GET_FEATURES failed"); 502 503 /* make sure both side support mergable rx buffers */ 504 if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF && 505 has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) 506 features |= 1UL << VIRTIO_NET_F_MRG_RXBUF; 507 508 return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features); 509 } 510 511 static void set_guest_features(struct kvm *kvm, void *dev, u32 features) 512 { 513 struct net_dev *ndev = dev; 514 struct virtio_net_config *conf = &ndev->config; 515 516 ndev->features = features; 517 518 conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status); 519 conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev, 520 conf->max_virtqueue_pairs); 521 } 522 523 static void virtio_net_start(struct net_dev *ndev) 524 { 525 if (ndev->mode == NET_MODE_TAP) { 526 if (!virtio_net__tap_init(ndev)) 527 die_perror("TAP device initialized failed because"); 528 529 if (ndev->vhost_fd && 530 virtio_net__vhost_set_features(ndev) != 0) 531 die_perror("VHOST_SET_FEATURES failed"); 532 } else { 533 ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ? 534 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 535 sizeof(struct virtio_net_hdr); 536 uip_init(&ndev->info); 537 } 538 } 539 540 static void notify_status(struct kvm *kvm, void *dev, u32 status) 541 { 542 if (status & VIRTIO__STATUS_START) 543 virtio_net_start(dev); 544 } 545 546 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq) 547 { 548 return vq == (u32)(ndev->queue_pairs * 2); 549 } 550 551 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align, 552 u32 pfn) 553 { 554 struct vhost_vring_state state = { .index = vq }; 555 struct vhost_vring_addr addr; 556 struct net_dev *ndev = dev; 557 struct virt_queue *queue; 558 void *p; 559 int r; 560 561 compat__remove_message(compat_id); 562 563 queue = &ndev->vqs[vq]; 564 queue->pfn = pfn; 565 p = virtio_get_vq(kvm, queue->pfn, page_size); 566 567 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align); 568 virtio_init_device_vq(&ndev->vdev, queue); 569 570 mutex_init(&ndev->io_lock[vq]); 571 pthread_cond_init(&ndev->io_cond[vq], NULL); 572 if (is_ctrl_vq(ndev, vq)) { 573 pthread_create(&ndev->io_thread[vq], NULL, virtio_net_ctrl_thread, ndev); 574 575 return 0; 576 } else if (ndev->vhost_fd == 0 ) { 577 if (vq & 1) 578 pthread_create(&ndev->io_thread[vq], NULL, virtio_net_tx_thread, ndev); 579 else 580 pthread_create(&ndev->io_thread[vq], NULL, virtio_net_rx_thread, ndev); 581 582 return 0; 583 } 584 585 if (queue->endian != VIRTIO_ENDIAN_HOST) 586 die_perror("VHOST requires the same endianness in guest and host"); 587 588 state.num = queue->vring.num; 589 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state); 590 if (r < 0) 591 die_perror("VHOST_SET_VRING_NUM failed"); 592 state.num = 0; 593 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state); 594 if (r < 0) 595 die_perror("VHOST_SET_VRING_BASE failed"); 596 597 addr = (struct vhost_vring_addr) { 598 .index = vq, 599 .desc_user_addr = (u64)(unsigned long)queue->vring.desc, 600 .avail_user_addr = (u64)(unsigned long)queue->vring.avail, 601 .used_user_addr = (u64)(unsigned long)queue->vring.used, 602 }; 603 604 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); 605 if (r < 0) 606 die_perror("VHOST_SET_VRING_ADDR failed"); 607 608 return 0; 609 } 610 611 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) 612 { 613 struct net_dev *ndev = dev; 614 struct vhost_vring_file file; 615 int r; 616 617 if (ndev->vhost_fd == 0) 618 return; 619 620 file = (struct vhost_vring_file) { 621 .index = vq, 622 .fd = eventfd(0, 0), 623 }; 624 625 r = irq__add_irqfd(kvm, gsi, file.fd, -1); 626 if (r < 0) 627 die_perror("KVM_IRQFD failed"); 628 629 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file); 630 if (r < 0) 631 die_perror("VHOST_SET_VRING_CALL failed"); 632 file.fd = ndev->tap_fd; 633 r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file); 634 if (r != 0) 635 die("VHOST_NET_SET_BACKEND failed %d", errno); 636 637 } 638 639 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) 640 { 641 struct net_dev *ndev = dev; 642 struct vhost_vring_file file = { 643 .index = vq, 644 .fd = efd, 645 }; 646 int r; 647 648 if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq)) 649 return; 650 651 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file); 652 if (r < 0) 653 die_perror("VHOST_SET_VRING_KICK failed"); 654 } 655 656 static int notify_vq(struct kvm *kvm, void *dev, u32 vq) 657 { 658 struct net_dev *ndev = dev; 659 660 virtio_net_handle_callback(kvm, ndev, vq); 661 662 return 0; 663 } 664 665 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq) 666 { 667 struct net_dev *ndev = dev; 668 669 return ndev->vqs[vq].pfn; 670 } 671 672 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) 673 { 674 /* FIXME: dynamic */ 675 return VIRTIO_NET_QUEUE_SIZE; 676 } 677 678 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size) 679 { 680 /* FIXME: dynamic */ 681 return size; 682 } 683 684 static int get_vq_count(struct kvm *kvm, void *dev) 685 { 686 struct net_dev *ndev = dev; 687 688 return ndev->queue_pairs * 2 + 1; 689 } 690 691 static struct virtio_ops net_dev_virtio_ops = { 692 .get_config = get_config, 693 .get_host_features = get_host_features, 694 .set_guest_features = set_guest_features, 695 .get_vq_count = get_vq_count, 696 .init_vq = init_vq, 697 .get_pfn_vq = get_pfn_vq, 698 .get_size_vq = get_size_vq, 699 .set_size_vq = set_size_vq, 700 .notify_vq = notify_vq, 701 .notify_vq_gsi = notify_vq_gsi, 702 .notify_vq_eventfd = notify_vq_eventfd, 703 .notify_status = notify_status, 704 }; 705 706 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev) 707 { 708 struct kvm_mem_bank *bank; 709 struct vhost_memory *mem; 710 int r, i; 711 712 ndev->vhost_fd = open("/dev/vhost-net", O_RDWR); 713 if (ndev->vhost_fd < 0) 714 die_perror("Failed openning vhost-net device"); 715 716 mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region)); 717 if (mem == NULL) 718 die("Failed allocating memory for vhost memory map"); 719 720 i = 0; 721 list_for_each_entry(bank, &kvm->mem_banks, list) { 722 mem->regions[i] = (struct vhost_memory_region) { 723 .guest_phys_addr = bank->guest_phys_addr, 724 .memory_size = bank->size, 725 .userspace_addr = (unsigned long)bank->host_addr, 726 }; 727 i++; 728 } 729 mem->nregions = i; 730 731 r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER); 732 if (r != 0) 733 die_perror("VHOST_SET_OWNER failed"); 734 735 r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem); 736 if (r != 0) 737 die_perror("VHOST_SET_MEM_TABLE failed"); 738 739 ndev->vdev.use_vhost = true; 740 741 free(mem); 742 } 743 744 static inline void str_to_mac(const char *str, char *mac) 745 { 746 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 747 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 748 } 749 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p, 750 const char *param, const char *val) 751 { 752 if (strcmp(param, "guest_mac") == 0) { 753 str_to_mac(val, p->guest_mac); 754 } else if (strcmp(param, "mode") == 0) { 755 if (!strncmp(val, "user", 4)) { 756 int i; 757 758 for (i = 0; i < kvm->cfg.num_net_devices; i++) 759 if (kvm->cfg.net_params[i].mode == NET_MODE_USER) 760 die("Only one usermode network device allowed at a time"); 761 p->mode = NET_MODE_USER; 762 } else if (!strncmp(val, "tap", 3)) { 763 p->mode = NET_MODE_TAP; 764 } else if (!strncmp(val, "none", 4)) { 765 kvm->cfg.no_net = 1; 766 return -1; 767 } else 768 die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network); 769 } else if (strcmp(param, "script") == 0) { 770 p->script = strdup(val); 771 } else if (strcmp(param, "downscript") == 0) { 772 p->downscript = strdup(val); 773 } else if (strcmp(param, "guest_ip") == 0) { 774 p->guest_ip = strdup(val); 775 } else if (strcmp(param, "host_ip") == 0) { 776 p->host_ip = strdup(val); 777 } else if (strcmp(param, "trans") == 0) { 778 p->trans = strdup(val); 779 } else if (strcmp(param, "tapif") == 0) { 780 p->tapif = strdup(val); 781 } else if (strcmp(param, "vhost") == 0) { 782 p->vhost = atoi(val); 783 } else if (strcmp(param, "fd") == 0) { 784 p->fd = atoi(val); 785 } else if (strcmp(param, "mq") == 0) { 786 p->mq = atoi(val); 787 } else 788 die("Unknown network parameter %s", param); 789 790 return 0; 791 } 792 793 int netdev_parser(const struct option *opt, const char *arg, int unset) 794 { 795 struct virtio_net_params p; 796 char *buf = NULL, *cmd = NULL, *cur = NULL; 797 bool on_cmd = true; 798 struct kvm *kvm = opt->ptr; 799 800 if (arg) { 801 buf = strdup(arg); 802 if (buf == NULL) 803 die("Failed allocating new net buffer"); 804 cur = strtok(buf, ",="); 805 } 806 807 p = (struct virtio_net_params) { 808 .guest_ip = DEFAULT_GUEST_ADDR, 809 .host_ip = DEFAULT_HOST_ADDR, 810 .script = DEFAULT_SCRIPT, 811 .downscript = DEFAULT_SCRIPT, 812 .mode = NET_MODE_TAP, 813 }; 814 815 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 816 p.guest_mac[5] += kvm->cfg.num_net_devices; 817 818 while (cur) { 819 if (on_cmd) { 820 cmd = cur; 821 } else { 822 if (set_net_param(kvm, &p, cmd, cur) < 0) 823 goto done; 824 } 825 on_cmd = !on_cmd; 826 827 cur = strtok(NULL, ",="); 828 }; 829 830 kvm->cfg.num_net_devices++; 831 832 kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params)); 833 if (kvm->cfg.net_params == NULL) 834 die("Failed adding new network device"); 835 836 kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p; 837 838 done: 839 free(buf); 840 return 0; 841 } 842 843 static int virtio_net__init_one(struct virtio_net_params *params) 844 { 845 int i, err; 846 struct net_dev *ndev; 847 struct virtio_ops *ops; 848 enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm); 849 850 ndev = calloc(1, sizeof(struct net_dev)); 851 if (ndev == NULL) 852 return -ENOMEM; 853 854 ops = malloc(sizeof(*ops)); 855 if (ops == NULL) { 856 err = -ENOMEM; 857 goto err_free_ndev; 858 } 859 860 list_add_tail(&ndev->list, &ndevs); 861 862 ndev->kvm = params->kvm; 863 ndev->params = params; 864 865 mutex_init(&ndev->mutex); 866 ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq)); 867 ndev->config.status = VIRTIO_NET_S_LINK_UP; 868 if (ndev->queue_pairs > 1) 869 ndev->config.max_virtqueue_pairs = ndev->queue_pairs; 870 871 for (i = 0 ; i < 6 ; i++) { 872 ndev->config.mac[i] = params->guest_mac[i]; 873 ndev->info.guest_mac.addr[i] = params->guest_mac[i]; 874 ndev->info.host_mac.addr[i] = params->host_mac[i]; 875 } 876 877 ndev->mode = params->mode; 878 if (ndev->mode == NET_MODE_TAP) { 879 ndev->ops = &tap_ops; 880 if (!virtio_net__tap_create(ndev)) 881 die_perror("You have requested a TAP device, but creation of one has failed because"); 882 } else { 883 ndev->info.host_ip = ntohl(inet_addr(params->host_ip)); 884 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip)); 885 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 886 ndev->info.buf_nr = 20, 887 ndev->ops = &uip_ops; 888 uip_static_init(&ndev->info); 889 } 890 891 *ops = net_dev_virtio_ops; 892 893 if (params->trans) { 894 if (strcmp(params->trans, "mmio") == 0) 895 trans = VIRTIO_MMIO; 896 else if (strcmp(params->trans, "pci") == 0) 897 trans = VIRTIO_PCI; 898 else 899 pr_warning("virtio-net: Unknown transport method : %s, " 900 "falling back to %s.", params->trans, 901 virtio_trans_name(trans)); 902 } 903 904 virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans, 905 PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET); 906 907 if (params->vhost) 908 virtio_net__vhost_init(params->kvm, ndev); 909 910 if (compat_id == -1) 911 compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET"); 912 913 return 0; 914 915 err_free_ndev: 916 free(ndev); 917 return err; 918 } 919 920 int virtio_net__init(struct kvm *kvm) 921 { 922 int i; 923 924 for (i = 0; i < kvm->cfg.num_net_devices; i++) { 925 kvm->cfg.net_params[i].kvm = kvm; 926 virtio_net__init_one(&kvm->cfg.net_params[i]); 927 } 928 929 if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) { 930 static struct virtio_net_params net_params; 931 932 net_params = (struct virtio_net_params) { 933 .guest_ip = kvm->cfg.guest_ip, 934 .host_ip = kvm->cfg.host_ip, 935 .kvm = kvm, 936 .script = kvm->cfg.script, 937 .mode = NET_MODE_USER, 938 }; 939 str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac); 940 str_to_mac(kvm->cfg.host_mac, net_params.host_mac); 941 942 virtio_net__init_one(&net_params); 943 } 944 945 return 0; 946 } 947 virtio_dev_init(virtio_net__init); 948 949 int virtio_net__exit(struct kvm *kvm) 950 { 951 struct virtio_net_params *params; 952 struct net_dev *ndev; 953 struct list_head *ptr; 954 955 list_for_each(ptr, &ndevs) { 956 ndev = list_entry(ptr, struct net_dev, list); 957 params = ndev->params; 958 /* Cleanup any tap device which attached to bridge */ 959 if (ndev->mode == NET_MODE_TAP && 960 strcmp(params->downscript, "none")) 961 virtio_net_exec_script(params->downscript, ndev->tap_name); 962 } 963 return 0; 964 } 965 virtio_dev_exit(virtio_net__exit); 966