1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/types.h" 5 #include "kvm/mutex.h" 6 #include "kvm/util.h" 7 #include "kvm/kvm.h" 8 #include "kvm/irq.h" 9 #include "kvm/uip.h" 10 #include "kvm/guest_compat.h" 11 #include "kvm/virtio-trans.h" 12 13 #include <linux/vhost.h> 14 #include <linux/virtio_net.h> 15 #include <linux/if_tun.h> 16 #include <linux/types.h> 17 18 #include <arpa/inet.h> 19 #include <net/if.h> 20 21 #include <unistd.h> 22 #include <assert.h> 23 #include <fcntl.h> 24 25 #include <sys/socket.h> 26 #include <sys/ioctl.h> 27 #include <sys/types.h> 28 #include <sys/wait.h> 29 #include <sys/eventfd.h> 30 31 #define VIRTIO_NET_QUEUE_SIZE 128 32 #define VIRTIO_NET_NUM_QUEUES 2 33 #define VIRTIO_NET_RX_QUEUE 0 34 #define VIRTIO_NET_TX_QUEUE 1 35 36 struct net_dev; 37 38 extern struct kvm *kvm; 39 40 struct net_dev_operations { 41 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 42 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 43 }; 44 45 struct net_dev { 46 pthread_mutex_t mutex; 47 struct virtio_trans vtrans; 48 struct list_head list; 49 50 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 51 struct virtio_net_config config; 52 u32 features; 53 54 pthread_t io_rx_thread; 55 pthread_mutex_t io_rx_lock; 56 pthread_cond_t io_rx_cond; 57 58 pthread_t io_tx_thread; 59 pthread_mutex_t io_tx_lock; 60 pthread_cond_t io_tx_cond; 61 62 int vhost_fd; 63 int tap_fd; 64 char tap_name[IFNAMSIZ]; 65 66 int mode; 67 68 struct uip_info info; 69 struct net_dev_operations *ops; 70 struct kvm *kvm; 71 }; 72 73 static LIST_HEAD(ndevs); 74 static int compat_id = -1; 75 76 static void *virtio_net_rx_thread(void *p) 77 { 78 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 79 struct virt_queue *vq; 80 struct kvm *kvm; 81 struct net_dev *ndev = p; 82 u16 out, in; 83 u16 head; 84 int len; 85 86 kvm = ndev->kvm; 87 vq = &ndev->vqs[VIRTIO_NET_RX_QUEUE]; 88 89 while (1) { 90 91 mutex_lock(&ndev->io_rx_lock); 92 if (!virt_queue__available(vq)) 93 pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock); 94 mutex_unlock(&ndev->io_rx_lock); 95 96 while (virt_queue__available(vq)) { 97 98 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 99 100 len = ndev->ops->rx(iov, in, ndev); 101 102 virt_queue__set_used_elem(vq, head, len); 103 104 /* We should interrupt guest right now, otherwise latency is huge. */ 105 if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_RX_QUEUE])) 106 ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, 107 VIRTIO_NET_RX_QUEUE); 108 } 109 110 } 111 112 pthread_exit(NULL); 113 return NULL; 114 115 } 116 117 static void *virtio_net_tx_thread(void *p) 118 { 119 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 120 struct virt_queue *vq; 121 struct kvm *kvm; 122 struct net_dev *ndev = p; 123 u16 out, in; 124 u16 head; 125 int len; 126 127 kvm = ndev->kvm; 128 vq = &ndev->vqs[VIRTIO_NET_TX_QUEUE]; 129 130 while (1) { 131 mutex_lock(&ndev->io_tx_lock); 132 if (!virt_queue__available(vq)) 133 pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock); 134 mutex_unlock(&ndev->io_tx_lock); 135 136 while (virt_queue__available(vq)) { 137 138 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 139 140 len = ndev->ops->tx(iov, out, ndev); 141 142 virt_queue__set_used_elem(vq, head, len); 143 } 144 145 if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_TX_QUEUE])) 146 ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_TX_QUEUE); 147 } 148 149 pthread_exit(NULL); 150 151 return NULL; 152 153 } 154 155 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue) 156 { 157 switch (queue) { 158 case VIRTIO_NET_TX_QUEUE: 159 mutex_lock(&ndev->io_tx_lock); 160 pthread_cond_signal(&ndev->io_tx_cond); 161 mutex_unlock(&ndev->io_tx_lock); 162 break; 163 case VIRTIO_NET_RX_QUEUE: 164 mutex_lock(&ndev->io_rx_lock); 165 pthread_cond_signal(&ndev->io_rx_cond); 166 mutex_unlock(&ndev->io_rx_lock); 167 break; 168 default: 169 pr_warning("Unknown queue index %u", queue); 170 } 171 } 172 173 static bool virtio_net__tap_init(const struct virtio_net_params *params, 174 struct net_dev *ndev) 175 { 176 int sock = socket(AF_INET, SOCK_STREAM, 0); 177 int pid, status, offload, hdr_len; 178 struct sockaddr_in sin = {0}; 179 struct ifreq ifr; 180 181 /* Did the user already gave us the FD? */ 182 if (params->fd) { 183 ndev->tap_fd = params->fd; 184 return 1; 185 } 186 187 ndev->tap_fd = open("/dev/net/tun", O_RDWR); 188 if (ndev->tap_fd < 0) { 189 pr_warning("Unable to open /dev/net/tun"); 190 goto fail; 191 } 192 193 memset(&ifr, 0, sizeof(ifr)); 194 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 195 if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) { 196 pr_warning("Config tap device error. Are you root?"); 197 goto fail; 198 } 199 200 strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name)); 201 202 if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) { 203 pr_warning("Config tap device TUNSETNOCSUM error"); 204 goto fail; 205 } 206 207 hdr_len = sizeof(struct virtio_net_hdr); 208 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) 209 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 210 211 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 212 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 213 pr_warning("Config tap device TUNSETOFFLOAD error"); 214 goto fail; 215 } 216 217 if (strcmp(params->script, "none")) { 218 pid = fork(); 219 if (pid == 0) { 220 execl(params->script, params->script, ndev->tap_name, NULL); 221 _exit(1); 222 } else { 223 waitpid(pid, &status, 0); 224 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 225 pr_warning("Fail to setup tap by %s", params->script); 226 goto fail; 227 } 228 } 229 } else { 230 memset(&ifr, 0, sizeof(ifr)); 231 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 232 sin.sin_addr.s_addr = inet_addr(params->host_ip); 233 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 234 ifr.ifr_addr.sa_family = AF_INET; 235 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 236 pr_warning("Could not set ip address on tap device"); 237 goto fail; 238 } 239 } 240 241 memset(&ifr, 0, sizeof(ifr)); 242 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 243 ioctl(sock, SIOCGIFFLAGS, &ifr); 244 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 245 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 246 pr_warning("Could not bring tap device up"); 247 248 close(sock); 249 250 return 1; 251 252 fail: 253 if (sock >= 0) 254 close(sock); 255 if (ndev->tap_fd >= 0) 256 close(ndev->tap_fd); 257 258 return 0; 259 } 260 261 static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev) 262 { 263 pthread_mutex_init(&ndev->io_tx_lock, NULL); 264 pthread_mutex_init(&ndev->io_rx_lock, NULL); 265 266 pthread_cond_init(&ndev->io_tx_cond, NULL); 267 pthread_cond_init(&ndev->io_rx_cond, NULL); 268 269 pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev); 270 pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev); 271 } 272 273 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 274 { 275 return writev(ndev->tap_fd, iov, out); 276 } 277 278 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 279 { 280 return readv(ndev->tap_fd, iov, in); 281 } 282 283 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 284 { 285 return uip_tx(iov, out, &ndev->info); 286 } 287 288 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 289 { 290 return uip_rx(iov, in, &ndev->info); 291 } 292 293 static struct net_dev_operations tap_ops = { 294 .rx = tap_ops_rx, 295 .tx = tap_ops_tx, 296 }; 297 298 static struct net_dev_operations uip_ops = { 299 .rx = uip_ops_rx, 300 .tx = uip_ops_tx, 301 }; 302 303 static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset) 304 { 305 struct net_dev *ndev = dev; 306 307 ((u8 *)(&ndev->config))[offset] = data; 308 } 309 310 static u8 get_config(struct kvm *kvm, void *dev, u32 offset) 311 { 312 struct net_dev *ndev = dev; 313 314 return ((u8 *)(&ndev->config))[offset]; 315 } 316 317 static u32 get_host_features(struct kvm *kvm, void *dev) 318 { 319 return 1UL << VIRTIO_NET_F_MAC 320 | 1UL << VIRTIO_NET_F_CSUM 321 | 1UL << VIRTIO_NET_F_HOST_UFO 322 | 1UL << VIRTIO_NET_F_HOST_TSO4 323 | 1UL << VIRTIO_NET_F_HOST_TSO6 324 | 1UL << VIRTIO_NET_F_GUEST_UFO 325 | 1UL << VIRTIO_NET_F_GUEST_TSO4 326 | 1UL << VIRTIO_NET_F_GUEST_TSO6 327 | 1UL << VIRTIO_RING_F_EVENT_IDX 328 | 1UL << VIRTIO_RING_F_INDIRECT_DESC; 329 } 330 331 static void set_guest_features(struct kvm *kvm, void *dev, u32 features) 332 { 333 struct net_dev *ndev = dev; 334 335 ndev->features = features; 336 } 337 338 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn) 339 { 340 struct vhost_vring_state state = { .index = vq }; 341 struct vhost_vring_addr addr; 342 struct net_dev *ndev = dev; 343 struct virt_queue *queue; 344 void *p; 345 int r; 346 347 compat__remove_message(compat_id); 348 349 queue = &ndev->vqs[vq]; 350 queue->pfn = pfn; 351 p = guest_pfn_to_host(kvm, queue->pfn); 352 353 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 354 355 if (ndev->vhost_fd == 0) 356 return 0; 357 358 state.num = queue->vring.num; 359 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state); 360 if (r < 0) 361 die_perror("VHOST_SET_VRING_NUM failed"); 362 state.num = 0; 363 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state); 364 if (r < 0) 365 die_perror("VHOST_SET_VRING_BASE failed"); 366 367 addr = (struct vhost_vring_addr) { 368 .index = vq, 369 .desc_user_addr = (u64)(unsigned long)queue->vring.desc, 370 .avail_user_addr = (u64)(unsigned long)queue->vring.avail, 371 .used_user_addr = (u64)(unsigned long)queue->vring.used, 372 }; 373 374 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); 375 if (r < 0) 376 die_perror("VHOST_SET_VRING_ADDR failed"); 377 378 return 0; 379 } 380 381 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) 382 { 383 struct net_dev *ndev = dev; 384 struct kvm_irqfd irq; 385 struct vhost_vring_file file; 386 int r; 387 388 if (ndev->vhost_fd == 0) 389 return; 390 391 irq = (struct kvm_irqfd) { 392 .gsi = gsi, 393 .fd = eventfd(0, 0), 394 }; 395 file = (struct vhost_vring_file) { 396 .index = vq, 397 .fd = irq.fd, 398 }; 399 400 r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq); 401 if (r < 0) 402 die_perror("KVM_IRQFD failed"); 403 404 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file); 405 if (r < 0) 406 die_perror("VHOST_SET_VRING_CALL failed"); 407 file.fd = ndev->tap_fd; 408 r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file); 409 if (r != 0) 410 die("VHOST_NET_SET_BACKEND failed %d", errno); 411 412 } 413 414 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) 415 { 416 struct net_dev *ndev = dev; 417 struct vhost_vring_file file = { 418 .index = vq, 419 .fd = efd, 420 }; 421 int r; 422 423 if (ndev->vhost_fd == 0) 424 return; 425 426 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file); 427 if (r < 0) 428 die_perror("VHOST_SET_VRING_KICK failed"); 429 } 430 431 static int notify_vq(struct kvm *kvm, void *dev, u32 vq) 432 { 433 struct net_dev *ndev = dev; 434 435 virtio_net_handle_callback(kvm, ndev, vq); 436 437 return 0; 438 } 439 440 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq) 441 { 442 struct net_dev *ndev = dev; 443 444 return ndev->vqs[vq].pfn; 445 } 446 447 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) 448 { 449 return VIRTIO_NET_QUEUE_SIZE; 450 } 451 452 static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) { 453 .set_config = set_config, 454 .get_config = get_config, 455 .get_host_features = get_host_features, 456 .set_guest_features = set_guest_features, 457 .init_vq = init_vq, 458 .notify_vq = notify_vq, 459 .get_pfn_vq = get_pfn_vq, 460 .get_size_vq = get_size_vq, 461 .notify_vq_gsi = notify_vq_gsi, 462 .notify_vq_eventfd = notify_vq_eventfd, 463 }; 464 465 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev) 466 { 467 u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX; 468 struct vhost_memory *mem; 469 int r; 470 471 ndev->vhost_fd = open("/dev/vhost-net", O_RDWR); 472 if (ndev->vhost_fd < 0) 473 die_perror("Failed openning vhost-net device"); 474 475 mem = malloc(sizeof(*mem) + sizeof(struct vhost_memory_region)); 476 if (mem == NULL) 477 die("Failed allocating memory for vhost memory map"); 478 479 mem->nregions = 1; 480 mem->regions[0] = (struct vhost_memory_region) { 481 .guest_phys_addr = 0, 482 .memory_size = kvm->ram_size, 483 .userspace_addr = (unsigned long)kvm->ram_start, 484 }; 485 486 r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER); 487 if (r != 0) 488 die_perror("VHOST_SET_OWNER failed"); 489 490 r = ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features); 491 if (r != 0) 492 die_perror("VHOST_SET_FEATURES failed"); 493 r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem); 494 if (r != 0) 495 die_perror("VHOST_SET_MEM_TABLE failed"); 496 free(mem); 497 } 498 499 void virtio_net__init(const struct virtio_net_params *params) 500 { 501 int i; 502 struct net_dev *ndev; 503 504 if (!params) 505 return; 506 507 ndev = calloc(1, sizeof(struct net_dev)); 508 if (ndev == NULL) 509 die("Failed allocating ndev"); 510 511 list_add_tail(&ndev->list, &ndevs); 512 513 ndev->kvm = params->kvm; 514 515 mutex_init(&ndev->mutex); 516 ndev->config.status = VIRTIO_NET_S_LINK_UP; 517 518 for (i = 0 ; i < 6 ; i++) { 519 ndev->config.mac[i] = params->guest_mac[i]; 520 ndev->info.guest_mac.addr[i] = params->guest_mac[i]; 521 ndev->info.host_mac.addr[i] = params->host_mac[i]; 522 } 523 524 ndev->mode = params->mode; 525 if (ndev->mode == NET_MODE_TAP) { 526 if (!virtio_net__tap_init(params, ndev)) 527 die_perror("You have requested a TAP device, but creation of one has" 528 "failed because:"); 529 ndev->ops = &tap_ops; 530 } else { 531 ndev->info.host_ip = ntohl(inet_addr(params->host_ip)); 532 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip)); 533 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 534 ndev->info.buf_nr = 20, 535 uip_init(&ndev->info); 536 ndev->ops = &uip_ops; 537 } 538 539 virtio_trans_init(&ndev->vtrans, VIRTIO_PCI); 540 ndev->vtrans.trans_ops->init(kvm, &ndev->vtrans, ndev, PCI_DEVICE_ID_VIRTIO_NET, 541 VIRTIO_ID_NET, PCI_CLASS_NET); 542 ndev->vtrans.virtio_ops = &net_dev_virtio_ops; 543 544 if (params->vhost) 545 virtio_net__vhost_init(params->kvm, ndev); 546 else 547 virtio_net__io_thread_init(params->kvm, ndev); 548 549 if (compat_id != -1) 550 compat_id = compat__add_message("virtio-net device was not detected", 551 "While you have requested a virtio-net device, " 552 "the guest kernel did not initialize it.\n" 553 "Please make sure that the guest kernel was " 554 "compiled with CONFIG_VIRTIO_NET=y enabled " 555 "in its .config"); 556 } 557