1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/types.h" 5 #include "kvm/mutex.h" 6 #include "kvm/util.h" 7 #include "kvm/kvm.h" 8 #include "kvm/irq.h" 9 #include "kvm/uip.h" 10 #include "kvm/guest_compat.h" 11 12 #include <linux/vhost.h> 13 #include <linux/virtio_net.h> 14 #include <linux/if_tun.h> 15 #include <linux/types.h> 16 17 #include <arpa/inet.h> 18 #include <net/if.h> 19 20 #include <unistd.h> 21 #include <fcntl.h> 22 23 #include <sys/socket.h> 24 #include <sys/ioctl.h> 25 #include <sys/types.h> 26 #include <sys/wait.h> 27 #include <sys/eventfd.h> 28 29 #define VIRTIO_NET_QUEUE_SIZE 128 30 #define VIRTIO_NET_NUM_QUEUES 2 31 #define VIRTIO_NET_RX_QUEUE 0 32 #define VIRTIO_NET_TX_QUEUE 1 33 34 struct net_dev; 35 36 extern struct kvm *kvm; 37 38 struct net_dev_operations { 39 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 40 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 41 }; 42 43 struct net_dev { 44 pthread_mutex_t mutex; 45 struct virtio_device vdev; 46 struct list_head list; 47 48 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 49 struct virtio_net_config config; 50 u32 features; 51 52 pthread_t io_rx_thread; 53 pthread_mutex_t io_rx_lock; 54 pthread_cond_t io_rx_cond; 55 56 pthread_t io_tx_thread; 57 pthread_mutex_t io_tx_lock; 58 pthread_cond_t io_tx_cond; 59 60 int vhost_fd; 61 int tap_fd; 62 char tap_name[IFNAMSIZ]; 63 64 int mode; 65 66 struct uip_info info; 67 struct net_dev_operations *ops; 68 struct kvm *kvm; 69 }; 70 71 static LIST_HEAD(ndevs); 72 static int compat_id = -1; 73 74 static void *virtio_net_rx_thread(void *p) 75 { 76 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 77 struct virt_queue *vq; 78 struct kvm *kvm; 79 struct net_dev *ndev = p; 80 u16 out, in; 81 u16 head; 82 int len; 83 84 kvm = ndev->kvm; 85 vq = &ndev->vqs[VIRTIO_NET_RX_QUEUE]; 86 87 while (1) { 88 mutex_lock(&ndev->io_rx_lock); 89 if (!virt_queue__available(vq)) 90 pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock); 91 mutex_unlock(&ndev->io_rx_lock); 92 93 while (virt_queue__available(vq)) { 94 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 95 len = ndev->ops->rx(iov, in, ndev); 96 virt_queue__set_used_elem(vq, head, len); 97 98 /* We should interrupt guest right now, otherwise latency is huge. */ 99 if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_RX_QUEUE])) 100 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, 101 VIRTIO_NET_RX_QUEUE); 102 } 103 } 104 105 pthread_exit(NULL); 106 return NULL; 107 108 } 109 110 static void *virtio_net_tx_thread(void *p) 111 { 112 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 113 struct virt_queue *vq; 114 struct kvm *kvm; 115 struct net_dev *ndev = p; 116 u16 out, in; 117 u16 head; 118 int len; 119 120 kvm = ndev->kvm; 121 vq = &ndev->vqs[VIRTIO_NET_TX_QUEUE]; 122 123 while (1) { 124 mutex_lock(&ndev->io_tx_lock); 125 if (!virt_queue__available(vq)) 126 pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock); 127 mutex_unlock(&ndev->io_tx_lock); 128 129 while (virt_queue__available(vq)) { 130 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 131 len = ndev->ops->tx(iov, out, ndev); 132 virt_queue__set_used_elem(vq, head, len); 133 } 134 135 if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_TX_QUEUE])) 136 ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, VIRTIO_NET_TX_QUEUE); 137 } 138 139 pthread_exit(NULL); 140 141 return NULL; 142 143 } 144 145 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue) 146 { 147 switch (queue) { 148 case VIRTIO_NET_TX_QUEUE: 149 mutex_lock(&ndev->io_tx_lock); 150 pthread_cond_signal(&ndev->io_tx_cond); 151 mutex_unlock(&ndev->io_tx_lock); 152 break; 153 case VIRTIO_NET_RX_QUEUE: 154 mutex_lock(&ndev->io_rx_lock); 155 pthread_cond_signal(&ndev->io_rx_cond); 156 mutex_unlock(&ndev->io_rx_lock); 157 break; 158 default: 159 pr_warning("Unknown queue index %u", queue); 160 } 161 } 162 163 static bool virtio_net__tap_init(const struct virtio_net_params *params, 164 struct net_dev *ndev) 165 { 166 int sock = socket(AF_INET, SOCK_STREAM, 0); 167 int pid, status, offload, hdr_len; 168 struct sockaddr_in sin = {0}; 169 struct ifreq ifr; 170 171 /* Did the user already gave us the FD? */ 172 if (params->fd) { 173 ndev->tap_fd = params->fd; 174 return 1; 175 } 176 177 ndev->tap_fd = open("/dev/net/tun", O_RDWR); 178 if (ndev->tap_fd < 0) { 179 pr_warning("Unable to open /dev/net/tun"); 180 goto fail; 181 } 182 183 memset(&ifr, 0, sizeof(ifr)); 184 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 185 if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) { 186 pr_warning("Config tap device error. Are you root?"); 187 goto fail; 188 } 189 190 strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name)); 191 192 if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) { 193 pr_warning("Config tap device TUNSETNOCSUM error"); 194 goto fail; 195 } 196 197 hdr_len = sizeof(struct virtio_net_hdr); 198 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) 199 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 200 201 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 202 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 203 pr_warning("Config tap device TUNSETOFFLOAD error"); 204 goto fail; 205 } 206 207 if (strcmp(params->script, "none")) { 208 pid = fork(); 209 if (pid == 0) { 210 execl(params->script, params->script, ndev->tap_name, NULL); 211 _exit(1); 212 } else { 213 waitpid(pid, &status, 0); 214 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 215 pr_warning("Fail to setup tap by %s", params->script); 216 goto fail; 217 } 218 } 219 } else { 220 memset(&ifr, 0, sizeof(ifr)); 221 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 222 sin.sin_addr.s_addr = inet_addr(params->host_ip); 223 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 224 ifr.ifr_addr.sa_family = AF_INET; 225 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 226 pr_warning("Could not set ip address on tap device"); 227 goto fail; 228 } 229 } 230 231 memset(&ifr, 0, sizeof(ifr)); 232 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 233 ioctl(sock, SIOCGIFFLAGS, &ifr); 234 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 235 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 236 pr_warning("Could not bring tap device up"); 237 238 close(sock); 239 240 return 1; 241 242 fail: 243 if (sock >= 0) 244 close(sock); 245 if (ndev->tap_fd >= 0) 246 close(ndev->tap_fd); 247 248 return 0; 249 } 250 251 static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev) 252 { 253 pthread_mutex_init(&ndev->io_tx_lock, NULL); 254 pthread_mutex_init(&ndev->io_rx_lock, NULL); 255 256 pthread_cond_init(&ndev->io_tx_cond, NULL); 257 pthread_cond_init(&ndev->io_rx_cond, NULL); 258 259 pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev); 260 pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev); 261 } 262 263 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 264 { 265 return writev(ndev->tap_fd, iov, out); 266 } 267 268 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 269 { 270 return readv(ndev->tap_fd, iov, in); 271 } 272 273 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 274 { 275 return uip_tx(iov, out, &ndev->info); 276 } 277 278 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 279 { 280 return uip_rx(iov, in, &ndev->info); 281 } 282 283 static struct net_dev_operations tap_ops = { 284 .rx = tap_ops_rx, 285 .tx = tap_ops_tx, 286 }; 287 288 static struct net_dev_operations uip_ops = { 289 .rx = uip_ops_rx, 290 .tx = uip_ops_tx, 291 }; 292 293 static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset) 294 { 295 struct net_dev *ndev = dev; 296 297 ((u8 *)(&ndev->config))[offset] = data; 298 } 299 300 static u8 get_config(struct kvm *kvm, void *dev, u32 offset) 301 { 302 struct net_dev *ndev = dev; 303 304 return ((u8 *)(&ndev->config))[offset]; 305 } 306 307 static u32 get_host_features(struct kvm *kvm, void *dev) 308 { 309 return 1UL << VIRTIO_NET_F_MAC 310 | 1UL << VIRTIO_NET_F_CSUM 311 | 1UL << VIRTIO_NET_F_HOST_UFO 312 | 1UL << VIRTIO_NET_F_HOST_TSO4 313 | 1UL << VIRTIO_NET_F_HOST_TSO6 314 | 1UL << VIRTIO_NET_F_GUEST_UFO 315 | 1UL << VIRTIO_NET_F_GUEST_TSO4 316 | 1UL << VIRTIO_NET_F_GUEST_TSO6 317 | 1UL << VIRTIO_RING_F_EVENT_IDX 318 | 1UL << VIRTIO_RING_F_INDIRECT_DESC; 319 } 320 321 static void set_guest_features(struct kvm *kvm, void *dev, u32 features) 322 { 323 struct net_dev *ndev = dev; 324 325 ndev->features = features; 326 } 327 328 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn) 329 { 330 struct vhost_vring_state state = { .index = vq }; 331 struct vhost_vring_addr addr; 332 struct net_dev *ndev = dev; 333 struct virt_queue *queue; 334 void *p; 335 int r; 336 337 compat__remove_message(compat_id); 338 339 queue = &ndev->vqs[vq]; 340 queue->pfn = pfn; 341 p = guest_pfn_to_host(kvm, queue->pfn); 342 343 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 344 345 if (ndev->vhost_fd == 0) 346 return 0; 347 348 state.num = queue->vring.num; 349 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state); 350 if (r < 0) 351 die_perror("VHOST_SET_VRING_NUM failed"); 352 state.num = 0; 353 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state); 354 if (r < 0) 355 die_perror("VHOST_SET_VRING_BASE failed"); 356 357 addr = (struct vhost_vring_addr) { 358 .index = vq, 359 .desc_user_addr = (u64)(unsigned long)queue->vring.desc, 360 .avail_user_addr = (u64)(unsigned long)queue->vring.avail, 361 .used_user_addr = (u64)(unsigned long)queue->vring.used, 362 }; 363 364 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); 365 if (r < 0) 366 die_perror("VHOST_SET_VRING_ADDR failed"); 367 368 return 0; 369 } 370 371 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) 372 { 373 struct net_dev *ndev = dev; 374 struct kvm_irqfd irq; 375 struct vhost_vring_file file; 376 int r; 377 378 if (ndev->vhost_fd == 0) 379 return; 380 381 irq = (struct kvm_irqfd) { 382 .gsi = gsi, 383 .fd = eventfd(0, 0), 384 }; 385 file = (struct vhost_vring_file) { 386 .index = vq, 387 .fd = irq.fd, 388 }; 389 390 r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq); 391 if (r < 0) 392 die_perror("KVM_IRQFD failed"); 393 394 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file); 395 if (r < 0) 396 die_perror("VHOST_SET_VRING_CALL failed"); 397 file.fd = ndev->tap_fd; 398 r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file); 399 if (r != 0) 400 die("VHOST_NET_SET_BACKEND failed %d", errno); 401 402 } 403 404 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) 405 { 406 struct net_dev *ndev = dev; 407 struct vhost_vring_file file = { 408 .index = vq, 409 .fd = efd, 410 }; 411 int r; 412 413 if (ndev->vhost_fd == 0) 414 return; 415 416 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file); 417 if (r < 0) 418 die_perror("VHOST_SET_VRING_KICK failed"); 419 } 420 421 static int notify_vq(struct kvm *kvm, void *dev, u32 vq) 422 { 423 struct net_dev *ndev = dev; 424 425 virtio_net_handle_callback(kvm, ndev, vq); 426 427 return 0; 428 } 429 430 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq) 431 { 432 struct net_dev *ndev = dev; 433 434 return ndev->vqs[vq].pfn; 435 } 436 437 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) 438 { 439 return VIRTIO_NET_QUEUE_SIZE; 440 } 441 442 static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) { 443 .set_config = set_config, 444 .get_config = get_config, 445 .get_host_features = get_host_features, 446 .set_guest_features = set_guest_features, 447 .init_vq = init_vq, 448 .notify_vq = notify_vq, 449 .get_pfn_vq = get_pfn_vq, 450 .get_size_vq = get_size_vq, 451 .notify_vq_gsi = notify_vq_gsi, 452 .notify_vq_eventfd = notify_vq_eventfd, 453 }; 454 455 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev) 456 { 457 u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX; 458 struct vhost_memory *mem; 459 int r; 460 461 ndev->vhost_fd = open("/dev/vhost-net", O_RDWR); 462 if (ndev->vhost_fd < 0) 463 die_perror("Failed openning vhost-net device"); 464 465 mem = malloc(sizeof(*mem) + sizeof(struct vhost_memory_region)); 466 if (mem == NULL) 467 die("Failed allocating memory for vhost memory map"); 468 469 mem->nregions = 1; 470 mem->regions[0] = (struct vhost_memory_region) { 471 .guest_phys_addr = 0, 472 .memory_size = kvm->ram_size, 473 .userspace_addr = (unsigned long)kvm->ram_start, 474 }; 475 476 r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER); 477 if (r != 0) 478 die_perror("VHOST_SET_OWNER failed"); 479 480 r = ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features); 481 if (r != 0) 482 die_perror("VHOST_SET_FEATURES failed"); 483 r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem); 484 if (r != 0) 485 die_perror("VHOST_SET_MEM_TABLE failed"); 486 free(mem); 487 } 488 489 void virtio_net__init(const struct virtio_net_params *params) 490 { 491 int i; 492 struct net_dev *ndev; 493 494 if (!params) 495 return; 496 497 ndev = calloc(1, sizeof(struct net_dev)); 498 if (ndev == NULL) 499 die("Failed allocating ndev"); 500 501 list_add_tail(&ndev->list, &ndevs); 502 503 ndev->kvm = params->kvm; 504 505 mutex_init(&ndev->mutex); 506 ndev->config.status = VIRTIO_NET_S_LINK_UP; 507 508 for (i = 0 ; i < 6 ; i++) { 509 ndev->config.mac[i] = params->guest_mac[i]; 510 ndev->info.guest_mac.addr[i] = params->guest_mac[i]; 511 ndev->info.host_mac.addr[i] = params->host_mac[i]; 512 } 513 514 ndev->mode = params->mode; 515 if (ndev->mode == NET_MODE_TAP) { 516 if (!virtio_net__tap_init(params, ndev)) 517 die_perror("You have requested a TAP device, but creation of one has" 518 "failed because:"); 519 ndev->ops = &tap_ops; 520 } else { 521 ndev->info.host_ip = ntohl(inet_addr(params->host_ip)); 522 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip)); 523 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 524 ndev->info.buf_nr = 20, 525 uip_init(&ndev->info); 526 ndev->ops = &uip_ops; 527 } 528 529 virtio_init(kvm, ndev, &ndev->vdev, &net_dev_virtio_ops, 530 VIRTIO_PCI, PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET); 531 532 if (params->vhost) 533 virtio_net__vhost_init(params->kvm, ndev); 534 else 535 virtio_net__io_thread_init(params->kvm, ndev); 536 537 if (compat_id != -1) 538 compat_id = compat__add_message("virtio-net device was not detected", 539 "While you have requested a virtio-net device, " 540 "the guest kernel did not initialize it.\n" 541 "Please make sure that the guest kernel was " 542 "compiled with CONFIG_VIRTIO_NET=y enabled " 543 "in its .config"); 544 } 545