1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/types.h" 5 #include "kvm/mutex.h" 6 #include "kvm/util.h" 7 #include "kvm/kvm.h" 8 #include "kvm/irq.h" 9 #include "kvm/uip.h" 10 #include "kvm/guest_compat.h" 11 #include "kvm/virtio-trans.h" 12 13 #include <linux/vhost.h> 14 #include <linux/virtio_net.h> 15 #include <linux/if_tun.h> 16 #include <linux/types.h> 17 18 #include <arpa/inet.h> 19 #include <net/if.h> 20 21 #include <unistd.h> 22 #include <fcntl.h> 23 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <sys/types.h> 27 #include <sys/wait.h> 28 #include <sys/eventfd.h> 29 30 #define VIRTIO_NET_QUEUE_SIZE 128 31 #define VIRTIO_NET_NUM_QUEUES 2 32 #define VIRTIO_NET_RX_QUEUE 0 33 #define VIRTIO_NET_TX_QUEUE 1 34 35 struct net_dev; 36 37 extern struct kvm *kvm; 38 39 struct net_dev_operations { 40 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 41 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 42 }; 43 44 struct net_dev { 45 pthread_mutex_t mutex; 46 struct virtio_trans vtrans; 47 struct list_head list; 48 49 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 50 struct virtio_net_config config; 51 u32 features; 52 53 pthread_t io_rx_thread; 54 pthread_mutex_t io_rx_lock; 55 pthread_cond_t io_rx_cond; 56 57 pthread_t io_tx_thread; 58 pthread_mutex_t io_tx_lock; 59 pthread_cond_t io_tx_cond; 60 61 int vhost_fd; 62 int tap_fd; 63 char tap_name[IFNAMSIZ]; 64 65 int mode; 66 67 struct uip_info info; 68 struct net_dev_operations *ops; 69 struct kvm *kvm; 70 }; 71 72 static LIST_HEAD(ndevs); 73 static int compat_id = -1; 74 75 static void *virtio_net_rx_thread(void *p) 76 { 77 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 78 struct virt_queue *vq; 79 struct kvm *kvm; 80 struct net_dev *ndev = p; 81 u16 out, in; 82 u16 head; 83 int len; 84 85 kvm = ndev->kvm; 86 vq = &ndev->vqs[VIRTIO_NET_RX_QUEUE]; 87 88 while (1) { 89 mutex_lock(&ndev->io_rx_lock); 90 if (!virt_queue__available(vq)) 91 pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock); 92 mutex_unlock(&ndev->io_rx_lock); 93 94 while (virt_queue__available(vq)) { 95 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 96 len = ndev->ops->rx(iov, in, ndev); 97 virt_queue__set_used_elem(vq, head, len); 98 99 /* We should interrupt guest right now, otherwise latency is huge. */ 100 if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_RX_QUEUE])) 101 ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, 102 VIRTIO_NET_RX_QUEUE); 103 } 104 } 105 106 pthread_exit(NULL); 107 return NULL; 108 109 } 110 111 static void *virtio_net_tx_thread(void *p) 112 { 113 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 114 struct virt_queue *vq; 115 struct kvm *kvm; 116 struct net_dev *ndev = p; 117 u16 out, in; 118 u16 head; 119 int len; 120 121 kvm = ndev->kvm; 122 vq = &ndev->vqs[VIRTIO_NET_TX_QUEUE]; 123 124 while (1) { 125 mutex_lock(&ndev->io_tx_lock); 126 if (!virt_queue__available(vq)) 127 pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock); 128 mutex_unlock(&ndev->io_tx_lock); 129 130 while (virt_queue__available(vq)) { 131 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 132 len = ndev->ops->tx(iov, out, ndev); 133 virt_queue__set_used_elem(vq, head, len); 134 } 135 136 if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_TX_QUEUE])) 137 ndev->vtrans.trans_ops->signal_vq(kvm, &ndev->vtrans, VIRTIO_NET_TX_QUEUE); 138 } 139 140 pthread_exit(NULL); 141 142 return NULL; 143 144 } 145 146 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue) 147 { 148 switch (queue) { 149 case VIRTIO_NET_TX_QUEUE: 150 mutex_lock(&ndev->io_tx_lock); 151 pthread_cond_signal(&ndev->io_tx_cond); 152 mutex_unlock(&ndev->io_tx_lock); 153 break; 154 case VIRTIO_NET_RX_QUEUE: 155 mutex_lock(&ndev->io_rx_lock); 156 pthread_cond_signal(&ndev->io_rx_cond); 157 mutex_unlock(&ndev->io_rx_lock); 158 break; 159 default: 160 pr_warning("Unknown queue index %u", queue); 161 } 162 } 163 164 static bool virtio_net__tap_init(const struct virtio_net_params *params, 165 struct net_dev *ndev) 166 { 167 int sock = socket(AF_INET, SOCK_STREAM, 0); 168 int pid, status, offload, hdr_len; 169 struct sockaddr_in sin = {0}; 170 struct ifreq ifr; 171 172 /* Did the user already gave us the FD? */ 173 if (params->fd) { 174 ndev->tap_fd = params->fd; 175 return 1; 176 } 177 178 ndev->tap_fd = open("/dev/net/tun", O_RDWR); 179 if (ndev->tap_fd < 0) { 180 pr_warning("Unable to open /dev/net/tun"); 181 goto fail; 182 } 183 184 memset(&ifr, 0, sizeof(ifr)); 185 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 186 if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) { 187 pr_warning("Config tap device error. Are you root?"); 188 goto fail; 189 } 190 191 strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name)); 192 193 if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) { 194 pr_warning("Config tap device TUNSETNOCSUM error"); 195 goto fail; 196 } 197 198 hdr_len = sizeof(struct virtio_net_hdr); 199 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) 200 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 201 202 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 203 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 204 pr_warning("Config tap device TUNSETOFFLOAD error"); 205 goto fail; 206 } 207 208 if (strcmp(params->script, "none")) { 209 pid = fork(); 210 if (pid == 0) { 211 execl(params->script, params->script, ndev->tap_name, NULL); 212 _exit(1); 213 } else { 214 waitpid(pid, &status, 0); 215 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 216 pr_warning("Fail to setup tap by %s", params->script); 217 goto fail; 218 } 219 } 220 } else { 221 memset(&ifr, 0, sizeof(ifr)); 222 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 223 sin.sin_addr.s_addr = inet_addr(params->host_ip); 224 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 225 ifr.ifr_addr.sa_family = AF_INET; 226 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 227 pr_warning("Could not set ip address on tap device"); 228 goto fail; 229 } 230 } 231 232 memset(&ifr, 0, sizeof(ifr)); 233 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 234 ioctl(sock, SIOCGIFFLAGS, &ifr); 235 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 236 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 237 pr_warning("Could not bring tap device up"); 238 239 close(sock); 240 241 return 1; 242 243 fail: 244 if (sock >= 0) 245 close(sock); 246 if (ndev->tap_fd >= 0) 247 close(ndev->tap_fd); 248 249 return 0; 250 } 251 252 static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev) 253 { 254 pthread_mutex_init(&ndev->io_tx_lock, NULL); 255 pthread_mutex_init(&ndev->io_rx_lock, NULL); 256 257 pthread_cond_init(&ndev->io_tx_cond, NULL); 258 pthread_cond_init(&ndev->io_rx_cond, NULL); 259 260 pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev); 261 pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev); 262 } 263 264 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 265 { 266 return writev(ndev->tap_fd, iov, out); 267 } 268 269 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 270 { 271 return readv(ndev->tap_fd, iov, in); 272 } 273 274 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 275 { 276 return uip_tx(iov, out, &ndev->info); 277 } 278 279 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 280 { 281 return uip_rx(iov, in, &ndev->info); 282 } 283 284 static struct net_dev_operations tap_ops = { 285 .rx = tap_ops_rx, 286 .tx = tap_ops_tx, 287 }; 288 289 static struct net_dev_operations uip_ops = { 290 .rx = uip_ops_rx, 291 .tx = uip_ops_tx, 292 }; 293 294 static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset) 295 { 296 struct net_dev *ndev = dev; 297 298 ((u8 *)(&ndev->config))[offset] = data; 299 } 300 301 static u8 get_config(struct kvm *kvm, void *dev, u32 offset) 302 { 303 struct net_dev *ndev = dev; 304 305 return ((u8 *)(&ndev->config))[offset]; 306 } 307 308 static u32 get_host_features(struct kvm *kvm, void *dev) 309 { 310 return 1UL << VIRTIO_NET_F_MAC 311 | 1UL << VIRTIO_NET_F_CSUM 312 | 1UL << VIRTIO_NET_F_HOST_UFO 313 | 1UL << VIRTIO_NET_F_HOST_TSO4 314 | 1UL << VIRTIO_NET_F_HOST_TSO6 315 | 1UL << VIRTIO_NET_F_GUEST_UFO 316 | 1UL << VIRTIO_NET_F_GUEST_TSO4 317 | 1UL << VIRTIO_NET_F_GUEST_TSO6 318 | 1UL << VIRTIO_RING_F_EVENT_IDX 319 | 1UL << VIRTIO_RING_F_INDIRECT_DESC; 320 } 321 322 static void set_guest_features(struct kvm *kvm, void *dev, u32 features) 323 { 324 struct net_dev *ndev = dev; 325 326 ndev->features = features; 327 } 328 329 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn) 330 { 331 struct vhost_vring_state state = { .index = vq }; 332 struct vhost_vring_addr addr; 333 struct net_dev *ndev = dev; 334 struct virt_queue *queue; 335 void *p; 336 int r; 337 338 compat__remove_message(compat_id); 339 340 queue = &ndev->vqs[vq]; 341 queue->pfn = pfn; 342 p = guest_pfn_to_host(kvm, queue->pfn); 343 344 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 345 346 if (ndev->vhost_fd == 0) 347 return 0; 348 349 state.num = queue->vring.num; 350 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state); 351 if (r < 0) 352 die_perror("VHOST_SET_VRING_NUM failed"); 353 state.num = 0; 354 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state); 355 if (r < 0) 356 die_perror("VHOST_SET_VRING_BASE failed"); 357 358 addr = (struct vhost_vring_addr) { 359 .index = vq, 360 .desc_user_addr = (u64)(unsigned long)queue->vring.desc, 361 .avail_user_addr = (u64)(unsigned long)queue->vring.avail, 362 .used_user_addr = (u64)(unsigned long)queue->vring.used, 363 }; 364 365 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); 366 if (r < 0) 367 die_perror("VHOST_SET_VRING_ADDR failed"); 368 369 return 0; 370 } 371 372 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) 373 { 374 struct net_dev *ndev = dev; 375 struct kvm_irqfd irq; 376 struct vhost_vring_file file; 377 int r; 378 379 if (ndev->vhost_fd == 0) 380 return; 381 382 irq = (struct kvm_irqfd) { 383 .gsi = gsi, 384 .fd = eventfd(0, 0), 385 }; 386 file = (struct vhost_vring_file) { 387 .index = vq, 388 .fd = irq.fd, 389 }; 390 391 r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq); 392 if (r < 0) 393 die_perror("KVM_IRQFD failed"); 394 395 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file); 396 if (r < 0) 397 die_perror("VHOST_SET_VRING_CALL failed"); 398 file.fd = ndev->tap_fd; 399 r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file); 400 if (r != 0) 401 die("VHOST_NET_SET_BACKEND failed %d", errno); 402 403 } 404 405 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) 406 { 407 struct net_dev *ndev = dev; 408 struct vhost_vring_file file = { 409 .index = vq, 410 .fd = efd, 411 }; 412 int r; 413 414 if (ndev->vhost_fd == 0) 415 return; 416 417 r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file); 418 if (r < 0) 419 die_perror("VHOST_SET_VRING_KICK failed"); 420 } 421 422 static int notify_vq(struct kvm *kvm, void *dev, u32 vq) 423 { 424 struct net_dev *ndev = dev; 425 426 virtio_net_handle_callback(kvm, ndev, vq); 427 428 return 0; 429 } 430 431 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq) 432 { 433 struct net_dev *ndev = dev; 434 435 return ndev->vqs[vq].pfn; 436 } 437 438 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) 439 { 440 return VIRTIO_NET_QUEUE_SIZE; 441 } 442 443 static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) { 444 .set_config = set_config, 445 .get_config = get_config, 446 .get_host_features = get_host_features, 447 .set_guest_features = set_guest_features, 448 .init_vq = init_vq, 449 .notify_vq = notify_vq, 450 .get_pfn_vq = get_pfn_vq, 451 .get_size_vq = get_size_vq, 452 .notify_vq_gsi = notify_vq_gsi, 453 .notify_vq_eventfd = notify_vq_eventfd, 454 }; 455 456 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev) 457 { 458 u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX; 459 struct vhost_memory *mem; 460 int r; 461 462 ndev->vhost_fd = open("/dev/vhost-net", O_RDWR); 463 if (ndev->vhost_fd < 0) 464 die_perror("Failed openning vhost-net device"); 465 466 mem = malloc(sizeof(*mem) + sizeof(struct vhost_memory_region)); 467 if (mem == NULL) 468 die("Failed allocating memory for vhost memory map"); 469 470 mem->nregions = 1; 471 mem->regions[0] = (struct vhost_memory_region) { 472 .guest_phys_addr = 0, 473 .memory_size = kvm->ram_size, 474 .userspace_addr = (unsigned long)kvm->ram_start, 475 }; 476 477 r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER); 478 if (r != 0) 479 die_perror("VHOST_SET_OWNER failed"); 480 481 r = ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features); 482 if (r != 0) 483 die_perror("VHOST_SET_FEATURES failed"); 484 r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem); 485 if (r != 0) 486 die_perror("VHOST_SET_MEM_TABLE failed"); 487 free(mem); 488 } 489 490 void virtio_net__init(const struct virtio_net_params *params) 491 { 492 int i; 493 struct net_dev *ndev; 494 495 if (!params) 496 return; 497 498 ndev = calloc(1, sizeof(struct net_dev)); 499 if (ndev == NULL) 500 die("Failed allocating ndev"); 501 502 list_add_tail(&ndev->list, &ndevs); 503 504 ndev->kvm = params->kvm; 505 506 mutex_init(&ndev->mutex); 507 ndev->config.status = VIRTIO_NET_S_LINK_UP; 508 509 for (i = 0 ; i < 6 ; i++) { 510 ndev->config.mac[i] = params->guest_mac[i]; 511 ndev->info.guest_mac.addr[i] = params->guest_mac[i]; 512 ndev->info.host_mac.addr[i] = params->host_mac[i]; 513 } 514 515 ndev->mode = params->mode; 516 if (ndev->mode == NET_MODE_TAP) { 517 if (!virtio_net__tap_init(params, ndev)) 518 die_perror("You have requested a TAP device, but creation of one has" 519 "failed because:"); 520 ndev->ops = &tap_ops; 521 } else { 522 ndev->info.host_ip = ntohl(inet_addr(params->host_ip)); 523 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip)); 524 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 525 ndev->info.buf_nr = 20, 526 uip_init(&ndev->info); 527 ndev->ops = &uip_ops; 528 } 529 530 virtio_trans_init(&ndev->vtrans, VIRTIO_PCI); 531 ndev->vtrans.trans_ops->init(kvm, &ndev->vtrans, ndev, PCI_DEVICE_ID_VIRTIO_NET, 532 VIRTIO_ID_NET, PCI_CLASS_NET); 533 ndev->vtrans.virtio_ops = &net_dev_virtio_ops; 534 535 if (params->vhost) 536 virtio_net__vhost_init(params->kvm, ndev); 537 else 538 virtio_net__io_thread_init(params->kvm, ndev); 539 540 if (compat_id != -1) 541 compat_id = compat__add_message("virtio-net device was not detected", 542 "While you have requested a virtio-net device, " 543 "the guest kernel did not initialize it.\n" 544 "Please make sure that the guest kernel was " 545 "compiled with CONFIG_VIRTIO_NET=y enabled " 546 "in its .config"); 547 } 548