1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 #include "kvm/irq.h" 11 #include "kvm/uip.h" 12 #include "kvm/ioeventfd.h" 13 14 #include <linux/virtio_net.h> 15 #include <linux/if_tun.h> 16 17 #include <arpa/inet.h> 18 #include <net/if.h> 19 20 #include <unistd.h> 21 #include <assert.h> 22 #include <fcntl.h> 23 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <sys/types.h> 27 #include <sys/wait.h> 28 29 #define VIRTIO_NET_QUEUE_SIZE 128 30 #define VIRTIO_NET_NUM_QUEUES 2 31 #define VIRTIO_NET_RX_QUEUE 0 32 #define VIRTIO_NET_TX_QUEUE 1 33 34 static struct pci_device_header pci_header = { 35 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 36 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 37 .header_type = PCI_HEADER_TYPE_NORMAL, 38 .revision_id = 0, 39 .class = 0x020000, 40 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 41 .subsys_id = VIRTIO_ID_NET, 42 }; 43 44 struct net_dev; 45 46 struct net_dev_operations { 47 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 48 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 49 }; 50 51 struct net_dev { 52 pthread_mutex_t mutex; 53 54 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 55 struct virtio_net_config config; 56 u32 host_features; 57 u32 guest_features; 58 u16 config_vector; 59 u8 status; 60 u8 isr; 61 u16 queue_selector; 62 u16 base_addr; 63 64 pthread_t io_rx_thread; 65 pthread_mutex_t io_rx_lock; 66 pthread_cond_t io_rx_cond; 67 68 pthread_t io_tx_thread; 69 pthread_mutex_t io_tx_lock; 70 pthread_cond_t io_tx_cond; 71 72 int tap_fd; 73 char tap_name[IFNAMSIZ]; 74 75 int mode; 76 77 struct uip_info info; 78 struct net_dev_operations *ops; 79 }; 80 81 static struct net_dev ndev = { 82 .mutex = PTHREAD_MUTEX_INITIALIZER, 83 84 .config = { 85 .mac = {0x00, 0x15, 0x15, 0x15, 0x15, 0x15}, 86 .status = VIRTIO_NET_S_LINK_UP, 87 }, 88 .host_features = 1UL << VIRTIO_NET_F_MAC 89 | 1UL << VIRTIO_NET_F_CSUM 90 | 1UL << VIRTIO_NET_F_HOST_UFO 91 | 1UL << VIRTIO_NET_F_HOST_TSO4 92 | 1UL << VIRTIO_NET_F_HOST_TSO6 93 | 1UL << VIRTIO_NET_F_GUEST_UFO 94 | 1UL << VIRTIO_NET_F_GUEST_TSO4 95 | 1UL << VIRTIO_NET_F_GUEST_TSO6, 96 .info = { 97 .host_mac.addr = {0x00, 0x01, 0x01, 0x01, 0x01, 0x01}, 98 .guest_mac.addr = {0x00, 0x15, 0x15, 0x15, 0x15, 0x15}, 99 .host_ip = 0xc0a82101, 100 .buf_nr = 20, 101 } 102 }; 103 104 static void *virtio_net_rx_thread(void *p) 105 { 106 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 107 struct virt_queue *vq; 108 struct kvm *kvm; 109 u16 out, in; 110 u16 head; 111 int len; 112 113 kvm = p; 114 vq = &ndev.vqs[VIRTIO_NET_RX_QUEUE]; 115 116 while (1) { 117 118 mutex_lock(&ndev.io_rx_lock); 119 if (!virt_queue__available(vq)) 120 pthread_cond_wait(&ndev.io_rx_cond, &ndev.io_rx_lock); 121 mutex_unlock(&ndev.io_rx_lock); 122 123 while (virt_queue__available(vq)) { 124 125 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 126 127 len = ndev.ops->rx(iov, in, &ndev); 128 129 virt_queue__set_used_elem(vq, head, len); 130 131 /* We should interrupt guest right now, otherwise latency is huge. */ 132 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm); 133 } 134 135 } 136 137 pthread_exit(NULL); 138 return NULL; 139 140 } 141 142 static void *virtio_net_tx_thread(void *p) 143 { 144 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 145 struct virt_queue *vq; 146 struct kvm *kvm; 147 u16 out, in; 148 u16 head; 149 int len; 150 151 kvm = p; 152 vq = &ndev.vqs[VIRTIO_NET_TX_QUEUE]; 153 154 while (1) { 155 mutex_lock(&ndev.io_tx_lock); 156 if (!virt_queue__available(vq)) 157 pthread_cond_wait(&ndev.io_tx_cond, &ndev.io_tx_lock); 158 mutex_unlock(&ndev.io_tx_lock); 159 160 while (virt_queue__available(vq)) { 161 162 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 163 164 len = ndev.ops->tx(iov, out, &ndev); 165 166 virt_queue__set_used_elem(vq, head, len); 167 } 168 169 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm); 170 171 } 172 173 pthread_exit(NULL); 174 175 return NULL; 176 177 } 178 179 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, u32 count) 180 { 181 u8 *config_space = (u8 *)&ndev.config; 182 183 if (size != 1 || count != 1) 184 return false; 185 186 if ((offset - VIRTIO_MSI_CONFIG_VECTOR) > sizeof(struct virtio_net_config)) 187 pr_error("config offset is too big: %li", offset - VIRTIO_MSI_CONFIG_VECTOR); 188 189 ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]); 190 191 return true; 192 } 193 194 static bool virtio_net_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) 195 { 196 unsigned long offset = port - ndev.base_addr; 197 bool ret = true; 198 199 mutex_lock(&ndev.mutex); 200 201 switch (offset) { 202 case VIRTIO_PCI_HOST_FEATURES: 203 ioport__write32(data, ndev.host_features); 204 break; 205 case VIRTIO_PCI_GUEST_FEATURES: 206 ret = false; 207 break; 208 case VIRTIO_PCI_QUEUE_PFN: 209 ioport__write32(data, ndev.vqs[ndev.queue_selector].pfn); 210 break; 211 case VIRTIO_PCI_QUEUE_NUM: 212 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 213 break; 214 case VIRTIO_PCI_QUEUE_SEL: 215 case VIRTIO_PCI_QUEUE_NOTIFY: 216 ret = false; 217 break; 218 case VIRTIO_PCI_STATUS: 219 ioport__write8(data, ndev.status); 220 break; 221 case VIRTIO_PCI_ISR: 222 ioport__write8(data, ndev.isr); 223 kvm__irq_line(kvm, pci_header.irq_line, VIRTIO_IRQ_LOW); 224 ndev.isr = VIRTIO_IRQ_LOW; 225 break; 226 case VIRTIO_MSI_CONFIG_VECTOR: 227 ioport__write16(data, ndev.config_vector); 228 break; 229 default: 230 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 231 }; 232 233 mutex_unlock(&ndev.mutex); 234 235 return ret; 236 } 237 238 static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index) 239 { 240 switch (queue_index) { 241 case VIRTIO_NET_TX_QUEUE: 242 mutex_lock(&ndev.io_tx_lock); 243 pthread_cond_signal(&ndev.io_tx_cond); 244 mutex_unlock(&ndev.io_tx_lock); 245 break; 246 case VIRTIO_NET_RX_QUEUE: 247 mutex_lock(&ndev.io_rx_lock); 248 pthread_cond_signal(&ndev.io_rx_cond); 249 mutex_unlock(&ndev.io_rx_lock); 250 break; 251 default: 252 pr_warning("Unknown queue index %u", queue_index); 253 } 254 } 255 256 static bool virtio_net_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) 257 { 258 unsigned long offset = port - ndev.base_addr; 259 bool ret = true; 260 261 mutex_lock(&ndev.mutex); 262 263 switch (offset) { 264 case VIRTIO_PCI_GUEST_FEATURES: 265 ndev.guest_features = ioport__read32(data); 266 break; 267 case VIRTIO_PCI_QUEUE_PFN: { 268 struct virt_queue *queue; 269 void *p; 270 271 assert(ndev.queue_selector < VIRTIO_NET_NUM_QUEUES); 272 273 queue = &ndev.vqs[ndev.queue_selector]; 274 queue->pfn = ioport__read32(data); 275 p = guest_pfn_to_host(kvm, queue->pfn); 276 277 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 278 279 break; 280 } 281 case VIRTIO_PCI_QUEUE_SEL: 282 ndev.queue_selector = ioport__read16(data); 283 break; 284 case VIRTIO_PCI_QUEUE_NOTIFY: { 285 u16 queue_index; 286 287 queue_index = ioport__read16(data); 288 virtio_net_handle_callback(kvm, queue_index); 289 break; 290 } 291 case VIRTIO_PCI_STATUS: 292 ndev.status = ioport__read8(data); 293 break; 294 case VIRTIO_MSI_CONFIG_VECTOR: 295 ndev.config_vector = VIRTIO_MSI_NO_VECTOR; 296 break; 297 case VIRTIO_MSI_QUEUE_VECTOR: 298 break; 299 default: 300 ret = false; 301 }; 302 303 mutex_unlock(&ndev.mutex); 304 305 return ret; 306 } 307 308 static void ioevent_callback(struct kvm *kvm, void *param) 309 { 310 virtio_net_handle_callback(kvm, (u64)(long)param); 311 } 312 313 static struct ioport_operations virtio_net_io_ops = { 314 .io_in = virtio_net_pci_io_in, 315 .io_out = virtio_net_pci_io_out, 316 }; 317 318 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 319 { 320 int sock = socket(AF_INET, SOCK_STREAM, 0); 321 int i, pid, status, offload, hdr_len; 322 struct sockaddr_in sin = {0}; 323 struct ifreq ifr; 324 325 for (i = 0 ; i < 6 ; i++) 326 ndev.config.mac[i] = params->guest_mac[i]; 327 328 ndev.tap_fd = open("/dev/net/tun", O_RDWR); 329 if (ndev.tap_fd < 0) { 330 pr_warning("Unable to open /dev/net/tun"); 331 goto fail; 332 } 333 334 memset(&ifr, 0, sizeof(ifr)); 335 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 336 if (ioctl(ndev.tap_fd, TUNSETIFF, &ifr) < 0) { 337 pr_warning("Config tap device error. Are you root?"); 338 goto fail; 339 } 340 341 strncpy(ndev.tap_name, ifr.ifr_name, sizeof(ndev.tap_name)); 342 343 if (ioctl(ndev.tap_fd, TUNSETNOCSUM, 1) < 0) { 344 pr_warning("Config tap device TUNSETNOCSUM error"); 345 goto fail; 346 } 347 348 hdr_len = sizeof(struct virtio_net_hdr); 349 if (ioctl(ndev.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 350 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 351 goto fail; 352 } 353 354 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 355 if (ioctl(ndev.tap_fd, TUNSETOFFLOAD, offload) < 0) { 356 pr_warning("Config tap device TUNSETOFFLOAD error"); 357 goto fail; 358 } 359 360 if (strcmp(params->script, "none")) { 361 pid = fork(); 362 if (pid == 0) { 363 execl(params->script, params->script, ndev.tap_name, NULL); 364 _exit(1); 365 } else { 366 waitpid(pid, &status, 0); 367 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 368 pr_warning("Fail to setup tap by %s", params->script); 369 goto fail; 370 } 371 } 372 } else { 373 memset(&ifr, 0, sizeof(ifr)); 374 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 375 sin.sin_addr.s_addr = inet_addr(params->host_ip); 376 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 377 ifr.ifr_addr.sa_family = AF_INET; 378 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 379 pr_warning("Could not set ip address on tap device"); 380 goto fail; 381 } 382 } 383 384 memset(&ifr, 0, sizeof(ifr)); 385 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 386 ioctl(sock, SIOCGIFFLAGS, &ifr); 387 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 388 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 389 pr_warning("Could not bring tap device up"); 390 391 close(sock); 392 393 return 1; 394 395 fail: 396 if (sock >= 0) 397 close(sock); 398 if (ndev.tap_fd >= 0) 399 close(ndev.tap_fd); 400 401 return 0; 402 } 403 404 static void virtio_net__io_thread_init(struct kvm *kvm) 405 { 406 pthread_mutex_init(&ndev.io_rx_lock, NULL); 407 pthread_cond_init(&ndev.io_tx_cond, NULL); 408 409 pthread_mutex_init(&ndev.io_rx_lock, NULL); 410 pthread_cond_init(&ndev.io_tx_cond, NULL); 411 412 pthread_create(&ndev.io_rx_thread, NULL, virtio_net_rx_thread, (void *)kvm); 413 pthread_create(&ndev.io_tx_thread, NULL, virtio_net_tx_thread, (void *)kvm); 414 } 415 416 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 417 { 418 return writev(ndev->tap_fd, iov, out); 419 } 420 421 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 422 { 423 return readv(ndev->tap_fd, iov, in); 424 } 425 426 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 427 { 428 return uip_tx(iov, out, &ndev->info); 429 } 430 431 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 432 { 433 return uip_rx(iov, in, &ndev->info); 434 } 435 436 static struct net_dev_operations tap_ops = { 437 .rx = tap_ops_rx, 438 .tx = tap_ops_tx, 439 }; 440 441 static struct net_dev_operations uip_ops = { 442 .rx = uip_ops_rx, 443 .tx = uip_ops_tx, 444 }; 445 446 void virtio_net__init(const struct virtio_net_parameters *params) 447 { 448 struct ioevent ioevent; 449 u8 dev, line, pin; 450 u16 net_base_addr; 451 int i; 452 453 if (irq__register_device(VIRTIO_ID_NET, &dev, &pin, &line) < 0) 454 return; 455 456 pci_header.irq_pin = pin; 457 pci_header.irq_line = line; 458 net_base_addr = ioport__register(IOPORT_EMPTY, &virtio_net_io_ops, IOPORT_SIZE, NULL); 459 pci_header.bar[0] = net_base_addr | PCI_BASE_ADDRESS_SPACE_IO; 460 ndev.base_addr = net_base_addr; 461 pci__register(&pci_header, dev); 462 463 ndev.mode = params->mode; 464 if (ndev.mode == NET_MODE_TAP) { 465 virtio_net__tap_init(params); 466 ndev.ops = &tap_ops; 467 } else { 468 uip_init(&ndev.info); 469 ndev.ops = &uip_ops; 470 } 471 472 virtio_net__io_thread_init(params->kvm); 473 474 for (i = 0; i < VIRTIO_NET_NUM_QUEUES; i++) { 475 ioevent = (struct ioevent) { 476 .io_addr = net_base_addr + VIRTIO_PCI_QUEUE_NOTIFY, 477 .io_len = sizeof(u16), 478 .fn = ioevent_callback, 479 .datamatch = i, 480 .fn_ptr = (void *)(long)i, 481 .fn_kvm = params->kvm, 482 .fd = eventfd(0, 0), 483 }; 484 485 ioeventfd__add_event(&ioevent); 486 } 487 } 488