1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 #include "kvm/irq.h" 11 #include "kvm/uip.h" 12 #include "kvm/ioeventfd.h" 13 14 #include <linux/virtio_net.h> 15 #include <linux/if_tun.h> 16 17 #include <arpa/inet.h> 18 #include <net/if.h> 19 20 #include <unistd.h> 21 #include <assert.h> 22 #include <fcntl.h> 23 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <sys/types.h> 27 #include <sys/wait.h> 28 29 #define VIRTIO_NET_QUEUE_SIZE 128 30 #define VIRTIO_NET_NUM_QUEUES 2 31 #define VIRTIO_NET_RX_QUEUE 0 32 #define VIRTIO_NET_TX_QUEUE 1 33 34 static struct pci_device_header pci_header = { 35 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 36 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 37 .header_type = PCI_HEADER_TYPE_NORMAL, 38 .revision_id = 0, 39 .class = 0x020000, 40 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 41 .subsys_id = VIRTIO_ID_NET, 42 }; 43 44 struct net_dev { 45 pthread_mutex_t mutex; 46 47 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 48 struct virtio_net_config config; 49 u32 host_features; 50 u32 guest_features; 51 u16 config_vector; 52 u8 status; 53 u8 isr; 54 u16 queue_selector; 55 u16 base_addr; 56 57 pthread_t io_rx_thread; 58 pthread_mutex_t io_rx_lock; 59 pthread_cond_t io_rx_cond; 60 61 pthread_t io_tx_thread; 62 pthread_mutex_t io_tx_lock; 63 pthread_cond_t io_tx_cond; 64 65 int tap_fd; 66 char tap_name[IFNAMSIZ]; 67 68 int mode; 69 70 struct uip_info info; 71 }; 72 73 74 static struct net_dev ndev = { 75 .mutex = PTHREAD_MUTEX_INITIALIZER, 76 77 .config = { 78 .mac = {0x00, 0x15, 0x15, 0x15, 0x15, 0x15}, 79 .status = VIRTIO_NET_S_LINK_UP, 80 }, 81 .host_features = 1UL << VIRTIO_NET_F_MAC 82 | 1UL << VIRTIO_NET_F_CSUM 83 | 1UL << VIRTIO_NET_F_HOST_UFO 84 | 1UL << VIRTIO_NET_F_HOST_TSO4 85 | 1UL << VIRTIO_NET_F_HOST_TSO6 86 | 1UL << VIRTIO_NET_F_GUEST_UFO 87 | 1UL << VIRTIO_NET_F_GUEST_TSO4 88 | 1UL << VIRTIO_NET_F_GUEST_TSO6, 89 .info = { 90 .host_mac.addr = {0x00, 0x01, 0x01, 0x01, 0x01, 0x01}, 91 .guest_mac.addr = {0x00, 0x15, 0x15, 0x15, 0x15, 0x15}, 92 .host_ip = 0xc0a82101, 93 .buf_nr = 20, 94 } 95 }; 96 97 static void *virtio_net_rx_thread(void *p) 98 { 99 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 100 struct virt_queue *vq; 101 struct kvm *kvm; 102 u16 out, in; 103 u16 head; 104 int len; 105 106 kvm = p; 107 vq = &ndev.vqs[VIRTIO_NET_RX_QUEUE]; 108 109 while (1) { 110 111 mutex_lock(&ndev.io_rx_lock); 112 if (!virt_queue__available(vq)) 113 pthread_cond_wait(&ndev.io_rx_cond, &ndev.io_rx_lock); 114 mutex_unlock(&ndev.io_rx_lock); 115 116 while (virt_queue__available(vq)) { 117 118 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 119 120 if (ndev.mode == NET_MODE_TAP) 121 len = readv(ndev.tap_fd, iov, in); 122 else 123 len = uip_rx(iov, in, &ndev.info); 124 125 virt_queue__set_used_elem(vq, head, len); 126 127 /* We should interrupt guest right now, otherwise latency is huge. */ 128 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm); 129 } 130 131 } 132 133 pthread_exit(NULL); 134 return NULL; 135 136 } 137 138 static void *virtio_net_tx_thread(void *p) 139 { 140 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 141 struct virt_queue *vq; 142 struct kvm *kvm; 143 u16 out, in; 144 u16 head; 145 int len; 146 147 kvm = p; 148 vq = &ndev.vqs[VIRTIO_NET_TX_QUEUE]; 149 150 while (1) { 151 mutex_lock(&ndev.io_tx_lock); 152 if (!virt_queue__available(vq)) 153 pthread_cond_wait(&ndev.io_tx_cond, &ndev.io_tx_lock); 154 mutex_unlock(&ndev.io_tx_lock); 155 156 while (virt_queue__available(vq)) { 157 158 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 159 160 if (ndev.mode == NET_MODE_TAP) 161 len = writev(ndev.tap_fd, iov, out); 162 else 163 len = uip_tx(iov, out, &ndev.info); 164 165 virt_queue__set_used_elem(vq, head, len); 166 } 167 168 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm); 169 170 } 171 172 pthread_exit(NULL); 173 174 return NULL; 175 176 } 177 178 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, u32 count) 179 { 180 u8 *config_space = (u8 *)&ndev.config; 181 182 if (size != 1 || count != 1) 183 return false; 184 185 if ((offset - VIRTIO_MSI_CONFIG_VECTOR) > sizeof(struct virtio_net_config)) 186 pr_error("config offset is too big: %li", offset - VIRTIO_MSI_CONFIG_VECTOR); 187 188 ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]); 189 190 return true; 191 } 192 193 static bool virtio_net_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) 194 { 195 unsigned long offset = port - ndev.base_addr; 196 bool ret = true; 197 198 mutex_lock(&ndev.mutex); 199 200 switch (offset) { 201 case VIRTIO_PCI_HOST_FEATURES: 202 ioport__write32(data, ndev.host_features); 203 break; 204 case VIRTIO_PCI_GUEST_FEATURES: 205 ret = false; 206 break; 207 case VIRTIO_PCI_QUEUE_PFN: 208 ioport__write32(data, ndev.vqs[ndev.queue_selector].pfn); 209 break; 210 case VIRTIO_PCI_QUEUE_NUM: 211 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 212 break; 213 case VIRTIO_PCI_QUEUE_SEL: 214 case VIRTIO_PCI_QUEUE_NOTIFY: 215 ret = false; 216 break; 217 case VIRTIO_PCI_STATUS: 218 ioport__write8(data, ndev.status); 219 break; 220 case VIRTIO_PCI_ISR: 221 ioport__write8(data, ndev.isr); 222 kvm__irq_line(kvm, pci_header.irq_line, VIRTIO_IRQ_LOW); 223 ndev.isr = VIRTIO_IRQ_LOW; 224 break; 225 case VIRTIO_MSI_CONFIG_VECTOR: 226 ioport__write16(data, ndev.config_vector); 227 break; 228 default: 229 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 230 }; 231 232 mutex_unlock(&ndev.mutex); 233 234 return ret; 235 } 236 237 static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index) 238 { 239 switch (queue_index) { 240 case VIRTIO_NET_TX_QUEUE: 241 mutex_lock(&ndev.io_tx_lock); 242 pthread_cond_signal(&ndev.io_tx_cond); 243 mutex_unlock(&ndev.io_tx_lock); 244 break; 245 case VIRTIO_NET_RX_QUEUE: 246 mutex_lock(&ndev.io_rx_lock); 247 pthread_cond_signal(&ndev.io_rx_cond); 248 mutex_unlock(&ndev.io_rx_lock); 249 break; 250 default: 251 pr_warning("Unknown queue index %u", queue_index); 252 } 253 } 254 255 static bool virtio_net_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) 256 { 257 unsigned long offset = port - ndev.base_addr; 258 bool ret = true; 259 260 mutex_lock(&ndev.mutex); 261 262 switch (offset) { 263 case VIRTIO_PCI_GUEST_FEATURES: 264 ndev.guest_features = ioport__read32(data); 265 break; 266 case VIRTIO_PCI_QUEUE_PFN: { 267 struct virt_queue *queue; 268 void *p; 269 270 assert(ndev.queue_selector < VIRTIO_NET_NUM_QUEUES); 271 272 queue = &ndev.vqs[ndev.queue_selector]; 273 queue->pfn = ioport__read32(data); 274 p = guest_pfn_to_host(kvm, queue->pfn); 275 276 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 277 278 break; 279 } 280 case VIRTIO_PCI_QUEUE_SEL: 281 ndev.queue_selector = ioport__read16(data); 282 break; 283 case VIRTIO_PCI_QUEUE_NOTIFY: { 284 u16 queue_index; 285 286 queue_index = ioport__read16(data); 287 virtio_net_handle_callback(kvm, queue_index); 288 break; 289 } 290 case VIRTIO_PCI_STATUS: 291 ndev.status = ioport__read8(data); 292 break; 293 case VIRTIO_MSI_CONFIG_VECTOR: 294 ndev.config_vector = VIRTIO_MSI_NO_VECTOR; 295 break; 296 case VIRTIO_MSI_QUEUE_VECTOR: 297 break; 298 default: 299 ret = false; 300 }; 301 302 mutex_unlock(&ndev.mutex); 303 304 return ret; 305 } 306 307 static void ioevent_callback(struct kvm *kvm, void *param) 308 { 309 virtio_net_handle_callback(kvm, (u64)(long)param); 310 } 311 312 static struct ioport_operations virtio_net_io_ops = { 313 .io_in = virtio_net_pci_io_in, 314 .io_out = virtio_net_pci_io_out, 315 }; 316 317 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 318 { 319 int sock = socket(AF_INET, SOCK_STREAM, 0); 320 int i, pid, status, offload, hdr_len; 321 struct sockaddr_in sin = {0}; 322 struct ifreq ifr; 323 324 for (i = 0 ; i < 6 ; i++) 325 ndev.config.mac[i] = params->guest_mac[i]; 326 327 ndev.tap_fd = open("/dev/net/tun", O_RDWR); 328 if (ndev.tap_fd < 0) { 329 pr_warning("Unable to open /dev/net/tun"); 330 goto fail; 331 } 332 333 memset(&ifr, 0, sizeof(ifr)); 334 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 335 if (ioctl(ndev.tap_fd, TUNSETIFF, &ifr) < 0) { 336 pr_warning("Config tap device error. Are you root?"); 337 goto fail; 338 } 339 340 strncpy(ndev.tap_name, ifr.ifr_name, sizeof(ndev.tap_name)); 341 342 if (ioctl(ndev.tap_fd, TUNSETNOCSUM, 1) < 0) { 343 pr_warning("Config tap device TUNSETNOCSUM error"); 344 goto fail; 345 } 346 347 hdr_len = sizeof(struct virtio_net_hdr); 348 if (ioctl(ndev.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 349 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 350 goto fail; 351 } 352 353 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 354 if (ioctl(ndev.tap_fd, TUNSETOFFLOAD, offload) < 0) { 355 pr_warning("Config tap device TUNSETOFFLOAD error"); 356 goto fail; 357 } 358 359 if (strcmp(params->script, "none")) { 360 pid = fork(); 361 if (pid == 0) { 362 execl(params->script, params->script, ndev.tap_name, NULL); 363 _exit(1); 364 } else { 365 waitpid(pid, &status, 0); 366 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 367 pr_warning("Fail to setup tap by %s", params->script); 368 goto fail; 369 } 370 } 371 } else { 372 memset(&ifr, 0, sizeof(ifr)); 373 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 374 sin.sin_addr.s_addr = inet_addr(params->host_ip); 375 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 376 ifr.ifr_addr.sa_family = AF_INET; 377 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 378 pr_warning("Could not set ip address on tap device"); 379 goto fail; 380 } 381 } 382 383 memset(&ifr, 0, sizeof(ifr)); 384 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 385 ioctl(sock, SIOCGIFFLAGS, &ifr); 386 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 387 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 388 pr_warning("Could not bring tap device up"); 389 390 close(sock); 391 392 return 1; 393 394 fail: 395 if (sock >= 0) 396 close(sock); 397 if (ndev.tap_fd >= 0) 398 close(ndev.tap_fd); 399 400 return 0; 401 } 402 403 static void virtio_net__io_thread_init(struct kvm *kvm) 404 { 405 pthread_mutex_init(&ndev.io_rx_lock, NULL); 406 pthread_cond_init(&ndev.io_tx_cond, NULL); 407 408 pthread_mutex_init(&ndev.io_rx_lock, NULL); 409 pthread_cond_init(&ndev.io_tx_cond, NULL); 410 411 pthread_create(&ndev.io_rx_thread, NULL, virtio_net_rx_thread, (void *)kvm); 412 pthread_create(&ndev.io_tx_thread, NULL, virtio_net_tx_thread, (void *)kvm); 413 } 414 415 void virtio_net__init(const struct virtio_net_parameters *params) 416 { 417 struct ioevent ioevent; 418 u8 dev, line, pin; 419 u16 net_base_addr; 420 int i; 421 422 if (irq__register_device(VIRTIO_ID_NET, &dev, &pin, &line) < 0) 423 return; 424 425 pci_header.irq_pin = pin; 426 pci_header.irq_line = line; 427 net_base_addr = ioport__register(IOPORT_EMPTY, &virtio_net_io_ops, IOPORT_SIZE, NULL); 428 pci_header.bar[0] = net_base_addr | PCI_BASE_ADDRESS_SPACE_IO; 429 ndev.base_addr = net_base_addr; 430 pci__register(&pci_header, dev); 431 432 ndev.mode = params->mode; 433 if (ndev.mode == NET_MODE_TAP) 434 virtio_net__tap_init(params); 435 else 436 uip_init(&ndev.info); 437 438 virtio_net__io_thread_init(params->kvm); 439 440 for (i = 0; i < VIRTIO_NET_NUM_QUEUES; i++) { 441 ioevent = (struct ioevent) { 442 .io_addr = net_base_addr + VIRTIO_PCI_QUEUE_NOTIFY, 443 .io_len = sizeof(u16), 444 .fn = ioevent_callback, 445 .datamatch = i, 446 .fn_ptr = (void *)(long)i, 447 .fn_kvm = params->kvm, 448 .fd = eventfd(0, 0), 449 }; 450 451 ioeventfd__add_event(&ioevent); 452 } 453 } 454