1 #include "kvm/virtio-net.h" 2 #include "kvm/virtio-pci-dev.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 #include "kvm/irq.h" 11 #include "kvm/ioeventfd.h" 12 13 #include <linux/virtio_net.h> 14 #include <linux/if_tun.h> 15 16 #include <arpa/inet.h> 17 #include <net/if.h> 18 19 #include <unistd.h> 20 #include <assert.h> 21 #include <fcntl.h> 22 23 #include <sys/socket.h> 24 #include <sys/ioctl.h> 25 #include <sys/types.h> 26 #include <sys/wait.h> 27 28 #define VIRTIO_NET_QUEUE_SIZE 128 29 #define VIRTIO_NET_NUM_QUEUES 2 30 #define VIRTIO_NET_RX_QUEUE 0 31 #define VIRTIO_NET_TX_QUEUE 1 32 33 static struct pci_device_header pci_header = { 34 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 35 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 36 .header_type = PCI_HEADER_TYPE_NORMAL, 37 .revision_id = 0, 38 .class = 0x020000, 39 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 40 .subsys_id = VIRTIO_ID_NET, 41 }; 42 43 struct net_dev { 44 pthread_mutex_t mutex; 45 46 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 47 struct virtio_net_config config; 48 u32 host_features; 49 u32 guest_features; 50 u16 config_vector; 51 u8 status; 52 u8 isr; 53 u16 queue_selector; 54 u16 base_addr; 55 56 pthread_t io_rx_thread; 57 pthread_mutex_t io_rx_lock; 58 pthread_cond_t io_rx_cond; 59 60 pthread_t io_tx_thread; 61 pthread_mutex_t io_tx_lock; 62 pthread_cond_t io_tx_cond; 63 64 int tap_fd; 65 char tap_name[IFNAMSIZ]; 66 67 int mode; 68 69 }; 70 71 72 static struct net_dev ndev = { 73 .mutex = PTHREAD_MUTEX_INITIALIZER, 74 75 .config = { 76 .mac = {0x00, 0x15, 0x15, 0x15, 0x15, 0x15}, 77 .status = VIRTIO_NET_S_LINK_UP, 78 }, 79 .host_features = 1UL << VIRTIO_NET_F_MAC 80 | 1UL << VIRTIO_NET_F_CSUM 81 | 1UL << VIRTIO_NET_F_HOST_UFO 82 | 1UL << VIRTIO_NET_F_HOST_TSO4 83 | 1UL << VIRTIO_NET_F_HOST_TSO6 84 | 1UL << VIRTIO_NET_F_GUEST_UFO 85 | 1UL << VIRTIO_NET_F_GUEST_TSO4 86 | 1UL << VIRTIO_NET_F_GUEST_TSO6, 87 }; 88 89 static void *virtio_net_rx_thread(void *p) 90 { 91 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 92 struct virt_queue *vq; 93 struct kvm *kvm; 94 u16 out, in; 95 u16 head; 96 int len; 97 98 kvm = p; 99 vq = &ndev.vqs[VIRTIO_NET_RX_QUEUE]; 100 101 while (1) { 102 mutex_lock(&ndev.io_rx_lock); 103 if (!virt_queue__available(vq)) 104 pthread_cond_wait(&ndev.io_rx_cond, &ndev.io_rx_lock); 105 mutex_unlock(&ndev.io_rx_lock); 106 107 while (virt_queue__available(vq)) { 108 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 109 len = readv(ndev.tap_fd, iov, in); 110 virt_queue__set_used_elem(vq, head, len); 111 112 /* We should interrupt guest right now, otherwise latency is huge. */ 113 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm); 114 } 115 116 } 117 118 pthread_exit(NULL); 119 return NULL; 120 121 } 122 123 static void *virtio_net_tx_thread(void *p) 124 { 125 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 126 struct virt_queue *vq; 127 struct kvm *kvm; 128 u16 out, in; 129 u16 head; 130 int len; 131 132 kvm = p; 133 vq = &ndev.vqs[VIRTIO_NET_TX_QUEUE]; 134 135 while (1) { 136 mutex_lock(&ndev.io_tx_lock); 137 if (!virt_queue__available(vq)) 138 pthread_cond_wait(&ndev.io_tx_cond, &ndev.io_tx_lock); 139 mutex_unlock(&ndev.io_tx_lock); 140 141 while (virt_queue__available(vq)) { 142 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 143 len = writev(ndev.tap_fd, iov, out); 144 virt_queue__set_used_elem(vq, head, len); 145 } 146 147 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm); 148 149 } 150 151 pthread_exit(NULL); 152 153 return NULL; 154 155 } 156 157 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, u32 count) 158 { 159 u8 *config_space = (u8 *)&ndev.config; 160 161 if (size != 1 || count != 1) 162 return false; 163 164 if ((offset - VIRTIO_MSI_CONFIG_VECTOR) > sizeof(struct virtio_net_config)) 165 pr_error("config offset is too big: %li", offset - VIRTIO_MSI_CONFIG_VECTOR); 166 167 ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]); 168 169 return true; 170 } 171 172 static bool virtio_net_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) 173 { 174 unsigned long offset = port - ndev.base_addr; 175 bool ret = true; 176 177 mutex_lock(&ndev.mutex); 178 179 switch (offset) { 180 case VIRTIO_PCI_HOST_FEATURES: 181 ioport__write32(data, ndev.host_features); 182 break; 183 case VIRTIO_PCI_GUEST_FEATURES: 184 ret = false; 185 break; 186 case VIRTIO_PCI_QUEUE_PFN: 187 ioport__write32(data, ndev.vqs[ndev.queue_selector].pfn); 188 break; 189 case VIRTIO_PCI_QUEUE_NUM: 190 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 191 break; 192 case VIRTIO_PCI_QUEUE_SEL: 193 case VIRTIO_PCI_QUEUE_NOTIFY: 194 ret = false; 195 break; 196 case VIRTIO_PCI_STATUS: 197 ioport__write8(data, ndev.status); 198 break; 199 case VIRTIO_PCI_ISR: 200 ioport__write8(data, ndev.isr); 201 kvm__irq_line(kvm, pci_header.irq_line, VIRTIO_IRQ_LOW); 202 ndev.isr = VIRTIO_IRQ_LOW; 203 break; 204 case VIRTIO_MSI_CONFIG_VECTOR: 205 ioport__write16(data, ndev.config_vector); 206 break; 207 default: 208 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 209 }; 210 211 mutex_unlock(&ndev.mutex); 212 213 return ret; 214 } 215 216 static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index) 217 { 218 switch (queue_index) { 219 case VIRTIO_NET_TX_QUEUE: { 220 mutex_lock(&ndev.io_tx_lock); 221 pthread_cond_signal(&ndev.io_tx_cond); 222 mutex_unlock(&ndev.io_tx_lock); 223 break; 224 } 225 case VIRTIO_NET_RX_QUEUE: { 226 mutex_lock(&ndev.io_rx_lock); 227 pthread_cond_signal(&ndev.io_rx_cond); 228 mutex_unlock(&ndev.io_rx_lock); 229 break; 230 } 231 default: 232 pr_warning("Unknown queue index %u", queue_index); 233 } 234 } 235 236 static bool virtio_net_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) 237 { 238 unsigned long offset = port - ndev.base_addr; 239 bool ret = true; 240 241 mutex_lock(&ndev.mutex); 242 243 switch (offset) { 244 case VIRTIO_PCI_GUEST_FEATURES: 245 ndev.guest_features = ioport__read32(data); 246 break; 247 case VIRTIO_PCI_QUEUE_PFN: { 248 struct virt_queue *queue; 249 void *p; 250 251 assert(ndev.queue_selector < VIRTIO_NET_NUM_QUEUES); 252 253 queue = &ndev.vqs[ndev.queue_selector]; 254 queue->pfn = ioport__read32(data); 255 p = guest_pfn_to_host(kvm, queue->pfn); 256 257 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 258 259 break; 260 } 261 case VIRTIO_PCI_QUEUE_SEL: 262 ndev.queue_selector = ioport__read16(data); 263 break; 264 case VIRTIO_PCI_QUEUE_NOTIFY: { 265 u16 queue_index; 266 267 queue_index = ioport__read16(data); 268 virtio_net_handle_callback(kvm, queue_index); 269 break; 270 } 271 case VIRTIO_PCI_STATUS: 272 ndev.status = ioport__read8(data); 273 break; 274 case VIRTIO_MSI_CONFIG_VECTOR: 275 ndev.config_vector = VIRTIO_MSI_NO_VECTOR; 276 break; 277 case VIRTIO_MSI_QUEUE_VECTOR: 278 break; 279 default: 280 ret = false; 281 }; 282 283 mutex_unlock(&ndev.mutex); 284 285 return ret; 286 } 287 288 static void ioevent_callback(struct kvm *kvm, void *param) 289 { 290 virtio_net_handle_callback(kvm, (u64)(long)param); 291 } 292 293 static struct ioport_operations virtio_net_io_ops = { 294 .io_in = virtio_net_pci_io_in, 295 .io_out = virtio_net_pci_io_out, 296 }; 297 298 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 299 { 300 int sock = socket(AF_INET, SOCK_STREAM, 0); 301 int i, pid, status, offload, hdr_len; 302 struct sockaddr_in sin = {0}; 303 struct ifreq ifr; 304 305 for (i = 0 ; i < 6 ; i++) 306 ndev.config.mac[i] = params->guest_mac[i]; 307 308 ndev.tap_fd = open("/dev/net/tun", O_RDWR); 309 if (ndev.tap_fd < 0) { 310 pr_warning("Unable to open /dev/net/tun"); 311 goto fail; 312 } 313 314 memset(&ifr, 0, sizeof(ifr)); 315 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 316 if (ioctl(ndev.tap_fd, TUNSETIFF, &ifr) < 0) { 317 pr_warning("Config tap device error. Are you root?"); 318 goto fail; 319 } 320 321 strncpy(ndev.tap_name, ifr.ifr_name, sizeof(ndev.tap_name)); 322 323 if (ioctl(ndev.tap_fd, TUNSETNOCSUM, 1) < 0) { 324 pr_warning("Config tap device TUNSETNOCSUM error"); 325 goto fail; 326 } 327 328 hdr_len = sizeof(struct virtio_net_hdr); 329 if (ioctl(ndev.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 330 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 331 goto fail; 332 } 333 334 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 335 if (ioctl(ndev.tap_fd, TUNSETOFFLOAD, offload) < 0) { 336 pr_warning("Config tap device TUNSETOFFLOAD error"); 337 goto fail; 338 } 339 340 if (strcmp(params->script, "none")) { 341 pid = fork(); 342 if (pid == 0) { 343 execl(params->script, params->script, ndev.tap_name, NULL); 344 _exit(1); 345 } else { 346 waitpid(pid, &status, 0); 347 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 348 pr_warning("Fail to setup tap by %s", params->script); 349 goto fail; 350 } 351 } 352 } else { 353 memset(&ifr, 0, sizeof(ifr)); 354 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 355 sin.sin_addr.s_addr = inet_addr(params->host_ip); 356 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 357 ifr.ifr_addr.sa_family = AF_INET; 358 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 359 pr_warning("Could not set ip address on tap device"); 360 goto fail; 361 } 362 } 363 364 memset(&ifr, 0, sizeof(ifr)); 365 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 366 ioctl(sock, SIOCGIFFLAGS, &ifr); 367 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 368 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 369 pr_warning("Could not bring tap device up"); 370 371 close(sock); 372 373 return 1; 374 375 fail: 376 if (sock >= 0) 377 close(sock); 378 if (ndev.tap_fd >= 0) 379 close(ndev.tap_fd); 380 381 return 0; 382 } 383 384 static void virtio_net__io_thread_init(struct kvm *kvm) 385 { 386 pthread_mutex_init(&ndev.io_rx_lock, NULL); 387 pthread_cond_init(&ndev.io_tx_cond, NULL); 388 389 pthread_mutex_init(&ndev.io_rx_lock, NULL); 390 pthread_cond_init(&ndev.io_tx_cond, NULL); 391 392 pthread_create(&ndev.io_rx_thread, NULL, virtio_net_rx_thread, (void *)kvm); 393 pthread_create(&ndev.io_tx_thread, NULL, virtio_net_tx_thread, (void *)kvm); 394 } 395 396 void virtio_net__init(const struct virtio_net_parameters *params) 397 { 398 if (virtio_net__tap_init(params)) { 399 u8 dev, line, pin; 400 u16 net_base_addr; 401 u64 i; 402 struct ioevent ioevent; 403 404 if (irq__register_device(VIRTIO_ID_NET, &dev, &pin, &line) < 0) 405 return; 406 407 pci_header.irq_pin = pin; 408 pci_header.irq_line = line; 409 net_base_addr = ioport__register(IOPORT_EMPTY, &virtio_net_io_ops, IOPORT_SIZE, NULL); 410 pci_header.bar[0] = net_base_addr | PCI_BASE_ADDRESS_SPACE_IO; 411 ndev.base_addr = net_base_addr; 412 413 pci__register(&pci_header, dev); 414 415 virtio_net__io_thread_init(params->kvm); 416 417 for (i = 0; i < VIRTIO_NET_NUM_QUEUES; i++) { 418 ioevent = (struct ioevent) { 419 .io_addr = net_base_addr + VIRTIO_PCI_QUEUE_NOTIFY, 420 .io_len = sizeof(u16), 421 .fn = ioevent_callback, 422 .datamatch = i, 423 .fn_ptr = (void *)(long)i, 424 .fn_kvm = params->kvm, 425 .fd = eventfd(0, 0), 426 }; 427 428 ioeventfd__add_event(&ioevent); 429 } 430 } 431 } 432