1 #include "kvm/virtio-net.h" 2 #include "kvm/virtio-pci-dev.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 #include "kvm/irq.h" 11 #include "kvm/ioeventfd.h" 12 13 #include <linux/virtio_net.h> 14 #include <linux/if_tun.h> 15 16 #include <arpa/inet.h> 17 #include <net/if.h> 18 19 #include <unistd.h> 20 #include <assert.h> 21 #include <fcntl.h> 22 23 #include <sys/socket.h> 24 #include <sys/ioctl.h> 25 #include <sys/types.h> 26 #include <sys/wait.h> 27 28 #define VIRTIO_NET_QUEUE_SIZE 128 29 #define VIRTIO_NET_NUM_QUEUES 2 30 #define VIRTIO_NET_RX_QUEUE 0 31 #define VIRTIO_NET_TX_QUEUE 1 32 33 static struct pci_device_header pci_header = { 34 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 35 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 36 .header_type = PCI_HEADER_TYPE_NORMAL, 37 .revision_id = 0, 38 .class = 0x020000, 39 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 40 .subsys_id = VIRTIO_ID_NET, 41 }; 42 43 struct net_device { 44 pthread_mutex_t mutex; 45 46 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 47 struct virtio_net_config config; 48 u32 host_features; 49 u32 guest_features; 50 u16 config_vector; 51 u8 status; 52 u8 isr; 53 u16 queue_selector; 54 u16 base_addr; 55 56 pthread_t io_rx_thread; 57 pthread_mutex_t io_rx_lock; 58 pthread_cond_t io_rx_cond; 59 60 pthread_t io_tx_thread; 61 pthread_mutex_t io_tx_lock; 62 pthread_cond_t io_tx_cond; 63 64 int tap_fd; 65 char tap_name[IFNAMSIZ]; 66 }; 67 68 static struct net_device ndev = { 69 .mutex = PTHREAD_MUTEX_INITIALIZER, 70 71 .config = { 72 .mac = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, 73 .status = VIRTIO_NET_S_LINK_UP, 74 }, 75 .host_features = 1UL << VIRTIO_NET_F_MAC 76 | 1UL << VIRTIO_NET_F_CSUM 77 | 1UL << VIRTIO_NET_F_HOST_UFO 78 | 1UL << VIRTIO_NET_F_HOST_TSO4 79 | 1UL << VIRTIO_NET_F_HOST_TSO6 80 | 1UL << VIRTIO_NET_F_GUEST_UFO 81 | 1UL << VIRTIO_NET_F_GUEST_TSO4 82 | 1UL << VIRTIO_NET_F_GUEST_TSO6, 83 }; 84 85 static void *virtio_net_rx_thread(void *p) 86 { 87 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 88 struct virt_queue *vq; 89 struct kvm *kvm; 90 u16 out, in; 91 u16 head; 92 int len; 93 94 kvm = p; 95 vq = &ndev.vqs[VIRTIO_NET_RX_QUEUE]; 96 97 while (1) { 98 mutex_lock(&ndev.io_rx_lock); 99 if (!virt_queue__available(vq)) 100 pthread_cond_wait(&ndev.io_rx_cond, &ndev.io_rx_lock); 101 mutex_unlock(&ndev.io_rx_lock); 102 103 while (virt_queue__available(vq)) { 104 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 105 len = readv(ndev.tap_fd, iov, in); 106 virt_queue__set_used_elem(vq, head, len); 107 108 /* We should interrupt guest right now, otherwise latency is huge. */ 109 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm); 110 } 111 112 } 113 114 pthread_exit(NULL); 115 return NULL; 116 117 } 118 119 static void *virtio_net_tx_thread(void *p) 120 { 121 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 122 struct virt_queue *vq; 123 struct kvm *kvm; 124 u16 out, in; 125 u16 head; 126 int len; 127 128 kvm = p; 129 vq = &ndev.vqs[VIRTIO_NET_TX_QUEUE]; 130 131 while (1) { 132 mutex_lock(&ndev.io_tx_lock); 133 if (!virt_queue__available(vq)) 134 pthread_cond_wait(&ndev.io_tx_cond, &ndev.io_tx_lock); 135 mutex_unlock(&ndev.io_tx_lock); 136 137 while (virt_queue__available(vq)) { 138 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 139 len = writev(ndev.tap_fd, iov, out); 140 virt_queue__set_used_elem(vq, head, len); 141 } 142 143 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm); 144 145 } 146 147 pthread_exit(NULL); 148 149 return NULL; 150 151 } 152 153 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, u32 count) 154 { 155 u8 *config_space = (u8 *)&ndev.config; 156 157 if (size != 1 || count != 1) 158 return false; 159 160 if ((offset - VIRTIO_MSI_CONFIG_VECTOR) > sizeof(struct virtio_net_config)) 161 pr_error("config offset is too big: %li", offset - VIRTIO_MSI_CONFIG_VECTOR); 162 163 ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]); 164 165 return true; 166 } 167 168 static bool virtio_net_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) 169 { 170 unsigned long offset = port - ndev.base_addr; 171 bool ret = true; 172 173 mutex_lock(&ndev.mutex); 174 175 switch (offset) { 176 case VIRTIO_PCI_HOST_FEATURES: 177 ioport__write32(data, ndev.host_features); 178 break; 179 case VIRTIO_PCI_GUEST_FEATURES: 180 ret = false; 181 break; 182 case VIRTIO_PCI_QUEUE_PFN: 183 ioport__write32(data, ndev.vqs[ndev.queue_selector].pfn); 184 break; 185 case VIRTIO_PCI_QUEUE_NUM: 186 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 187 break; 188 case VIRTIO_PCI_QUEUE_SEL: 189 case VIRTIO_PCI_QUEUE_NOTIFY: 190 ret = false; 191 break; 192 case VIRTIO_PCI_STATUS: 193 ioport__write8(data, ndev.status); 194 break; 195 case VIRTIO_PCI_ISR: 196 ioport__write8(data, ndev.isr); 197 kvm__irq_line(kvm, pci_header.irq_line, VIRTIO_IRQ_LOW); 198 ndev.isr = VIRTIO_IRQ_LOW; 199 break; 200 case VIRTIO_MSI_CONFIG_VECTOR: 201 ioport__write16(data, ndev.config_vector); 202 break; 203 default: 204 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 205 }; 206 207 mutex_unlock(&ndev.mutex); 208 209 return ret; 210 } 211 212 static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index) 213 { 214 switch (queue_index) { 215 case VIRTIO_NET_TX_QUEUE: { 216 mutex_lock(&ndev.io_tx_lock); 217 pthread_cond_signal(&ndev.io_tx_cond); 218 mutex_unlock(&ndev.io_tx_lock); 219 break; 220 } 221 case VIRTIO_NET_RX_QUEUE: { 222 mutex_lock(&ndev.io_rx_lock); 223 pthread_cond_signal(&ndev.io_rx_cond); 224 mutex_unlock(&ndev.io_rx_lock); 225 break; 226 } 227 default: 228 pr_warning("Unknown queue index %u", queue_index); 229 } 230 } 231 232 static bool virtio_net_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) 233 { 234 unsigned long offset = port - ndev.base_addr; 235 bool ret = true; 236 237 mutex_lock(&ndev.mutex); 238 239 switch (offset) { 240 case VIRTIO_PCI_GUEST_FEATURES: 241 ndev.guest_features = ioport__read32(data); 242 break; 243 case VIRTIO_PCI_QUEUE_PFN: { 244 struct virt_queue *queue; 245 void *p; 246 247 assert(ndev.queue_selector < VIRTIO_NET_NUM_QUEUES); 248 249 queue = &ndev.vqs[ndev.queue_selector]; 250 queue->pfn = ioport__read32(data); 251 p = guest_pfn_to_host(kvm, queue->pfn); 252 253 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 254 255 break; 256 } 257 case VIRTIO_PCI_QUEUE_SEL: 258 ndev.queue_selector = ioport__read16(data); 259 break; 260 case VIRTIO_PCI_QUEUE_NOTIFY: { 261 u16 queue_index; 262 263 queue_index = ioport__read16(data); 264 virtio_net_handle_callback(kvm, queue_index); 265 break; 266 } 267 case VIRTIO_PCI_STATUS: 268 ndev.status = ioport__read8(data); 269 break; 270 case VIRTIO_MSI_CONFIG_VECTOR: 271 ndev.config_vector = VIRTIO_MSI_NO_VECTOR; 272 break; 273 case VIRTIO_MSI_QUEUE_VECTOR: 274 break; 275 default: 276 ret = false; 277 }; 278 279 mutex_unlock(&ndev.mutex); 280 281 return ret; 282 } 283 284 static void ioevent_callback(struct kvm *kvm, void *param) 285 { 286 virtio_net_handle_callback(kvm, (u64)param); 287 } 288 289 static struct ioport_operations virtio_net_io_ops = { 290 .io_in = virtio_net_pci_io_in, 291 .io_out = virtio_net_pci_io_out, 292 }; 293 294 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 295 { 296 int sock = socket(AF_INET, SOCK_STREAM, 0); 297 int i, pid, status, offload, hdr_len; 298 struct sockaddr_in sin = {0}; 299 struct ifreq ifr; 300 301 for (i = 0 ; i < 6 ; i++) 302 ndev.config.mac[i] = params->guest_mac[i]; 303 304 ndev.tap_fd = open("/dev/net/tun", O_RDWR); 305 if (ndev.tap_fd < 0) { 306 pr_warning("Unable to open /dev/net/tun"); 307 goto fail; 308 } 309 310 memset(&ifr, 0, sizeof(ifr)); 311 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 312 if (ioctl(ndev.tap_fd, TUNSETIFF, &ifr) < 0) { 313 pr_warning("Config tap device error. Are you root?"); 314 goto fail; 315 } 316 317 strncpy(ndev.tap_name, ifr.ifr_name, sizeof(ndev.tap_name)); 318 319 if (ioctl(ndev.tap_fd, TUNSETNOCSUM, 1) < 0) { 320 pr_warning("Config tap device TUNSETNOCSUM error"); 321 goto fail; 322 } 323 324 hdr_len = sizeof(struct virtio_net_hdr); 325 if (ioctl(ndev.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 326 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 327 goto fail; 328 } 329 330 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 331 if (ioctl(ndev.tap_fd, TUNSETOFFLOAD, offload) < 0) { 332 pr_warning("Config tap device TUNSETOFFLOAD error"); 333 goto fail; 334 } 335 336 if (strcmp(params->script, "none")) { 337 pid = fork(); 338 if (pid == 0) { 339 execl(params->script, params->script, ndev.tap_name, NULL); 340 _exit(1); 341 } else { 342 waitpid(pid, &status, 0); 343 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 344 pr_warning("Fail to setup tap by %s", params->script); 345 goto fail; 346 } 347 } 348 } else { 349 memset(&ifr, 0, sizeof(ifr)); 350 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 351 sin.sin_addr.s_addr = inet_addr(params->host_ip); 352 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 353 ifr.ifr_addr.sa_family = AF_INET; 354 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 355 pr_warning("Could not set ip address on tap device"); 356 goto fail; 357 } 358 } 359 360 memset(&ifr, 0, sizeof(ifr)); 361 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name)); 362 ioctl(sock, SIOCGIFFLAGS, &ifr); 363 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 364 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 365 pr_warning("Could not bring tap device up"); 366 367 close(sock); 368 369 return 1; 370 371 fail: 372 if (sock >= 0) 373 close(sock); 374 if (ndev.tap_fd >= 0) 375 close(ndev.tap_fd); 376 377 return 0; 378 } 379 380 static void virtio_net__io_thread_init(struct kvm *kvm) 381 { 382 pthread_mutex_init(&ndev.io_rx_lock, NULL); 383 pthread_cond_init(&ndev.io_tx_cond, NULL); 384 385 pthread_mutex_init(&ndev.io_rx_lock, NULL); 386 pthread_cond_init(&ndev.io_tx_cond, NULL); 387 388 pthread_create(&ndev.io_rx_thread, NULL, virtio_net_rx_thread, (void *)kvm); 389 pthread_create(&ndev.io_tx_thread, NULL, virtio_net_tx_thread, (void *)kvm); 390 } 391 392 void virtio_net__init(const struct virtio_net_parameters *params) 393 { 394 if (virtio_net__tap_init(params)) { 395 u8 dev, line, pin; 396 u16 net_base_addr; 397 u64 i; 398 struct ioevent ioevent; 399 400 if (irq__register_device(VIRTIO_ID_NET, &dev, &pin, &line) < 0) 401 return; 402 403 pci_header.irq_pin = pin; 404 pci_header.irq_line = line; 405 net_base_addr = ioport__register(IOPORT_EMPTY, &virtio_net_io_ops, IOPORT_SIZE, NULL); 406 pci_header.bar[0] = net_base_addr | PCI_BASE_ADDRESS_SPACE_IO; 407 ndev.base_addr = net_base_addr; 408 409 pci__register(&pci_header, dev); 410 411 virtio_net__io_thread_init(params->kvm); 412 413 for (i = 0; i < VIRTIO_NET_NUM_QUEUES; i++) { 414 ioevent = (struct ioevent) { 415 .io_addr = net_base_addr + VIRTIO_PCI_QUEUE_NOTIFY, 416 .io_len = sizeof(u16), 417 .fn = ioevent_callback, 418 .datamatch = i, 419 .fn_ptr = (void *)i, 420 .fn_kvm = params->kvm, 421 .fd = eventfd(0, 0), 422 }; 423 424 ioeventfd__add_event(&ioevent); 425 } 426 } 427 } 428