1 #include "kvm/virtio-net.h" 2 #include "kvm/virtio-pci-dev.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 #include "kvm/irq.h" 11 12 #include <linux/virtio_net.h> 13 #include <linux/if_tun.h> 14 #include <net/if.h> 15 #include <sys/ioctl.h> 16 #include <assert.h> 17 #include <fcntl.h> 18 #include <arpa/inet.h> 19 #include <sys/types.h> 20 #include <sys/socket.h> 21 #include <unistd.h> 22 #include <sys/wait.h> 23 24 #define VIRTIO_NET_QUEUE_SIZE 128 25 #define VIRTIO_NET_NUM_QUEUES 2 26 #define VIRTIO_NET_RX_QUEUE 0 27 #define VIRTIO_NET_TX_QUEUE 1 28 29 static struct pci_device_header virtio_net_pci_device = { 30 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 31 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 32 .header_type = PCI_HEADER_TYPE_NORMAL, 33 .revision_id = 0, 34 .class = 0x020000, 35 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 36 .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET, 37 .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, 38 }; 39 40 struct net_device { 41 pthread_mutex_t mutex; 42 43 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 44 struct virtio_net_config net_config; 45 u32 host_features; 46 u32 guest_features; 47 u16 config_vector; 48 u8 status; 49 u8 isr; 50 u16 queue_selector; 51 52 pthread_t io_rx_thread; 53 pthread_mutex_t io_rx_mutex; 54 pthread_cond_t io_rx_cond; 55 56 pthread_t io_tx_thread; 57 pthread_mutex_t io_tx_mutex; 58 pthread_cond_t io_tx_cond; 59 60 int tap_fd; 61 char tap_name[IFNAMSIZ]; 62 }; 63 64 static struct net_device net_device = { 65 .mutex = PTHREAD_MUTEX_INITIALIZER, 66 67 .net_config = { 68 .mac = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, 69 .status = VIRTIO_NET_S_LINK_UP, 70 }, 71 .host_features = 1UL << VIRTIO_NET_F_MAC 72 | 1UL << VIRTIO_NET_F_CSUM 73 | 1UL << VIRTIO_NET_F_HOST_UFO 74 | 1UL << VIRTIO_NET_F_HOST_TSO4 75 | 1UL << VIRTIO_NET_F_HOST_TSO6 76 | 1UL << VIRTIO_NET_F_GUEST_UFO 77 | 1UL << VIRTIO_NET_F_GUEST_TSO4 78 | 1UL << VIRTIO_NET_F_GUEST_TSO6, 79 }; 80 81 static void *virtio_net_rx_thread(void *p) 82 { 83 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 84 struct virt_queue *vq; 85 struct kvm *self; 86 u16 out, in; 87 u16 head; 88 int len; 89 90 self = p; 91 vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE]; 92 93 while (1) { 94 mutex_lock(&net_device.io_rx_mutex); 95 if (!virt_queue__available(vq)) 96 pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex); 97 mutex_unlock(&net_device.io_rx_mutex); 98 99 while (virt_queue__available(vq)) { 100 head = virt_queue__get_iov(vq, iov, &out, &in, self); 101 len = readv(net_device.tap_fd, iov, in); 102 virt_queue__set_used_elem(vq, head, len); 103 104 /* We should interrupt guest right now, otherwise latency is huge. */ 105 virt_queue__trigger_irq(vq, virtio_net_pci_device.irq_line, &net_device.isr, self); 106 } 107 108 } 109 110 pthread_exit(NULL); 111 return NULL; 112 113 } 114 115 static void *virtio_net_tx_thread(void *p) 116 { 117 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 118 struct virt_queue *vq; 119 struct kvm *self; 120 u16 out, in; 121 u16 head; 122 int len; 123 124 self = p; 125 vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE]; 126 127 while (1) { 128 mutex_lock(&net_device.io_tx_mutex); 129 if (!virt_queue__available(vq)) 130 pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex); 131 mutex_unlock(&net_device.io_tx_mutex); 132 133 while (virt_queue__available(vq)) { 134 head = virt_queue__get_iov(vq, iov, &out, &in, self); 135 len = writev(net_device.tap_fd, iov, out); 136 virt_queue__set_used_elem(vq, head, len); 137 } 138 139 virt_queue__trigger_irq(vq, virtio_net_pci_device.irq_line, &net_device.isr, self); 140 141 } 142 143 pthread_exit(NULL); 144 145 return NULL; 146 147 } 148 149 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, u32 count) 150 { 151 u8 *config_space = (u8 *) &net_device.net_config; 152 153 if (size != 1 || count != 1) 154 return false; 155 156 if ((offset - VIRTIO_MSI_CONFIG_VECTOR) > sizeof(struct virtio_net_config)) 157 error("config offset is too big: %li", offset - VIRTIO_MSI_CONFIG_VECTOR); 158 159 ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]); 160 161 return true; 162 } 163 164 static bool virtio_net_pci_io_in(struct kvm *self, u16 port, void *data, int size, u32 count) 165 { 166 unsigned long offset = port - IOPORT_VIRTIO_NET; 167 bool ret = true; 168 169 mutex_lock(&net_device.mutex); 170 171 switch (offset) { 172 case VIRTIO_PCI_HOST_FEATURES: 173 ioport__write32(data, net_device.host_features); 174 break; 175 case VIRTIO_PCI_GUEST_FEATURES: 176 ret = false; 177 break; 178 case VIRTIO_PCI_QUEUE_PFN: 179 ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn); 180 break; 181 case VIRTIO_PCI_QUEUE_NUM: 182 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 183 break; 184 case VIRTIO_PCI_QUEUE_SEL: 185 case VIRTIO_PCI_QUEUE_NOTIFY: 186 ret = false; 187 break; 188 case VIRTIO_PCI_STATUS: 189 ioport__write8(data, net_device.status); 190 break; 191 case VIRTIO_PCI_ISR: 192 ioport__write8(data, net_device.isr); 193 kvm__irq_line(self, virtio_net_pci_device.irq_line, VIRTIO_IRQ_LOW); 194 net_device.isr = VIRTIO_IRQ_LOW; 195 break; 196 case VIRTIO_MSI_CONFIG_VECTOR: 197 ioport__write16(data, net_device.config_vector); 198 break; 199 default: 200 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 201 }; 202 203 mutex_unlock(&net_device.mutex); 204 205 return ret; 206 } 207 208 static void virtio_net_handle_callback(struct kvm *self, u16 queue_index) 209 { 210 switch (queue_index) { 211 case VIRTIO_NET_TX_QUEUE: { 212 mutex_lock(&net_device.io_tx_mutex); 213 pthread_cond_signal(&net_device.io_tx_cond); 214 mutex_unlock(&net_device.io_tx_mutex); 215 break; 216 } 217 case VIRTIO_NET_RX_QUEUE: { 218 mutex_lock(&net_device.io_rx_mutex); 219 pthread_cond_signal(&net_device.io_rx_cond); 220 mutex_unlock(&net_device.io_rx_mutex); 221 break; 222 } 223 default: 224 warning("Unknown queue index %u", queue_index); 225 } 226 } 227 228 static bool virtio_net_pci_io_out(struct kvm *self, u16 port, void *data, int size, u32 count) 229 { 230 unsigned long offset = port - IOPORT_VIRTIO_NET; 231 bool ret = true; 232 233 mutex_lock(&net_device.mutex); 234 235 switch (offset) { 236 case VIRTIO_PCI_GUEST_FEATURES: 237 net_device.guest_features = ioport__read32(data); 238 break; 239 case VIRTIO_PCI_QUEUE_PFN: { 240 struct virt_queue *queue; 241 void *p; 242 243 assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES); 244 245 queue = &net_device.vqs[net_device.queue_selector]; 246 queue->pfn = ioport__read32(data); 247 p = guest_pfn_to_host(self, queue->pfn); 248 249 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 250 251 break; 252 } 253 case VIRTIO_PCI_QUEUE_SEL: 254 net_device.queue_selector = ioport__read16(data); 255 break; 256 case VIRTIO_PCI_QUEUE_NOTIFY: { 257 u16 queue_index; 258 queue_index = ioport__read16(data); 259 virtio_net_handle_callback(self, queue_index); 260 break; 261 } 262 case VIRTIO_PCI_STATUS: 263 net_device.status = ioport__read8(data); 264 break; 265 case VIRTIO_MSI_CONFIG_VECTOR: 266 net_device.config_vector = VIRTIO_MSI_NO_VECTOR; 267 break; 268 case VIRTIO_MSI_QUEUE_VECTOR: 269 break; 270 default: 271 ret = false; 272 }; 273 274 mutex_unlock(&net_device.mutex); 275 276 return ret; 277 } 278 279 static struct ioport_operations virtio_net_io_ops = { 280 .io_in = virtio_net_pci_io_in, 281 .io_out = virtio_net_pci_io_out, 282 }; 283 284 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 285 { 286 int sock = socket(AF_INET, SOCK_STREAM, 0); 287 int i, pid, status, offload, hdr_len; 288 struct sockaddr_in sin = {0}; 289 struct ifreq ifr; 290 291 for (i = 0 ; i < 6 ; i++) 292 net_device.net_config.mac[i] = params->guest_mac[i]; 293 294 net_device.tap_fd = open("/dev/net/tun", O_RDWR); 295 if (net_device.tap_fd < 0) { 296 warning("Unable to open /dev/net/tun"); 297 goto fail; 298 } 299 300 memset(&ifr, 0, sizeof(ifr)); 301 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 302 if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) { 303 warning("Config tap device error. Are you root?"); 304 goto fail; 305 } 306 307 strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name)); 308 309 if (ioctl(net_device.tap_fd, TUNSETNOCSUM, 1) < 0) { 310 warning("Config tap device TUNSETNOCSUM error"); 311 goto fail; 312 } 313 314 hdr_len = sizeof(struct virtio_net_hdr); 315 if (ioctl(net_device.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 316 warning("Config tap device TUNSETVNETHDRSZ error"); 317 goto fail; 318 } 319 320 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 321 if (ioctl(net_device.tap_fd, TUNSETOFFLOAD, offload) < 0) { 322 warning("Config tap device TUNSETOFFLOAD error"); 323 goto fail; 324 } 325 326 if (strcmp(params->script, "none")) { 327 pid = fork(); 328 if (pid == 0) { 329 execl(params->script, params->script, net_device.tap_name, NULL); 330 _exit(1); 331 } else { 332 waitpid(pid, &status, 0); 333 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 334 warning("Fail to setup tap by %s", params->script); 335 goto fail; 336 } 337 } 338 } else { 339 memset(&ifr, 0, sizeof(ifr)); 340 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 341 sin.sin_addr.s_addr = inet_addr(params->host_ip); 342 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 343 ifr.ifr_addr.sa_family = AF_INET; 344 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 345 warning("Could not set ip address on tap device"); 346 goto fail; 347 } 348 } 349 350 memset(&ifr, 0, sizeof(ifr)); 351 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 352 ioctl(sock, SIOCGIFFLAGS, &ifr); 353 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 354 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 355 warning("Could not bring tap device up"); 356 357 close(sock); 358 359 return 1; 360 361 fail: 362 if (sock >= 0) 363 close(sock); 364 if (net_device.tap_fd >= 0) 365 close(net_device.tap_fd); 366 367 return 0; 368 } 369 370 static void virtio_net__io_thread_init(struct kvm *self) 371 { 372 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 373 pthread_cond_init(&net_device.io_tx_cond, NULL); 374 375 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 376 pthread_cond_init(&net_device.io_tx_cond, NULL); 377 378 pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self); 379 pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self); 380 } 381 382 void virtio_net__init(const struct virtio_net_parameters *params) 383 { 384 if (virtio_net__tap_init(params)) { 385 u8 dev, line, pin; 386 387 if (irq__register_device(PCI_DEVICE_ID_VIRTIO_NET, &dev, &pin, &line) < 0) 388 return; 389 390 virtio_net_pci_device.irq_pin = pin; 391 virtio_net_pci_device.irq_line = line; 392 pci__register(&virtio_net_pci_device, dev); 393 ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); 394 395 virtio_net__io_thread_init(params->self); 396 } 397 } 398