1 #include "kvm/virtio-net.h" 2 #include "kvm/virtio-pci.h" 3 #include "kvm/virtio-pci-dev.h" 4 #include "kvm/virtio.h" 5 #include "kvm/ioport.h" 6 #include "kvm/types.h" 7 #include "kvm/mutex.h" 8 #include "kvm/util.h" 9 #include "kvm/kvm.h" 10 #include "kvm/pci.h" 11 #include "kvm/irq.h" 12 13 #include <linux/virtio_net.h> 14 #include <linux/if_tun.h> 15 #include <net/if.h> 16 #include <sys/ioctl.h> 17 #include <assert.h> 18 #include <fcntl.h> 19 #include <arpa/inet.h> 20 #include <sys/types.h> 21 #include <sys/socket.h> 22 #include <unistd.h> 23 #include <sys/wait.h> 24 25 #define VIRTIO_NET_QUEUE_SIZE 128 26 #define VIRTIO_NET_NUM_QUEUES 2 27 #define VIRTIO_NET_RX_QUEUE 0 28 #define VIRTIO_NET_TX_QUEUE 1 29 30 static struct pci_device_header virtio_net_pci_device = { 31 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 32 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 33 .header_type = PCI_HEADER_TYPE_NORMAL, 34 .revision_id = 0, 35 .class = 0x020000, 36 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 37 .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET, 38 .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, 39 }; 40 41 struct net_device { 42 pthread_mutex_t mutex; 43 44 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 45 struct virtio_net_config net_config; 46 u32 host_features; 47 u32 guest_features; 48 u16 config_vector; 49 u8 status; 50 u8 isr; 51 u16 queue_selector; 52 53 pthread_t io_rx_thread; 54 pthread_mutex_t io_rx_mutex; 55 pthread_cond_t io_rx_cond; 56 57 pthread_t io_tx_thread; 58 pthread_mutex_t io_tx_mutex; 59 pthread_cond_t io_tx_cond; 60 61 int tap_fd; 62 char tap_name[IFNAMSIZ]; 63 }; 64 65 static struct net_device net_device = { 66 .mutex = PTHREAD_MUTEX_INITIALIZER, 67 68 .net_config = { 69 .mac = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, 70 .status = VIRTIO_NET_S_LINK_UP, 71 }, 72 .host_features = 1UL << VIRTIO_NET_F_MAC 73 | 1UL << VIRTIO_NET_F_CSUM 74 | 1UL << VIRTIO_NET_F_HOST_UFO 75 | 1UL << VIRTIO_NET_F_HOST_TSO4 76 | 1UL << VIRTIO_NET_F_HOST_TSO6 77 | 1UL << VIRTIO_NET_F_GUEST_UFO 78 | 1UL << VIRTIO_NET_F_GUEST_TSO4 79 | 1UL << VIRTIO_NET_F_GUEST_TSO6, 80 }; 81 82 static void *virtio_net_rx_thread(void *p) 83 { 84 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 85 struct virt_queue *vq; 86 struct kvm *self; 87 u16 out, in; 88 u16 head; 89 int len; 90 91 self = p; 92 vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE]; 93 94 while (1) { 95 mutex_lock(&net_device.io_rx_mutex); 96 if (!virt_queue__available(vq)) 97 pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex); 98 mutex_unlock(&net_device.io_rx_mutex); 99 100 while (virt_queue__available(vq)) { 101 head = virt_queue__get_iov(vq, iov, &out, &in, self); 102 len = readv(net_device.tap_fd, iov, in); 103 virt_queue__set_used_elem(vq, head, len); 104 105 /* We should interrupt guest right now, otherwise latency is huge. */ 106 virt_queue__trigger_irq(vq, virtio_net_pci_device.irq_line, &net_device.isr, self); 107 } 108 109 } 110 111 pthread_exit(NULL); 112 return NULL; 113 114 } 115 116 static void *virtio_net_tx_thread(void *p) 117 { 118 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 119 struct virt_queue *vq; 120 struct kvm *self; 121 u16 out, in; 122 u16 head; 123 int len; 124 125 self = p; 126 vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE]; 127 128 while (1) { 129 mutex_lock(&net_device.io_tx_mutex); 130 if (!virt_queue__available(vq)) 131 pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex); 132 mutex_unlock(&net_device.io_tx_mutex); 133 134 while (virt_queue__available(vq)) { 135 head = virt_queue__get_iov(vq, iov, &out, &in, self); 136 len = writev(net_device.tap_fd, iov, out); 137 virt_queue__set_used_elem(vq, head, len); 138 } 139 140 virt_queue__trigger_irq(vq, virtio_net_pci_device.irq_line, &net_device.isr, self); 141 142 } 143 144 pthread_exit(NULL); 145 146 return NULL; 147 148 } 149 150 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, u32 count) 151 { 152 u8 *config_space = (u8 *) &net_device.net_config; 153 154 if (size != 1 || count != 1) 155 return false; 156 157 if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config)) 158 error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI); 159 160 ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]); 161 162 return true; 163 } 164 165 static bool virtio_net_pci_io_in(struct kvm *self, u16 port, void *data, int size, u32 count) 166 { 167 unsigned long offset = port - IOPORT_VIRTIO_NET; 168 bool ret = true; 169 170 mutex_lock(&net_device.mutex); 171 172 switch (offset) { 173 case VIRTIO_PCI_HOST_FEATURES: 174 ioport__write32(data, net_device.host_features); 175 break; 176 case VIRTIO_PCI_GUEST_FEATURES: 177 ret = false; 178 break; 179 case VIRTIO_PCI_QUEUE_PFN: 180 ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn); 181 break; 182 case VIRTIO_PCI_QUEUE_NUM: 183 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 184 break; 185 case VIRTIO_PCI_QUEUE_SEL: 186 case VIRTIO_PCI_QUEUE_NOTIFY: 187 ret = false; 188 break; 189 case VIRTIO_PCI_STATUS: 190 ioport__write8(data, net_device.status); 191 break; 192 case VIRTIO_PCI_ISR: 193 ioport__write8(data, net_device.isr); 194 kvm__irq_line(self, virtio_net_pci_device.irq_line, VIRTIO_IRQ_LOW); 195 net_device.isr = VIRTIO_IRQ_LOW; 196 break; 197 case VIRTIO_MSI_CONFIG_VECTOR: 198 ioport__write16(data, net_device.config_vector); 199 break; 200 default: 201 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 202 }; 203 204 mutex_unlock(&net_device.mutex); 205 206 return ret; 207 } 208 209 static void virtio_net_handle_callback(struct kvm *self, u16 queue_index) 210 { 211 switch (queue_index) { 212 case VIRTIO_NET_TX_QUEUE: { 213 mutex_lock(&net_device.io_tx_mutex); 214 pthread_cond_signal(&net_device.io_tx_cond); 215 mutex_unlock(&net_device.io_tx_mutex); 216 break; 217 } 218 case VIRTIO_NET_RX_QUEUE: { 219 mutex_lock(&net_device.io_rx_mutex); 220 pthread_cond_signal(&net_device.io_rx_cond); 221 mutex_unlock(&net_device.io_rx_mutex); 222 break; 223 } 224 default: 225 warning("Unknown queue index %u", queue_index); 226 } 227 } 228 229 static bool virtio_net_pci_io_out(struct kvm *self, u16 port, void *data, int size, u32 count) 230 { 231 unsigned long offset = port - IOPORT_VIRTIO_NET; 232 bool ret = true; 233 234 mutex_lock(&net_device.mutex); 235 236 switch (offset) { 237 case VIRTIO_PCI_GUEST_FEATURES: 238 net_device.guest_features = ioport__read32(data); 239 break; 240 case VIRTIO_PCI_QUEUE_PFN: { 241 struct virt_queue *queue; 242 void *p; 243 244 assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES); 245 246 queue = &net_device.vqs[net_device.queue_selector]; 247 queue->pfn = ioport__read32(data); 248 p = guest_flat_to_host(self, queue->pfn << 12); 249 250 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096); 251 252 break; 253 } 254 case VIRTIO_PCI_QUEUE_SEL: 255 net_device.queue_selector = ioport__read16(data); 256 break; 257 case VIRTIO_PCI_QUEUE_NOTIFY: { 258 u16 queue_index; 259 queue_index = ioport__read16(data); 260 virtio_net_handle_callback(self, queue_index); 261 break; 262 } 263 case VIRTIO_PCI_STATUS: 264 net_device.status = ioport__read8(data); 265 break; 266 case VIRTIO_MSI_CONFIG_VECTOR: 267 net_device.config_vector = VIRTIO_MSI_NO_VECTOR; 268 break; 269 case VIRTIO_MSI_QUEUE_VECTOR: 270 break; 271 default: 272 ret = false; 273 }; 274 275 mutex_unlock(&net_device.mutex); 276 277 return ret; 278 } 279 280 static struct ioport_operations virtio_net_io_ops = { 281 .io_in = virtio_net_pci_io_in, 282 .io_out = virtio_net_pci_io_out, 283 }; 284 285 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 286 { 287 int sock = socket(AF_INET, SOCK_STREAM, 0); 288 int i, pid, status, offload, hdr_len; 289 struct sockaddr_in sin = {0}; 290 struct ifreq ifr; 291 292 for (i = 0 ; i < 6 ; i++) 293 net_device.net_config.mac[i] = params->guest_mac[i]; 294 295 net_device.tap_fd = open("/dev/net/tun", O_RDWR); 296 if (net_device.tap_fd < 0) { 297 warning("Unable to open /dev/net/tun"); 298 goto fail; 299 } 300 301 memset(&ifr, 0, sizeof(ifr)); 302 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 303 if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) { 304 warning("Config tap device error. Are you root?"); 305 goto fail; 306 } 307 308 strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name)); 309 310 if (ioctl(net_device.tap_fd, TUNSETNOCSUM, 1) < 0) { 311 warning("Config tap device TUNSETNOCSUM error"); 312 goto fail; 313 } 314 315 hdr_len = sizeof(struct virtio_net_hdr); 316 if (ioctl(net_device.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 317 warning("Config tap device TUNSETVNETHDRSZ error"); 318 goto fail; 319 } 320 321 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 322 if (ioctl(net_device.tap_fd, TUNSETOFFLOAD, offload) < 0) { 323 warning("Config tap device TUNSETOFFLOAD error"); 324 goto fail; 325 } 326 327 if (strcmp(params->script, "none")) { 328 pid = fork(); 329 if (pid == 0) { 330 execl(params->script, params->script, net_device.tap_name, NULL); 331 _exit(1); 332 } else { 333 waitpid(pid, &status, 0); 334 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 335 warning("Fail to setup tap by %s", params->script); 336 goto fail; 337 } 338 } 339 } else { 340 memset(&ifr, 0, sizeof(ifr)); 341 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 342 sin.sin_addr.s_addr = inet_addr(params->host_ip); 343 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 344 ifr.ifr_addr.sa_family = AF_INET; 345 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 346 warning("Could not set ip address on tap device"); 347 goto fail; 348 } 349 } 350 351 memset(&ifr, 0, sizeof(ifr)); 352 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 353 ioctl(sock, SIOCGIFFLAGS, &ifr); 354 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 355 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 356 warning("Could not bring tap device up"); 357 358 close(sock); 359 360 return 1; 361 362 fail: 363 if (sock >= 0) 364 close(sock); 365 if (net_device.tap_fd >= 0) 366 close(net_device.tap_fd); 367 368 return 0; 369 } 370 371 static void virtio_net__io_thread_init(struct kvm *self) 372 { 373 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 374 pthread_cond_init(&net_device.io_tx_cond, NULL); 375 376 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 377 pthread_cond_init(&net_device.io_tx_cond, NULL); 378 379 pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self); 380 pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self); 381 } 382 383 void virtio_net__init(const struct virtio_net_parameters *params) 384 { 385 if (virtio_net__tap_init(params)) { 386 u8 dev, line, pin; 387 388 if (irq__register_device(PCI_DEVICE_ID_VIRTIO_NET, &dev, &pin, &line) < 0) 389 return; 390 391 virtio_net_pci_device.irq_pin = pin; 392 virtio_net_pci_device.irq_line = line; 393 pci__register(&virtio_net_pci_device, dev); 394 ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); 395 396 virtio_net__io_thread_init(params->self); 397 } 398 } 399