1 #include "kvm/virtio-net.h" 2 #include "kvm/virtio-pci.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 11 #include <linux/virtio_net.h> 12 #include <linux/if_tun.h> 13 #include <net/if.h> 14 #include <sys/ioctl.h> 15 #include <assert.h> 16 #include <fcntl.h> 17 #include <arpa/inet.h> 18 #include <sys/types.h> 19 #include <sys/socket.h> 20 #include <unistd.h> 21 #include <sys/wait.h> 22 23 #define VIRTIO_NET_IRQ 14 24 #define VIRTIO_NET_QUEUE_SIZE 128 25 #define VIRTIO_NET_NUM_QUEUES 2 26 #define VIRTIO_NET_RX_QUEUE 0 27 #define VIRTIO_NET_TX_QUEUE 1 28 #define PCI_VIRTIO_NET_DEVNUM 3 29 30 struct net_device { 31 pthread_mutex_t mutex; 32 33 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 34 struct virtio_net_config net_config; 35 uint32_t host_features; 36 uint32_t guest_features; 37 uint16_t config_vector; 38 uint8_t status; 39 uint16_t queue_selector; 40 41 pthread_t io_rx_thread; 42 pthread_mutex_t io_rx_mutex; 43 pthread_cond_t io_rx_cond; 44 45 pthread_t io_tx_thread; 46 pthread_mutex_t io_tx_mutex; 47 pthread_cond_t io_tx_cond; 48 49 int tap_fd; 50 char tap_name[IFNAMSIZ]; 51 }; 52 53 static struct net_device net_device = { 54 .mutex = PTHREAD_MUTEX_INITIALIZER, 55 56 .net_config = { 57 .mac = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}, 58 .status = VIRTIO_NET_S_LINK_UP, 59 }, 60 .host_features = 1UL << VIRTIO_NET_F_MAC | 61 1UL << VIRTIO_NET_F_CSUM | 62 1UL << VIRTIO_NET_F_HOST_UFO | 63 1UL << VIRTIO_NET_F_HOST_TSO4 | 64 1UL << VIRTIO_NET_F_HOST_TSO6 | 65 1UL << VIRTIO_NET_F_GUEST_UFO | 66 1UL << VIRTIO_NET_F_GUEST_TSO4 | 67 1UL << VIRTIO_NET_F_GUEST_TSO6, 68 }; 69 70 static void *virtio_net_rx_thread(void *p) 71 { 72 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 73 struct virt_queue *vq; 74 struct kvm *self; 75 uint16_t out, in; 76 uint16_t head; 77 int len; 78 79 self = p; 80 vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE]; 81 82 while (1) { 83 mutex_lock(&net_device.io_rx_mutex); 84 if (!virt_queue__available(vq)) 85 pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex); 86 mutex_unlock(&net_device.io_rx_mutex); 87 88 while (virt_queue__available(vq)) { 89 head = virt_queue__get_iov(vq, iov, &out, &in, self); 90 len = readv(net_device.tap_fd, iov, in); 91 virt_queue__set_used_elem(vq, head, len); 92 /* We should interrupt guest right now, otherwise latency is huge. */ 93 kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 94 } 95 96 } 97 98 pthread_exit(NULL); 99 return NULL; 100 101 } 102 103 static void *virtio_net_tx_thread(void *p) 104 { 105 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 106 struct virt_queue *vq; 107 struct kvm *self; 108 uint16_t out, in; 109 uint16_t head; 110 int len; 111 112 self = p; 113 vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE]; 114 115 while (1) { 116 mutex_lock(&net_device.io_tx_mutex); 117 if (!virt_queue__available(vq)) 118 pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex); 119 mutex_unlock(&net_device.io_tx_mutex); 120 121 while (virt_queue__available(vq)) { 122 head = virt_queue__get_iov(vq, iov, &out, &in, self); 123 len = writev(net_device.tap_fd, iov, out); 124 virt_queue__set_used_elem(vq, head, len); 125 } 126 127 kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 128 } 129 130 pthread_exit(NULL); 131 return NULL; 132 133 } 134 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count) 135 { 136 uint8_t *config_space = (uint8_t *) &net_device.net_config; 137 138 if (size != 1 || count != 1) 139 return false; 140 141 if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config)) 142 error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI); 143 144 ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]); 145 146 return true; 147 } 148 149 static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 150 { 151 unsigned long offset = port - IOPORT_VIRTIO_NET; 152 bool ret = true; 153 154 mutex_lock(&net_device.mutex); 155 156 switch (offset) { 157 case VIRTIO_PCI_HOST_FEATURES: 158 ioport__write32(data, net_device.host_features); 159 break; 160 case VIRTIO_PCI_GUEST_FEATURES: 161 ret = false; 162 break; 163 case VIRTIO_PCI_QUEUE_PFN: 164 ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn); 165 break; 166 case VIRTIO_PCI_QUEUE_NUM: 167 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 168 break; 169 case VIRTIO_PCI_QUEUE_SEL: 170 case VIRTIO_PCI_QUEUE_NOTIFY: 171 ret = false; 172 break; 173 case VIRTIO_PCI_STATUS: 174 ioport__write8(data, net_device.status); 175 break; 176 case VIRTIO_PCI_ISR: 177 ioport__write8(data, 0x1); 178 kvm__irq_line(self, VIRTIO_NET_IRQ, 0); 179 break; 180 case VIRTIO_MSI_CONFIG_VECTOR: 181 ioport__write16(data, net_device.config_vector); 182 break; 183 default: 184 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 185 }; 186 187 mutex_unlock(&net_device.mutex); 188 189 return ret; 190 } 191 192 static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index) 193 { 194 if (queue_index == VIRTIO_NET_TX_QUEUE) { 195 196 mutex_lock(&net_device.io_tx_mutex); 197 pthread_cond_signal(&net_device.io_tx_cond); 198 mutex_unlock(&net_device.io_tx_mutex); 199 200 } else if (queue_index == VIRTIO_NET_RX_QUEUE) { 201 202 mutex_lock(&net_device.io_rx_mutex); 203 pthread_cond_signal(&net_device.io_rx_cond); 204 mutex_unlock(&net_device.io_rx_mutex); 205 206 } 207 } 208 209 static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 210 { 211 unsigned long offset = port - IOPORT_VIRTIO_NET; 212 bool ret = true; 213 214 mutex_lock(&net_device.mutex); 215 216 switch (offset) { 217 case VIRTIO_PCI_GUEST_FEATURES: 218 net_device.guest_features = ioport__read32(data); 219 break; 220 case VIRTIO_PCI_QUEUE_PFN: { 221 struct virt_queue *queue; 222 void *p; 223 224 assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES); 225 226 queue = &net_device.vqs[net_device.queue_selector]; 227 queue->pfn = ioport__read32(data); 228 p = guest_flat_to_host(self, queue->pfn << 12); 229 230 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096); 231 232 break; 233 } 234 case VIRTIO_PCI_QUEUE_SEL: 235 net_device.queue_selector = ioport__read16(data); 236 break; 237 case VIRTIO_PCI_QUEUE_NOTIFY: { 238 uint16_t queue_index; 239 queue_index = ioport__read16(data); 240 virtio_net_handle_callback(self, queue_index); 241 break; 242 } 243 case VIRTIO_PCI_STATUS: 244 net_device.status = ioport__read8(data); 245 break; 246 case VIRTIO_MSI_CONFIG_VECTOR: 247 net_device.config_vector = VIRTIO_MSI_NO_VECTOR; 248 break; 249 case VIRTIO_MSI_QUEUE_VECTOR: 250 break; 251 default: 252 ret = false; 253 }; 254 255 mutex_unlock(&net_device.mutex); 256 return ret; 257 } 258 259 static struct ioport_operations virtio_net_io_ops = { 260 .io_in = virtio_net_pci_io_in, 261 .io_out = virtio_net_pci_io_out, 262 }; 263 264 #define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 265 #define PCI_DEVICE_ID_VIRTIO_NET 0x1000 266 #define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET 0x1af4 267 #define PCI_SUBSYSTEM_ID_VIRTIO_NET 0x0001 268 269 static struct pci_device_header virtio_net_pci_device = { 270 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 271 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 272 .header_type = PCI_HEADER_TYPE_NORMAL, 273 .revision_id = 0, 274 .class = 0x020000, 275 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 276 .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET, 277 .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, 278 .irq_pin = 3, 279 .irq_line = VIRTIO_NET_IRQ, 280 }; 281 282 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 283 { 284 int sock = socket(AF_INET, SOCK_STREAM, 0); 285 int i, pid, status, offload, hdr_len; 286 struct sockaddr_in sin = {0}; 287 struct ifreq ifr; 288 289 for (i = 0 ; i < 6 ; i++) 290 net_device.net_config.mac[i] = params->guest_mac[i]; 291 292 net_device.tap_fd = open("/dev/net/tun", O_RDWR); 293 if (net_device.tap_fd < 0) { 294 warning("Unable to open /dev/net/tun"); 295 goto fail; 296 } 297 298 memset(&ifr, 0, sizeof(ifr)); 299 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 300 if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) { 301 warning("Config tap device error. Are you root?"); 302 goto fail; 303 } 304 305 strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name)); 306 307 if (ioctl(net_device.tap_fd, TUNSETNOCSUM, 1) < 0) { 308 warning("Config tap device TUNSETNOCSUM error"); 309 goto fail; 310 } 311 312 hdr_len = sizeof(struct virtio_net_hdr); 313 if (ioctl(net_device.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 314 warning("Config tap device TUNSETVNETHDRSZ error"); 315 goto fail; 316 } 317 318 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 319 if (ioctl(net_device.tap_fd, TUNSETOFFLOAD, offload) < 0) { 320 warning("Config tap device TUNSETOFFLOAD error"); 321 goto fail; 322 } 323 324 if (strcmp(params->script, "none")) { 325 pid = fork(); 326 if (pid == 0) { 327 execl(params->script, params->script, net_device.tap_name, NULL); 328 _exit(1); 329 } else { 330 waitpid(pid, &status, 0); 331 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 332 warning("Fail to setup tap by %s", params->script); 333 goto fail; 334 } 335 } 336 } else { 337 memset(&ifr, 0, sizeof(ifr)); 338 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 339 sin.sin_addr.s_addr = inet_addr(params->host_ip); 340 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 341 ifr.ifr_addr.sa_family = AF_INET; 342 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 343 warning("Could not set ip address on tap device"); 344 goto fail; 345 } 346 } 347 348 memset(&ifr, 0, sizeof(ifr)); 349 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 350 ioctl(sock, SIOCGIFFLAGS, &ifr); 351 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 352 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 353 warning("Could not bring tap device up"); 354 355 close(sock); 356 357 return 1; 358 359 fail: 360 if (sock >= 0) 361 close(sock); 362 if (net_device.tap_fd >= 0) 363 close(net_device.tap_fd); 364 365 return 0; 366 } 367 368 static void virtio_net__io_thread_init(struct kvm *self) 369 { 370 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 371 pthread_cond_init(&net_device.io_tx_cond, NULL); 372 373 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 374 pthread_cond_init(&net_device.io_tx_cond, NULL); 375 376 pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self); 377 pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self); 378 } 379 380 void virtio_net__init(const struct virtio_net_parameters *params) 381 { 382 if (virtio_net__tap_init(params)) { 383 pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM); 384 ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); 385 386 virtio_net__io_thread_init(params->self); 387 } 388 } 389