1 #include "kvm/virtio-net.h" 2 #include "kvm/virtio-pci.h" 3 #include "kvm/virtio-pci-dev.h" 4 #include "kvm/virtio.h" 5 #include "kvm/ioport.h" 6 #include "kvm/types.h" 7 #include "kvm/mutex.h" 8 #include "kvm/util.h" 9 #include "kvm/kvm.h" 10 #include "kvm/pci.h" 11 12 #include <linux/virtio_net.h> 13 #include <linux/if_tun.h> 14 #include <net/if.h> 15 #include <sys/ioctl.h> 16 #include <assert.h> 17 #include <fcntl.h> 18 #include <arpa/inet.h> 19 #include <sys/types.h> 20 #include <sys/socket.h> 21 #include <unistd.h> 22 #include <sys/wait.h> 23 24 #define VIRTIO_NET_QUEUE_SIZE 128 25 #define VIRTIO_NET_NUM_QUEUES 2 26 #define VIRTIO_NET_RX_QUEUE 0 27 #define VIRTIO_NET_TX_QUEUE 1 28 29 struct net_device { 30 pthread_mutex_t mutex; 31 32 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 33 struct virtio_net_config net_config; 34 u32 host_features; 35 u32 guest_features; 36 u16 config_vector; 37 u8 status; 38 u8 isr; 39 u16 queue_selector; 40 41 pthread_t io_rx_thread; 42 pthread_mutex_t io_rx_mutex; 43 pthread_cond_t io_rx_cond; 44 45 pthread_t io_tx_thread; 46 pthread_mutex_t io_tx_mutex; 47 pthread_cond_t io_tx_cond; 48 49 int tap_fd; 50 char tap_name[IFNAMSIZ]; 51 }; 52 53 static struct net_device net_device = { 54 .mutex = PTHREAD_MUTEX_INITIALIZER, 55 56 .net_config = { 57 .mac = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, 58 .status = VIRTIO_NET_S_LINK_UP, 59 }, 60 .host_features = 1UL << VIRTIO_NET_F_MAC 61 | 1UL << VIRTIO_NET_F_CSUM 62 | 1UL << VIRTIO_NET_F_HOST_UFO 63 | 1UL << VIRTIO_NET_F_HOST_TSO4 64 | 1UL << VIRTIO_NET_F_HOST_TSO6 65 | 1UL << VIRTIO_NET_F_GUEST_UFO 66 | 1UL << VIRTIO_NET_F_GUEST_TSO4 67 | 1UL << VIRTIO_NET_F_GUEST_TSO6, 68 }; 69 70 static void *virtio_net_rx_thread(void *p) 71 { 72 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 73 struct virt_queue *vq; 74 struct kvm *self; 75 u16 out, in; 76 u16 head; 77 int len; 78 79 self = p; 80 vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE]; 81 82 while (1) { 83 mutex_lock(&net_device.io_rx_mutex); 84 if (!virt_queue__available(vq)) 85 pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex); 86 mutex_unlock(&net_device.io_rx_mutex); 87 88 while (virt_queue__available(vq)) { 89 head = virt_queue__get_iov(vq, iov, &out, &in, self); 90 len = readv(net_device.tap_fd, iov, in); 91 virt_queue__set_used_elem(vq, head, len); 92 93 /* We should interrupt guest right now, otherwise latency is huge. */ 94 virt_queue__trigger_irq(vq, VIRTIO_NET_IRQ, &net_device.isr, self); 95 } 96 97 } 98 99 pthread_exit(NULL); 100 return NULL; 101 102 } 103 104 static void *virtio_net_tx_thread(void *p) 105 { 106 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 107 struct virt_queue *vq; 108 struct kvm *self; 109 u16 out, in; 110 u16 head; 111 int len; 112 113 self = p; 114 vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE]; 115 116 while (1) { 117 mutex_lock(&net_device.io_tx_mutex); 118 if (!virt_queue__available(vq)) 119 pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex); 120 mutex_unlock(&net_device.io_tx_mutex); 121 122 while (virt_queue__available(vq)) { 123 head = virt_queue__get_iov(vq, iov, &out, &in, self); 124 len = writev(net_device.tap_fd, iov, out); 125 virt_queue__set_used_elem(vq, head, len); 126 } 127 128 virt_queue__trigger_irq(vq, VIRTIO_NET_IRQ, &net_device.isr, self); 129 130 } 131 132 pthread_exit(NULL); 133 134 return NULL; 135 136 } 137 138 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, u32 count) 139 { 140 u8 *config_space = (u8 *) &net_device.net_config; 141 142 if (size != 1 || count != 1) 143 return false; 144 145 if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config)) 146 error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI); 147 148 ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]); 149 150 return true; 151 } 152 153 static bool virtio_net_pci_io_in(struct kvm *self, u16 port, void *data, int size, u32 count) 154 { 155 unsigned long offset = port - IOPORT_VIRTIO_NET; 156 bool ret = true; 157 158 mutex_lock(&net_device.mutex); 159 160 switch (offset) { 161 case VIRTIO_PCI_HOST_FEATURES: 162 ioport__write32(data, net_device.host_features); 163 break; 164 case VIRTIO_PCI_GUEST_FEATURES: 165 ret = false; 166 break; 167 case VIRTIO_PCI_QUEUE_PFN: 168 ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn); 169 break; 170 case VIRTIO_PCI_QUEUE_NUM: 171 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 172 break; 173 case VIRTIO_PCI_QUEUE_SEL: 174 case VIRTIO_PCI_QUEUE_NOTIFY: 175 ret = false; 176 break; 177 case VIRTIO_PCI_STATUS: 178 ioport__write8(data, net_device.status); 179 break; 180 case VIRTIO_PCI_ISR: 181 ioport__write8(data, net_device.isr); 182 kvm__irq_line(self, VIRTIO_NET_IRQ, VIRTIO_IRQ_LOW); 183 net_device.isr = VIRTIO_IRQ_LOW; 184 break; 185 case VIRTIO_MSI_CONFIG_VECTOR: 186 ioport__write16(data, net_device.config_vector); 187 break; 188 default: 189 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 190 }; 191 192 mutex_unlock(&net_device.mutex); 193 194 return ret; 195 } 196 197 static void virtio_net_handle_callback(struct kvm *self, u16 queue_index) 198 { 199 switch (queue_index) { 200 case VIRTIO_NET_TX_QUEUE: { 201 mutex_lock(&net_device.io_tx_mutex); 202 pthread_cond_signal(&net_device.io_tx_cond); 203 mutex_unlock(&net_device.io_tx_mutex); 204 break; 205 } 206 case VIRTIO_NET_RX_QUEUE: { 207 mutex_lock(&net_device.io_rx_mutex); 208 pthread_cond_signal(&net_device.io_rx_cond); 209 mutex_unlock(&net_device.io_rx_mutex); 210 break; 211 } 212 default: 213 warning("Unknown queue index %u", queue_index); 214 } 215 } 216 217 static bool virtio_net_pci_io_out(struct kvm *self, u16 port, void *data, int size, u32 count) 218 { 219 unsigned long offset = port - IOPORT_VIRTIO_NET; 220 bool ret = true; 221 222 mutex_lock(&net_device.mutex); 223 224 switch (offset) { 225 case VIRTIO_PCI_GUEST_FEATURES: 226 net_device.guest_features = ioport__read32(data); 227 break; 228 case VIRTIO_PCI_QUEUE_PFN: { 229 struct virt_queue *queue; 230 void *p; 231 232 assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES); 233 234 queue = &net_device.vqs[net_device.queue_selector]; 235 queue->pfn = ioport__read32(data); 236 p = guest_flat_to_host(self, queue->pfn << 12); 237 238 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096); 239 240 break; 241 } 242 case VIRTIO_PCI_QUEUE_SEL: 243 net_device.queue_selector = ioport__read16(data); 244 break; 245 case VIRTIO_PCI_QUEUE_NOTIFY: { 246 u16 queue_index; 247 queue_index = ioport__read16(data); 248 virtio_net_handle_callback(self, queue_index); 249 break; 250 } 251 case VIRTIO_PCI_STATUS: 252 net_device.status = ioport__read8(data); 253 break; 254 case VIRTIO_MSI_CONFIG_VECTOR: 255 net_device.config_vector = VIRTIO_MSI_NO_VECTOR; 256 break; 257 case VIRTIO_MSI_QUEUE_VECTOR: 258 break; 259 default: 260 ret = false; 261 }; 262 263 mutex_unlock(&net_device.mutex); 264 265 return ret; 266 } 267 268 static struct ioport_operations virtio_net_io_ops = { 269 .io_in = virtio_net_pci_io_in, 270 .io_out = virtio_net_pci_io_out, 271 }; 272 273 static struct pci_device_header virtio_net_pci_device = { 274 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 275 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 276 .header_type = PCI_HEADER_TYPE_NORMAL, 277 .revision_id = 0, 278 .class = 0x020000, 279 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 280 .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET, 281 .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, 282 .irq_pin = VIRTIO_NET_PIN, 283 .irq_line = VIRTIO_NET_IRQ, 284 }; 285 286 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 287 { 288 int sock = socket(AF_INET, SOCK_STREAM, 0); 289 int i, pid, status, offload, hdr_len; 290 struct sockaddr_in sin = {0}; 291 struct ifreq ifr; 292 293 for (i = 0 ; i < 6 ; i++) 294 net_device.net_config.mac[i] = params->guest_mac[i]; 295 296 net_device.tap_fd = open("/dev/net/tun", O_RDWR); 297 if (net_device.tap_fd < 0) { 298 warning("Unable to open /dev/net/tun"); 299 goto fail; 300 } 301 302 memset(&ifr, 0, sizeof(ifr)); 303 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 304 if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) { 305 warning("Config tap device error. Are you root?"); 306 goto fail; 307 } 308 309 strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name)); 310 311 if (ioctl(net_device.tap_fd, TUNSETNOCSUM, 1) < 0) { 312 warning("Config tap device TUNSETNOCSUM error"); 313 goto fail; 314 } 315 316 hdr_len = sizeof(struct virtio_net_hdr); 317 if (ioctl(net_device.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) { 318 warning("Config tap device TUNSETVNETHDRSZ error"); 319 goto fail; 320 } 321 322 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 323 if (ioctl(net_device.tap_fd, TUNSETOFFLOAD, offload) < 0) { 324 warning("Config tap device TUNSETOFFLOAD error"); 325 goto fail; 326 } 327 328 if (strcmp(params->script, "none")) { 329 pid = fork(); 330 if (pid == 0) { 331 execl(params->script, params->script, net_device.tap_name, NULL); 332 _exit(1); 333 } else { 334 waitpid(pid, &status, 0); 335 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 336 warning("Fail to setup tap by %s", params->script); 337 goto fail; 338 } 339 } 340 } else { 341 memset(&ifr, 0, sizeof(ifr)); 342 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 343 sin.sin_addr.s_addr = inet_addr(params->host_ip); 344 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 345 ifr.ifr_addr.sa_family = AF_INET; 346 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 347 warning("Could not set ip address on tap device"); 348 goto fail; 349 } 350 } 351 352 memset(&ifr, 0, sizeof(ifr)); 353 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 354 ioctl(sock, SIOCGIFFLAGS, &ifr); 355 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 356 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 357 warning("Could not bring tap device up"); 358 359 close(sock); 360 361 return 1; 362 363 fail: 364 if (sock >= 0) 365 close(sock); 366 if (net_device.tap_fd >= 0) 367 close(net_device.tap_fd); 368 369 return 0; 370 } 371 372 static void virtio_net__io_thread_init(struct kvm *self) 373 { 374 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 375 pthread_cond_init(&net_device.io_tx_cond, NULL); 376 377 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 378 pthread_cond_init(&net_device.io_tx_cond, NULL); 379 380 pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self); 381 pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self); 382 } 383 384 void virtio_net__init(const struct virtio_net_parameters *params) 385 { 386 if (virtio_net__tap_init(params)) { 387 pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM); 388 ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); 389 390 virtio_net__io_thread_init(params->self); 391 } 392 } 393