1 #include "kvm/virtio-net.h" 2 #include "kvm/virtio-pci.h" 3 #include "kvm/virtio.h" 4 #include "kvm/ioport.h" 5 #include "kvm/types.h" 6 #include "kvm/mutex.h" 7 #include "kvm/util.h" 8 #include "kvm/kvm.h" 9 #include "kvm/pci.h" 10 11 #include <linux/virtio_net.h> 12 #include <linux/if_tun.h> 13 #include <net/if.h> 14 #include <sys/ioctl.h> 15 #include <assert.h> 16 #include <fcntl.h> 17 #include <arpa/inet.h> 18 #include <sys/types.h> 19 #include <sys/socket.h> 20 #include <unistd.h> 21 #include <sys/wait.h> 22 23 #define VIRTIO_NET_IRQ 14 24 #define VIRTIO_NET_QUEUE_SIZE 128 25 #define VIRTIO_NET_NUM_QUEUES 2 26 #define VIRTIO_NET_RX_QUEUE 0 27 #define VIRTIO_NET_TX_QUEUE 1 28 #define PCI_VIRTIO_NET_DEVNUM 3 29 30 struct net_device { 31 pthread_mutex_t mutex; 32 33 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 34 struct virtio_net_config net_config; 35 uint32_t host_features; 36 uint32_t guest_features; 37 uint16_t config_vector; 38 uint8_t status; 39 uint16_t queue_selector; 40 41 pthread_t io_rx_thread; 42 pthread_mutex_t io_rx_mutex; 43 pthread_cond_t io_rx_cond; 44 45 pthread_t io_tx_thread; 46 pthread_mutex_t io_tx_mutex; 47 pthread_cond_t io_tx_cond; 48 49 int tap_fd; 50 char tap_name[IFNAMSIZ]; 51 }; 52 53 static struct net_device net_device = { 54 .mutex = PTHREAD_MUTEX_INITIALIZER, 55 56 .net_config = { 57 .mac = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}, 58 .status = VIRTIO_NET_S_LINK_UP, 59 }, 60 61 .host_features = 1UL << VIRTIO_NET_F_MAC, 62 }; 63 64 static void *virtio_net_rx_thread(void *p) 65 { 66 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 67 struct virt_queue *vq; 68 struct kvm *self; 69 uint16_t out, in; 70 uint16_t head; 71 int len; 72 73 self = p; 74 vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE]; 75 76 while (1) { 77 mutex_lock(&net_device.io_rx_mutex); 78 if (!virt_queue__available(vq)) 79 pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex); 80 mutex_unlock(&net_device.io_rx_mutex); 81 82 while (virt_queue__available(vq)) { 83 head = virt_queue__get_iov(vq, iov, &out, &in, self); 84 85 /* We do not specify GSO or CSUM features, So we can ignore virtio_net_hdr */ 86 len = readv(net_device.tap_fd, iov + 1, in - 1); 87 88 /* However, We have to tell guest we have write the virtio_net_hdr */ 89 virt_queue__set_used_elem(vq, head, sizeof(struct virtio_net_hdr) + len); 90 91 /* We should interrupt guest right now, otherwise latency is huge. */ 92 kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 93 } 94 95 } 96 97 pthread_exit(NULL); 98 return NULL; 99 100 } 101 102 static void *virtio_net_tx_thread(void *p) 103 { 104 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 105 struct virt_queue *vq; 106 struct kvm *self; 107 uint16_t out, in; 108 uint16_t head; 109 int len; 110 111 self = p; 112 vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE]; 113 114 while (1) { 115 mutex_lock(&net_device.io_tx_mutex); 116 if (!virt_queue__available(vq)) 117 pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex); 118 mutex_unlock(&net_device.io_tx_mutex); 119 120 while (virt_queue__available(vq)) { 121 head = virt_queue__get_iov(vq, iov, &out, &in, self); 122 len = writev(net_device.tap_fd, iov + 1, out - 1); 123 virt_queue__set_used_elem(vq, head, len); 124 } 125 126 kvm__irq_line(self, VIRTIO_NET_IRQ, 1); 127 } 128 129 pthread_exit(NULL); 130 return NULL; 131 132 } 133 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count) 134 { 135 uint8_t *config_space = (uint8_t *) &net_device.net_config; 136 137 if (size != 1 || count != 1) 138 return false; 139 140 if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config)) 141 error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI); 142 143 ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]); 144 145 return true; 146 } 147 148 static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 149 { 150 unsigned long offset = port - IOPORT_VIRTIO_NET; 151 bool ret = true; 152 153 mutex_lock(&net_device.mutex); 154 155 switch (offset) { 156 case VIRTIO_PCI_HOST_FEATURES: 157 ioport__write32(data, net_device.host_features); 158 break; 159 case VIRTIO_PCI_GUEST_FEATURES: 160 ret = false; 161 break; 162 case VIRTIO_PCI_QUEUE_PFN: 163 ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn); 164 break; 165 case VIRTIO_PCI_QUEUE_NUM: 166 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE); 167 break; 168 case VIRTIO_PCI_QUEUE_SEL: 169 case VIRTIO_PCI_QUEUE_NOTIFY: 170 ret = false; 171 break; 172 case VIRTIO_PCI_STATUS: 173 ioport__write8(data, net_device.status); 174 break; 175 case VIRTIO_PCI_ISR: 176 ioport__write8(data, 0x1); 177 kvm__irq_line(self, VIRTIO_NET_IRQ, 0); 178 break; 179 case VIRTIO_MSI_CONFIG_VECTOR: 180 ioport__write16(data, net_device.config_vector); 181 break; 182 default: 183 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count); 184 }; 185 186 mutex_unlock(&net_device.mutex); 187 188 return ret; 189 } 190 191 static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index) 192 { 193 if (queue_index == VIRTIO_NET_TX_QUEUE) { 194 195 mutex_lock(&net_device.io_tx_mutex); 196 pthread_cond_signal(&net_device.io_tx_cond); 197 mutex_unlock(&net_device.io_tx_mutex); 198 199 } else if (queue_index == VIRTIO_NET_RX_QUEUE) { 200 201 mutex_lock(&net_device.io_rx_mutex); 202 pthread_cond_signal(&net_device.io_rx_cond); 203 mutex_unlock(&net_device.io_rx_mutex); 204 205 } 206 } 207 208 static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 209 { 210 unsigned long offset = port - IOPORT_VIRTIO_NET; 211 bool ret = true; 212 213 mutex_lock(&net_device.mutex); 214 215 switch (offset) { 216 case VIRTIO_PCI_GUEST_FEATURES: 217 net_device.guest_features = ioport__read32(data); 218 break; 219 case VIRTIO_PCI_QUEUE_PFN: { 220 struct virt_queue *queue; 221 void *p; 222 223 assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES); 224 225 queue = &net_device.vqs[net_device.queue_selector]; 226 queue->pfn = ioport__read32(data); 227 p = guest_flat_to_host(self, queue->pfn << 12); 228 229 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096); 230 231 break; 232 } 233 case VIRTIO_PCI_QUEUE_SEL: 234 net_device.queue_selector = ioport__read16(data); 235 break; 236 case VIRTIO_PCI_QUEUE_NOTIFY: { 237 uint16_t queue_index; 238 queue_index = ioport__read16(data); 239 virtio_net_handle_callback(self, queue_index); 240 break; 241 } 242 case VIRTIO_PCI_STATUS: 243 net_device.status = ioport__read8(data); 244 break; 245 case VIRTIO_MSI_CONFIG_VECTOR: 246 net_device.config_vector = VIRTIO_MSI_NO_VECTOR; 247 break; 248 case VIRTIO_MSI_QUEUE_VECTOR: 249 break; 250 default: 251 ret = false; 252 }; 253 254 mutex_unlock(&net_device.mutex); 255 return ret; 256 } 257 258 static struct ioport_operations virtio_net_io_ops = { 259 .io_in = virtio_net_pci_io_in, 260 .io_out = virtio_net_pci_io_out, 261 }; 262 263 #define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 264 #define PCI_DEVICE_ID_VIRTIO_NET 0x1000 265 #define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET 0x1af4 266 #define PCI_SUBSYSTEM_ID_VIRTIO_NET 0x0001 267 268 static struct pci_device_header virtio_net_pci_device = { 269 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 270 .device_id = PCI_DEVICE_ID_VIRTIO_NET, 271 .header_type = PCI_HEADER_TYPE_NORMAL, 272 .revision_id = 0, 273 .class = 0x020000, 274 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 275 .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET, 276 .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, 277 .irq_pin = 3, 278 .irq_line = VIRTIO_NET_IRQ, 279 }; 280 281 static bool virtio_net__tap_init(const struct virtio_net_parameters *params) 282 { 283 struct ifreq ifr; 284 int sock = socket(AF_INET, SOCK_STREAM, 0); 285 int i, pid, status; 286 struct sockaddr_in sin = {0}; 287 288 for (i = 0 ; i < 6 ; i++) 289 net_device.net_config.mac[i] = params->guest_mac[i]; 290 291 net_device.tap_fd = open("/dev/net/tun", O_RDWR); 292 if (net_device.tap_fd < 0) { 293 warning("Unable to open /dev/net/tun\n"); 294 goto fail; 295 } 296 297 memset(&ifr, 0, sizeof(ifr)); 298 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 299 300 if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) { 301 warning("Config tap device error. Are you root?"); 302 goto fail; 303 } 304 305 strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name)); 306 307 ioctl(net_device.tap_fd, TUNSETNOCSUM, 1); 308 309 if (strcmp(params->script, "none")) { 310 pid = fork(); 311 if (pid == 0) { 312 execl(params->script, params->script, net_device.tap_name, NULL); 313 _exit(1); 314 } else { 315 waitpid(pid, &status, 0); 316 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 317 warning("Fail to setup tap by %s", params->script); 318 goto fail; 319 } 320 } 321 } else { 322 memset(&ifr, 0, sizeof(ifr)); 323 324 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 325 326 sin.sin_addr.s_addr = inet_addr(params->host_ip); 327 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 328 ifr.ifr_addr.sa_family = AF_INET; 329 330 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 331 warning("Can not set ip address on tap device"); 332 goto fail; 333 } 334 } 335 336 memset(&ifr, 0, sizeof(ifr)); 337 strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name)); 338 ioctl(sock, SIOCGIFFLAGS, &ifr); 339 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 340 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 341 warning("Could not bring tap device up"); 342 343 close(sock); 344 345 return 1; 346 347 fail: 348 if (sock >= 0) 349 close(sock); 350 if (net_device.tap_fd >= 0) 351 close(net_device.tap_fd); 352 353 return 0; 354 } 355 356 static void virtio_net__io_thread_init(struct kvm *self) 357 { 358 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 359 pthread_cond_init(&net_device.io_tx_cond, NULL); 360 361 pthread_mutex_init(&net_device.io_rx_mutex, NULL); 362 pthread_cond_init(&net_device.io_tx_cond, NULL); 363 364 pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self); 365 pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self); 366 } 367 368 void virtio_net__init(const struct virtio_net_parameters *params) 369 { 370 if (virtio_net__tap_init(params)) { 371 pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM); 372 ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); 373 374 virtio_net__io_thread_init(params->self); 375 } 376 } 377