1 #include "kvm/virtio-pci-dev.h" 2 #include "kvm/virtio-net.h" 3 #include "kvm/virtio.h" 4 #include "kvm/types.h" 5 #include "kvm/mutex.h" 6 #include "kvm/util.h" 7 #include "kvm/kvm.h" 8 #include "kvm/irq.h" 9 #include "kvm/uip.h" 10 #include "kvm/guest_compat.h" 11 #include "kvm/virtio-pci.h" 12 13 #include <linux/virtio_net.h> 14 #include <linux/if_tun.h> 15 #include <linux/types.h> 16 17 #include <arpa/inet.h> 18 #include <net/if.h> 19 20 #include <unistd.h> 21 #include <assert.h> 22 #include <fcntl.h> 23 24 #include <sys/socket.h> 25 #include <sys/ioctl.h> 26 #include <sys/types.h> 27 #include <sys/wait.h> 28 29 #define VIRTIO_NET_QUEUE_SIZE 128 30 #define VIRTIO_NET_NUM_QUEUES 2 31 #define VIRTIO_NET_RX_QUEUE 0 32 #define VIRTIO_NET_TX_QUEUE 1 33 34 struct net_dev; 35 36 extern struct kvm *kvm; 37 38 struct net_dev_operations { 39 int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); 40 int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); 41 }; 42 43 struct net_dev { 44 pthread_mutex_t mutex; 45 struct virtio_pci vpci; 46 struct list_head list; 47 48 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES]; 49 struct virtio_net_config config; 50 u32 features; 51 52 pthread_t io_rx_thread; 53 pthread_mutex_t io_rx_lock; 54 pthread_cond_t io_rx_cond; 55 56 pthread_t io_tx_thread; 57 pthread_mutex_t io_tx_lock; 58 pthread_cond_t io_tx_cond; 59 60 int tap_fd; 61 char tap_name[IFNAMSIZ]; 62 63 int mode; 64 65 struct uip_info info; 66 struct net_dev_operations *ops; 67 struct kvm *kvm; 68 }; 69 70 static LIST_HEAD(ndevs); 71 static int compat_id = -1; 72 73 static void *virtio_net_rx_thread(void *p) 74 { 75 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 76 struct virt_queue *vq; 77 struct kvm *kvm; 78 struct net_dev *ndev = p; 79 u16 out, in; 80 u16 head; 81 int len; 82 83 kvm = ndev->kvm; 84 vq = &ndev->vqs[VIRTIO_NET_RX_QUEUE]; 85 86 while (1) { 87 88 mutex_lock(&ndev->io_rx_lock); 89 if (!virt_queue__available(vq)) 90 pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock); 91 mutex_unlock(&ndev->io_rx_lock); 92 93 while (virt_queue__available(vq)) { 94 95 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 96 97 len = ndev->ops->rx(iov, in, ndev); 98 99 virt_queue__set_used_elem(vq, head, len); 100 101 /* We should interrupt guest right now, otherwise latency is huge. */ 102 virtio_pci__signal_vq(kvm, &ndev->vpci, VIRTIO_NET_RX_QUEUE); 103 } 104 105 } 106 107 pthread_exit(NULL); 108 return NULL; 109 110 } 111 112 static void *virtio_net_tx_thread(void *p) 113 { 114 struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; 115 struct virt_queue *vq; 116 struct kvm *kvm; 117 struct net_dev *ndev = p; 118 u16 out, in; 119 u16 head; 120 int len; 121 122 kvm = ndev->kvm; 123 vq = &ndev->vqs[VIRTIO_NET_TX_QUEUE]; 124 125 while (1) { 126 mutex_lock(&ndev->io_tx_lock); 127 if (!virt_queue__available(vq)) 128 pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock); 129 mutex_unlock(&ndev->io_tx_lock); 130 131 while (virt_queue__available(vq)) { 132 133 head = virt_queue__get_iov(vq, iov, &out, &in, kvm); 134 135 len = ndev->ops->tx(iov, out, ndev); 136 137 virt_queue__set_used_elem(vq, head, len); 138 } 139 140 virtio_pci__signal_vq(kvm, &ndev->vpci, VIRTIO_NET_TX_QUEUE); 141 } 142 143 pthread_exit(NULL); 144 145 return NULL; 146 147 } 148 149 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue) 150 { 151 switch (queue) { 152 case VIRTIO_NET_TX_QUEUE: 153 mutex_lock(&ndev->io_tx_lock); 154 pthread_cond_signal(&ndev->io_tx_cond); 155 mutex_unlock(&ndev->io_tx_lock); 156 break; 157 case VIRTIO_NET_RX_QUEUE: 158 mutex_lock(&ndev->io_rx_lock); 159 pthread_cond_signal(&ndev->io_rx_cond); 160 mutex_unlock(&ndev->io_rx_lock); 161 break; 162 default: 163 pr_warning("Unknown queue index %u", queue); 164 } 165 } 166 167 static bool virtio_net__tap_init(const struct virtio_net_params *params, 168 struct net_dev *ndev) 169 { 170 int sock = socket(AF_INET, SOCK_STREAM, 0); 171 int pid, status, offload, hdr_len; 172 struct sockaddr_in sin = {0}; 173 struct ifreq ifr; 174 175 ndev->tap_fd = open("/dev/net/tun", O_RDWR); 176 if (ndev->tap_fd < 0) { 177 pr_warning("Unable to open /dev/net/tun"); 178 goto fail; 179 } 180 181 memset(&ifr, 0, sizeof(ifr)); 182 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 183 if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) { 184 pr_warning("Config tap device error. Are you root?"); 185 goto fail; 186 } 187 188 strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name)); 189 190 if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) { 191 pr_warning("Config tap device TUNSETNOCSUM error"); 192 goto fail; 193 } 194 195 hdr_len = sizeof(struct virtio_net_hdr); 196 if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) 197 pr_warning("Config tap device TUNSETVNETHDRSZ error"); 198 199 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; 200 if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) { 201 pr_warning("Config tap device TUNSETOFFLOAD error"); 202 goto fail; 203 } 204 205 if (strcmp(params->script, "none")) { 206 pid = fork(); 207 if (pid == 0) { 208 execl(params->script, params->script, ndev->tap_name, NULL); 209 _exit(1); 210 } else { 211 waitpid(pid, &status, 0); 212 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { 213 pr_warning("Fail to setup tap by %s", params->script); 214 goto fail; 215 } 216 } 217 } else { 218 memset(&ifr, 0, sizeof(ifr)); 219 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 220 sin.sin_addr.s_addr = inet_addr(params->host_ip); 221 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr)); 222 ifr.ifr_addr.sa_family = AF_INET; 223 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) { 224 pr_warning("Could not set ip address on tap device"); 225 goto fail; 226 } 227 } 228 229 memset(&ifr, 0, sizeof(ifr)); 230 strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name)); 231 ioctl(sock, SIOCGIFFLAGS, &ifr); 232 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 233 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) 234 pr_warning("Could not bring tap device up"); 235 236 close(sock); 237 238 return 1; 239 240 fail: 241 if (sock >= 0) 242 close(sock); 243 if (ndev->tap_fd >= 0) 244 close(ndev->tap_fd); 245 246 return 0; 247 } 248 249 static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev) 250 { 251 pthread_mutex_init(&ndev->io_rx_lock, NULL); 252 pthread_cond_init(&ndev->io_tx_cond, NULL); 253 254 pthread_mutex_init(&ndev->io_rx_lock, NULL); 255 pthread_cond_init(&ndev->io_tx_cond, NULL); 256 257 pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev); 258 pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev); 259 } 260 261 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 262 { 263 return writev(ndev->tap_fd, iov, out); 264 } 265 266 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 267 { 268 return readv(ndev->tap_fd, iov, in); 269 } 270 271 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) 272 { 273 return uip_tx(iov, out, &ndev->info); 274 } 275 276 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) 277 { 278 return uip_rx(iov, in, &ndev->info); 279 } 280 281 static struct net_dev_operations tap_ops = { 282 .rx = tap_ops_rx, 283 .tx = tap_ops_tx, 284 }; 285 286 static struct net_dev_operations uip_ops = { 287 .rx = uip_ops_rx, 288 .tx = uip_ops_tx, 289 }; 290 291 static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset) 292 { 293 struct net_dev *ndev = dev; 294 295 ((u8 *)(&ndev->config))[offset] = data; 296 } 297 298 static u8 get_config(struct kvm *kvm, void *dev, u32 offset) 299 { 300 struct net_dev *ndev = dev; 301 302 return ((u8 *)(&ndev->config))[offset]; 303 } 304 305 static u32 get_host_features(struct kvm *kvm, void *dev) 306 { 307 return 1UL << VIRTIO_NET_F_MAC 308 | 1UL << VIRTIO_NET_F_CSUM 309 | 1UL << VIRTIO_NET_F_HOST_UFO 310 | 1UL << VIRTIO_NET_F_HOST_TSO4 311 | 1UL << VIRTIO_NET_F_HOST_TSO6 312 | 1UL << VIRTIO_NET_F_GUEST_UFO 313 | 1UL << VIRTIO_NET_F_GUEST_TSO4 314 | 1UL << VIRTIO_NET_F_GUEST_TSO6; 315 } 316 317 static void set_guest_features(struct kvm *kvm, void *dev, u32 features) 318 { 319 struct net_dev *ndev = dev; 320 321 ndev->features = features; 322 } 323 324 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn) 325 { 326 struct net_dev *ndev = dev; 327 struct virt_queue *queue; 328 void *p; 329 330 compat__remove_message(compat_id); 331 332 queue = &ndev->vqs[vq]; 333 queue->pfn = pfn; 334 p = guest_pfn_to_host(kvm, queue->pfn); 335 336 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 337 338 return 0; 339 } 340 341 static int notify_vq(struct kvm *kvm, void *dev, u32 vq) 342 { 343 struct net_dev *ndev = dev; 344 345 virtio_net_handle_callback(kvm, ndev, vq); 346 347 return 0; 348 } 349 350 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq) 351 { 352 struct net_dev *ndev = dev; 353 354 return ndev->vqs[vq].pfn; 355 } 356 357 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) 358 { 359 return VIRTIO_NET_QUEUE_SIZE; 360 } 361 362 void virtio_net__init(const struct virtio_net_params *params) 363 { 364 int i; 365 struct net_dev *ndev; 366 367 if (!params) 368 return; 369 370 ndev = calloc(1, sizeof(struct net_dev)); 371 if (ndev == NULL) 372 die("Failed allocating ndev"); 373 374 list_add_tail(&ndev->list, &ndevs); 375 376 ndev->kvm = params->kvm; 377 378 mutex_init(&ndev->mutex); 379 ndev->config.status = VIRTIO_NET_S_LINK_UP; 380 381 for (i = 0 ; i < 6 ; i++) { 382 ndev->config.mac[i] = params->guest_mac[i]; 383 ndev->info.guest_mac.addr[i] = params->guest_mac[i]; 384 ndev->info.host_mac.addr[i] = params->host_mac[i]; 385 } 386 387 ndev->mode = params->mode; 388 if (ndev->mode == NET_MODE_TAP) { 389 if (!virtio_net__tap_init(params, ndev)) 390 die_perror("You have requested a TAP device, but creation of one has" 391 "failed because:"); 392 ndev->ops = &tap_ops; 393 } else { 394 ndev->info.host_ip = ntohl(inet_addr(params->host_ip)); 395 ndev->info.guest_ip = ntohl(inet_addr(params->guest_ip)); 396 ndev->info.guest_netmask = ntohl(inet_addr("255.255.255.0")); 397 ndev->info.buf_nr = 20, 398 uip_init(&ndev->info); 399 ndev->ops = &uip_ops; 400 } 401 402 virtio_pci__init(kvm, &ndev->vpci, ndev, PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET); 403 ndev->vpci.ops = (struct virtio_pci_ops) { 404 .set_config = set_config, 405 .get_config = get_config, 406 .get_host_features = get_host_features, 407 .set_guest_features = set_guest_features, 408 .init_vq = init_vq, 409 .notify_vq = notify_vq, 410 .get_pfn_vq = get_pfn_vq, 411 .get_size_vq = get_size_vq, 412 }; 413 414 virtio_net__io_thread_init(params->kvm, ndev); 415 416 if (compat_id != -1) 417 compat_id = compat__add_message("virtio-net device was not detected", 418 "While you have requested a virtio-net device, " 419 "the guest kernel did not initialize it.\n" 420 "Please make sure that the guest kernel was " 421 "compiled with CONFIG_VIRTIO_NET=y enabled " 422 "in its .config"); 423 } 424