xref: /kvmtool/virtio/net.c (revision 69205aa12bc4d49078ce129299c9d8f748a4f1c6)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/types.h"
5 #include "kvm/mutex.h"
6 #include "kvm/util.h"
7 #include "kvm/kvm.h"
8 #include "kvm/irq.h"
9 #include "kvm/uip.h"
10 #include "kvm/guest_compat.h"
11 
12 #include <linux/vhost.h>
13 #include <linux/virtio_net.h>
14 #include <linux/if_tun.h>
15 #include <linux/types.h>
16 
17 #include <arpa/inet.h>
18 #include <net/if.h>
19 
20 #include <unistd.h>
21 #include <fcntl.h>
22 
23 #include <sys/socket.h>
24 #include <sys/ioctl.h>
25 #include <sys/types.h>
26 #include <sys/wait.h>
27 #include <sys/eventfd.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		128
30 #define VIRTIO_NET_NUM_QUEUES		2
31 #define VIRTIO_NET_RX_QUEUE		0
32 #define VIRTIO_NET_TX_QUEUE		1
33 
34 struct net_dev;
35 
36 extern struct kvm *kvm;
37 
38 struct net_dev_operations {
39 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
40 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
41 };
42 
43 struct net_dev {
44 	pthread_mutex_t			mutex;
45 	struct virtio_device		vdev;
46 	struct list_head		list;
47 
48 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
49 	struct virtio_net_config	config;
50 	u32				features;
51 
52 	pthread_t			io_rx_thread;
53 	pthread_mutex_t			io_rx_lock;
54 	pthread_cond_t			io_rx_cond;
55 
56 	pthread_t			io_tx_thread;
57 	pthread_mutex_t			io_tx_lock;
58 	pthread_cond_t			io_tx_cond;
59 
60 	int				vhost_fd;
61 	int				tap_fd;
62 	char				tap_name[IFNAMSIZ];
63 
64 	int				mode;
65 
66 	struct uip_info			info;
67 	struct net_dev_operations	*ops;
68 	struct kvm			*kvm;
69 };
70 
71 static LIST_HEAD(ndevs);
72 static int compat_id = -1;
73 
74 static void *virtio_net_rx_thread(void *p)
75 {
76 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
77 	struct virt_queue *vq;
78 	struct kvm *kvm;
79 	struct net_dev *ndev = p;
80 	u16 out, in;
81 	u16 head;
82 	int len;
83 
84 	kvm	= ndev->kvm;
85 	vq	= &ndev->vqs[VIRTIO_NET_RX_QUEUE];
86 
87 	while (1) {
88 		mutex_lock(&ndev->io_rx_lock);
89 		if (!virt_queue__available(vq))
90 			pthread_cond_wait(&ndev->io_rx_cond, &ndev->io_rx_lock);
91 		mutex_unlock(&ndev->io_rx_lock);
92 
93 		while (virt_queue__available(vq)) {
94 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
95 			len = ndev->ops->rx(iov, in, ndev);
96 			virt_queue__set_used_elem(vq, head, len);
97 
98 			/* We should interrupt guest right now, otherwise latency is huge. */
99 			if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_RX_QUEUE]))
100 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev,
101 							   VIRTIO_NET_RX_QUEUE);
102 		}
103 	}
104 
105 	pthread_exit(NULL);
106 	return NULL;
107 
108 }
109 
110 static void *virtio_net_tx_thread(void *p)
111 {
112 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
113 	struct virt_queue *vq;
114 	struct kvm *kvm;
115 	struct net_dev *ndev = p;
116 	u16 out, in;
117 	u16 head;
118 	int len;
119 
120 	kvm	= ndev->kvm;
121 	vq	= &ndev->vqs[VIRTIO_NET_TX_QUEUE];
122 
123 	while (1) {
124 		mutex_lock(&ndev->io_tx_lock);
125 		if (!virt_queue__available(vq))
126 			pthread_cond_wait(&ndev->io_tx_cond, &ndev->io_tx_lock);
127 		mutex_unlock(&ndev->io_tx_lock);
128 
129 		while (virt_queue__available(vq)) {
130 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
131 			len = ndev->ops->tx(iov, out, ndev);
132 			virt_queue__set_used_elem(vq, head, len);
133 		}
134 
135 		if (virtio_queue__should_signal(&ndev->vqs[VIRTIO_NET_TX_QUEUE]))
136 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, VIRTIO_NET_TX_QUEUE);
137 	}
138 
139 	pthread_exit(NULL);
140 
141 	return NULL;
142 
143 }
144 
145 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
146 {
147 	switch (queue) {
148 	case VIRTIO_NET_TX_QUEUE:
149 		mutex_lock(&ndev->io_tx_lock);
150 		pthread_cond_signal(&ndev->io_tx_cond);
151 		mutex_unlock(&ndev->io_tx_lock);
152 		break;
153 	case VIRTIO_NET_RX_QUEUE:
154 		mutex_lock(&ndev->io_rx_lock);
155 		pthread_cond_signal(&ndev->io_rx_cond);
156 		mutex_unlock(&ndev->io_rx_lock);
157 		break;
158 	default:
159 		pr_warning("Unknown queue index %u", queue);
160 	}
161 }
162 
163 static bool virtio_net__tap_init(const struct virtio_net_params *params,
164 					struct net_dev *ndev)
165 {
166 	int sock = socket(AF_INET, SOCK_STREAM, 0);
167 	int pid, status, offload, hdr_len;
168 	struct sockaddr_in sin = {0};
169 	struct ifreq ifr;
170 
171 	/* Did the user already gave us the FD? */
172 	if (params->fd) {
173 		ndev->tap_fd = params->fd;
174 		return 1;
175 	}
176 
177 	ndev->tap_fd = open("/dev/net/tun", O_RDWR);
178 	if (ndev->tap_fd < 0) {
179 		pr_warning("Unable to open /dev/net/tun");
180 		goto fail;
181 	}
182 
183 	memset(&ifr, 0, sizeof(ifr));
184 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
185 	if (ioctl(ndev->tap_fd, TUNSETIFF, &ifr) < 0) {
186 		pr_warning("Config tap device error. Are you root?");
187 		goto fail;
188 	}
189 
190 	strncpy(ndev->tap_name, ifr.ifr_name, sizeof(ndev->tap_name));
191 
192 	if (ioctl(ndev->tap_fd, TUNSETNOCSUM, 1) < 0) {
193 		pr_warning("Config tap device TUNSETNOCSUM error");
194 		goto fail;
195 	}
196 
197 	hdr_len = sizeof(struct virtio_net_hdr);
198 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
199 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
200 
201 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
202 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
203 		pr_warning("Config tap device TUNSETOFFLOAD error");
204 		goto fail;
205 	}
206 
207 	if (strcmp(params->script, "none")) {
208 		pid = fork();
209 		if (pid == 0) {
210 			execl(params->script, params->script, ndev->tap_name, NULL);
211 			_exit(1);
212 		} else {
213 			waitpid(pid, &status, 0);
214 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
215 				pr_warning("Fail to setup tap by %s", params->script);
216 				goto fail;
217 			}
218 		}
219 	} else {
220 		memset(&ifr, 0, sizeof(ifr));
221 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
222 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
223 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
224 		ifr.ifr_addr.sa_family = AF_INET;
225 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
226 			pr_warning("Could not set ip address on tap device");
227 			goto fail;
228 		}
229 	}
230 
231 	memset(&ifr, 0, sizeof(ifr));
232 	strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
233 	ioctl(sock, SIOCGIFFLAGS, &ifr);
234 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
235 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
236 		pr_warning("Could not bring tap device up");
237 
238 	close(sock);
239 
240 	return 1;
241 
242 fail:
243 	if (sock >= 0)
244 		close(sock);
245 	if (ndev->tap_fd >= 0)
246 		close(ndev->tap_fd);
247 
248 	return 0;
249 }
250 
251 static void virtio_net__io_thread_init(struct kvm *kvm, struct net_dev *ndev)
252 {
253 	pthread_mutex_init(&ndev->io_tx_lock, NULL);
254 	pthread_mutex_init(&ndev->io_rx_lock, NULL);
255 
256 	pthread_cond_init(&ndev->io_tx_cond, NULL);
257 	pthread_cond_init(&ndev->io_rx_cond, NULL);
258 
259 	pthread_create(&ndev->io_tx_thread, NULL, virtio_net_tx_thread, ndev);
260 	pthread_create(&ndev->io_rx_thread, NULL, virtio_net_rx_thread, ndev);
261 }
262 
263 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
264 {
265 	return writev(ndev->tap_fd, iov, out);
266 }
267 
268 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
269 {
270 	return readv(ndev->tap_fd, iov, in);
271 }
272 
273 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
274 {
275 	return uip_tx(iov, out, &ndev->info);
276 }
277 
278 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
279 {
280 	return uip_rx(iov, in, &ndev->info);
281 }
282 
283 static struct net_dev_operations tap_ops = {
284 	.rx	= tap_ops_rx,
285 	.tx	= tap_ops_tx,
286 };
287 
288 static struct net_dev_operations uip_ops = {
289 	.rx	= uip_ops_rx,
290 	.tx	= uip_ops_tx,
291 };
292 
293 static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset)
294 {
295 	struct net_dev *ndev = dev;
296 
297 	((u8 *)(&ndev->config))[offset] = data;
298 }
299 
300 static u8 get_config(struct kvm *kvm, void *dev, u32 offset)
301 {
302 	struct net_dev *ndev = dev;
303 
304 	return ((u8 *)(&ndev->config))[offset];
305 }
306 
307 static u32 get_host_features(struct kvm *kvm, void *dev)
308 {
309 	return 1UL << VIRTIO_NET_F_MAC
310 		| 1UL << VIRTIO_NET_F_CSUM
311 		| 1UL << VIRTIO_NET_F_HOST_UFO
312 		| 1UL << VIRTIO_NET_F_HOST_TSO4
313 		| 1UL << VIRTIO_NET_F_HOST_TSO6
314 		| 1UL << VIRTIO_NET_F_GUEST_UFO
315 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
316 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
317 		| 1UL << VIRTIO_RING_F_EVENT_IDX
318 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC;
319 }
320 
321 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
322 {
323 	struct net_dev *ndev = dev;
324 
325 	ndev->features = features;
326 }
327 
328 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
329 {
330 	struct vhost_vring_state state = { .index = vq };
331 	struct vhost_vring_addr addr;
332 	struct net_dev *ndev = dev;
333 	struct virt_queue *queue;
334 	void *p;
335 	int r;
336 
337 	compat__remove_message(compat_id);
338 
339 	queue		= &ndev->vqs[vq];
340 	queue->pfn	= pfn;
341 	p		= guest_pfn_to_host(kvm, queue->pfn);
342 
343 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
344 
345 	if (ndev->vhost_fd == 0)
346 		return 0;
347 
348 	state.num = queue->vring.num;
349 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
350 	if (r < 0)
351 		die_perror("VHOST_SET_VRING_NUM failed");
352 	state.num = 0;
353 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
354 	if (r < 0)
355 		die_perror("VHOST_SET_VRING_BASE failed");
356 
357 	addr = (struct vhost_vring_addr) {
358 		.index = vq,
359 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
360 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
361 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
362 	};
363 
364 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
365 	if (r < 0)
366 		die_perror("VHOST_SET_VRING_ADDR failed");
367 
368 	return 0;
369 }
370 
371 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
372 {
373 	struct net_dev *ndev = dev;
374 	struct kvm_irqfd irq;
375 	struct vhost_vring_file file;
376 	int r;
377 
378 	if (ndev->vhost_fd == 0)
379 		return;
380 
381 	irq = (struct kvm_irqfd) {
382 		.gsi	= gsi,
383 		.fd	= eventfd(0, 0),
384 	};
385 	file = (struct vhost_vring_file) {
386 		.index	= vq,
387 		.fd	= irq.fd,
388 	};
389 
390 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
391 	if (r < 0)
392 		die_perror("KVM_IRQFD failed");
393 
394 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
395 	if (r < 0)
396 		die_perror("VHOST_SET_VRING_CALL failed");
397 	file.fd = ndev->tap_fd;
398 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
399 	if (r != 0)
400 		die("VHOST_NET_SET_BACKEND failed %d", errno);
401 
402 }
403 
404 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
405 {
406 	struct net_dev *ndev = dev;
407 	struct vhost_vring_file file = {
408 		.index	= vq,
409 		.fd	= efd,
410 	};
411 	int r;
412 
413 	if (ndev->vhost_fd == 0)
414 		return;
415 
416 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
417 	if (r < 0)
418 		die_perror("VHOST_SET_VRING_KICK failed");
419 }
420 
421 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
422 {
423 	struct net_dev *ndev = dev;
424 
425 	virtio_net_handle_callback(kvm, ndev, vq);
426 
427 	return 0;
428 }
429 
430 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
431 {
432 	struct net_dev *ndev = dev;
433 
434 	return ndev->vqs[vq].pfn;
435 }
436 
437 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
438 {
439 	return VIRTIO_NET_QUEUE_SIZE;
440 }
441 
442 static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) {
443 	.set_config		= set_config,
444 	.get_config		= get_config,
445 	.get_host_features	= get_host_features,
446 	.set_guest_features	= set_guest_features,
447 	.init_vq		= init_vq,
448 	.notify_vq		= notify_vq,
449 	.get_pfn_vq		= get_pfn_vq,
450 	.get_size_vq		= get_size_vq,
451 	.notify_vq_gsi		= notify_vq_gsi,
452 	.notify_vq_eventfd	= notify_vq_eventfd,
453 };
454 
455 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
456 {
457 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
458 	struct vhost_memory *mem;
459 	int r;
460 
461 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
462 	if (ndev->vhost_fd < 0)
463 		die_perror("Failed openning vhost-net device");
464 
465 	mem = malloc(sizeof(*mem) + sizeof(struct vhost_memory_region));
466 	if (mem == NULL)
467 		die("Failed allocating memory for vhost memory map");
468 
469 	mem->nregions = 1;
470 	mem->regions[0] = (struct vhost_memory_region) {
471 		.guest_phys_addr	= 0,
472 		.memory_size		= kvm->ram_size,
473 		.userspace_addr		= (unsigned long)kvm->ram_start,
474 	};
475 
476 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
477 	if (r != 0)
478 		die_perror("VHOST_SET_OWNER failed");
479 
480 	r = ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
481 	if (r != 0)
482 		die_perror("VHOST_SET_FEATURES failed");
483 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
484 	if (r != 0)
485 		die_perror("VHOST_SET_MEM_TABLE failed");
486 	free(mem);
487 }
488 
489 void virtio_net__init(const struct virtio_net_params *params)
490 {
491 	int i;
492 	struct net_dev *ndev;
493 
494 	if (!params)
495 		return;
496 
497 	ndev = calloc(1, sizeof(struct net_dev));
498 	if (ndev == NULL)
499 		die("Failed allocating ndev");
500 
501 	list_add_tail(&ndev->list, &ndevs);
502 
503 	ndev->kvm = params->kvm;
504 
505 	mutex_init(&ndev->mutex);
506 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
507 
508 	for (i = 0 ; i < 6 ; i++) {
509 		ndev->config.mac[i]		= params->guest_mac[i];
510 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
511 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
512 	}
513 
514 	ndev->mode = params->mode;
515 	if (ndev->mode == NET_MODE_TAP) {
516 		if (!virtio_net__tap_init(params, ndev))
517 			die_perror("You have requested a TAP device, but creation of one has"
518 					"failed because:");
519 		ndev->ops = &tap_ops;
520 	} else {
521 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
522 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
523 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
524 		ndev->info.buf_nr		= 20,
525 		uip_init(&ndev->info);
526 		ndev->ops = &uip_ops;
527 	}
528 
529 	if (params->trans && strcmp(params->trans, "mmio") == 0)
530 		virtio_init(kvm, ndev, &ndev->vdev, &net_dev_virtio_ops,
531 			    VIRTIO_MMIO, PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
532 	else
533 		virtio_init(kvm, ndev, &ndev->vdev, &net_dev_virtio_ops,
534 			    VIRTIO_PCI, PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
535 
536 	if (params->vhost)
537 		virtio_net__vhost_init(params->kvm, ndev);
538 	else
539 		virtio_net__io_thread_init(params->kvm, ndev);
540 
541 	if (compat_id != -1)
542 		compat_id = compat__add_message("virtio-net device was not detected",
543 						"While you have requested a virtio-net device, "
544 						"the guest kernel did not initialize it.\n"
545 						"Please make sure that the guest kernel was "
546 						"compiled with CONFIG_VIRTIO_NET=y enabled "
547 						"in its .config");
548 }
549