xref: /kvmtool/virtio/net.c (revision ad96e8676bfac56bcacaa3cb602e7107fb7a99fd)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/mutex.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 #include "kvm/irq.h"
8 #include "kvm/uip.h"
9 #include "kvm/guest_compat.h"
10 #include "kvm/iovec.h"
11 
12 #include <linux/vhost.h>
13 #include <linux/virtio_net.h>
14 #include <linux/if_tun.h>
15 #include <linux/types.h>
16 
17 #include <arpa/inet.h>
18 #include <net/if.h>
19 
20 #include <unistd.h>
21 #include <fcntl.h>
22 
23 #include <sys/socket.h>
24 #include <sys/ioctl.h>
25 #include <sys/types.h>
26 #include <sys/wait.h>
27 #include <sys/eventfd.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		256
30 #define VIRTIO_NET_NUM_QUEUES		8
31 
32 struct net_dev;
33 
34 struct net_dev_operations {
35 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
36 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37 };
38 
39 struct net_dev_queue {
40 	int				id;
41 	struct net_dev			*ndev;
42 	struct virt_queue		vq;
43 	pthread_t			thread;
44 	struct mutex			lock;
45 	pthread_cond_t			cond;
46 };
47 
48 struct net_dev {
49 	struct mutex			mutex;
50 	struct virtio_device		vdev;
51 	struct list_head		list;
52 
53 	struct net_dev_queue		queues[VIRTIO_NET_NUM_QUEUES * 2 + 1];
54 	struct virtio_net_config	config;
55 	u32				features, queue_pairs;
56 
57 	int				vhost_fd;
58 	int				tap_fd;
59 	char				tap_name[IFNAMSIZ];
60 	bool				tap_ufo;
61 
62 	int				mode;
63 
64 	struct uip_info			info;
65 	struct net_dev_operations	*ops;
66 	struct kvm			*kvm;
67 
68 	struct virtio_net_params	*params;
69 };
70 
71 static LIST_HEAD(ndevs);
72 static int compat_id = -1;
73 
74 #define MAX_PACKET_SIZE 65550
75 
76 static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
77 {
78 	return ndev->features & (1 << feature);
79 }
80 
81 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
82 {
83 	hdr->hdr_len		= virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len);
84 	hdr->gso_size		= virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size);
85 	hdr->csum_start		= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start);
86 	hdr->csum_offset	= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset);
87 }
88 
89 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
90 {
91 	hdr->hdr_len		= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr_len);
92 	hdr->gso_size		= virtio_host_to_guest_u16(&ndev->vdev, hdr->gso_size);
93 	hdr->csum_start		= virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_start);
94 	hdr->csum_offset	= virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_offset);
95 }
96 
97 static void *virtio_net_rx_thread(void *p)
98 {
99 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
100 	struct net_dev_queue *queue = p;
101 	struct virt_queue *vq = &queue->vq;
102 	struct net_dev *ndev = queue->ndev;
103 	struct kvm *kvm;
104 	u16 out, in;
105 	u16 head;
106 	int len, copied;
107 
108 	kvm__set_thread_name("virtio-net-rx");
109 
110 	kvm = ndev->kvm;
111 	while (1) {
112 		mutex_lock(&queue->lock);
113 		if (!virt_queue__available(vq))
114 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
115 		mutex_unlock(&queue->lock);
116 
117 		while (virt_queue__available(vq)) {
118 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
119 			struct iovec dummy_iov = {
120 				.iov_base = buffer,
121 				.iov_len  = sizeof(buffer),
122 			};
123 			struct virtio_net_hdr_mrg_rxbuf *hdr;
124 			u16 num_buffers;
125 
126 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
127 			if (len < 0) {
128 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
129 						__func__, queue->id, len);
130 				goto out_err;
131 			}
132 
133 			copied = num_buffers = 0;
134 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
135 			hdr = iov[0].iov_base;
136 			while (copied < len) {
137 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
138 
139 				memcpy_toiovec(iov, buffer + copied, iovsize);
140 				copied += iovsize;
141 				virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++);
142 				if (copied == len)
143 					break;
144 				while (!virt_queue__available(vq))
145 					sleep(0);
146 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
147 			}
148 
149 			virtio_net_fix_rx_hdr(&hdr->hdr, ndev);
150 			if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
151 				hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers);
152 
153 			virt_queue__used_idx_advance(vq, num_buffers);
154 
155 			/* We should interrupt guest right now, otherwise latency is huge. */
156 			if (virtio_queue__should_signal(vq))
157 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
158 		}
159 	}
160 
161 out_err:
162 	pthread_exit(NULL);
163 	return NULL;
164 
165 }
166 
167 static void *virtio_net_tx_thread(void *p)
168 {
169 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
170 	struct net_dev_queue *queue = p;
171 	struct virt_queue *vq = &queue->vq;
172 	struct net_dev *ndev = queue->ndev;
173 	struct kvm *kvm;
174 	u16 out, in;
175 	u16 head;
176 	int len;
177 
178 	kvm__set_thread_name("virtio-net-tx");
179 
180 	kvm = ndev->kvm;
181 
182 	while (1) {
183 		mutex_lock(&queue->lock);
184 		if (!virt_queue__available(vq))
185 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
186 		mutex_unlock(&queue->lock);
187 
188 		while (virt_queue__available(vq)) {
189 			struct virtio_net_hdr *hdr;
190 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
191 			hdr = iov[0].iov_base;
192 			virtio_net_fix_tx_hdr(hdr, ndev);
193 			len = ndev->ops->tx(iov, out, ndev);
194 			if (len < 0) {
195 				pr_warning("%s: tx on vq %u failed (%d)\n",
196 						__func__, queue->id, errno);
197 				goto out_err;
198 			}
199 
200 			virt_queue__set_used_elem(vq, head, len);
201 		}
202 
203 		if (virtio_queue__should_signal(vq))
204 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
205 	}
206 
207 out_err:
208 	pthread_exit(NULL);
209 	return NULL;
210 }
211 
212 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
213 {
214 	/* Not much to do here */
215 	return VIRTIO_NET_OK;
216 }
217 
218 static void *virtio_net_ctrl_thread(void *p)
219 {
220 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
221 	struct net_dev_queue *queue = p;
222 	struct virt_queue *vq = &queue->vq;
223 	struct net_dev *ndev = queue->ndev;
224 	u16 out, in, head;
225 	struct kvm *kvm = ndev->kvm;
226 	struct virtio_net_ctrl_hdr *ctrl;
227 	virtio_net_ctrl_ack *ack;
228 
229 	kvm__set_thread_name("virtio-net-ctrl");
230 
231 	while (1) {
232 		mutex_lock(&queue->lock);
233 		if (!virt_queue__available(vq))
234 			pthread_cond_wait(&queue->cond, &queue->lock.mutex);
235 		mutex_unlock(&queue->lock);
236 
237 		while (virt_queue__available(vq)) {
238 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
239 			ctrl = iov[0].iov_base;
240 			ack = iov[out].iov_base;
241 
242 			switch (ctrl->class) {
243 			case VIRTIO_NET_CTRL_MQ:
244 				*ack = virtio_net_handle_mq(kvm, ndev, ctrl);
245 				break;
246 			default:
247 				*ack = VIRTIO_NET_ERR;
248 				break;
249 			}
250 			virt_queue__set_used_elem(vq, head, iov[out].iov_len);
251 		}
252 
253 		if (virtio_queue__should_signal(vq))
254 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, queue->id);
255 	}
256 
257 	pthread_exit(NULL);
258 
259 	return NULL;
260 }
261 
262 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
263 {
264 	struct net_dev_queue *net_queue = &ndev->queues[queue];
265 
266 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
267 		pr_warning("Unknown queue index %u", queue);
268 		return;
269 	}
270 
271 	mutex_lock(&net_queue->lock);
272 	pthread_cond_signal(&net_queue->cond);
273 	mutex_unlock(&net_queue->lock);
274 }
275 
276 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
277 				  const char *tapname)
278 {
279 	int ret;
280 
281 	memset(ifr, 0, sizeof(*ifr));
282 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
283 	if (tapname)
284 		strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
285 
286 	ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr);
287 
288 	if (ret >= 0)
289 		strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
290 	return ret;
291 }
292 
293 static int virtio_net_exec_script(const char* script, const char *tap_name)
294 {
295 	pid_t pid;
296 	int status;
297 
298 	pid = fork();
299 	if (pid == 0) {
300 		execl(script, script, tap_name, NULL);
301 		_exit(1);
302 	} else {
303 		waitpid(pid, &status, 0);
304 		if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
305 			pr_warning("Fail to setup tap by %s", script);
306 			return -1;
307 		}
308 	}
309 	return 0;
310 }
311 
312 static bool virtio_net__tap_init(struct net_dev *ndev)
313 {
314 	int sock = socket(AF_INET, SOCK_STREAM, 0);
315 	int hdr_len;
316 	struct sockaddr_in sin = {0};
317 	struct ifreq ifr;
318 	const struct virtio_net_params *params = ndev->params;
319 	bool skipconf = !!params->tapif;
320 
321 	hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
322 			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
323 			sizeof(struct virtio_net_hdr);
324 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
325 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
326 
327 	if (strcmp(params->script, "none")) {
328 		if (virtio_net_exec_script(params->script, ndev->tap_name) < 0)
329 			goto fail;
330 	} else if (!skipconf) {
331 		memset(&ifr, 0, sizeof(ifr));
332 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
333 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
334 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
335 		ifr.ifr_addr.sa_family = AF_INET;
336 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
337 			pr_warning("Could not set ip address on tap device");
338 			goto fail;
339 		}
340 	}
341 
342 	if (!skipconf) {
343 		memset(&ifr, 0, sizeof(ifr));
344 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
345 		ioctl(sock, SIOCGIFFLAGS, &ifr);
346 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
347 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
348 			pr_warning("Could not bring tap device up");
349 	}
350 
351 	close(sock);
352 
353 	return 1;
354 
355 fail:
356 	if (sock >= 0)
357 		close(sock);
358 	if (ndev->tap_fd >= 0)
359 		close(ndev->tap_fd);
360 
361 	return 0;
362 }
363 
364 static bool virtio_net__tap_create(struct net_dev *ndev)
365 {
366 	int offload;
367 	struct ifreq ifr;
368 	const struct virtio_net_params *params = ndev->params;
369 	bool macvtap = (!!params->tapif) && (params->tapif[0] == '/');
370 
371 	/* Did the user already gave us the FD? */
372 	if (params->fd)
373 		ndev->tap_fd = params->fd;
374 	else {
375 		const char *tap_file = "/dev/net/tun";
376 
377 		/* Did the user ask us to use macvtap? */
378 		if (macvtap)
379 			tap_file = params->tapif;
380 
381 		ndev->tap_fd = open(tap_file, O_RDWR);
382 		if (ndev->tap_fd < 0) {
383 			pr_warning("Unable to open %s", tap_file);
384 			return 0;
385 		}
386 	}
387 
388 	if (!macvtap &&
389 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
390 		pr_warning("Config tap device error. Are you root?");
391 		goto fail;
392 	}
393 
394 	/*
395 	 * The UFO support had been removed from kernel in commit:
396 	 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984
397 	 * https://www.spinics.net/lists/netdev/msg443562.html
398 	 * In oder to support the older kernels without this commit,
399 	 * we set the TUN_F_UFO to offload by default to test the status of
400 	 * UFO kernel support.
401 	 */
402 	ndev->tap_ufo = true;
403 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
404 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
405 		/*
406 		 * Is this failure caused by kernel remove the UFO support?
407 		 * Try TUNSETOFFLOAD without TUN_F_UFO.
408 		 */
409 		offload &= ~TUN_F_UFO;
410 		if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
411 			pr_warning("Config tap device TUNSETOFFLOAD error");
412 			goto fail;
413 		}
414 		ndev->tap_ufo = false;
415 	}
416 
417 	return 1;
418 
419 fail:
420 	if ((ndev->tap_fd >= 0) || (!params->fd) )
421 		close(ndev->tap_fd);
422 
423 	return 0;
424 }
425 
426 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
427 {
428 	return writev(ndev->tap_fd, iov, out);
429 }
430 
431 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
432 {
433 	return readv(ndev->tap_fd, iov, in);
434 }
435 
436 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
437 {
438 	return uip_tx(iov, out, &ndev->info);
439 }
440 
441 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
442 {
443 	return uip_rx(iov, in, &ndev->info);
444 }
445 
446 static struct net_dev_operations tap_ops = {
447 	.rx	= tap_ops_rx,
448 	.tx	= tap_ops_tx,
449 };
450 
451 static struct net_dev_operations uip_ops = {
452 	.rx	= uip_ops_rx,
453 	.tx	= uip_ops_tx,
454 };
455 
456 static u8 *get_config(struct kvm *kvm, void *dev)
457 {
458 	struct net_dev *ndev = dev;
459 
460 	return ((u8 *)(&ndev->config));
461 }
462 
463 static u32 get_host_features(struct kvm *kvm, void *dev)
464 {
465 	u32 features;
466 	struct net_dev *ndev = dev;
467 
468 	features = 1UL << VIRTIO_NET_F_MAC
469 		| 1UL << VIRTIO_NET_F_CSUM
470 		| 1UL << VIRTIO_NET_F_HOST_TSO4
471 		| 1UL << VIRTIO_NET_F_HOST_TSO6
472 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
473 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
474 		| 1UL << VIRTIO_RING_F_EVENT_IDX
475 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
476 		| 1UL << VIRTIO_NET_F_CTRL_VQ
477 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
478 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
479 
480 	/*
481 	 * The UFO feature for host and guest only can be enabled when the
482 	 * kernel has TAP UFO support.
483 	 */
484 	if (ndev->tap_ufo)
485 		features |= (1UL << VIRTIO_NET_F_HOST_UFO
486 				| 1UL << VIRTIO_NET_F_GUEST_UFO);
487 
488 	return features;
489 }
490 
491 static int virtio_net__vhost_set_features(struct net_dev *ndev)
492 {
493 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
494 	u64 vhost_features;
495 
496 	if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
497 		die_perror("VHOST_GET_FEATURES failed");
498 
499 	/* make sure both side support mergable rx buffers */
500 	if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF &&
501 			has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
502 		features |= 1UL << VIRTIO_NET_F_MRG_RXBUF;
503 
504 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
505 }
506 
507 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
508 {
509 	struct net_dev *ndev = dev;
510 	struct virtio_net_config *conf = &ndev->config;
511 
512 	ndev->features = features;
513 
514 	conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status);
515 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev,
516 							     conf->max_virtqueue_pairs);
517 }
518 
519 static void virtio_net_start(struct net_dev *ndev)
520 {
521 	if (ndev->mode == NET_MODE_TAP) {
522 		if (!virtio_net__tap_init(ndev))
523 			die_perror("TAP device initialized failed because");
524 
525 		if (ndev->vhost_fd &&
526 				virtio_net__vhost_set_features(ndev) != 0)
527 			die_perror("VHOST_SET_FEATURES failed");
528 	} else {
529 		ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
530 						sizeof(struct virtio_net_hdr_mrg_rxbuf) :
531 						sizeof(struct virtio_net_hdr);
532 		uip_init(&ndev->info);
533 	}
534 }
535 
536 static void notify_status(struct kvm *kvm, void *dev, u32 status)
537 {
538 	if (status & VIRTIO__STATUS_START)
539 		virtio_net_start(dev);
540 }
541 
542 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
543 {
544 	return vq == (u32)(ndev->queue_pairs * 2);
545 }
546 
547 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
548 		   u32 pfn)
549 {
550 	struct vhost_vring_state state = { .index = vq };
551 	struct net_dev_queue *net_queue;
552 	struct vhost_vring_addr addr;
553 	struct net_dev *ndev = dev;
554 	struct virt_queue *queue;
555 	void *p;
556 	int r;
557 
558 	compat__remove_message(compat_id);
559 
560 	net_queue	= &ndev->queues[vq];
561 	net_queue->id	= vq;
562 	net_queue->ndev	= ndev;
563 	queue		= &net_queue->vq;
564 	queue->pfn	= pfn;
565 	p		= virtio_get_vq(kvm, queue->pfn, page_size);
566 
567 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align);
568 	virtio_init_device_vq(&ndev->vdev, queue);
569 
570 	mutex_init(&net_queue->lock);
571 	pthread_cond_init(&net_queue->cond, NULL);
572 	if (is_ctrl_vq(ndev, vq)) {
573 		pthread_create(&net_queue->thread, NULL, virtio_net_ctrl_thread,
574 			       net_queue);
575 
576 		return 0;
577 	} else if (ndev->vhost_fd == 0 ) {
578 		if (vq & 1)
579 			pthread_create(&net_queue->thread, NULL,
580 				       virtio_net_tx_thread, net_queue);
581 		else
582 			pthread_create(&net_queue->thread, NULL,
583 				       virtio_net_rx_thread, net_queue);
584 
585 		return 0;
586 	}
587 
588 	if (queue->endian != VIRTIO_ENDIAN_HOST)
589 		die_perror("VHOST requires the same endianness in guest and host");
590 
591 	state.num = queue->vring.num;
592 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
593 	if (r < 0)
594 		die_perror("VHOST_SET_VRING_NUM failed");
595 	state.num = 0;
596 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
597 	if (r < 0)
598 		die_perror("VHOST_SET_VRING_BASE failed");
599 
600 	addr = (struct vhost_vring_addr) {
601 		.index = vq,
602 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
603 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
604 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
605 	};
606 
607 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
608 	if (r < 0)
609 		die_perror("VHOST_SET_VRING_ADDR failed");
610 
611 	return 0;
612 }
613 
614 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
615 {
616 	struct net_dev *ndev = dev;
617 	struct net_dev_queue *queue = &ndev->queues[vq];
618 	struct vhost_vring_file file;
619 	int r;
620 
621 	if (ndev->vhost_fd == 0)
622 		return;
623 
624 	file = (struct vhost_vring_file) {
625 		.index	= vq,
626 		.fd	= eventfd(0, 0),
627 	};
628 
629 	r = irq__add_irqfd(kvm, gsi, file.fd, -1);
630 	if (r < 0)
631 		die_perror("KVM_IRQFD failed");
632 
633 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
634 	if (r < 0)
635 		die_perror("VHOST_SET_VRING_CALL failed");
636 	file.fd = ndev->tap_fd;
637 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
638 	if (r != 0)
639 		die("VHOST_NET_SET_BACKEND failed %d", errno);
640 
641 }
642 
643 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
644 {
645 	struct net_dev *ndev = dev;
646 	struct vhost_vring_file file = {
647 		.index	= vq,
648 		.fd	= efd,
649 	};
650 	int r;
651 
652 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
653 		return;
654 
655 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
656 	if (r < 0)
657 		die_perror("VHOST_SET_VRING_KICK failed");
658 }
659 
660 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
661 {
662 	struct net_dev *ndev = dev;
663 
664 	virtio_net_handle_callback(kvm, ndev, vq);
665 
666 	return 0;
667 }
668 
669 static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq)
670 {
671 	struct net_dev *ndev = dev;
672 
673 	return &ndev->queues[vq].vq;
674 }
675 
676 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
677 {
678 	/* FIXME: dynamic */
679 	return VIRTIO_NET_QUEUE_SIZE;
680 }
681 
682 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
683 {
684 	/* FIXME: dynamic */
685 	return size;
686 }
687 
688 static int get_vq_count(struct kvm *kvm, void *dev)
689 {
690 	struct net_dev *ndev = dev;
691 
692 	return ndev->queue_pairs * 2 + 1;
693 }
694 
695 static struct virtio_ops net_dev_virtio_ops = {
696 	.get_config		= get_config,
697 	.get_host_features	= get_host_features,
698 	.set_guest_features	= set_guest_features,
699 	.get_vq_count		= get_vq_count,
700 	.init_vq		= init_vq,
701 	.get_vq			= get_vq,
702 	.get_size_vq		= get_size_vq,
703 	.set_size_vq		= set_size_vq,
704 	.notify_vq		= notify_vq,
705 	.notify_vq_gsi		= notify_vq_gsi,
706 	.notify_vq_eventfd	= notify_vq_eventfd,
707 	.notify_status		= notify_status,
708 };
709 
710 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
711 {
712 	struct kvm_mem_bank *bank;
713 	struct vhost_memory *mem;
714 	int r, i;
715 
716 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
717 	if (ndev->vhost_fd < 0)
718 		die_perror("Failed openning vhost-net device");
719 
720 	mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region));
721 	if (mem == NULL)
722 		die("Failed allocating memory for vhost memory map");
723 
724 	i = 0;
725 	list_for_each_entry(bank, &kvm->mem_banks, list) {
726 		mem->regions[i] = (struct vhost_memory_region) {
727 			.guest_phys_addr = bank->guest_phys_addr,
728 			.memory_size	 = bank->size,
729 			.userspace_addr	 = (unsigned long)bank->host_addr,
730 		};
731 		i++;
732 	}
733 	mem->nregions = i;
734 
735 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
736 	if (r != 0)
737 		die_perror("VHOST_SET_OWNER failed");
738 
739 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
740 	if (r != 0)
741 		die_perror("VHOST_SET_MEM_TABLE failed");
742 
743 	ndev->vdev.use_vhost = true;
744 
745 	free(mem);
746 }
747 
748 static inline void str_to_mac(const char *str, char *mac)
749 {
750 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
751 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
752 }
753 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
754 			const char *param, const char *val)
755 {
756 	if (strcmp(param, "guest_mac") == 0) {
757 		str_to_mac(val, p->guest_mac);
758 	} else if (strcmp(param, "mode") == 0) {
759 		if (!strncmp(val, "user", 4)) {
760 			int i;
761 
762 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
763 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
764 					die("Only one usermode network device allowed at a time");
765 			p->mode = NET_MODE_USER;
766 		} else if (!strncmp(val, "tap", 3)) {
767 			p->mode = NET_MODE_TAP;
768 		} else if (!strncmp(val, "none", 4)) {
769 			kvm->cfg.no_net = 1;
770 			return -1;
771 		} else
772 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
773 	} else if (strcmp(param, "script") == 0) {
774 		p->script = strdup(val);
775 	} else if (strcmp(param, "downscript") == 0) {
776 		p->downscript = strdup(val);
777 	} else if (strcmp(param, "guest_ip") == 0) {
778 		p->guest_ip = strdup(val);
779 	} else if (strcmp(param, "host_ip") == 0) {
780 		p->host_ip = strdup(val);
781 	} else if (strcmp(param, "trans") == 0) {
782 		p->trans = strdup(val);
783 	} else if (strcmp(param, "tapif") == 0) {
784 		p->tapif = strdup(val);
785 	} else if (strcmp(param, "vhost") == 0) {
786 		p->vhost = atoi(val);
787 	} else if (strcmp(param, "fd") == 0) {
788 		p->fd = atoi(val);
789 	} else if (strcmp(param, "mq") == 0) {
790 		p->mq = atoi(val);
791 	} else
792 		die("Unknown network parameter %s", param);
793 
794 	return 0;
795 }
796 
797 int netdev_parser(const struct option *opt, const char *arg, int unset)
798 {
799 	struct virtio_net_params p;
800 	char *buf = NULL, *cmd = NULL, *cur = NULL;
801 	bool on_cmd = true;
802 	struct kvm *kvm = opt->ptr;
803 
804 	if (arg) {
805 		buf = strdup(arg);
806 		if (buf == NULL)
807 			die("Failed allocating new net buffer");
808 		cur = strtok(buf, ",=");
809 	}
810 
811 	p = (struct virtio_net_params) {
812 		.guest_ip	= DEFAULT_GUEST_ADDR,
813 		.host_ip	= DEFAULT_HOST_ADDR,
814 		.script		= DEFAULT_SCRIPT,
815 		.downscript	= DEFAULT_SCRIPT,
816 		.mode		= NET_MODE_TAP,
817 	};
818 
819 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
820 	p.guest_mac[5] += kvm->cfg.num_net_devices;
821 
822 	while (cur) {
823 		if (on_cmd) {
824 			cmd = cur;
825 		} else {
826 			if (set_net_param(kvm, &p, cmd, cur) < 0)
827 				goto done;
828 		}
829 		on_cmd = !on_cmd;
830 
831 		cur = strtok(NULL, ",=");
832 	};
833 
834 	kvm->cfg.num_net_devices++;
835 
836 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
837 	if (kvm->cfg.net_params == NULL)
838 		die("Failed adding new network device");
839 
840 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
841 
842 done:
843 	free(buf);
844 	return 0;
845 }
846 
847 static int virtio_net__init_one(struct virtio_net_params *params)
848 {
849 	int i, err;
850 	struct net_dev *ndev;
851 	struct virtio_ops *ops;
852 	enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm);
853 
854 	ndev = calloc(1, sizeof(struct net_dev));
855 	if (ndev == NULL)
856 		return -ENOMEM;
857 
858 	ops = malloc(sizeof(*ops));
859 	if (ops == NULL) {
860 		err = -ENOMEM;
861 		goto err_free_ndev;
862 	}
863 
864 	list_add_tail(&ndev->list, &ndevs);
865 
866 	ndev->kvm = params->kvm;
867 	ndev->params = params;
868 
869 	mutex_init(&ndev->mutex);
870 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
871 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
872 	if (ndev->queue_pairs > 1)
873 		ndev->config.max_virtqueue_pairs = ndev->queue_pairs;
874 
875 	for (i = 0 ; i < 6 ; i++) {
876 		ndev->config.mac[i]		= params->guest_mac[i];
877 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
878 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
879 	}
880 
881 	ndev->mode = params->mode;
882 	if (ndev->mode == NET_MODE_TAP) {
883 		ndev->ops = &tap_ops;
884 		if (!virtio_net__tap_create(ndev))
885 			die_perror("You have requested a TAP device, but creation of one has failed because");
886 	} else {
887 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
888 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
889 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
890 		ndev->info.buf_nr		= 20,
891 		ndev->ops = &uip_ops;
892 		uip_static_init(&ndev->info);
893 	}
894 
895 	*ops = net_dev_virtio_ops;
896 
897 	if (params->trans) {
898 		if (strcmp(params->trans, "mmio") == 0)
899 			trans = VIRTIO_MMIO;
900 		else if (strcmp(params->trans, "pci") == 0)
901 			trans = VIRTIO_PCI;
902 		else
903 			pr_warning("virtio-net: Unknown transport method : %s, "
904 				   "falling back to %s.", params->trans,
905 				   virtio_trans_name(trans));
906 	}
907 
908 	virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
909 		    PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
910 
911 	if (params->vhost)
912 		virtio_net__vhost_init(params->kvm, ndev);
913 
914 	if (compat_id == -1)
915 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
916 
917 	return 0;
918 
919 err_free_ndev:
920 	free(ndev);
921 	return err;
922 }
923 
924 int virtio_net__init(struct kvm *kvm)
925 {
926 	int i;
927 
928 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
929 		kvm->cfg.net_params[i].kvm = kvm;
930 		virtio_net__init_one(&kvm->cfg.net_params[i]);
931 	}
932 
933 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
934 		static struct virtio_net_params net_params;
935 
936 		net_params = (struct virtio_net_params) {
937 			.guest_ip	= kvm->cfg.guest_ip,
938 			.host_ip	= kvm->cfg.host_ip,
939 			.kvm		= kvm,
940 			.script		= kvm->cfg.script,
941 			.mode		= NET_MODE_USER,
942 		};
943 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
944 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
945 
946 		virtio_net__init_one(&net_params);
947 	}
948 
949 	return 0;
950 }
951 virtio_dev_init(virtio_net__init);
952 
953 int virtio_net__exit(struct kvm *kvm)
954 {
955 	struct virtio_net_params *params;
956 	struct net_dev *ndev;
957 	struct list_head *ptr;
958 
959 	list_for_each(ptr, &ndevs) {
960 		ndev = list_entry(ptr, struct net_dev, list);
961 		params = ndev->params;
962 		/* Cleanup any tap device which attached to bridge */
963 		if (ndev->mode == NET_MODE_TAP &&
964 		    strcmp(params->downscript, "none"))
965 			virtio_net_exec_script(params->downscript, ndev->tap_name);
966 	}
967 	return 0;
968 }
969 virtio_dev_exit(virtio_net__exit);
970