xref: /kvmtool/virtio/net.c (revision 95242e44535bfb37b4ac4c90c353d5b73824a08e)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/mutex.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 #include "kvm/irq.h"
8 #include "kvm/uip.h"
9 #include "kvm/guest_compat.h"
10 #include "kvm/iovec.h"
11 
12 #include <linux/vhost.h>
13 #include <linux/virtio_net.h>
14 #include <linux/if_tun.h>
15 #include <linux/types.h>
16 
17 #include <arpa/inet.h>
18 #include <net/if.h>
19 
20 #include <unistd.h>
21 #include <fcntl.h>
22 
23 #include <sys/socket.h>
24 #include <sys/ioctl.h>
25 #include <sys/types.h>
26 #include <sys/wait.h>
27 #include <sys/eventfd.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		256
30 #define VIRTIO_NET_NUM_QUEUES		8
31 
32 struct net_dev;
33 
34 struct net_dev_operations {
35 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
36 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37 };
38 
39 struct net_dev {
40 	struct mutex			mutex;
41 	struct virtio_device		vdev;
42 	struct list_head		list;
43 
44 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES * 2 + 1];
45 	struct virtio_net_config	config;
46 	u32				features, rx_vqs, tx_vqs, queue_pairs;
47 
48 	pthread_t			io_thread[VIRTIO_NET_NUM_QUEUES * 2 + 1];
49 	struct mutex			io_lock[VIRTIO_NET_NUM_QUEUES * 2 + 1];
50 	pthread_cond_t			io_cond[VIRTIO_NET_NUM_QUEUES * 2 + 1];
51 
52 	int				vhost_fd;
53 	int				tap_fd;
54 	char				tap_name[IFNAMSIZ];
55 	bool				tap_ufo;
56 
57 	int				mode;
58 
59 	struct uip_info			info;
60 	struct net_dev_operations	*ops;
61 	struct kvm			*kvm;
62 
63 	struct virtio_net_params	*params;
64 };
65 
66 static LIST_HEAD(ndevs);
67 static int compat_id = -1;
68 
69 #define MAX_PACKET_SIZE 65550
70 
71 static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
72 {
73 	return ndev->features & (1 << feature);
74 }
75 
76 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
77 {
78 	hdr->hdr_len		= virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len);
79 	hdr->gso_size		= virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size);
80 	hdr->csum_start		= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start);
81 	hdr->csum_offset	= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset);
82 }
83 
84 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
85 {
86 	hdr->hdr_len		= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr_len);
87 	hdr->gso_size		= virtio_host_to_guest_u16(&ndev->vdev, hdr->gso_size);
88 	hdr->csum_start		= virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_start);
89 	hdr->csum_offset	= virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_offset);
90 }
91 
92 static void *virtio_net_rx_thread(void *p)
93 {
94 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
95 	struct virt_queue *vq;
96 	struct kvm *kvm;
97 	struct net_dev *ndev = p;
98 	u16 out, in;
99 	u16 head;
100 	int len, copied;
101 	u32 id;
102 
103 	mutex_lock(&ndev->mutex);
104 	id = ndev->rx_vqs++ * 2;
105 	mutex_unlock(&ndev->mutex);
106 
107 	kvm__set_thread_name("virtio-net-rx");
108 
109 	kvm = ndev->kvm;
110 	vq = &ndev->vqs[id];
111 
112 	while (1) {
113 		mutex_lock(&ndev->io_lock[id]);
114 		if (!virt_queue__available(vq))
115 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
116 		mutex_unlock(&ndev->io_lock[id]);
117 
118 		while (virt_queue__available(vq)) {
119 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
120 			struct iovec dummy_iov = {
121 				.iov_base = buffer,
122 				.iov_len  = sizeof(buffer),
123 			};
124 			struct virtio_net_hdr_mrg_rxbuf *hdr;
125 			u16 num_buffers;
126 
127 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
128 			if (len < 0) {
129 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
130 						__func__, id, len);
131 				goto out_err;
132 			}
133 
134 			copied = num_buffers = 0;
135 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
136 			hdr = iov[0].iov_base;
137 			while (copied < len) {
138 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
139 
140 				memcpy_toiovec(iov, buffer + copied, iovsize);
141 				copied += iovsize;
142 				virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++);
143 				if (copied == len)
144 					break;
145 				while (!virt_queue__available(vq))
146 					sleep(0);
147 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
148 			}
149 
150 			virtio_net_fix_rx_hdr(&hdr->hdr, ndev);
151 			if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
152 				hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers);
153 
154 			virt_queue__used_idx_advance(vq, num_buffers);
155 
156 			/* We should interrupt guest right now, otherwise latency is huge. */
157 			if (virtio_queue__should_signal(vq))
158 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
159 		}
160 	}
161 
162 out_err:
163 	pthread_exit(NULL);
164 	return NULL;
165 
166 }
167 
168 static void *virtio_net_tx_thread(void *p)
169 {
170 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
171 	struct virt_queue *vq;
172 	struct kvm *kvm;
173 	struct net_dev *ndev = p;
174 	u16 out, in;
175 	u16 head;
176 	int len;
177 	u32 id;
178 
179 	mutex_lock(&ndev->mutex);
180 	id = ndev->tx_vqs++ * 2 + 1;
181 	mutex_unlock(&ndev->mutex);
182 
183 	kvm__set_thread_name("virtio-net-tx");
184 
185 	kvm = ndev->kvm;
186 	vq = &ndev->vqs[id];
187 
188 	while (1) {
189 		mutex_lock(&ndev->io_lock[id]);
190 		if (!virt_queue__available(vq))
191 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
192 		mutex_unlock(&ndev->io_lock[id]);
193 
194 		while (virt_queue__available(vq)) {
195 			struct virtio_net_hdr *hdr;
196 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
197 			hdr = iov[0].iov_base;
198 			virtio_net_fix_tx_hdr(hdr, ndev);
199 			len = ndev->ops->tx(iov, out, ndev);
200 			if (len < 0) {
201 				pr_warning("%s: tx on vq %u failed (%d)\n",
202 						__func__, id, errno);
203 				goto out_err;
204 			}
205 
206 			virt_queue__set_used_elem(vq, head, len);
207 		}
208 
209 		if (virtio_queue__should_signal(vq))
210 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
211 	}
212 
213 out_err:
214 	pthread_exit(NULL);
215 	return NULL;
216 }
217 
218 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
219 {
220 	/* Not much to do here */
221 	return VIRTIO_NET_OK;
222 }
223 
224 static void *virtio_net_ctrl_thread(void *p)
225 {
226 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
227 	u16 out, in, head;
228 	struct net_dev *ndev = p;
229 	struct kvm *kvm = ndev->kvm;
230 	u32 id = ndev->queue_pairs * 2;
231 	struct virt_queue *vq = &ndev->vqs[id];
232 	struct virtio_net_ctrl_hdr *ctrl;
233 	virtio_net_ctrl_ack *ack;
234 
235 	kvm__set_thread_name("virtio-net-ctrl");
236 
237 	while (1) {
238 		mutex_lock(&ndev->io_lock[id]);
239 		if (!virt_queue__available(vq))
240 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
241 		mutex_unlock(&ndev->io_lock[id]);
242 
243 		while (virt_queue__available(vq)) {
244 			head = virt_queue__get_iov(&ndev->vqs[id], iov, &out, &in, kvm);
245 			ctrl = iov[0].iov_base;
246 			ack = iov[out].iov_base;
247 
248 			switch (ctrl->class) {
249 			case VIRTIO_NET_CTRL_MQ:
250 				*ack = virtio_net_handle_mq(kvm, ndev, ctrl);
251 				break;
252 			default:
253 				*ack = VIRTIO_NET_ERR;
254 				break;
255 			}
256 			virt_queue__set_used_elem(&ndev->vqs[id], head, iov[out].iov_len);
257 		}
258 
259 		if (virtio_queue__should_signal(&ndev->vqs[id]))
260 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
261 	}
262 
263 	pthread_exit(NULL);
264 
265 	return NULL;
266 }
267 
268 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
269 {
270 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
271 		pr_warning("Unknown queue index %u", queue);
272 		return;
273 	}
274 
275 	mutex_lock(&ndev->io_lock[queue]);
276 	pthread_cond_signal(&ndev->io_cond[queue]);
277 	mutex_unlock(&ndev->io_lock[queue]);
278 }
279 
280 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
281 				  const char *tapname)
282 {
283 	int ret;
284 
285 	memset(ifr, 0, sizeof(*ifr));
286 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
287 	if (tapname)
288 		strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
289 
290 	ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr);
291 
292 	if (ret >= 0)
293 		strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
294 	return ret;
295 }
296 
297 static int virtio_net_exec_script(const char* script, const char *tap_name)
298 {
299 	pid_t pid;
300 	int status;
301 
302 	pid = fork();
303 	if (pid == 0) {
304 		execl(script, script, tap_name, NULL);
305 		_exit(1);
306 	} else {
307 		waitpid(pid, &status, 0);
308 		if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
309 			pr_warning("Fail to setup tap by %s", script);
310 			return -1;
311 		}
312 	}
313 	return 0;
314 }
315 
316 static bool virtio_net__tap_init(struct net_dev *ndev)
317 {
318 	int sock = socket(AF_INET, SOCK_STREAM, 0);
319 	int hdr_len;
320 	struct sockaddr_in sin = {0};
321 	struct ifreq ifr;
322 	const struct virtio_net_params *params = ndev->params;
323 	bool skipconf = !!params->tapif;
324 
325 	hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
326 			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
327 			sizeof(struct virtio_net_hdr);
328 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
329 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
330 
331 	if (strcmp(params->script, "none")) {
332 		if (virtio_net_exec_script(params->script, ndev->tap_name) < 0)
333 			goto fail;
334 	} else if (!skipconf) {
335 		memset(&ifr, 0, sizeof(ifr));
336 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
337 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
338 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
339 		ifr.ifr_addr.sa_family = AF_INET;
340 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
341 			pr_warning("Could not set ip address on tap device");
342 			goto fail;
343 		}
344 	}
345 
346 	if (!skipconf) {
347 		memset(&ifr, 0, sizeof(ifr));
348 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
349 		ioctl(sock, SIOCGIFFLAGS, &ifr);
350 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
351 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
352 			pr_warning("Could not bring tap device up");
353 	}
354 
355 	close(sock);
356 
357 	return 1;
358 
359 fail:
360 	if (sock >= 0)
361 		close(sock);
362 	if (ndev->tap_fd >= 0)
363 		close(ndev->tap_fd);
364 
365 	return 0;
366 }
367 
368 static bool virtio_net__tap_create(struct net_dev *ndev)
369 {
370 	int offload;
371 	struct ifreq ifr;
372 	const struct virtio_net_params *params = ndev->params;
373 	bool macvtap = (!!params->tapif) && (params->tapif[0] == '/');
374 
375 	/* Did the user already gave us the FD? */
376 	if (params->fd)
377 		ndev->tap_fd = params->fd;
378 	else {
379 		const char *tap_file = "/dev/net/tun";
380 
381 		/* Did the user ask us to use macvtap? */
382 		if (macvtap)
383 			tap_file = params->tapif;
384 
385 		ndev->tap_fd = open(tap_file, O_RDWR);
386 		if (ndev->tap_fd < 0) {
387 			pr_warning("Unable to open %s", tap_file);
388 			return 0;
389 		}
390 	}
391 
392 	if (!macvtap &&
393 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
394 		pr_warning("Config tap device error. Are you root?");
395 		goto fail;
396 	}
397 
398 	/*
399 	 * The UFO support had been removed from kernel in commit:
400 	 * ID: fb652fdfe83710da0ca13448a41b7ed027d0a984
401 	 * https://www.spinics.net/lists/netdev/msg443562.html
402 	 * In oder to support the older kernels without this commit,
403 	 * we set the TUN_F_UFO to offload by default to test the status of
404 	 * UFO kernel support.
405 	 */
406 	ndev->tap_ufo = true;
407 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
408 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
409 		/*
410 		 * Is this failure caused by kernel remove the UFO support?
411 		 * Try TUNSETOFFLOAD without TUN_F_UFO.
412 		 */
413 		offload &= ~TUN_F_UFO;
414 		if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
415 			pr_warning("Config tap device TUNSETOFFLOAD error");
416 			goto fail;
417 		}
418 		ndev->tap_ufo = false;
419 	}
420 
421 	return 1;
422 
423 fail:
424 	if ((ndev->tap_fd >= 0) || (!params->fd) )
425 		close(ndev->tap_fd);
426 
427 	return 0;
428 }
429 
430 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
431 {
432 	return writev(ndev->tap_fd, iov, out);
433 }
434 
435 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
436 {
437 	return readv(ndev->tap_fd, iov, in);
438 }
439 
440 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
441 {
442 	return uip_tx(iov, out, &ndev->info);
443 }
444 
445 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
446 {
447 	return uip_rx(iov, in, &ndev->info);
448 }
449 
450 static struct net_dev_operations tap_ops = {
451 	.rx	= tap_ops_rx,
452 	.tx	= tap_ops_tx,
453 };
454 
455 static struct net_dev_operations uip_ops = {
456 	.rx	= uip_ops_rx,
457 	.tx	= uip_ops_tx,
458 };
459 
460 static u8 *get_config(struct kvm *kvm, void *dev)
461 {
462 	struct net_dev *ndev = dev;
463 
464 	return ((u8 *)(&ndev->config));
465 }
466 
467 static u32 get_host_features(struct kvm *kvm, void *dev)
468 {
469 	u32 features;
470 	struct net_dev *ndev = dev;
471 
472 	features = 1UL << VIRTIO_NET_F_MAC
473 		| 1UL << VIRTIO_NET_F_CSUM
474 		| 1UL << VIRTIO_NET_F_HOST_TSO4
475 		| 1UL << VIRTIO_NET_F_HOST_TSO6
476 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
477 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
478 		| 1UL << VIRTIO_RING_F_EVENT_IDX
479 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
480 		| 1UL << VIRTIO_NET_F_CTRL_VQ
481 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
482 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
483 
484 	/*
485 	 * The UFO feature for host and guest only can be enabled when the
486 	 * kernel has TAP UFO support.
487 	 */
488 	if (ndev->tap_ufo)
489 		features |= (1UL << VIRTIO_NET_F_HOST_UFO
490 				| 1UL << VIRTIO_NET_F_GUEST_UFO);
491 
492 	return features;
493 }
494 
495 static int virtio_net__vhost_set_features(struct net_dev *ndev)
496 {
497 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
498 	u64 vhost_features;
499 
500 	if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
501 		die_perror("VHOST_GET_FEATURES failed");
502 
503 	/* make sure both side support mergable rx buffers */
504 	if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF &&
505 			has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
506 		features |= 1UL << VIRTIO_NET_F_MRG_RXBUF;
507 
508 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
509 }
510 
511 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
512 {
513 	struct net_dev *ndev = dev;
514 	struct virtio_net_config *conf = &ndev->config;
515 
516 	ndev->features = features;
517 
518 	conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status);
519 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev,
520 							     conf->max_virtqueue_pairs);
521 }
522 
523 static void virtio_net_start(struct net_dev *ndev)
524 {
525 	if (ndev->mode == NET_MODE_TAP) {
526 		if (!virtio_net__tap_init(ndev))
527 			die_perror("TAP device initialized failed because");
528 
529 		if (ndev->vhost_fd &&
530 				virtio_net__vhost_set_features(ndev) != 0)
531 			die_perror("VHOST_SET_FEATURES failed");
532 	} else {
533 		ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
534 						sizeof(struct virtio_net_hdr_mrg_rxbuf) :
535 						sizeof(struct virtio_net_hdr);
536 		uip_init(&ndev->info);
537 	}
538 }
539 
540 static void notify_status(struct kvm *kvm, void *dev, u32 status)
541 {
542 	if (status & VIRTIO__STATUS_START)
543 		virtio_net_start(dev);
544 }
545 
546 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
547 {
548 	return vq == (u32)(ndev->queue_pairs * 2);
549 }
550 
551 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
552 		   u32 pfn)
553 {
554 	struct vhost_vring_state state = { .index = vq };
555 	struct vhost_vring_addr addr;
556 	struct net_dev *ndev = dev;
557 	struct virt_queue *queue;
558 	void *p;
559 	int r;
560 
561 	compat__remove_message(compat_id);
562 
563 	queue		= &ndev->vqs[vq];
564 	queue->pfn	= pfn;
565 	p		= virtio_get_vq(kvm, queue->pfn, page_size);
566 
567 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align);
568 	virtio_init_device_vq(&ndev->vdev, queue);
569 
570 	mutex_init(&ndev->io_lock[vq]);
571 	pthread_cond_init(&ndev->io_cond[vq], NULL);
572 	if (is_ctrl_vq(ndev, vq)) {
573 		pthread_create(&ndev->io_thread[vq], NULL, virtio_net_ctrl_thread, ndev);
574 
575 		return 0;
576 	} else if (ndev->vhost_fd == 0 ) {
577 		if (vq & 1)
578 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_tx_thread, ndev);
579 		else
580 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_rx_thread, ndev);
581 
582 		return 0;
583 	}
584 
585 	if (queue->endian != VIRTIO_ENDIAN_HOST)
586 		die_perror("VHOST requires the same endianness in guest and host");
587 
588 	state.num = queue->vring.num;
589 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
590 	if (r < 0)
591 		die_perror("VHOST_SET_VRING_NUM failed");
592 	state.num = 0;
593 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
594 	if (r < 0)
595 		die_perror("VHOST_SET_VRING_BASE failed");
596 
597 	addr = (struct vhost_vring_addr) {
598 		.index = vq,
599 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
600 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
601 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
602 	};
603 
604 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
605 	if (r < 0)
606 		die_perror("VHOST_SET_VRING_ADDR failed");
607 
608 	return 0;
609 }
610 
611 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
612 {
613 	struct net_dev *ndev = dev;
614 	struct vhost_vring_file file;
615 	int r;
616 
617 	if (ndev->vhost_fd == 0)
618 		return;
619 
620 	file = (struct vhost_vring_file) {
621 		.index	= vq,
622 		.fd	= eventfd(0, 0),
623 	};
624 
625 	r = irq__add_irqfd(kvm, gsi, file.fd, -1);
626 	if (r < 0)
627 		die_perror("KVM_IRQFD failed");
628 
629 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
630 	if (r < 0)
631 		die_perror("VHOST_SET_VRING_CALL failed");
632 	file.fd = ndev->tap_fd;
633 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
634 	if (r != 0)
635 		die("VHOST_NET_SET_BACKEND failed %d", errno);
636 
637 }
638 
639 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
640 {
641 	struct net_dev *ndev = dev;
642 	struct vhost_vring_file file = {
643 		.index	= vq,
644 		.fd	= efd,
645 	};
646 	int r;
647 
648 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
649 		return;
650 
651 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
652 	if (r < 0)
653 		die_perror("VHOST_SET_VRING_KICK failed");
654 }
655 
656 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
657 {
658 	struct net_dev *ndev = dev;
659 
660 	virtio_net_handle_callback(kvm, ndev, vq);
661 
662 	return 0;
663 }
664 
665 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
666 {
667 	struct net_dev *ndev = dev;
668 
669 	return ndev->vqs[vq].pfn;
670 }
671 
672 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
673 {
674 	/* FIXME: dynamic */
675 	return VIRTIO_NET_QUEUE_SIZE;
676 }
677 
678 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
679 {
680 	/* FIXME: dynamic */
681 	return size;
682 }
683 
684 static struct virtio_ops net_dev_virtio_ops = {
685 	.get_config		= get_config,
686 	.get_host_features	= get_host_features,
687 	.set_guest_features	= set_guest_features,
688 	.init_vq		= init_vq,
689 	.get_pfn_vq		= get_pfn_vq,
690 	.get_size_vq		= get_size_vq,
691 	.set_size_vq		= set_size_vq,
692 	.notify_vq		= notify_vq,
693 	.notify_vq_gsi		= notify_vq_gsi,
694 	.notify_vq_eventfd	= notify_vq_eventfd,
695 	.notify_status		= notify_status,
696 };
697 
698 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
699 {
700 	struct kvm_mem_bank *bank;
701 	struct vhost_memory *mem;
702 	int r, i;
703 
704 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
705 	if (ndev->vhost_fd < 0)
706 		die_perror("Failed openning vhost-net device");
707 
708 	mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region));
709 	if (mem == NULL)
710 		die("Failed allocating memory for vhost memory map");
711 
712 	i = 0;
713 	list_for_each_entry(bank, &kvm->mem_banks, list) {
714 		mem->regions[i] = (struct vhost_memory_region) {
715 			.guest_phys_addr = bank->guest_phys_addr,
716 			.memory_size	 = bank->size,
717 			.userspace_addr	 = (unsigned long)bank->host_addr,
718 		};
719 		i++;
720 	}
721 	mem->nregions = i;
722 
723 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
724 	if (r != 0)
725 		die_perror("VHOST_SET_OWNER failed");
726 
727 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
728 	if (r != 0)
729 		die_perror("VHOST_SET_MEM_TABLE failed");
730 
731 	ndev->vdev.use_vhost = true;
732 
733 	free(mem);
734 }
735 
736 static inline void str_to_mac(const char *str, char *mac)
737 {
738 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
739 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
740 }
741 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
742 			const char *param, const char *val)
743 {
744 	if (strcmp(param, "guest_mac") == 0) {
745 		str_to_mac(val, p->guest_mac);
746 	} else if (strcmp(param, "mode") == 0) {
747 		if (!strncmp(val, "user", 4)) {
748 			int i;
749 
750 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
751 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
752 					die("Only one usermode network device allowed at a time");
753 			p->mode = NET_MODE_USER;
754 		} else if (!strncmp(val, "tap", 3)) {
755 			p->mode = NET_MODE_TAP;
756 		} else if (!strncmp(val, "none", 4)) {
757 			kvm->cfg.no_net = 1;
758 			return -1;
759 		} else
760 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
761 	} else if (strcmp(param, "script") == 0) {
762 		p->script = strdup(val);
763 	} else if (strcmp(param, "downscript") == 0) {
764 		p->downscript = strdup(val);
765 	} else if (strcmp(param, "guest_ip") == 0) {
766 		p->guest_ip = strdup(val);
767 	} else if (strcmp(param, "host_ip") == 0) {
768 		p->host_ip = strdup(val);
769 	} else if (strcmp(param, "trans") == 0) {
770 		p->trans = strdup(val);
771 	} else if (strcmp(param, "tapif") == 0) {
772 		p->tapif = strdup(val);
773 	} else if (strcmp(param, "vhost") == 0) {
774 		p->vhost = atoi(val);
775 	} else if (strcmp(param, "fd") == 0) {
776 		p->fd = atoi(val);
777 	} else if (strcmp(param, "mq") == 0) {
778 		p->mq = atoi(val);
779 	} else
780 		die("Unknown network parameter %s", param);
781 
782 	return 0;
783 }
784 
785 int netdev_parser(const struct option *opt, const char *arg, int unset)
786 {
787 	struct virtio_net_params p;
788 	char *buf = NULL, *cmd = NULL, *cur = NULL;
789 	bool on_cmd = true;
790 	struct kvm *kvm = opt->ptr;
791 
792 	if (arg) {
793 		buf = strdup(arg);
794 		if (buf == NULL)
795 			die("Failed allocating new net buffer");
796 		cur = strtok(buf, ",=");
797 	}
798 
799 	p = (struct virtio_net_params) {
800 		.guest_ip	= DEFAULT_GUEST_ADDR,
801 		.host_ip	= DEFAULT_HOST_ADDR,
802 		.script		= DEFAULT_SCRIPT,
803 		.downscript	= DEFAULT_SCRIPT,
804 		.mode		= NET_MODE_TAP,
805 	};
806 
807 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
808 	p.guest_mac[5] += kvm->cfg.num_net_devices;
809 
810 	while (cur) {
811 		if (on_cmd) {
812 			cmd = cur;
813 		} else {
814 			if (set_net_param(kvm, &p, cmd, cur) < 0)
815 				goto done;
816 		}
817 		on_cmd = !on_cmd;
818 
819 		cur = strtok(NULL, ",=");
820 	};
821 
822 	kvm->cfg.num_net_devices++;
823 
824 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
825 	if (kvm->cfg.net_params == NULL)
826 		die("Failed adding new network device");
827 
828 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
829 
830 done:
831 	free(buf);
832 	return 0;
833 }
834 
835 static int virtio_net__init_one(struct virtio_net_params *params)
836 {
837 	int i, err;
838 	struct net_dev *ndev;
839 	struct virtio_ops *ops;
840 	enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm);
841 
842 	ndev = calloc(1, sizeof(struct net_dev));
843 	if (ndev == NULL)
844 		return -ENOMEM;
845 
846 	ops = malloc(sizeof(*ops));
847 	if (ops == NULL) {
848 		err = -ENOMEM;
849 		goto err_free_ndev;
850 	}
851 
852 	list_add_tail(&ndev->list, &ndevs);
853 
854 	ndev->kvm = params->kvm;
855 	ndev->params = params;
856 
857 	mutex_init(&ndev->mutex);
858 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
859 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
860 	if (ndev->queue_pairs > 1)
861 		ndev->config.max_virtqueue_pairs = ndev->queue_pairs;
862 
863 	for (i = 0 ; i < 6 ; i++) {
864 		ndev->config.mac[i]		= params->guest_mac[i];
865 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
866 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
867 	}
868 
869 	ndev->mode = params->mode;
870 	if (ndev->mode == NET_MODE_TAP) {
871 		ndev->ops = &tap_ops;
872 		if (!virtio_net__tap_create(ndev))
873 			die_perror("You have requested a TAP device, but creation of one has failed because");
874 	} else {
875 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
876 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
877 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
878 		ndev->info.buf_nr		= 20,
879 		ndev->ops = &uip_ops;
880 		uip_static_init(&ndev->info);
881 	}
882 
883 	*ops = net_dev_virtio_ops;
884 
885 	if (params->trans) {
886 		if (strcmp(params->trans, "mmio") == 0)
887 			trans = VIRTIO_MMIO;
888 		else if (strcmp(params->trans, "pci") == 0)
889 			trans = VIRTIO_PCI;
890 		else
891 			pr_warning("virtio-net: Unknown transport method : %s, "
892 				   "falling back to %s.", params->trans,
893 				   virtio_trans_name(trans));
894 	}
895 
896 	virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
897 		    PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
898 
899 	if (params->vhost)
900 		virtio_net__vhost_init(params->kvm, ndev);
901 
902 	if (compat_id == -1)
903 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
904 
905 	return 0;
906 
907 err_free_ndev:
908 	free(ndev);
909 	return err;
910 }
911 
912 int virtio_net__init(struct kvm *kvm)
913 {
914 	int i;
915 
916 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
917 		kvm->cfg.net_params[i].kvm = kvm;
918 		virtio_net__init_one(&kvm->cfg.net_params[i]);
919 	}
920 
921 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
922 		static struct virtio_net_params net_params;
923 
924 		net_params = (struct virtio_net_params) {
925 			.guest_ip	= kvm->cfg.guest_ip,
926 			.host_ip	= kvm->cfg.host_ip,
927 			.kvm		= kvm,
928 			.script		= kvm->cfg.script,
929 			.mode		= NET_MODE_USER,
930 		};
931 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
932 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
933 
934 		virtio_net__init_one(&net_params);
935 	}
936 
937 	return 0;
938 }
939 virtio_dev_init(virtio_net__init);
940 
941 int virtio_net__exit(struct kvm *kvm)
942 {
943 	struct virtio_net_params *params;
944 	struct net_dev *ndev;
945 	struct list_head *ptr;
946 
947 	list_for_each(ptr, &ndevs) {
948 		ndev = list_entry(ptr, struct net_dev, list);
949 		params = ndev->params;
950 		/* Cleanup any tap device which attached to bridge */
951 		if (ndev->mode == NET_MODE_TAP &&
952 		    strcmp(params->downscript, "none"))
953 			virtio_net_exec_script(params->downscript, ndev->tap_name);
954 	}
955 	return 0;
956 }
957 virtio_dev_exit(virtio_net__exit);
958