xref: /kvmtool/virtio/net.c (revision eef27ae368562bcce4f8a2b65822b307da0d4146)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/mutex.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 #include "kvm/irq.h"
8 #include "kvm/uip.h"
9 #include "kvm/guest_compat.h"
10 #include "kvm/iovec.h"
11 
12 #include <linux/vhost.h>
13 #include <linux/virtio_net.h>
14 #include <linux/if_tun.h>
15 #include <linux/types.h>
16 
17 #include <arpa/inet.h>
18 #include <net/if.h>
19 
20 #include <unistd.h>
21 #include <fcntl.h>
22 
23 #include <sys/socket.h>
24 #include <sys/ioctl.h>
25 #include <sys/types.h>
26 #include <sys/wait.h>
27 #include <sys/eventfd.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		256
30 #define VIRTIO_NET_NUM_QUEUES		8
31 
32 struct net_dev;
33 
34 struct net_dev_operations {
35 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
36 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37 };
38 
39 struct net_dev {
40 	struct mutex			mutex;
41 	struct virtio_device		vdev;
42 	struct list_head		list;
43 
44 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES * 2 + 1];
45 	struct virtio_net_config	config;
46 	u32				features, rx_vqs, tx_vqs, queue_pairs;
47 
48 	pthread_t			io_thread[VIRTIO_NET_NUM_QUEUES * 2 + 1];
49 	struct mutex			io_lock[VIRTIO_NET_NUM_QUEUES * 2 + 1];
50 	pthread_cond_t			io_cond[VIRTIO_NET_NUM_QUEUES * 2 + 1];
51 
52 	int				vhost_fd;
53 	int				tap_fd;
54 	char				tap_name[IFNAMSIZ];
55 
56 	int				mode;
57 
58 	struct uip_info			info;
59 	struct net_dev_operations	*ops;
60 	struct kvm			*kvm;
61 
62 	struct virtio_net_params	*params;
63 };
64 
65 static LIST_HEAD(ndevs);
66 static int compat_id = -1;
67 
68 #define MAX_PACKET_SIZE 65550
69 
70 static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
71 {
72 	return ndev->features & (1 << feature);
73 }
74 
75 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
76 {
77 	hdr->hdr_len		= virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len);
78 	hdr->gso_size		= virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size);
79 	hdr->csum_start		= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start);
80 	hdr->csum_offset	= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset);
81 }
82 
83 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr_mrg_rxbuf *hdr, struct net_dev *ndev)
84 {
85 	hdr->hdr.hdr_len	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.hdr_len);
86 	hdr->hdr.gso_size	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.gso_size);
87 	hdr->hdr.csum_start	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.csum_start);
88 	hdr->hdr.csum_offset	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.csum_offset);
89 	if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
90 		hdr->num_buffers	= virtio_host_to_guest_u16(&ndev->vdev, hdr->num_buffers);
91 }
92 
93 static void *virtio_net_rx_thread(void *p)
94 {
95 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
96 	struct virt_queue *vq;
97 	struct kvm *kvm;
98 	struct net_dev *ndev = p;
99 	u16 out, in;
100 	u16 head;
101 	int len, copied;
102 	u32 id;
103 
104 	mutex_lock(&ndev->mutex);
105 	id = ndev->rx_vqs++ * 2;
106 	mutex_unlock(&ndev->mutex);
107 
108 	kvm__set_thread_name("virtio-net-rx");
109 
110 	kvm = ndev->kvm;
111 	vq = &ndev->vqs[id];
112 
113 	while (1) {
114 		mutex_lock(&ndev->io_lock[id]);
115 		if (!virt_queue__available(vq))
116 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
117 		mutex_unlock(&ndev->io_lock[id]);
118 
119 		while (virt_queue__available(vq)) {
120 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
121 			struct iovec dummy_iov = {
122 				.iov_base = buffer,
123 				.iov_len  = sizeof(buffer),
124 			};
125 			struct virtio_net_hdr_mrg_rxbuf *hdr;
126 			int i;
127 
128 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
129 			if (len < 0) {
130 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
131 						__func__, id, len);
132 				goto out_err;
133 			}
134 
135 			copied = i = 0;
136 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
137 			hdr = iov[0].iov_base;
138 			while (copied < len) {
139 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
140 
141 				memcpy_toiovec(iov, buffer + copied, iovsize);
142 				copied += iovsize;
143 				if (i++ == 0)
144 					virtio_net_fix_rx_hdr(hdr, ndev);
145 				if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) {
146 					u16 num_buffers = virtio_guest_to_host_u16(vq, hdr->num_buffers);
147 					hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers + 1);
148 				}
149 				virt_queue__set_used_elem(vq, head, iovsize);
150 				if (copied == len)
151 					break;
152 				while (!virt_queue__available(vq))
153 					sleep(0);
154 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
155 			}
156 			/* We should interrupt guest right now, otherwise latency is huge. */
157 			if (virtio_queue__should_signal(vq))
158 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
159 		}
160 	}
161 
162 out_err:
163 	pthread_exit(NULL);
164 	return NULL;
165 
166 }
167 
168 static void *virtio_net_tx_thread(void *p)
169 {
170 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
171 	struct virt_queue *vq;
172 	struct kvm *kvm;
173 	struct net_dev *ndev = p;
174 	u16 out, in;
175 	u16 head;
176 	int len;
177 	u32 id;
178 
179 	mutex_lock(&ndev->mutex);
180 	id = ndev->tx_vqs++ * 2 + 1;
181 	mutex_unlock(&ndev->mutex);
182 
183 	kvm__set_thread_name("virtio-net-tx");
184 
185 	kvm = ndev->kvm;
186 	vq = &ndev->vqs[id];
187 
188 	while (1) {
189 		mutex_lock(&ndev->io_lock[id]);
190 		if (!virt_queue__available(vq))
191 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
192 		mutex_unlock(&ndev->io_lock[id]);
193 
194 		while (virt_queue__available(vq)) {
195 			struct virtio_net_hdr *hdr;
196 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
197 			hdr = iov[0].iov_base;
198 			virtio_net_fix_tx_hdr(hdr, ndev);
199 			len = ndev->ops->tx(iov, out, ndev);
200 			if (len < 0) {
201 				pr_warning("%s: tx on vq %u failed (%d)\n",
202 						__func__, id, errno);
203 				goto out_err;
204 			}
205 
206 			virt_queue__set_used_elem(vq, head, len);
207 		}
208 
209 		if (virtio_queue__should_signal(vq))
210 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
211 	}
212 
213 out_err:
214 	pthread_exit(NULL);
215 	return NULL;
216 }
217 
218 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
219 {
220 	/* Not much to do here */
221 	return VIRTIO_NET_OK;
222 }
223 
224 static void *virtio_net_ctrl_thread(void *p)
225 {
226 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
227 	u16 out, in, head;
228 	struct net_dev *ndev = p;
229 	struct kvm *kvm = ndev->kvm;
230 	u32 id = ndev->queue_pairs * 2;
231 	struct virt_queue *vq = &ndev->vqs[id];
232 	struct virtio_net_ctrl_hdr *ctrl;
233 	virtio_net_ctrl_ack *ack;
234 
235 	kvm__set_thread_name("virtio-net-ctrl");
236 
237 	while (1) {
238 		mutex_lock(&ndev->io_lock[id]);
239 		if (!virt_queue__available(vq))
240 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
241 		mutex_unlock(&ndev->io_lock[id]);
242 
243 		while (virt_queue__available(vq)) {
244 			head = virt_queue__get_iov(&ndev->vqs[id], iov, &out, &in, kvm);
245 			ctrl = iov[0].iov_base;
246 			ack = iov[out].iov_base;
247 
248 			switch (ctrl->class) {
249 			case VIRTIO_NET_CTRL_MQ:
250 				*ack = virtio_net_handle_mq(kvm, ndev, ctrl);
251 				break;
252 			default:
253 				*ack = VIRTIO_NET_ERR;
254 				break;
255 			}
256 			virt_queue__set_used_elem(&ndev->vqs[id], head, iov[out].iov_len);
257 		}
258 
259 		if (virtio_queue__should_signal(&ndev->vqs[id]))
260 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
261 	}
262 
263 	pthread_exit(NULL);
264 
265 	return NULL;
266 }
267 
268 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
269 {
270 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
271 		pr_warning("Unknown queue index %u", queue);
272 		return;
273 	}
274 
275 	mutex_lock(&ndev->io_lock[queue]);
276 	pthread_cond_signal(&ndev->io_cond[queue]);
277 	mutex_unlock(&ndev->io_lock[queue]);
278 }
279 
280 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
281 				  const char *tapname)
282 {
283 	int ret;
284 
285 	memset(ifr, 0, sizeof(*ifr));
286 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
287 	if (tapname)
288 		strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
289 
290 	ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr);
291 
292 	if (ret >= 0)
293 		strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
294 	return ret;
295 }
296 
297 static int virtio_net_exec_script(const char* script, const char *tap_name)
298 {
299 	pid_t pid;
300 	int status;
301 
302 	pid = fork();
303 	if (pid == 0) {
304 		execl(script, script, tap_name, NULL);
305 		_exit(1);
306 	} else {
307 		waitpid(pid, &status, 0);
308 		if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
309 			pr_warning("Fail to setup tap by %s", script);
310 			return -1;
311 		}
312 	}
313 	return 0;
314 }
315 
316 static bool virtio_net__tap_init(struct net_dev *ndev)
317 {
318 	int sock = socket(AF_INET, SOCK_STREAM, 0);
319 	int offload, hdr_len;
320 	struct sockaddr_in sin = {0};
321 	struct ifreq ifr;
322 	const struct virtio_net_params *params = ndev->params;
323 	bool skipconf = !!params->tapif;
324 	bool macvtap = skipconf && (params->tapif[0] == '/');
325 	const char *tap_file = "/dev/net/tun";
326 
327 	/* Did the user already gave us the FD? */
328 	if (params->fd) {
329 		ndev->tap_fd = params->fd;
330 		return 1;
331 	}
332 
333 	if (macvtap)
334 		tap_file = params->tapif;
335 
336 	ndev->tap_fd = open(tap_file, O_RDWR);
337 	if (ndev->tap_fd < 0) {
338 		pr_warning("Unable to open %s", tap_file);
339 		goto fail;
340 	}
341 
342 	if (!macvtap &&
343 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
344 		pr_warning("Config tap device error. Are you root?");
345 		goto fail;
346 	}
347 
348 	hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
349 			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
350 			sizeof(struct virtio_net_hdr);
351 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
352 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
353 
354 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
355 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
356 		pr_warning("Config tap device TUNSETOFFLOAD error");
357 		goto fail;
358 	}
359 
360 	if (strcmp(params->script, "none")) {
361 		if (virtio_net_exec_script(params->script, ndev->tap_name) < 0)
362 			goto fail;
363 	} else if (!skipconf) {
364 		memset(&ifr, 0, sizeof(ifr));
365 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
366 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
367 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
368 		ifr.ifr_addr.sa_family = AF_INET;
369 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
370 			pr_warning("Could not set ip address on tap device");
371 			goto fail;
372 		}
373 	}
374 
375 	if (!skipconf) {
376 		memset(&ifr, 0, sizeof(ifr));
377 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
378 		ioctl(sock, SIOCGIFFLAGS, &ifr);
379 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
380 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
381 			pr_warning("Could not bring tap device up");
382 	}
383 
384 	close(sock);
385 
386 	return 1;
387 
388 fail:
389 	if (sock >= 0)
390 		close(sock);
391 	if (ndev->tap_fd >= 0)
392 		close(ndev->tap_fd);
393 
394 	return 0;
395 }
396 
397 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
398 {
399 	return writev(ndev->tap_fd, iov, out);
400 }
401 
402 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
403 {
404 	return readv(ndev->tap_fd, iov, in);
405 }
406 
407 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
408 {
409 	return uip_tx(iov, out, &ndev->info);
410 }
411 
412 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
413 {
414 	return uip_rx(iov, in, &ndev->info);
415 }
416 
417 static struct net_dev_operations tap_ops = {
418 	.rx	= tap_ops_rx,
419 	.tx	= tap_ops_tx,
420 };
421 
422 static struct net_dev_operations uip_ops = {
423 	.rx	= uip_ops_rx,
424 	.tx	= uip_ops_tx,
425 };
426 
427 static u8 *get_config(struct kvm *kvm, void *dev)
428 {
429 	struct net_dev *ndev = dev;
430 
431 	return ((u8 *)(&ndev->config));
432 }
433 
434 static u32 get_host_features(struct kvm *kvm, void *dev)
435 {
436 	struct net_dev *ndev = dev;
437 
438 	return 1UL << VIRTIO_NET_F_MAC
439 		| 1UL << VIRTIO_NET_F_CSUM
440 		| 1UL << VIRTIO_NET_F_HOST_UFO
441 		| 1UL << VIRTIO_NET_F_HOST_TSO4
442 		| 1UL << VIRTIO_NET_F_HOST_TSO6
443 		| 1UL << VIRTIO_NET_F_GUEST_UFO
444 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
445 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
446 		| 1UL << VIRTIO_RING_F_EVENT_IDX
447 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
448 		| 1UL << VIRTIO_NET_F_CTRL_VQ
449 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
450 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
451 }
452 
453 static int virtio_net__vhost_set_features(struct net_dev *ndev)
454 {
455 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
456 	u64 vhost_features;
457 
458 	if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
459 		die_perror("VHOST_GET_FEATURES failed");
460 
461 	/* make sure both side support mergable rx buffers */
462 	if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF &&
463 			has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
464 		features |= 1UL << VIRTIO_NET_F_MRG_RXBUF;
465 
466 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
467 }
468 
469 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
470 {
471 	struct net_dev *ndev = dev;
472 	struct virtio_net_config *conf = &ndev->config;
473 
474 	ndev->features = features;
475 
476 	conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status);
477 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev,
478 							     conf->max_virtqueue_pairs);
479 
480 	if (ndev->mode == NET_MODE_TAP) {
481 		if (!virtio_net__tap_init(ndev))
482 			die_perror("You have requested a TAP device, but creation of one has failed because");
483 		if (ndev->vhost_fd &&
484 				virtio_net__vhost_set_features(ndev) != 0)
485 			die_perror("VHOST_SET_FEATURES failed");
486 	} else {
487 		ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
488 						sizeof(struct virtio_net_hdr_mrg_rxbuf) :
489 						sizeof(struct virtio_net_hdr);
490 		uip_init(&ndev->info);
491 	}
492 }
493 
494 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
495 {
496 	return vq == (u32)(ndev->queue_pairs * 2);
497 }
498 
499 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
500 		   u32 pfn)
501 {
502 	struct vhost_vring_state state = { .index = vq };
503 	struct vhost_vring_addr addr;
504 	struct net_dev *ndev = dev;
505 	struct virt_queue *queue;
506 	void *p;
507 	int r;
508 
509 	compat__remove_message(compat_id);
510 
511 	queue		= &ndev->vqs[vq];
512 	queue->pfn	= pfn;
513 	p		= virtio_get_vq(kvm, queue->pfn, page_size);
514 
515 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align);
516 	virtio_init_device_vq(&ndev->vdev, queue);
517 
518 	mutex_init(&ndev->io_lock[vq]);
519 	pthread_cond_init(&ndev->io_cond[vq], NULL);
520 	if (is_ctrl_vq(ndev, vq)) {
521 		pthread_create(&ndev->io_thread[vq], NULL, virtio_net_ctrl_thread, ndev);
522 
523 		return 0;
524 	} else if (ndev->vhost_fd == 0 ) {
525 		if (vq & 1)
526 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_tx_thread, ndev);
527 		else
528 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_rx_thread, ndev);
529 
530 		return 0;
531 	}
532 
533 	if (queue->endian != VIRTIO_ENDIAN_HOST)
534 		die_perror("VHOST requires VIRTIO_ENDIAN_HOST");
535 
536 	state.num = queue->vring.num;
537 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
538 	if (r < 0)
539 		die_perror("VHOST_SET_VRING_NUM failed");
540 	state.num = 0;
541 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
542 	if (r < 0)
543 		die_perror("VHOST_SET_VRING_BASE failed");
544 
545 	addr = (struct vhost_vring_addr) {
546 		.index = vq,
547 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
548 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
549 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
550 	};
551 
552 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
553 	if (r < 0)
554 		die_perror("VHOST_SET_VRING_ADDR failed");
555 
556 	return 0;
557 }
558 
559 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
560 {
561 	struct net_dev *ndev = dev;
562 	struct kvm_irqfd irq;
563 	struct vhost_vring_file file;
564 	int r;
565 
566 	if (ndev->vhost_fd == 0)
567 		return;
568 
569 	irq = (struct kvm_irqfd) {
570 		.gsi	= gsi,
571 		.fd	= eventfd(0, 0),
572 	};
573 	file = (struct vhost_vring_file) {
574 		.index	= vq,
575 		.fd	= irq.fd,
576 	};
577 
578 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
579 	if (r < 0)
580 		die_perror("KVM_IRQFD failed");
581 
582 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
583 	if (r < 0)
584 		die_perror("VHOST_SET_VRING_CALL failed");
585 	file.fd = ndev->tap_fd;
586 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
587 	if (r != 0)
588 		die("VHOST_NET_SET_BACKEND failed %d", errno);
589 
590 }
591 
592 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
593 {
594 	struct net_dev *ndev = dev;
595 	struct vhost_vring_file file = {
596 		.index	= vq,
597 		.fd	= efd,
598 	};
599 	int r;
600 
601 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
602 		return;
603 
604 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
605 	if (r < 0)
606 		die_perror("VHOST_SET_VRING_KICK failed");
607 }
608 
609 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
610 {
611 	struct net_dev *ndev = dev;
612 
613 	virtio_net_handle_callback(kvm, ndev, vq);
614 
615 	return 0;
616 }
617 
618 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
619 {
620 	struct net_dev *ndev = dev;
621 
622 	return ndev->vqs[vq].pfn;
623 }
624 
625 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
626 {
627 	/* FIXME: dynamic */
628 	return VIRTIO_NET_QUEUE_SIZE;
629 }
630 
631 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
632 {
633 	/* FIXME: dynamic */
634 	return size;
635 }
636 
637 static struct virtio_ops net_dev_virtio_ops = {
638 	.get_config		= get_config,
639 	.get_host_features	= get_host_features,
640 	.set_guest_features	= set_guest_features,
641 	.init_vq		= init_vq,
642 	.get_pfn_vq		= get_pfn_vq,
643 	.get_size_vq		= get_size_vq,
644 	.set_size_vq		= set_size_vq,
645 	.notify_vq		= notify_vq,
646 	.notify_vq_gsi		= notify_vq_gsi,
647 	.notify_vq_eventfd	= notify_vq_eventfd,
648 };
649 
650 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
651 {
652 	struct kvm_mem_bank *bank;
653 	struct vhost_memory *mem;
654 	int r, i;
655 
656 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
657 	if (ndev->vhost_fd < 0)
658 		die_perror("Failed openning vhost-net device");
659 
660 	mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region));
661 	if (mem == NULL)
662 		die("Failed allocating memory for vhost memory map");
663 
664 	i = 0;
665 	list_for_each_entry(bank, &kvm->mem_banks, list) {
666 		mem->regions[i] = (struct vhost_memory_region) {
667 			.guest_phys_addr = bank->guest_phys_addr,
668 			.memory_size	 = bank->size,
669 			.userspace_addr	 = (unsigned long)bank->host_addr,
670 		};
671 		i++;
672 	}
673 	mem->nregions = i;
674 
675 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
676 	if (r != 0)
677 		die_perror("VHOST_SET_OWNER failed");
678 
679 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
680 	if (r != 0)
681 		die_perror("VHOST_SET_MEM_TABLE failed");
682 
683 	ndev->vdev.use_vhost = true;
684 
685 	free(mem);
686 }
687 
688 static inline void str_to_mac(const char *str, char *mac)
689 {
690 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
691 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
692 }
693 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
694 			const char *param, const char *val)
695 {
696 	if (strcmp(param, "guest_mac") == 0) {
697 		str_to_mac(val, p->guest_mac);
698 	} else if (strcmp(param, "mode") == 0) {
699 		if (!strncmp(val, "user", 4)) {
700 			int i;
701 
702 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
703 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
704 					die("Only one usermode network device allowed at a time");
705 			p->mode = NET_MODE_USER;
706 		} else if (!strncmp(val, "tap", 3)) {
707 			p->mode = NET_MODE_TAP;
708 		} else if (!strncmp(val, "none", 4)) {
709 			kvm->cfg.no_net = 1;
710 			return -1;
711 		} else
712 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
713 	} else if (strcmp(param, "script") == 0) {
714 		p->script = strdup(val);
715 	} else if (strcmp(param, "downscript") == 0) {
716 		p->downscript = strdup(val);
717 	} else if (strcmp(param, "guest_ip") == 0) {
718 		p->guest_ip = strdup(val);
719 	} else if (strcmp(param, "host_ip") == 0) {
720 		p->host_ip = strdup(val);
721 	} else if (strcmp(param, "trans") == 0) {
722 		p->trans = strdup(val);
723 	} else if (strcmp(param, "tapif") == 0) {
724 		p->tapif = strdup(val);
725 	} else if (strcmp(param, "vhost") == 0) {
726 		p->vhost = atoi(val);
727 	} else if (strcmp(param, "fd") == 0) {
728 		p->fd = atoi(val);
729 	} else if (strcmp(param, "mq") == 0) {
730 		p->mq = atoi(val);
731 	} else
732 		die("Unknown network parameter %s", param);
733 
734 	return 0;
735 }
736 
737 int netdev_parser(const struct option *opt, const char *arg, int unset)
738 {
739 	struct virtio_net_params p;
740 	char *buf = NULL, *cmd = NULL, *cur = NULL;
741 	bool on_cmd = true;
742 	struct kvm *kvm = opt->ptr;
743 
744 	if (arg) {
745 		buf = strdup(arg);
746 		if (buf == NULL)
747 			die("Failed allocating new net buffer");
748 		cur = strtok(buf, ",=");
749 	}
750 
751 	p = (struct virtio_net_params) {
752 		.guest_ip	= DEFAULT_GUEST_ADDR,
753 		.host_ip	= DEFAULT_HOST_ADDR,
754 		.script		= DEFAULT_SCRIPT,
755 		.downscript	= DEFAULT_SCRIPT,
756 		.mode		= NET_MODE_TAP,
757 	};
758 
759 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
760 	p.guest_mac[5] += kvm->cfg.num_net_devices;
761 
762 	while (cur) {
763 		if (on_cmd) {
764 			cmd = cur;
765 		} else {
766 			if (set_net_param(kvm, &p, cmd, cur) < 0)
767 				goto done;
768 		}
769 		on_cmd = !on_cmd;
770 
771 		cur = strtok(NULL, ",=");
772 	};
773 
774 	kvm->cfg.num_net_devices++;
775 
776 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
777 	if (kvm->cfg.net_params == NULL)
778 		die("Failed adding new network device");
779 
780 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
781 
782 done:
783 	free(buf);
784 	return 0;
785 }
786 
787 static int virtio_net__init_one(struct virtio_net_params *params)
788 {
789 	int i, err;
790 	struct net_dev *ndev;
791 	struct virtio_ops *ops;
792 	enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm);
793 
794 	ndev = calloc(1, sizeof(struct net_dev));
795 	if (ndev == NULL)
796 		return -ENOMEM;
797 
798 	ops = malloc(sizeof(*ops));
799 	if (ops == NULL) {
800 		err = -ENOMEM;
801 		goto err_free_ndev;
802 	}
803 
804 	list_add_tail(&ndev->list, &ndevs);
805 
806 	ndev->kvm = params->kvm;
807 	ndev->params = params;
808 
809 	mutex_init(&ndev->mutex);
810 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
811 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
812 	if (ndev->queue_pairs > 1)
813 		ndev->config.max_virtqueue_pairs = ndev->queue_pairs;
814 
815 	for (i = 0 ; i < 6 ; i++) {
816 		ndev->config.mac[i]		= params->guest_mac[i];
817 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
818 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
819 	}
820 
821 	ndev->mode = params->mode;
822 	if (ndev->mode == NET_MODE_TAP) {
823 		ndev->ops = &tap_ops;
824 	} else {
825 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
826 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
827 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
828 		ndev->info.buf_nr		= 20,
829 		ndev->ops = &uip_ops;
830 		uip_static_init(&ndev->info);
831 	}
832 
833 	*ops = net_dev_virtio_ops;
834 
835 	if (params->trans) {
836 		if (strcmp(params->trans, "mmio") == 0)
837 			trans = VIRTIO_MMIO;
838 		else if (strcmp(params->trans, "pci") == 0)
839 			trans = VIRTIO_PCI;
840 		else
841 			pr_warning("virtio-net: Unknown transport method : %s, "
842 				   "falling back to %s.", params->trans,
843 				   virtio_trans_name(trans));
844 	}
845 
846 	virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
847 		    PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
848 
849 	if (params->vhost)
850 		virtio_net__vhost_init(params->kvm, ndev);
851 
852 	if (compat_id == -1)
853 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
854 
855 	return 0;
856 
857 err_free_ndev:
858 	free(ndev);
859 	return err;
860 }
861 
862 int virtio_net__init(struct kvm *kvm)
863 {
864 	int i;
865 
866 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
867 		kvm->cfg.net_params[i].kvm = kvm;
868 		virtio_net__init_one(&kvm->cfg.net_params[i]);
869 	}
870 
871 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
872 		static struct virtio_net_params net_params;
873 
874 		net_params = (struct virtio_net_params) {
875 			.guest_ip	= kvm->cfg.guest_ip,
876 			.host_ip	= kvm->cfg.host_ip,
877 			.kvm		= kvm,
878 			.script		= kvm->cfg.script,
879 			.mode		= NET_MODE_USER,
880 		};
881 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
882 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
883 
884 		virtio_net__init_one(&net_params);
885 	}
886 
887 	return 0;
888 }
889 virtio_dev_init(virtio_net__init);
890 
891 int virtio_net__exit(struct kvm *kvm)
892 {
893 	struct virtio_net_params *params;
894 	struct net_dev *ndev;
895 	struct list_head *ptr;
896 
897 	list_for_each(ptr, &ndevs) {
898 		ndev = list_entry(ptr, struct net_dev, list);
899 		params = ndev->params;
900 		/* Cleanup any tap device which attached to bridge */
901 		if (ndev->mode == NET_MODE_TAP &&
902 		    strcmp(params->downscript, "none"))
903 			virtio_net_exec_script(params->downscript, ndev->tap_name);
904 	}
905 	return 0;
906 }
907 virtio_dev_exit(virtio_net__exit);
908