xref: /kvmtool/virtio/net.c (revision f83dc816a9c76f87ad90723f366700077fb367ea)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/mutex.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 #include "kvm/irq.h"
8 #include "kvm/uip.h"
9 #include "kvm/guest_compat.h"
10 #include "kvm/iovec.h"
11 
12 #include <linux/vhost.h>
13 #include <linux/virtio_net.h>
14 #include <linux/if_tun.h>
15 #include <linux/types.h>
16 
17 #include <arpa/inet.h>
18 #include <net/if.h>
19 
20 #include <unistd.h>
21 #include <fcntl.h>
22 
23 #include <sys/socket.h>
24 #include <sys/ioctl.h>
25 #include <sys/types.h>
26 #include <sys/wait.h>
27 #include <sys/eventfd.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		256
30 #define VIRTIO_NET_NUM_QUEUES		8
31 
32 struct net_dev;
33 
34 struct net_dev_operations {
35 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
36 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37 };
38 
39 struct net_dev {
40 	struct mutex			mutex;
41 	struct virtio_device		vdev;
42 	struct list_head		list;
43 
44 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES * 2 + 1];
45 	struct virtio_net_config	config;
46 	u32				features, rx_vqs, tx_vqs, queue_pairs;
47 
48 	pthread_t			io_thread[VIRTIO_NET_NUM_QUEUES * 2 + 1];
49 	struct mutex			io_lock[VIRTIO_NET_NUM_QUEUES * 2 + 1];
50 	pthread_cond_t			io_cond[VIRTIO_NET_NUM_QUEUES * 2 + 1];
51 
52 	int				vhost_fd;
53 	int				tap_fd;
54 	char				tap_name[IFNAMSIZ];
55 
56 	int				mode;
57 
58 	struct uip_info			info;
59 	struct net_dev_operations	*ops;
60 	struct kvm			*kvm;
61 
62 	struct virtio_net_params	*params;
63 };
64 
65 static LIST_HEAD(ndevs);
66 static int compat_id = -1;
67 
68 #define MAX_PACKET_SIZE 65550
69 
70 static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
71 {
72 	return ndev->features & (1 << feature);
73 }
74 
75 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
76 {
77 	hdr->hdr_len		= virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len);
78 	hdr->gso_size		= virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size);
79 	hdr->csum_start		= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start);
80 	hdr->csum_offset	= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset);
81 }
82 
83 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr_mrg_rxbuf *hdr, struct net_dev *ndev)
84 {
85 	hdr->hdr.hdr_len	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.hdr_len);
86 	hdr->hdr.gso_size	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.gso_size);
87 	hdr->hdr.csum_start	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.csum_start);
88 	hdr->hdr.csum_offset	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.csum_offset);
89 	if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
90 		hdr->num_buffers	= virtio_host_to_guest_u16(&ndev->vdev, hdr->num_buffers);
91 }
92 
93 static void *virtio_net_rx_thread(void *p)
94 {
95 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
96 	struct virt_queue *vq;
97 	struct kvm *kvm;
98 	struct net_dev *ndev = p;
99 	u16 out, in;
100 	u16 head;
101 	int len, copied;
102 	u32 id;
103 
104 	mutex_lock(&ndev->mutex);
105 	id = ndev->rx_vqs++ * 2;
106 	mutex_unlock(&ndev->mutex);
107 
108 	kvm__set_thread_name("virtio-net-rx");
109 
110 	kvm = ndev->kvm;
111 	vq = &ndev->vqs[id];
112 
113 	while (1) {
114 		mutex_lock(&ndev->io_lock[id]);
115 		if (!virt_queue__available(vq))
116 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
117 		mutex_unlock(&ndev->io_lock[id]);
118 
119 		while (virt_queue__available(vq)) {
120 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
121 			struct iovec dummy_iov = {
122 				.iov_base = buffer,
123 				.iov_len  = sizeof(buffer),
124 			};
125 			struct virtio_net_hdr_mrg_rxbuf *hdr;
126 			int i;
127 
128 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
129 			if (len < 0) {
130 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
131 						__func__, id, len);
132 				goto out_err;
133 			}
134 
135 			copied = i = 0;
136 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
137 			hdr = iov[0].iov_base;
138 			while (copied < len) {
139 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
140 
141 				memcpy_toiovec(iov, buffer + copied, iovsize);
142 				copied += iovsize;
143 				if (i++ == 0)
144 					virtio_net_fix_rx_hdr(hdr, ndev);
145 				if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) {
146 					u16 num_buffers = virtio_guest_to_host_u16(vq, hdr->num_buffers);
147 					hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers + 1);
148 				}
149 				virt_queue__set_used_elem(vq, head, iovsize);
150 				if (copied == len)
151 					break;
152 				while (!virt_queue__available(vq))
153 					sleep(0);
154 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
155 			}
156 			/* We should interrupt guest right now, otherwise latency is huge. */
157 			if (virtio_queue__should_signal(vq))
158 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
159 		}
160 	}
161 
162 out_err:
163 	pthread_exit(NULL);
164 	return NULL;
165 
166 }
167 
168 static void *virtio_net_tx_thread(void *p)
169 {
170 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
171 	struct virt_queue *vq;
172 	struct kvm *kvm;
173 	struct net_dev *ndev = p;
174 	u16 out, in;
175 	u16 head;
176 	int len;
177 	u32 id;
178 
179 	mutex_lock(&ndev->mutex);
180 	id = ndev->tx_vqs++ * 2 + 1;
181 	mutex_unlock(&ndev->mutex);
182 
183 	kvm__set_thread_name("virtio-net-tx");
184 
185 	kvm = ndev->kvm;
186 	vq = &ndev->vqs[id];
187 
188 	while (1) {
189 		mutex_lock(&ndev->io_lock[id]);
190 		if (!virt_queue__available(vq))
191 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
192 		mutex_unlock(&ndev->io_lock[id]);
193 
194 		while (virt_queue__available(vq)) {
195 			struct virtio_net_hdr *hdr;
196 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
197 			hdr = iov[0].iov_base;
198 			virtio_net_fix_tx_hdr(hdr, ndev);
199 			len = ndev->ops->tx(iov, out, ndev);
200 			if (len < 0) {
201 				pr_warning("%s: tx on vq %u failed (%d)\n",
202 						__func__, id, errno);
203 				goto out_err;
204 			}
205 
206 			virt_queue__set_used_elem(vq, head, len);
207 		}
208 
209 		if (virtio_queue__should_signal(vq))
210 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
211 	}
212 
213 out_err:
214 	pthread_exit(NULL);
215 	return NULL;
216 }
217 
218 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
219 {
220 	/* Not much to do here */
221 	return VIRTIO_NET_OK;
222 }
223 
224 static void *virtio_net_ctrl_thread(void *p)
225 {
226 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
227 	u16 out, in, head;
228 	struct net_dev *ndev = p;
229 	struct kvm *kvm = ndev->kvm;
230 	u32 id = ndev->queue_pairs * 2;
231 	struct virt_queue *vq = &ndev->vqs[id];
232 	struct virtio_net_ctrl_hdr *ctrl;
233 	virtio_net_ctrl_ack *ack;
234 
235 	kvm__set_thread_name("virtio-net-ctrl");
236 
237 	while (1) {
238 		mutex_lock(&ndev->io_lock[id]);
239 		if (!virt_queue__available(vq))
240 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
241 		mutex_unlock(&ndev->io_lock[id]);
242 
243 		while (virt_queue__available(vq)) {
244 			head = virt_queue__get_iov(&ndev->vqs[id], iov, &out, &in, kvm);
245 			ctrl = iov[0].iov_base;
246 			ack = iov[out].iov_base;
247 
248 			switch (ctrl->class) {
249 			case VIRTIO_NET_CTRL_MQ:
250 				*ack = virtio_net_handle_mq(kvm, ndev, ctrl);
251 				break;
252 			default:
253 				*ack = VIRTIO_NET_ERR;
254 				break;
255 			}
256 			virt_queue__set_used_elem(&ndev->vqs[id], head, iov[out].iov_len);
257 		}
258 
259 		if (virtio_queue__should_signal(&ndev->vqs[id]))
260 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
261 	}
262 
263 	pthread_exit(NULL);
264 
265 	return NULL;
266 }
267 
268 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
269 {
270 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
271 		pr_warning("Unknown queue index %u", queue);
272 		return;
273 	}
274 
275 	mutex_lock(&ndev->io_lock[queue]);
276 	pthread_cond_signal(&ndev->io_cond[queue]);
277 	mutex_unlock(&ndev->io_lock[queue]);
278 }
279 
280 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
281 				  const char *tapname)
282 {
283 	int ret;
284 
285 	memset(ifr, 0, sizeof(*ifr));
286 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
287 	if (tapname)
288 		strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
289 
290 	ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr);
291 
292 	if (ret >= 0)
293 		strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
294 	return ret;
295 }
296 
297 static bool virtio_net__tap_init(struct net_dev *ndev)
298 {
299 	int sock = socket(AF_INET, SOCK_STREAM, 0);
300 	int pid, status, offload, hdr_len;
301 	struct sockaddr_in sin = {0};
302 	struct ifreq ifr;
303 	const struct virtio_net_params *params = ndev->params;
304 	bool skipconf = !!params->tapif;
305 	bool macvtap = skipconf && (params->tapif[0] == '/');
306 	const char *tap_file = "/dev/net/tun";
307 
308 	/* Did the user already gave us the FD? */
309 	if (params->fd) {
310 		ndev->tap_fd = params->fd;
311 		return 1;
312 	}
313 
314 	if (macvtap)
315 		tap_file = params->tapif;
316 
317 	ndev->tap_fd = open(tap_file, O_RDWR);
318 	if (ndev->tap_fd < 0) {
319 		pr_warning("Unable to open %s", tap_file);
320 		goto fail;
321 	}
322 
323 	if (!macvtap &&
324 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
325 		pr_warning("Config tap device error. Are you root?");
326 		goto fail;
327 	}
328 
329 	hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
330 			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
331 			sizeof(struct virtio_net_hdr);
332 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
333 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
334 
335 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
336 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
337 		pr_warning("Config tap device TUNSETOFFLOAD error");
338 		goto fail;
339 	}
340 
341 	if (strcmp(params->script, "none")) {
342 		pid = fork();
343 		if (pid == 0) {
344 			execl(params->script, params->script, ndev->tap_name, NULL);
345 			_exit(1);
346 		} else {
347 			waitpid(pid, &status, 0);
348 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
349 				pr_warning("Fail to setup tap by %s", params->script);
350 				goto fail;
351 			}
352 		}
353 	} else if (!skipconf) {
354 		memset(&ifr, 0, sizeof(ifr));
355 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
356 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
357 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
358 		ifr.ifr_addr.sa_family = AF_INET;
359 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
360 			pr_warning("Could not set ip address on tap device");
361 			goto fail;
362 		}
363 	}
364 
365 	if (!skipconf) {
366 		memset(&ifr, 0, sizeof(ifr));
367 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
368 		ioctl(sock, SIOCGIFFLAGS, &ifr);
369 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
370 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
371 			pr_warning("Could not bring tap device up");
372 	}
373 
374 	close(sock);
375 
376 	return 1;
377 
378 fail:
379 	if (sock >= 0)
380 		close(sock);
381 	if (ndev->tap_fd >= 0)
382 		close(ndev->tap_fd);
383 
384 	return 0;
385 }
386 
387 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
388 {
389 	return writev(ndev->tap_fd, iov, out);
390 }
391 
392 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
393 {
394 	return readv(ndev->tap_fd, iov, in);
395 }
396 
397 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
398 {
399 	return uip_tx(iov, out, &ndev->info);
400 }
401 
402 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
403 {
404 	return uip_rx(iov, in, &ndev->info);
405 }
406 
407 static struct net_dev_operations tap_ops = {
408 	.rx	= tap_ops_rx,
409 	.tx	= tap_ops_tx,
410 };
411 
412 static struct net_dev_operations uip_ops = {
413 	.rx	= uip_ops_rx,
414 	.tx	= uip_ops_tx,
415 };
416 
417 static u8 *get_config(struct kvm *kvm, void *dev)
418 {
419 	struct net_dev *ndev = dev;
420 
421 	return ((u8 *)(&ndev->config));
422 }
423 
424 static u32 get_host_features(struct kvm *kvm, void *dev)
425 {
426 	struct net_dev *ndev = dev;
427 
428 	return 1UL << VIRTIO_NET_F_MAC
429 		| 1UL << VIRTIO_NET_F_CSUM
430 		| 1UL << VIRTIO_NET_F_HOST_UFO
431 		| 1UL << VIRTIO_NET_F_HOST_TSO4
432 		| 1UL << VIRTIO_NET_F_HOST_TSO6
433 		| 1UL << VIRTIO_NET_F_GUEST_UFO
434 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
435 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
436 		| 1UL << VIRTIO_RING_F_EVENT_IDX
437 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
438 		| 1UL << VIRTIO_NET_F_CTRL_VQ
439 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
440 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
441 }
442 
443 static int virtio_net__vhost_set_features(struct net_dev *ndev)
444 {
445 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
446 	u64 vhost_features;
447 
448 	if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
449 		die_perror("VHOST_GET_FEATURES failed");
450 
451 	/* make sure both side support mergable rx buffers */
452 	if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF &&
453 			has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
454 		features |= 1UL << VIRTIO_NET_F_MRG_RXBUF;
455 
456 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
457 }
458 
459 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
460 {
461 	struct net_dev *ndev = dev;
462 	struct virtio_net_config *conf = &ndev->config;
463 
464 	ndev->features = features;
465 
466 	conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status);
467 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev,
468 							     conf->max_virtqueue_pairs);
469 
470 	if (ndev->mode == NET_MODE_TAP) {
471 		if (!virtio_net__tap_init(ndev))
472 			die_perror("You have requested a TAP device, but creation of one has failed because");
473 		if (ndev->vhost_fd &&
474 				virtio_net__vhost_set_features(ndev) != 0)
475 			die_perror("VHOST_SET_FEATURES failed");
476 	} else {
477 		ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
478 						sizeof(struct virtio_net_hdr_mrg_rxbuf) :
479 						sizeof(struct virtio_net_hdr);
480 		uip_init(&ndev->info);
481 	}
482 }
483 
484 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
485 {
486 	return vq == (u32)(ndev->queue_pairs * 2);
487 }
488 
489 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
490 		   u32 pfn)
491 {
492 	struct vhost_vring_state state = { .index = vq };
493 	struct vhost_vring_addr addr;
494 	struct net_dev *ndev = dev;
495 	struct virt_queue *queue;
496 	void *p;
497 	int r;
498 
499 	compat__remove_message(compat_id);
500 
501 	queue		= &ndev->vqs[vq];
502 	queue->pfn	= pfn;
503 	p		= virtio_get_vq(kvm, queue->pfn, page_size);
504 
505 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align);
506 	virtio_init_device_vq(&ndev->vdev, queue);
507 
508 	mutex_init(&ndev->io_lock[vq]);
509 	pthread_cond_init(&ndev->io_cond[vq], NULL);
510 	if (is_ctrl_vq(ndev, vq)) {
511 		pthread_create(&ndev->io_thread[vq], NULL, virtio_net_ctrl_thread, ndev);
512 
513 		return 0;
514 	} else if (ndev->vhost_fd == 0 ) {
515 		if (vq & 1)
516 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_tx_thread, ndev);
517 		else
518 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_rx_thread, ndev);
519 
520 		return 0;
521 	}
522 
523 	if (queue->endian != VIRTIO_ENDIAN_HOST)
524 		die_perror("VHOST requires VIRTIO_ENDIAN_HOST");
525 
526 	state.num = queue->vring.num;
527 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
528 	if (r < 0)
529 		die_perror("VHOST_SET_VRING_NUM failed");
530 	state.num = 0;
531 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
532 	if (r < 0)
533 		die_perror("VHOST_SET_VRING_BASE failed");
534 
535 	addr = (struct vhost_vring_addr) {
536 		.index = vq,
537 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
538 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
539 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
540 	};
541 
542 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
543 	if (r < 0)
544 		die_perror("VHOST_SET_VRING_ADDR failed");
545 
546 	return 0;
547 }
548 
549 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
550 {
551 	struct net_dev *ndev = dev;
552 	struct kvm_irqfd irq;
553 	struct vhost_vring_file file;
554 	int r;
555 
556 	if (ndev->vhost_fd == 0)
557 		return;
558 
559 	irq = (struct kvm_irqfd) {
560 		.gsi	= gsi,
561 		.fd	= eventfd(0, 0),
562 	};
563 	file = (struct vhost_vring_file) {
564 		.index	= vq,
565 		.fd	= irq.fd,
566 	};
567 
568 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
569 	if (r < 0)
570 		die_perror("KVM_IRQFD failed");
571 
572 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
573 	if (r < 0)
574 		die_perror("VHOST_SET_VRING_CALL failed");
575 	file.fd = ndev->tap_fd;
576 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
577 	if (r != 0)
578 		die("VHOST_NET_SET_BACKEND failed %d", errno);
579 
580 }
581 
582 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
583 {
584 	struct net_dev *ndev = dev;
585 	struct vhost_vring_file file = {
586 		.index	= vq,
587 		.fd	= efd,
588 	};
589 	int r;
590 
591 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
592 		return;
593 
594 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
595 	if (r < 0)
596 		die_perror("VHOST_SET_VRING_KICK failed");
597 }
598 
599 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
600 {
601 	struct net_dev *ndev = dev;
602 
603 	virtio_net_handle_callback(kvm, ndev, vq);
604 
605 	return 0;
606 }
607 
608 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
609 {
610 	struct net_dev *ndev = dev;
611 
612 	return ndev->vqs[vq].pfn;
613 }
614 
615 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
616 {
617 	/* FIXME: dynamic */
618 	return VIRTIO_NET_QUEUE_SIZE;
619 }
620 
621 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
622 {
623 	/* FIXME: dynamic */
624 	return size;
625 }
626 
627 static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) {
628 	.get_config		= get_config,
629 	.get_host_features	= get_host_features,
630 	.set_guest_features	= set_guest_features,
631 	.init_vq		= init_vq,
632 	.get_pfn_vq		= get_pfn_vq,
633 	.get_size_vq		= get_size_vq,
634 	.set_size_vq		= set_size_vq,
635 	.notify_vq		= notify_vq,
636 	.notify_vq_gsi		= notify_vq_gsi,
637 	.notify_vq_eventfd	= notify_vq_eventfd,
638 };
639 
640 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
641 {
642 	struct vhost_memory *mem;
643 	int r;
644 
645 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
646 	if (ndev->vhost_fd < 0)
647 		die_perror("Failed openning vhost-net device");
648 
649 	mem = calloc(1, sizeof(*mem) + sizeof(struct vhost_memory_region));
650 	if (mem == NULL)
651 		die("Failed allocating memory for vhost memory map");
652 
653 	mem->nregions = 1;
654 	mem->regions[0] = (struct vhost_memory_region) {
655 		.guest_phys_addr	= 0,
656 		.memory_size		= kvm->ram_size,
657 		.userspace_addr		= (unsigned long)kvm->ram_start,
658 	};
659 
660 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
661 	if (r != 0)
662 		die_perror("VHOST_SET_OWNER failed");
663 
664 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
665 	if (r != 0)
666 		die_perror("VHOST_SET_MEM_TABLE failed");
667 
668 	ndev->vdev.use_vhost = true;
669 
670 	free(mem);
671 }
672 
673 static inline void str_to_mac(const char *str, char *mac)
674 {
675 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
676 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
677 }
678 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
679 			const char *param, const char *val)
680 {
681 	if (strcmp(param, "guest_mac") == 0) {
682 		str_to_mac(val, p->guest_mac);
683 	} else if (strcmp(param, "mode") == 0) {
684 		if (!strncmp(val, "user", 4)) {
685 			int i;
686 
687 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
688 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
689 					die("Only one usermode network device allowed at a time");
690 			p->mode = NET_MODE_USER;
691 		} else if (!strncmp(val, "tap", 3)) {
692 			p->mode = NET_MODE_TAP;
693 		} else if (!strncmp(val, "none", 4)) {
694 			kvm->cfg.no_net = 1;
695 			return -1;
696 		} else
697 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
698 	} else if (strcmp(param, "script") == 0) {
699 		p->script = strdup(val);
700 	} else if (strcmp(param, "guest_ip") == 0) {
701 		p->guest_ip = strdup(val);
702 	} else if (strcmp(param, "host_ip") == 0) {
703 		p->host_ip = strdup(val);
704 	} else if (strcmp(param, "trans") == 0) {
705 		p->trans = strdup(val);
706 	} else if (strcmp(param, "tapif") == 0) {
707 		p->tapif = strdup(val);
708 	} else if (strcmp(param, "vhost") == 0) {
709 		p->vhost = atoi(val);
710 	} else if (strcmp(param, "fd") == 0) {
711 		p->fd = atoi(val);
712 	} else if (strcmp(param, "mq") == 0) {
713 		p->mq = atoi(val);
714 	} else
715 		die("Unknown network parameter %s", param);
716 
717 	return 0;
718 }
719 
720 int netdev_parser(const struct option *opt, const char *arg, int unset)
721 {
722 	struct virtio_net_params p;
723 	char *buf = NULL, *cmd = NULL, *cur = NULL;
724 	bool on_cmd = true;
725 	struct kvm *kvm = opt->ptr;
726 
727 	if (arg) {
728 		buf = strdup(arg);
729 		if (buf == NULL)
730 			die("Failed allocating new net buffer");
731 		cur = strtok(buf, ",=");
732 	}
733 
734 	p = (struct virtio_net_params) {
735 		.guest_ip	= DEFAULT_GUEST_ADDR,
736 		.host_ip	= DEFAULT_HOST_ADDR,
737 		.script		= DEFAULT_SCRIPT,
738 		.mode		= NET_MODE_TAP,
739 	};
740 
741 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
742 	p.guest_mac[5] += kvm->cfg.num_net_devices;
743 
744 	while (cur) {
745 		if (on_cmd) {
746 			cmd = cur;
747 		} else {
748 			if (set_net_param(kvm, &p, cmd, cur) < 0)
749 				goto done;
750 		}
751 		on_cmd = !on_cmd;
752 
753 		cur = strtok(NULL, ",=");
754 	};
755 
756 	kvm->cfg.num_net_devices++;
757 
758 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
759 	if (kvm->cfg.net_params == NULL)
760 		die("Failed adding new network device");
761 
762 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
763 
764 done:
765 	free(buf);
766 	return 0;
767 }
768 
769 static int virtio_net__init_one(struct virtio_net_params *params)
770 {
771 	int i, err;
772 	struct net_dev *ndev;
773 	struct virtio_ops *ops;
774 	enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm);
775 
776 	ndev = calloc(1, sizeof(struct net_dev));
777 	if (ndev == NULL)
778 		return -ENOMEM;
779 
780 	ops = malloc(sizeof(*ops));
781 	if (ops == NULL) {
782 		err = -ENOMEM;
783 		goto err_free_ndev;
784 	}
785 
786 	list_add_tail(&ndev->list, &ndevs);
787 
788 	ndev->kvm = params->kvm;
789 	ndev->params = params;
790 
791 	mutex_init(&ndev->mutex);
792 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
793 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
794 	if (ndev->queue_pairs > 1)
795 		ndev->config.max_virtqueue_pairs = ndev->queue_pairs;
796 
797 	for (i = 0 ; i < 6 ; i++) {
798 		ndev->config.mac[i]		= params->guest_mac[i];
799 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
800 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
801 	}
802 
803 	ndev->mode = params->mode;
804 	if (ndev->mode == NET_MODE_TAP) {
805 		ndev->ops = &tap_ops;
806 	} else {
807 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
808 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
809 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
810 		ndev->info.buf_nr		= 20,
811 		ndev->ops = &uip_ops;
812 		uip_static_init(&ndev->info);
813 	}
814 
815 	*ops = net_dev_virtio_ops;
816 
817 	if (params->trans) {
818 		if (strcmp(params->trans, "mmio") == 0)
819 			trans = VIRTIO_MMIO;
820 		else if (strcmp(params->trans, "pci") == 0)
821 			trans = VIRTIO_PCI;
822 		else
823 			pr_warning("virtio-net: Unknown transport method : %s, "
824 				   "falling back to %s.", params->trans,
825 				   virtio_trans_name(trans));
826 	}
827 
828 	virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
829 		    PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
830 
831 	if (params->vhost)
832 		virtio_net__vhost_init(params->kvm, ndev);
833 
834 	if (compat_id == -1)
835 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
836 
837 	return 0;
838 
839 err_free_ndev:
840 	free(ndev);
841 	return err;
842 }
843 
844 int virtio_net__init(struct kvm *kvm)
845 {
846 	int i;
847 
848 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
849 		kvm->cfg.net_params[i].kvm = kvm;
850 		virtio_net__init_one(&kvm->cfg.net_params[i]);
851 	}
852 
853 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
854 		static struct virtio_net_params net_params;
855 
856 		net_params = (struct virtio_net_params) {
857 			.guest_ip	= kvm->cfg.guest_ip,
858 			.host_ip	= kvm->cfg.host_ip,
859 			.kvm		= kvm,
860 			.script		= kvm->cfg.script,
861 			.mode		= NET_MODE_USER,
862 		};
863 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
864 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
865 
866 		virtio_net__init_one(&net_params);
867 	}
868 
869 	return 0;
870 }
871 virtio_dev_init(virtio_net__init);
872 
873 int virtio_net__exit(struct kvm *kvm)
874 {
875 	return 0;
876 }
877 virtio_dev_exit(virtio_net__exit);
878