xref: /kvmtool/virtio/net.c (revision eaf4f9656170b6e6153b52cd2c158facb9e11c5d)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/mutex.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 #include "kvm/irq.h"
8 #include "kvm/uip.h"
9 #include "kvm/guest_compat.h"
10 #include "kvm/iovec.h"
11 
12 #include <linux/vhost.h>
13 #include <linux/virtio_net.h>
14 #include <linux/if_tun.h>
15 #include <linux/types.h>
16 
17 #include <arpa/inet.h>
18 #include <net/if.h>
19 
20 #include <unistd.h>
21 #include <fcntl.h>
22 
23 #include <sys/socket.h>
24 #include <sys/ioctl.h>
25 #include <sys/types.h>
26 #include <sys/wait.h>
27 #include <sys/eventfd.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		256
30 #define VIRTIO_NET_NUM_QUEUES		8
31 
32 struct net_dev;
33 
34 struct net_dev_operations {
35 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
36 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37 };
38 
39 struct net_dev {
40 	struct mutex			mutex;
41 	struct virtio_device		vdev;
42 	struct list_head		list;
43 
44 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES * 2 + 1];
45 	struct virtio_net_config	config;
46 	u32				features, rx_vqs, tx_vqs, queue_pairs;
47 
48 	pthread_t			io_thread[VIRTIO_NET_NUM_QUEUES * 2 + 1];
49 	struct mutex			io_lock[VIRTIO_NET_NUM_QUEUES * 2 + 1];
50 	pthread_cond_t			io_cond[VIRTIO_NET_NUM_QUEUES * 2 + 1];
51 
52 	int				vhost_fd;
53 	int				tap_fd;
54 	char				tap_name[IFNAMSIZ];
55 
56 	int				mode;
57 
58 	struct uip_info			info;
59 	struct net_dev_operations	*ops;
60 	struct kvm			*kvm;
61 
62 	struct virtio_net_params	*params;
63 };
64 
65 static LIST_HEAD(ndevs);
66 static int compat_id = -1;
67 
68 #define MAX_PACKET_SIZE 65550
69 
70 static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
71 {
72 	return ndev->features & (1 << feature);
73 }
74 
75 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
76 {
77 	hdr->hdr_len		= virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len);
78 	hdr->gso_size		= virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size);
79 	hdr->csum_start		= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start);
80 	hdr->csum_offset	= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset);
81 }
82 
83 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr_mrg_rxbuf *hdr, struct net_dev *ndev)
84 {
85 	hdr->hdr.hdr_len	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.hdr_len);
86 	hdr->hdr.gso_size	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.gso_size);
87 	hdr->hdr.csum_start	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.csum_start);
88 	hdr->hdr.csum_offset	= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr.csum_offset);
89 	if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
90 		hdr->num_buffers	= virtio_host_to_guest_u16(&ndev->vdev, hdr->num_buffers);
91 }
92 
93 static void *virtio_net_rx_thread(void *p)
94 {
95 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
96 	struct virt_queue *vq;
97 	struct kvm *kvm;
98 	struct net_dev *ndev = p;
99 	u16 out, in;
100 	u16 head;
101 	int len, copied;
102 	u32 id;
103 
104 	mutex_lock(&ndev->mutex);
105 	id = ndev->rx_vqs++ * 2;
106 	mutex_unlock(&ndev->mutex);
107 
108 	kvm__set_thread_name("virtio-net-rx");
109 
110 	kvm = ndev->kvm;
111 	vq = &ndev->vqs[id];
112 
113 	while (1) {
114 		mutex_lock(&ndev->io_lock[id]);
115 		if (!virt_queue__available(vq))
116 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
117 		mutex_unlock(&ndev->io_lock[id]);
118 
119 		while (virt_queue__available(vq)) {
120 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
121 			struct iovec dummy_iov = {
122 				.iov_base = buffer,
123 				.iov_len  = sizeof(buffer),
124 			};
125 			struct virtio_net_hdr_mrg_rxbuf *hdr;
126 			int i;
127 
128 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
129 			if (len < 0) {
130 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
131 						__func__, id, len);
132 				goto out_err;
133 			}
134 
135 			copied = i = 0;
136 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
137 			hdr = iov[0].iov_base;
138 			while (copied < len) {
139 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
140 
141 				memcpy_toiovec(iov, buffer + copied, iovsize);
142 				copied += iovsize;
143 				if (i++ == 0)
144 					virtio_net_fix_rx_hdr(hdr, ndev);
145 				if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF)) {
146 					u16 num_buffers = virtio_guest_to_host_u16(vq, hdr->num_buffers);
147 					hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers + 1);
148 				}
149 				virt_queue__set_used_elem(vq, head, iovsize);
150 				if (copied == len)
151 					break;
152 				while (!virt_queue__available(vq))
153 					sleep(0);
154 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
155 			}
156 			/* We should interrupt guest right now, otherwise latency is huge. */
157 			if (virtio_queue__should_signal(vq))
158 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
159 		}
160 	}
161 
162 out_err:
163 	pthread_exit(NULL);
164 	return NULL;
165 
166 }
167 
168 static void *virtio_net_tx_thread(void *p)
169 {
170 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
171 	struct virt_queue *vq;
172 	struct kvm *kvm;
173 	struct net_dev *ndev = p;
174 	u16 out, in;
175 	u16 head;
176 	int len;
177 	u32 id;
178 
179 	mutex_lock(&ndev->mutex);
180 	id = ndev->tx_vqs++ * 2 + 1;
181 	mutex_unlock(&ndev->mutex);
182 
183 	kvm__set_thread_name("virtio-net-tx");
184 
185 	kvm = ndev->kvm;
186 	vq = &ndev->vqs[id];
187 
188 	while (1) {
189 		mutex_lock(&ndev->io_lock[id]);
190 		if (!virt_queue__available(vq))
191 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
192 		mutex_unlock(&ndev->io_lock[id]);
193 
194 		while (virt_queue__available(vq)) {
195 			struct virtio_net_hdr *hdr;
196 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
197 			hdr = iov[0].iov_base;
198 			virtio_net_fix_tx_hdr(hdr, ndev);
199 			len = ndev->ops->tx(iov, out, ndev);
200 			if (len < 0) {
201 				pr_warning("%s: tx on vq %u failed (%d)\n",
202 						__func__, id, errno);
203 				goto out_err;
204 			}
205 
206 			virt_queue__set_used_elem(vq, head, len);
207 		}
208 
209 		if (virtio_queue__should_signal(vq))
210 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
211 	}
212 
213 out_err:
214 	pthread_exit(NULL);
215 	return NULL;
216 }
217 
218 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
219 {
220 	/* Not much to do here */
221 	return VIRTIO_NET_OK;
222 }
223 
224 static void *virtio_net_ctrl_thread(void *p)
225 {
226 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
227 	u16 out, in, head;
228 	struct net_dev *ndev = p;
229 	struct kvm *kvm = ndev->kvm;
230 	u32 id = ndev->queue_pairs * 2;
231 	struct virt_queue *vq = &ndev->vqs[id];
232 	struct virtio_net_ctrl_hdr *ctrl;
233 	virtio_net_ctrl_ack *ack;
234 
235 	while (1) {
236 		mutex_lock(&ndev->io_lock[id]);
237 		if (!virt_queue__available(vq))
238 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
239 		mutex_unlock(&ndev->io_lock[id]);
240 
241 		while (virt_queue__available(vq)) {
242 			head = virt_queue__get_iov(&ndev->vqs[id], iov, &out, &in, kvm);
243 			ctrl = iov[0].iov_base;
244 			ack = iov[out].iov_base;
245 
246 			switch (ctrl->class) {
247 			case VIRTIO_NET_CTRL_MQ:
248 				*ack = virtio_net_handle_mq(kvm, ndev, ctrl);
249 				break;
250 			default:
251 				*ack = VIRTIO_NET_ERR;
252 				break;
253 			}
254 			virt_queue__set_used_elem(&ndev->vqs[id], head, iov[out].iov_len);
255 		}
256 
257 		if (virtio_queue__should_signal(&ndev->vqs[id]))
258 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
259 	}
260 
261 	pthread_exit(NULL);
262 
263 	return NULL;
264 }
265 
266 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
267 {
268 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
269 		pr_warning("Unknown queue index %u", queue);
270 		return;
271 	}
272 
273 	mutex_lock(&ndev->io_lock[queue]);
274 	pthread_cond_signal(&ndev->io_cond[queue]);
275 	mutex_unlock(&ndev->io_lock[queue]);
276 }
277 
278 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
279 				  const char *tapname)
280 {
281 	int ret;
282 
283 	memset(ifr, 0, sizeof(*ifr));
284 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
285 	if (tapname)
286 		strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
287 
288 	ret = ioctl(ndev->tap_fd, TUNSETIFF, &ifr);
289 
290 	if (ret >= 0)
291 		strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
292 	return ret;
293 }
294 
295 static bool virtio_net__tap_init(struct net_dev *ndev)
296 {
297 	int sock = socket(AF_INET, SOCK_STREAM, 0);
298 	int pid, status, offload, hdr_len;
299 	struct sockaddr_in sin = {0};
300 	struct ifreq ifr;
301 	const struct virtio_net_params *params = ndev->params;
302 	bool skipconf = !!params->tapif;
303 	bool macvtap = skipconf && (params->tapif[0] == '/');
304 	const char *tap_file = "/dev/net/tun";
305 
306 	/* Did the user already gave us the FD? */
307 	if (params->fd) {
308 		ndev->tap_fd = params->fd;
309 		return 1;
310 	}
311 
312 	if (macvtap)
313 		tap_file = params->tapif;
314 
315 	ndev->tap_fd = open(tap_file, O_RDWR);
316 	if (ndev->tap_fd < 0) {
317 		pr_warning("Unable to open %s", tap_file);
318 		goto fail;
319 	}
320 
321 	if (!macvtap &&
322 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
323 		pr_warning("Config tap device error. Are you root?");
324 		goto fail;
325 	}
326 
327 	hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
328 			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
329 			sizeof(struct virtio_net_hdr);
330 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
331 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
332 
333 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
334 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
335 		pr_warning("Config tap device TUNSETOFFLOAD error");
336 		goto fail;
337 	}
338 
339 	if (strcmp(params->script, "none")) {
340 		pid = fork();
341 		if (pid == 0) {
342 			execl(params->script, params->script, ndev->tap_name, NULL);
343 			_exit(1);
344 		} else {
345 			waitpid(pid, &status, 0);
346 			if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
347 				pr_warning("Fail to setup tap by %s", params->script);
348 				goto fail;
349 			}
350 		}
351 	} else if (!skipconf) {
352 		memset(&ifr, 0, sizeof(ifr));
353 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
354 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
355 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
356 		ifr.ifr_addr.sa_family = AF_INET;
357 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
358 			pr_warning("Could not set ip address on tap device");
359 			goto fail;
360 		}
361 	}
362 
363 	if (!skipconf) {
364 		memset(&ifr, 0, sizeof(ifr));
365 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
366 		ioctl(sock, SIOCGIFFLAGS, &ifr);
367 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
368 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
369 			pr_warning("Could not bring tap device up");
370 	}
371 
372 	close(sock);
373 
374 	return 1;
375 
376 fail:
377 	if (sock >= 0)
378 		close(sock);
379 	if (ndev->tap_fd >= 0)
380 		close(ndev->tap_fd);
381 
382 	return 0;
383 }
384 
385 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
386 {
387 	return writev(ndev->tap_fd, iov, out);
388 }
389 
390 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
391 {
392 	return readv(ndev->tap_fd, iov, in);
393 }
394 
395 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
396 {
397 	return uip_tx(iov, out, &ndev->info);
398 }
399 
400 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
401 {
402 	return uip_rx(iov, in, &ndev->info);
403 }
404 
405 static struct net_dev_operations tap_ops = {
406 	.rx	= tap_ops_rx,
407 	.tx	= tap_ops_tx,
408 };
409 
410 static struct net_dev_operations uip_ops = {
411 	.rx	= uip_ops_rx,
412 	.tx	= uip_ops_tx,
413 };
414 
415 static u8 *get_config(struct kvm *kvm, void *dev)
416 {
417 	struct net_dev *ndev = dev;
418 
419 	return ((u8 *)(&ndev->config));
420 }
421 
422 static u32 get_host_features(struct kvm *kvm, void *dev)
423 {
424 	struct net_dev *ndev = dev;
425 
426 	return 1UL << VIRTIO_NET_F_MAC
427 		| 1UL << VIRTIO_NET_F_CSUM
428 		| 1UL << VIRTIO_NET_F_HOST_UFO
429 		| 1UL << VIRTIO_NET_F_HOST_TSO4
430 		| 1UL << VIRTIO_NET_F_HOST_TSO6
431 		| 1UL << VIRTIO_NET_F_GUEST_UFO
432 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
433 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
434 		| 1UL << VIRTIO_RING_F_EVENT_IDX
435 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
436 		| 1UL << VIRTIO_NET_F_CTRL_VQ
437 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
438 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
439 }
440 
441 static int virtio_net__vhost_set_features(struct net_dev *ndev)
442 {
443 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
444 	u64 vhost_features;
445 
446 	if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
447 		die_perror("VHOST_GET_FEATURES failed");
448 
449 	/* make sure both side support mergable rx buffers */
450 	if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF &&
451 			has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
452 		features |= 1UL << VIRTIO_NET_F_MRG_RXBUF;
453 
454 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
455 }
456 
457 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
458 {
459 	struct net_dev *ndev = dev;
460 	struct virtio_net_config *conf = &ndev->config;
461 
462 	ndev->features = features;
463 
464 	conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status);
465 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev,
466 							     conf->max_virtqueue_pairs);
467 
468 	if (ndev->mode == NET_MODE_TAP) {
469 		if (!virtio_net__tap_init(ndev))
470 			die_perror("You have requested a TAP device, but creation of one has failed because");
471 		if (ndev->vhost_fd &&
472 				virtio_net__vhost_set_features(ndev) != 0)
473 			die_perror("VHOST_SET_FEATURES failed");
474 	} else {
475 		ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
476 						sizeof(struct virtio_net_hdr_mrg_rxbuf) :
477 						sizeof(struct virtio_net_hdr);
478 		uip_init(&ndev->info);
479 	}
480 }
481 
482 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
483 {
484 	return vq == (u32)(ndev->queue_pairs * 2);
485 }
486 
487 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
488 		   u32 pfn)
489 {
490 	struct vhost_vring_state state = { .index = vq };
491 	struct vhost_vring_addr addr;
492 	struct net_dev *ndev = dev;
493 	struct virt_queue *queue;
494 	void *p;
495 	int r;
496 
497 	compat__remove_message(compat_id);
498 
499 	queue		= &ndev->vqs[vq];
500 	queue->pfn	= pfn;
501 	p		= virtio_get_vq(kvm, queue->pfn, page_size);
502 
503 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align);
504 	virtio_init_device_vq(&ndev->vdev, queue);
505 
506 	mutex_init(&ndev->io_lock[vq]);
507 	pthread_cond_init(&ndev->io_cond[vq], NULL);
508 	if (is_ctrl_vq(ndev, vq)) {
509 		pthread_create(&ndev->io_thread[vq], NULL, virtio_net_ctrl_thread, ndev);
510 
511 		return 0;
512 	} else if (ndev->vhost_fd == 0 ) {
513 		if (vq & 1)
514 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_tx_thread, ndev);
515 		else
516 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_rx_thread, ndev);
517 
518 		return 0;
519 	}
520 
521 	if (queue->endian != VIRTIO_ENDIAN_HOST)
522 		die_perror("VHOST requires VIRTIO_ENDIAN_HOST");
523 
524 	state.num = queue->vring.num;
525 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
526 	if (r < 0)
527 		die_perror("VHOST_SET_VRING_NUM failed");
528 	state.num = 0;
529 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
530 	if (r < 0)
531 		die_perror("VHOST_SET_VRING_BASE failed");
532 
533 	addr = (struct vhost_vring_addr) {
534 		.index = vq,
535 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
536 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
537 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
538 	};
539 
540 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
541 	if (r < 0)
542 		die_perror("VHOST_SET_VRING_ADDR failed");
543 
544 	return 0;
545 }
546 
547 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
548 {
549 	struct net_dev *ndev = dev;
550 	struct kvm_irqfd irq;
551 	struct vhost_vring_file file;
552 	int r;
553 
554 	if (ndev->vhost_fd == 0)
555 		return;
556 
557 	irq = (struct kvm_irqfd) {
558 		.gsi	= gsi,
559 		.fd	= eventfd(0, 0),
560 	};
561 	file = (struct vhost_vring_file) {
562 		.index	= vq,
563 		.fd	= irq.fd,
564 	};
565 
566 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
567 	if (r < 0)
568 		die_perror("KVM_IRQFD failed");
569 
570 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
571 	if (r < 0)
572 		die_perror("VHOST_SET_VRING_CALL failed");
573 	file.fd = ndev->tap_fd;
574 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
575 	if (r != 0)
576 		die("VHOST_NET_SET_BACKEND failed %d", errno);
577 
578 }
579 
580 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
581 {
582 	struct net_dev *ndev = dev;
583 	struct vhost_vring_file file = {
584 		.index	= vq,
585 		.fd	= efd,
586 	};
587 	int r;
588 
589 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
590 		return;
591 
592 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
593 	if (r < 0)
594 		die_perror("VHOST_SET_VRING_KICK failed");
595 }
596 
597 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
598 {
599 	struct net_dev *ndev = dev;
600 
601 	virtio_net_handle_callback(kvm, ndev, vq);
602 
603 	return 0;
604 }
605 
606 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
607 {
608 	struct net_dev *ndev = dev;
609 
610 	return ndev->vqs[vq].pfn;
611 }
612 
613 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
614 {
615 	/* FIXME: dynamic */
616 	return VIRTIO_NET_QUEUE_SIZE;
617 }
618 
619 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
620 {
621 	/* FIXME: dynamic */
622 	return size;
623 }
624 
625 static struct virtio_ops net_dev_virtio_ops = (struct virtio_ops) {
626 	.get_config		= get_config,
627 	.get_host_features	= get_host_features,
628 	.set_guest_features	= set_guest_features,
629 	.init_vq		= init_vq,
630 	.get_pfn_vq		= get_pfn_vq,
631 	.get_size_vq		= get_size_vq,
632 	.set_size_vq		= set_size_vq,
633 	.notify_vq		= notify_vq,
634 	.notify_vq_gsi		= notify_vq_gsi,
635 	.notify_vq_eventfd	= notify_vq_eventfd,
636 };
637 
638 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
639 {
640 	struct vhost_memory *mem;
641 	int r;
642 
643 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
644 	if (ndev->vhost_fd < 0)
645 		die_perror("Failed openning vhost-net device");
646 
647 	mem = calloc(1, sizeof(*mem) + sizeof(struct vhost_memory_region));
648 	if (mem == NULL)
649 		die("Failed allocating memory for vhost memory map");
650 
651 	mem->nregions = 1;
652 	mem->regions[0] = (struct vhost_memory_region) {
653 		.guest_phys_addr	= 0,
654 		.memory_size		= kvm->ram_size,
655 		.userspace_addr		= (unsigned long)kvm->ram_start,
656 	};
657 
658 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
659 	if (r != 0)
660 		die_perror("VHOST_SET_OWNER failed");
661 
662 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
663 	if (r != 0)
664 		die_perror("VHOST_SET_MEM_TABLE failed");
665 
666 	ndev->vdev.use_vhost = true;
667 
668 	free(mem);
669 }
670 
671 static inline void str_to_mac(const char *str, char *mac)
672 {
673 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
674 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
675 }
676 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
677 			const char *param, const char *val)
678 {
679 	if (strcmp(param, "guest_mac") == 0) {
680 		str_to_mac(val, p->guest_mac);
681 	} else if (strcmp(param, "mode") == 0) {
682 		if (!strncmp(val, "user", 4)) {
683 			int i;
684 
685 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
686 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
687 					die("Only one usermode network device allowed at a time");
688 			p->mode = NET_MODE_USER;
689 		} else if (!strncmp(val, "tap", 3)) {
690 			p->mode = NET_MODE_TAP;
691 		} else if (!strncmp(val, "none", 4)) {
692 			kvm->cfg.no_net = 1;
693 			return -1;
694 		} else
695 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
696 	} else if (strcmp(param, "script") == 0) {
697 		p->script = strdup(val);
698 	} else if (strcmp(param, "guest_ip") == 0) {
699 		p->guest_ip = strdup(val);
700 	} else if (strcmp(param, "host_ip") == 0) {
701 		p->host_ip = strdup(val);
702 	} else if (strcmp(param, "trans") == 0) {
703 		p->trans = strdup(val);
704 	} else if (strcmp(param, "tapif") == 0) {
705 		p->tapif = strdup(val);
706 	} else if (strcmp(param, "vhost") == 0) {
707 		p->vhost = atoi(val);
708 	} else if (strcmp(param, "fd") == 0) {
709 		p->fd = atoi(val);
710 	} else if (strcmp(param, "mq") == 0) {
711 		p->mq = atoi(val);
712 	} else
713 		die("Unknown network parameter %s", param);
714 
715 	return 0;
716 }
717 
718 int netdev_parser(const struct option *opt, const char *arg, int unset)
719 {
720 	struct virtio_net_params p;
721 	char *buf = NULL, *cmd = NULL, *cur = NULL;
722 	bool on_cmd = true;
723 	struct kvm *kvm = opt->ptr;
724 
725 	if (arg) {
726 		buf = strdup(arg);
727 		if (buf == NULL)
728 			die("Failed allocating new net buffer");
729 		cur = strtok(buf, ",=");
730 	}
731 
732 	p = (struct virtio_net_params) {
733 		.guest_ip	= DEFAULT_GUEST_ADDR,
734 		.host_ip	= DEFAULT_HOST_ADDR,
735 		.script		= DEFAULT_SCRIPT,
736 		.mode		= NET_MODE_TAP,
737 	};
738 
739 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
740 	p.guest_mac[5] += kvm->cfg.num_net_devices;
741 
742 	while (cur) {
743 		if (on_cmd) {
744 			cmd = cur;
745 		} else {
746 			if (set_net_param(kvm, &p, cmd, cur) < 0)
747 				goto done;
748 		}
749 		on_cmd = !on_cmd;
750 
751 		cur = strtok(NULL, ",=");
752 	};
753 
754 	kvm->cfg.num_net_devices++;
755 
756 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
757 	if (kvm->cfg.net_params == NULL)
758 		die("Failed adding new network device");
759 
760 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
761 
762 done:
763 	free(buf);
764 	return 0;
765 }
766 
767 static int virtio_net__init_one(struct virtio_net_params *params)
768 {
769 	int i, err;
770 	struct net_dev *ndev;
771 	struct virtio_ops *ops;
772 	enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm);
773 
774 	ndev = calloc(1, sizeof(struct net_dev));
775 	if (ndev == NULL)
776 		return -ENOMEM;
777 
778 	ops = malloc(sizeof(*ops));
779 	if (ops == NULL) {
780 		err = -ENOMEM;
781 		goto err_free_ndev;
782 	}
783 
784 	list_add_tail(&ndev->list, &ndevs);
785 
786 	ndev->kvm = params->kvm;
787 	ndev->params = params;
788 
789 	mutex_init(&ndev->mutex);
790 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
791 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
792 	if (ndev->queue_pairs > 1)
793 		ndev->config.max_virtqueue_pairs = ndev->queue_pairs;
794 
795 	for (i = 0 ; i < 6 ; i++) {
796 		ndev->config.mac[i]		= params->guest_mac[i];
797 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
798 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
799 	}
800 
801 	ndev->mode = params->mode;
802 	if (ndev->mode == NET_MODE_TAP) {
803 		ndev->ops = &tap_ops;
804 	} else {
805 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
806 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
807 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
808 		ndev->info.buf_nr		= 20,
809 		ndev->ops = &uip_ops;
810 		uip_static_init(&ndev->info);
811 	}
812 
813 	*ops = net_dev_virtio_ops;
814 
815 	if (params->trans) {
816 		if (strcmp(params->trans, "mmio") == 0)
817 			trans = VIRTIO_MMIO;
818 		else if (strcmp(params->trans, "pci") == 0)
819 			trans = VIRTIO_PCI;
820 		else
821 			pr_warning("virtio-net: Unknown transport method : %s, "
822 				   "falling back to %s.", params->trans,
823 				   virtio_trans_name(trans));
824 	}
825 
826 	virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
827 		    PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
828 
829 	if (params->vhost)
830 		virtio_net__vhost_init(params->kvm, ndev);
831 
832 	if (compat_id == -1)
833 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
834 
835 	return 0;
836 
837 err_free_ndev:
838 	free(ndev);
839 	return err;
840 }
841 
842 int virtio_net__init(struct kvm *kvm)
843 {
844 	int i;
845 
846 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
847 		kvm->cfg.net_params[i].kvm = kvm;
848 		virtio_net__init_one(&kvm->cfg.net_params[i]);
849 	}
850 
851 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
852 		static struct virtio_net_params net_params;
853 
854 		net_params = (struct virtio_net_params) {
855 			.guest_ip	= kvm->cfg.guest_ip,
856 			.host_ip	= kvm->cfg.host_ip,
857 			.kvm		= kvm,
858 			.script		= kvm->cfg.script,
859 			.mode		= NET_MODE_USER,
860 		};
861 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
862 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
863 
864 		virtio_net__init_one(&net_params);
865 	}
866 
867 	return 0;
868 }
869 virtio_dev_init(virtio_net__init);
870 
871 int virtio_net__exit(struct kvm *kvm)
872 {
873 	return 0;
874 }
875 virtio_dev_exit(virtio_net__exit);
876