xref: /kvmtool/virtio/net.c (revision 3fea89a924511f9f8fe05a892098fad77c1eca0d)
1 #include "kvm/virtio-pci-dev.h"
2 #include "kvm/virtio-net.h"
3 #include "kvm/virtio.h"
4 #include "kvm/mutex.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 #include "kvm/irq.h"
8 #include "kvm/uip.h"
9 #include "kvm/guest_compat.h"
10 #include "kvm/iovec.h"
11 
12 #include <linux/vhost.h>
13 #include <linux/virtio_net.h>
14 #include <linux/if_tun.h>
15 #include <linux/types.h>
16 
17 #include <arpa/inet.h>
18 #include <net/if.h>
19 
20 #include <unistd.h>
21 #include <fcntl.h>
22 
23 #include <sys/socket.h>
24 #include <sys/ioctl.h>
25 #include <sys/types.h>
26 #include <sys/wait.h>
27 #include <sys/eventfd.h>
28 
29 #define VIRTIO_NET_QUEUE_SIZE		256
30 #define VIRTIO_NET_NUM_QUEUES		8
31 
32 struct net_dev;
33 
34 struct net_dev_operations {
35 	int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev);
36 	int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev);
37 };
38 
39 struct net_dev {
40 	struct mutex			mutex;
41 	struct virtio_device		vdev;
42 	struct list_head		list;
43 
44 	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES * 2 + 1];
45 	struct virtio_net_config	config;
46 	u32				features, rx_vqs, tx_vqs, queue_pairs;
47 
48 	pthread_t			io_thread[VIRTIO_NET_NUM_QUEUES * 2 + 1];
49 	struct mutex			io_lock[VIRTIO_NET_NUM_QUEUES * 2 + 1];
50 	pthread_cond_t			io_cond[VIRTIO_NET_NUM_QUEUES * 2 + 1];
51 
52 	int				vhost_fd;
53 	int				tap_fd;
54 	char				tap_name[IFNAMSIZ];
55 
56 	int				mode;
57 
58 	struct uip_info			info;
59 	struct net_dev_operations	*ops;
60 	struct kvm			*kvm;
61 
62 	struct virtio_net_params	*params;
63 };
64 
65 static LIST_HEAD(ndevs);
66 static int compat_id = -1;
67 
68 #define MAX_PACKET_SIZE 65550
69 
70 static bool has_virtio_feature(struct net_dev *ndev, u32 feature)
71 {
72 	return ndev->features & (1 << feature);
73 }
74 
75 static void virtio_net_fix_tx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
76 {
77 	hdr->hdr_len		= virtio_guest_to_host_u16(&ndev->vdev, hdr->hdr_len);
78 	hdr->gso_size		= virtio_guest_to_host_u16(&ndev->vdev, hdr->gso_size);
79 	hdr->csum_start		= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_start);
80 	hdr->csum_offset	= virtio_guest_to_host_u16(&ndev->vdev, hdr->csum_offset);
81 }
82 
83 static void virtio_net_fix_rx_hdr(struct virtio_net_hdr *hdr, struct net_dev *ndev)
84 {
85 	hdr->hdr_len		= virtio_host_to_guest_u16(&ndev->vdev, hdr->hdr_len);
86 	hdr->gso_size		= virtio_host_to_guest_u16(&ndev->vdev, hdr->gso_size);
87 	hdr->csum_start		= virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_start);
88 	hdr->csum_offset	= virtio_host_to_guest_u16(&ndev->vdev, hdr->csum_offset);
89 }
90 
91 static void *virtio_net_rx_thread(void *p)
92 {
93 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
94 	struct virt_queue *vq;
95 	struct kvm *kvm;
96 	struct net_dev *ndev = p;
97 	u16 out, in;
98 	u16 head;
99 	int len, copied;
100 	u32 id;
101 
102 	mutex_lock(&ndev->mutex);
103 	id = ndev->rx_vqs++ * 2;
104 	mutex_unlock(&ndev->mutex);
105 
106 	kvm__set_thread_name("virtio-net-rx");
107 
108 	kvm = ndev->kvm;
109 	vq = &ndev->vqs[id];
110 
111 	while (1) {
112 		mutex_lock(&ndev->io_lock[id]);
113 		if (!virt_queue__available(vq))
114 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
115 		mutex_unlock(&ndev->io_lock[id]);
116 
117 		while (virt_queue__available(vq)) {
118 			unsigned char buffer[MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf)];
119 			struct iovec dummy_iov = {
120 				.iov_base = buffer,
121 				.iov_len  = sizeof(buffer),
122 			};
123 			struct virtio_net_hdr_mrg_rxbuf *hdr;
124 			u16 num_buffers;
125 
126 			len = ndev->ops->rx(&dummy_iov, 1, ndev);
127 			if (len < 0) {
128 				pr_warning("%s: rx on vq %u failed (%d), exiting thread\n",
129 						__func__, id, len);
130 				goto out_err;
131 			}
132 
133 			copied = num_buffers = 0;
134 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
135 			hdr = iov[0].iov_base;
136 			while (copied < len) {
137 				size_t iovsize = min_t(size_t, len - copied, iov_size(iov, in));
138 
139 				memcpy_toiovec(iov, buffer + copied, iovsize);
140 				copied += iovsize;
141 				virt_queue__set_used_elem_no_update(vq, head, iovsize, num_buffers++);
142 				if (copied == len)
143 					break;
144 				while (!virt_queue__available(vq))
145 					sleep(0);
146 				head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
147 			}
148 
149 			virtio_net_fix_rx_hdr(&hdr->hdr, ndev);
150 			if (has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
151 				hdr->num_buffers = virtio_host_to_guest_u16(vq, num_buffers);
152 
153 			virt_queue__used_idx_advance(vq, num_buffers);
154 
155 			/* We should interrupt guest right now, otherwise latency is huge. */
156 			if (virtio_queue__should_signal(vq))
157 				ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
158 		}
159 	}
160 
161 out_err:
162 	pthread_exit(NULL);
163 	return NULL;
164 
165 }
166 
167 static void *virtio_net_tx_thread(void *p)
168 {
169 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
170 	struct virt_queue *vq;
171 	struct kvm *kvm;
172 	struct net_dev *ndev = p;
173 	u16 out, in;
174 	u16 head;
175 	int len;
176 	u32 id;
177 
178 	mutex_lock(&ndev->mutex);
179 	id = ndev->tx_vqs++ * 2 + 1;
180 	mutex_unlock(&ndev->mutex);
181 
182 	kvm__set_thread_name("virtio-net-tx");
183 
184 	kvm = ndev->kvm;
185 	vq = &ndev->vqs[id];
186 
187 	while (1) {
188 		mutex_lock(&ndev->io_lock[id]);
189 		if (!virt_queue__available(vq))
190 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
191 		mutex_unlock(&ndev->io_lock[id]);
192 
193 		while (virt_queue__available(vq)) {
194 			struct virtio_net_hdr *hdr;
195 			head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
196 			hdr = iov[0].iov_base;
197 			virtio_net_fix_tx_hdr(hdr, ndev);
198 			len = ndev->ops->tx(iov, out, ndev);
199 			if (len < 0) {
200 				pr_warning("%s: tx on vq %u failed (%d)\n",
201 						__func__, id, errno);
202 				goto out_err;
203 			}
204 
205 			virt_queue__set_used_elem(vq, head, len);
206 		}
207 
208 		if (virtio_queue__should_signal(vq))
209 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
210 	}
211 
212 out_err:
213 	pthread_exit(NULL);
214 	return NULL;
215 }
216 
217 static virtio_net_ctrl_ack virtio_net_handle_mq(struct kvm* kvm, struct net_dev *ndev, struct virtio_net_ctrl_hdr *ctrl)
218 {
219 	/* Not much to do here */
220 	return VIRTIO_NET_OK;
221 }
222 
223 static void *virtio_net_ctrl_thread(void *p)
224 {
225 	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
226 	u16 out, in, head;
227 	struct net_dev *ndev = p;
228 	struct kvm *kvm = ndev->kvm;
229 	u32 id = ndev->queue_pairs * 2;
230 	struct virt_queue *vq = &ndev->vqs[id];
231 	struct virtio_net_ctrl_hdr *ctrl;
232 	virtio_net_ctrl_ack *ack;
233 
234 	kvm__set_thread_name("virtio-net-ctrl");
235 
236 	while (1) {
237 		mutex_lock(&ndev->io_lock[id]);
238 		if (!virt_queue__available(vq))
239 			pthread_cond_wait(&ndev->io_cond[id], &ndev->io_lock[id].mutex);
240 		mutex_unlock(&ndev->io_lock[id]);
241 
242 		while (virt_queue__available(vq)) {
243 			head = virt_queue__get_iov(&ndev->vqs[id], iov, &out, &in, kvm);
244 			ctrl = iov[0].iov_base;
245 			ack = iov[out].iov_base;
246 
247 			switch (ctrl->class) {
248 			case VIRTIO_NET_CTRL_MQ:
249 				*ack = virtio_net_handle_mq(kvm, ndev, ctrl);
250 				break;
251 			default:
252 				*ack = VIRTIO_NET_ERR;
253 				break;
254 			}
255 			virt_queue__set_used_elem(&ndev->vqs[id], head, iov[out].iov_len);
256 		}
257 
258 		if (virtio_queue__should_signal(&ndev->vqs[id]))
259 			ndev->vdev.ops->signal_vq(kvm, &ndev->vdev, id);
260 	}
261 
262 	pthread_exit(NULL);
263 
264 	return NULL;
265 }
266 
267 static void virtio_net_handle_callback(struct kvm *kvm, struct net_dev *ndev, int queue)
268 {
269 	if ((u32)queue >= (ndev->queue_pairs * 2 + 1)) {
270 		pr_warning("Unknown queue index %u", queue);
271 		return;
272 	}
273 
274 	mutex_lock(&ndev->io_lock[queue]);
275 	pthread_cond_signal(&ndev->io_cond[queue]);
276 	mutex_unlock(&ndev->io_lock[queue]);
277 }
278 
279 static int virtio_net_request_tap(struct net_dev *ndev, struct ifreq *ifr,
280 				  const char *tapname)
281 {
282 	int ret;
283 
284 	memset(ifr, 0, sizeof(*ifr));
285 	ifr->ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
286 	if (tapname)
287 		strncpy(ifr->ifr_name, tapname, sizeof(ifr->ifr_name));
288 
289 	ret = ioctl(ndev->tap_fd, TUNSETIFF, ifr);
290 
291 	if (ret >= 0)
292 		strncpy(ndev->tap_name, ifr->ifr_name, sizeof(ndev->tap_name));
293 	return ret;
294 }
295 
296 static int virtio_net_exec_script(const char* script, const char *tap_name)
297 {
298 	pid_t pid;
299 	int status;
300 
301 	pid = fork();
302 	if (pid == 0) {
303 		execl(script, script, tap_name, NULL);
304 		_exit(1);
305 	} else {
306 		waitpid(pid, &status, 0);
307 		if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
308 			pr_warning("Fail to setup tap by %s", script);
309 			return -1;
310 		}
311 	}
312 	return 0;
313 }
314 
315 static bool virtio_net__tap_init(struct net_dev *ndev)
316 {
317 	int sock = socket(AF_INET, SOCK_STREAM, 0);
318 	int offload, hdr_len;
319 	struct sockaddr_in sin = {0};
320 	struct ifreq ifr;
321 	const struct virtio_net_params *params = ndev->params;
322 	bool skipconf = !!params->tapif;
323 	bool macvtap = skipconf && (params->tapif[0] == '/');
324 	const char *tap_file = "/dev/net/tun";
325 
326 	/* Did the user already gave us the FD? */
327 	if (params->fd) {
328 		ndev->tap_fd = params->fd;
329 		return 1;
330 	}
331 
332 	if (macvtap)
333 		tap_file = params->tapif;
334 
335 	ndev->tap_fd = open(tap_file, O_RDWR);
336 	if (ndev->tap_fd < 0) {
337 		pr_warning("Unable to open %s", tap_file);
338 		goto fail;
339 	}
340 
341 	if (!macvtap &&
342 	    virtio_net_request_tap(ndev, &ifr, params->tapif) < 0) {
343 		pr_warning("Config tap device error. Are you root?");
344 		goto fail;
345 	}
346 
347 	hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
348 			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
349 			sizeof(struct virtio_net_hdr);
350 	if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0)
351 		pr_warning("Config tap device TUNSETVNETHDRSZ error");
352 
353 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
354 	if (ioctl(ndev->tap_fd, TUNSETOFFLOAD, offload) < 0) {
355 		pr_warning("Config tap device TUNSETOFFLOAD error");
356 		goto fail;
357 	}
358 
359 	if (strcmp(params->script, "none")) {
360 		if (virtio_net_exec_script(params->script, ndev->tap_name) < 0)
361 			goto fail;
362 	} else if (!skipconf) {
363 		memset(&ifr, 0, sizeof(ifr));
364 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
365 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
366 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
367 		ifr.ifr_addr.sa_family = AF_INET;
368 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
369 			pr_warning("Could not set ip address on tap device");
370 			goto fail;
371 		}
372 	}
373 
374 	if (!skipconf) {
375 		memset(&ifr, 0, sizeof(ifr));
376 		strncpy(ifr.ifr_name, ndev->tap_name, sizeof(ndev->tap_name));
377 		ioctl(sock, SIOCGIFFLAGS, &ifr);
378 		ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
379 		if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
380 			pr_warning("Could not bring tap device up");
381 	}
382 
383 	close(sock);
384 
385 	return 1;
386 
387 fail:
388 	if (sock >= 0)
389 		close(sock);
390 	if (ndev->tap_fd >= 0)
391 		close(ndev->tap_fd);
392 
393 	return 0;
394 }
395 
396 static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
397 {
398 	return writev(ndev->tap_fd, iov, out);
399 }
400 
401 static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
402 {
403 	return readv(ndev->tap_fd, iov, in);
404 }
405 
406 static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev)
407 {
408 	return uip_tx(iov, out, &ndev->info);
409 }
410 
411 static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev)
412 {
413 	return uip_rx(iov, in, &ndev->info);
414 }
415 
416 static struct net_dev_operations tap_ops = {
417 	.rx	= tap_ops_rx,
418 	.tx	= tap_ops_tx,
419 };
420 
421 static struct net_dev_operations uip_ops = {
422 	.rx	= uip_ops_rx,
423 	.tx	= uip_ops_tx,
424 };
425 
426 static u8 *get_config(struct kvm *kvm, void *dev)
427 {
428 	struct net_dev *ndev = dev;
429 
430 	return ((u8 *)(&ndev->config));
431 }
432 
433 static u32 get_host_features(struct kvm *kvm, void *dev)
434 {
435 	struct net_dev *ndev = dev;
436 
437 	return 1UL << VIRTIO_NET_F_MAC
438 		| 1UL << VIRTIO_NET_F_CSUM
439 		| 1UL << VIRTIO_NET_F_HOST_UFO
440 		| 1UL << VIRTIO_NET_F_HOST_TSO4
441 		| 1UL << VIRTIO_NET_F_HOST_TSO6
442 		| 1UL << VIRTIO_NET_F_GUEST_UFO
443 		| 1UL << VIRTIO_NET_F_GUEST_TSO4
444 		| 1UL << VIRTIO_NET_F_GUEST_TSO6
445 		| 1UL << VIRTIO_RING_F_EVENT_IDX
446 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
447 		| 1UL << VIRTIO_NET_F_CTRL_VQ
448 		| 1UL << VIRTIO_NET_F_MRG_RXBUF
449 		| 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0);
450 }
451 
452 static int virtio_net__vhost_set_features(struct net_dev *ndev)
453 {
454 	u64 features = 1UL << VIRTIO_RING_F_EVENT_IDX;
455 	u64 vhost_features;
456 
457 	if (ioctl(ndev->vhost_fd, VHOST_GET_FEATURES, &vhost_features) != 0)
458 		die_perror("VHOST_GET_FEATURES failed");
459 
460 	/* make sure both side support mergable rx buffers */
461 	if (vhost_features & 1UL << VIRTIO_NET_F_MRG_RXBUF &&
462 			has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF))
463 		features |= 1UL << VIRTIO_NET_F_MRG_RXBUF;
464 
465 	return ioctl(ndev->vhost_fd, VHOST_SET_FEATURES, &features);
466 }
467 
468 static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
469 {
470 	struct net_dev *ndev = dev;
471 	struct virtio_net_config *conf = &ndev->config;
472 
473 	ndev->features = features;
474 
475 	conf->status = virtio_host_to_guest_u16(&ndev->vdev, conf->status);
476 	conf->max_virtqueue_pairs = virtio_host_to_guest_u16(&ndev->vdev,
477 							     conf->max_virtqueue_pairs);
478 
479 	if (ndev->mode == NET_MODE_TAP) {
480 		if (!virtio_net__tap_init(ndev))
481 			die_perror("You have requested a TAP device, but creation of one has failed because");
482 		if (ndev->vhost_fd &&
483 				virtio_net__vhost_set_features(ndev) != 0)
484 			die_perror("VHOST_SET_FEATURES failed");
485 	} else {
486 		ndev->info.vnet_hdr_len = has_virtio_feature(ndev, VIRTIO_NET_F_MRG_RXBUF) ?
487 						sizeof(struct virtio_net_hdr_mrg_rxbuf) :
488 						sizeof(struct virtio_net_hdr);
489 		uip_init(&ndev->info);
490 	}
491 }
492 
493 static bool is_ctrl_vq(struct net_dev *ndev, u32 vq)
494 {
495 	return vq == (u32)(ndev->queue_pairs * 2);
496 }
497 
498 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
499 		   u32 pfn)
500 {
501 	struct vhost_vring_state state = { .index = vq };
502 	struct vhost_vring_addr addr;
503 	struct net_dev *ndev = dev;
504 	struct virt_queue *queue;
505 	void *p;
506 	int r;
507 
508 	compat__remove_message(compat_id);
509 
510 	queue		= &ndev->vqs[vq];
511 	queue->pfn	= pfn;
512 	p		= virtio_get_vq(kvm, queue->pfn, page_size);
513 
514 	vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, align);
515 	virtio_init_device_vq(&ndev->vdev, queue);
516 
517 	mutex_init(&ndev->io_lock[vq]);
518 	pthread_cond_init(&ndev->io_cond[vq], NULL);
519 	if (is_ctrl_vq(ndev, vq)) {
520 		pthread_create(&ndev->io_thread[vq], NULL, virtio_net_ctrl_thread, ndev);
521 
522 		return 0;
523 	} else if (ndev->vhost_fd == 0 ) {
524 		if (vq & 1)
525 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_tx_thread, ndev);
526 		else
527 			pthread_create(&ndev->io_thread[vq], NULL, virtio_net_rx_thread, ndev);
528 
529 		return 0;
530 	}
531 
532 	if (queue->endian != VIRTIO_ENDIAN_HOST)
533 		die_perror("VHOST requires VIRTIO_ENDIAN_HOST");
534 
535 	state.num = queue->vring.num;
536 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_NUM, &state);
537 	if (r < 0)
538 		die_perror("VHOST_SET_VRING_NUM failed");
539 	state.num = 0;
540 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_BASE, &state);
541 	if (r < 0)
542 		die_perror("VHOST_SET_VRING_BASE failed");
543 
544 	addr = (struct vhost_vring_addr) {
545 		.index = vq,
546 		.desc_user_addr = (u64)(unsigned long)queue->vring.desc,
547 		.avail_user_addr = (u64)(unsigned long)queue->vring.avail,
548 		.used_user_addr = (u64)(unsigned long)queue->vring.used,
549 	};
550 
551 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
552 	if (r < 0)
553 		die_perror("VHOST_SET_VRING_ADDR failed");
554 
555 	return 0;
556 }
557 
558 static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
559 {
560 	struct net_dev *ndev = dev;
561 	struct kvm_irqfd irq;
562 	struct vhost_vring_file file;
563 	int r;
564 
565 	if (ndev->vhost_fd == 0)
566 		return;
567 
568 	irq = (struct kvm_irqfd) {
569 		.gsi	= gsi,
570 		.fd	= eventfd(0, 0),
571 	};
572 	file = (struct vhost_vring_file) {
573 		.index	= vq,
574 		.fd	= irq.fd,
575 	};
576 
577 	r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
578 	if (r < 0)
579 		die_perror("KVM_IRQFD failed");
580 
581 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_CALL, &file);
582 	if (r < 0)
583 		die_perror("VHOST_SET_VRING_CALL failed");
584 	file.fd = ndev->tap_fd;
585 	r = ioctl(ndev->vhost_fd, VHOST_NET_SET_BACKEND, &file);
586 	if (r != 0)
587 		die("VHOST_NET_SET_BACKEND failed %d", errno);
588 
589 }
590 
591 static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
592 {
593 	struct net_dev *ndev = dev;
594 	struct vhost_vring_file file = {
595 		.index	= vq,
596 		.fd	= efd,
597 	};
598 	int r;
599 
600 	if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
601 		return;
602 
603 	r = ioctl(ndev->vhost_fd, VHOST_SET_VRING_KICK, &file);
604 	if (r < 0)
605 		die_perror("VHOST_SET_VRING_KICK failed");
606 }
607 
608 static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
609 {
610 	struct net_dev *ndev = dev;
611 
612 	virtio_net_handle_callback(kvm, ndev, vq);
613 
614 	return 0;
615 }
616 
617 static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
618 {
619 	struct net_dev *ndev = dev;
620 
621 	return ndev->vqs[vq].pfn;
622 }
623 
624 static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
625 {
626 	/* FIXME: dynamic */
627 	return VIRTIO_NET_QUEUE_SIZE;
628 }
629 
630 static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
631 {
632 	/* FIXME: dynamic */
633 	return size;
634 }
635 
636 static struct virtio_ops net_dev_virtio_ops = {
637 	.get_config		= get_config,
638 	.get_host_features	= get_host_features,
639 	.set_guest_features	= set_guest_features,
640 	.init_vq		= init_vq,
641 	.get_pfn_vq		= get_pfn_vq,
642 	.get_size_vq		= get_size_vq,
643 	.set_size_vq		= set_size_vq,
644 	.notify_vq		= notify_vq,
645 	.notify_vq_gsi		= notify_vq_gsi,
646 	.notify_vq_eventfd	= notify_vq_eventfd,
647 };
648 
649 static void virtio_net__vhost_init(struct kvm *kvm, struct net_dev *ndev)
650 {
651 	struct kvm_mem_bank *bank;
652 	struct vhost_memory *mem;
653 	int r, i;
654 
655 	ndev->vhost_fd = open("/dev/vhost-net", O_RDWR);
656 	if (ndev->vhost_fd < 0)
657 		die_perror("Failed openning vhost-net device");
658 
659 	mem = calloc(1, sizeof(*mem) + kvm->mem_slots * sizeof(struct vhost_memory_region));
660 	if (mem == NULL)
661 		die("Failed allocating memory for vhost memory map");
662 
663 	i = 0;
664 	list_for_each_entry(bank, &kvm->mem_banks, list) {
665 		mem->regions[i] = (struct vhost_memory_region) {
666 			.guest_phys_addr = bank->guest_phys_addr,
667 			.memory_size	 = bank->size,
668 			.userspace_addr	 = (unsigned long)bank->host_addr,
669 		};
670 		i++;
671 	}
672 	mem->nregions = i;
673 
674 	r = ioctl(ndev->vhost_fd, VHOST_SET_OWNER);
675 	if (r != 0)
676 		die_perror("VHOST_SET_OWNER failed");
677 
678 	r = ioctl(ndev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
679 	if (r != 0)
680 		die_perror("VHOST_SET_MEM_TABLE failed");
681 
682 	ndev->vdev.use_vhost = true;
683 
684 	free(mem);
685 }
686 
687 static inline void str_to_mac(const char *str, char *mac)
688 {
689 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
690 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
691 }
692 static int set_net_param(struct kvm *kvm, struct virtio_net_params *p,
693 			const char *param, const char *val)
694 {
695 	if (strcmp(param, "guest_mac") == 0) {
696 		str_to_mac(val, p->guest_mac);
697 	} else if (strcmp(param, "mode") == 0) {
698 		if (!strncmp(val, "user", 4)) {
699 			int i;
700 
701 			for (i = 0; i < kvm->cfg.num_net_devices; i++)
702 				if (kvm->cfg.net_params[i].mode == NET_MODE_USER)
703 					die("Only one usermode network device allowed at a time");
704 			p->mode = NET_MODE_USER;
705 		} else if (!strncmp(val, "tap", 3)) {
706 			p->mode = NET_MODE_TAP;
707 		} else if (!strncmp(val, "none", 4)) {
708 			kvm->cfg.no_net = 1;
709 			return -1;
710 		} else
711 			die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network);
712 	} else if (strcmp(param, "script") == 0) {
713 		p->script = strdup(val);
714 	} else if (strcmp(param, "downscript") == 0) {
715 		p->downscript = strdup(val);
716 	} else if (strcmp(param, "guest_ip") == 0) {
717 		p->guest_ip = strdup(val);
718 	} else if (strcmp(param, "host_ip") == 0) {
719 		p->host_ip = strdup(val);
720 	} else if (strcmp(param, "trans") == 0) {
721 		p->trans = strdup(val);
722 	} else if (strcmp(param, "tapif") == 0) {
723 		p->tapif = strdup(val);
724 	} else if (strcmp(param, "vhost") == 0) {
725 		p->vhost = atoi(val);
726 	} else if (strcmp(param, "fd") == 0) {
727 		p->fd = atoi(val);
728 	} else if (strcmp(param, "mq") == 0) {
729 		p->mq = atoi(val);
730 	} else
731 		die("Unknown network parameter %s", param);
732 
733 	return 0;
734 }
735 
736 int netdev_parser(const struct option *opt, const char *arg, int unset)
737 {
738 	struct virtio_net_params p;
739 	char *buf = NULL, *cmd = NULL, *cur = NULL;
740 	bool on_cmd = true;
741 	struct kvm *kvm = opt->ptr;
742 
743 	if (arg) {
744 		buf = strdup(arg);
745 		if (buf == NULL)
746 			die("Failed allocating new net buffer");
747 		cur = strtok(buf, ",=");
748 	}
749 
750 	p = (struct virtio_net_params) {
751 		.guest_ip	= DEFAULT_GUEST_ADDR,
752 		.host_ip	= DEFAULT_HOST_ADDR,
753 		.script		= DEFAULT_SCRIPT,
754 		.downscript	= DEFAULT_SCRIPT,
755 		.mode		= NET_MODE_TAP,
756 	};
757 
758 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
759 	p.guest_mac[5] += kvm->cfg.num_net_devices;
760 
761 	while (cur) {
762 		if (on_cmd) {
763 			cmd = cur;
764 		} else {
765 			if (set_net_param(kvm, &p, cmd, cur) < 0)
766 				goto done;
767 		}
768 		on_cmd = !on_cmd;
769 
770 		cur = strtok(NULL, ",=");
771 	};
772 
773 	kvm->cfg.num_net_devices++;
774 
775 	kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params));
776 	if (kvm->cfg.net_params == NULL)
777 		die("Failed adding new network device");
778 
779 	kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p;
780 
781 done:
782 	free(buf);
783 	return 0;
784 }
785 
786 static int virtio_net__init_one(struct virtio_net_params *params)
787 {
788 	int i, err;
789 	struct net_dev *ndev;
790 	struct virtio_ops *ops;
791 	enum virtio_trans trans = VIRTIO_DEFAULT_TRANS(params->kvm);
792 
793 	ndev = calloc(1, sizeof(struct net_dev));
794 	if (ndev == NULL)
795 		return -ENOMEM;
796 
797 	ops = malloc(sizeof(*ops));
798 	if (ops == NULL) {
799 		err = -ENOMEM;
800 		goto err_free_ndev;
801 	}
802 
803 	list_add_tail(&ndev->list, &ndevs);
804 
805 	ndev->kvm = params->kvm;
806 	ndev->params = params;
807 
808 	mutex_init(&ndev->mutex);
809 	ndev->queue_pairs = max(1, min(VIRTIO_NET_NUM_QUEUES, params->mq));
810 	ndev->config.status = VIRTIO_NET_S_LINK_UP;
811 	if (ndev->queue_pairs > 1)
812 		ndev->config.max_virtqueue_pairs = ndev->queue_pairs;
813 
814 	for (i = 0 ; i < 6 ; i++) {
815 		ndev->config.mac[i]		= params->guest_mac[i];
816 		ndev->info.guest_mac.addr[i]	= params->guest_mac[i];
817 		ndev->info.host_mac.addr[i]	= params->host_mac[i];
818 	}
819 
820 	ndev->mode = params->mode;
821 	if (ndev->mode == NET_MODE_TAP) {
822 		ndev->ops = &tap_ops;
823 	} else {
824 		ndev->info.host_ip		= ntohl(inet_addr(params->host_ip));
825 		ndev->info.guest_ip		= ntohl(inet_addr(params->guest_ip));
826 		ndev->info.guest_netmask	= ntohl(inet_addr("255.255.255.0"));
827 		ndev->info.buf_nr		= 20,
828 		ndev->ops = &uip_ops;
829 		uip_static_init(&ndev->info);
830 	}
831 
832 	*ops = net_dev_virtio_ops;
833 
834 	if (params->trans) {
835 		if (strcmp(params->trans, "mmio") == 0)
836 			trans = VIRTIO_MMIO;
837 		else if (strcmp(params->trans, "pci") == 0)
838 			trans = VIRTIO_PCI;
839 		else
840 			pr_warning("virtio-net: Unknown transport method : %s, "
841 				   "falling back to %s.", params->trans,
842 				   virtio_trans_name(trans));
843 	}
844 
845 	virtio_init(params->kvm, ndev, &ndev->vdev, ops, trans,
846 		    PCI_DEVICE_ID_VIRTIO_NET, VIRTIO_ID_NET, PCI_CLASS_NET);
847 
848 	if (params->vhost)
849 		virtio_net__vhost_init(params->kvm, ndev);
850 
851 	if (compat_id == -1)
852 		compat_id = virtio_compat_add_message("virtio-net", "CONFIG_VIRTIO_NET");
853 
854 	return 0;
855 
856 err_free_ndev:
857 	free(ndev);
858 	return err;
859 }
860 
861 int virtio_net__init(struct kvm *kvm)
862 {
863 	int i;
864 
865 	for (i = 0; i < kvm->cfg.num_net_devices; i++) {
866 		kvm->cfg.net_params[i].kvm = kvm;
867 		virtio_net__init_one(&kvm->cfg.net_params[i]);
868 	}
869 
870 	if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) {
871 		static struct virtio_net_params net_params;
872 
873 		net_params = (struct virtio_net_params) {
874 			.guest_ip	= kvm->cfg.guest_ip,
875 			.host_ip	= kvm->cfg.host_ip,
876 			.kvm		= kvm,
877 			.script		= kvm->cfg.script,
878 			.mode		= NET_MODE_USER,
879 		};
880 		str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac);
881 		str_to_mac(kvm->cfg.host_mac, net_params.host_mac);
882 
883 		virtio_net__init_one(&net_params);
884 	}
885 
886 	return 0;
887 }
888 virtio_dev_init(virtio_net__init);
889 
890 int virtio_net__exit(struct kvm *kvm)
891 {
892 	struct virtio_net_params *params;
893 	struct net_dev *ndev;
894 	struct list_head *ptr;
895 
896 	list_for_each(ptr, &ndevs) {
897 		ndev = list_entry(ptr, struct net_dev, list);
898 		params = ndev->params;
899 		/* Cleanup any tap device which attached to bridge */
900 		if (ndev->mode == NET_MODE_TAP &&
901 		    strcmp(params->downscript, "none"))
902 			virtio_net_exec_script(params->downscript, ndev->tap_name);
903 	}
904 	return 0;
905 }
906 virtio_dev_exit(virtio_net__exit);
907